Implement complete device selection in the backend.

2025-08-09 07:45:01 +02:00 · 2022-10-29 17:33:44 -04:00
parent 840348b4eb
commit b7a663ed20
4 changed files with 68 additions and 30 deletions
--- a/ui/sd_internal/init.py
+++ b/ui/sd_internal/init.py
@ -18,7 +18,6 @@ class Request:
    precision: str = "autocast" # or "full"
    save_to_disk_path: str = None
    turbo: bool = True
-    use_cpu: bool = False
    use_full_precision: bool = False
    use_face_correction: str = None # or "GFPGANv1.3"
    use_upscale: str = None # or "RealESRGAN_x4plus" or "RealESRGAN_x4plus_anime_6B"
@ -50,7 +49,7 @@ class Request:
            "output_format": self.output_format,
        }

-    def to_string(self):
+    def __str__(self):
        return f'''
    session_id: {self.session_id}
    prompt: {self.prompt}
@ -64,7 +63,6 @@ class Request:
    precision: {self.precision}
    save_to_disk_path: {self.save_to_disk_path}
    turbo: {self.turbo}
-    use_cpu: {self.use_cpu}
    use_full_precision: {self.use_full_precision}
    use_face_correction: {self.use_face_correction}
    use_upscale: {self.use_upscale}
--- a/ui/sd_internal/runtime.py
+++ b/ui/sd_internal/runtime.py
@ -45,6 +45,25 @@ from io import BytesIO
 from threading import local as LocalThreadVars
 thread_data = LocalThreadVars()

+def get_processor_name():
+    try:
+        import platform, subprocess
+        if platform.system() == "Windows":
+            return platform.processor()
+        elif platform.system() == "Darwin":
+            os.environ['PATH'] = os.environ['PATH'] + os.pathsep + '/usr/sbin'
+            command ="sysctl -n machdep.cpu.brand_string"
+            return subprocess.check_output(command).strip()
+        elif platform.system() == "Linux":
+            command = "cat /proc/cpuinfo"
+            all_info = subprocess.check_output(command, shell=True).decode().strip()
+            for line in all_info.split("\n"):
+                if "model name" in line:
+                    return re.sub( ".*model name.*:", "", line,1).strip()
+    except:
+        print(traceback.format_exc())
+        return "cpu"
+
 def device_would_fail(device):
    if device == 'cpu': return None
    # Returns None when no issues found, otherwise returns the detected error str.
@ -68,17 +87,17 @@ def device_select(device):
        print(failure_msg)
        return False

-    device_name = torch.cuda.get_device_name(device)
+    thread_data.device_name = torch.cuda.get_device_name(device)
+    thread_data.device = device

-    # otherwise these NVIDIA cards create green images
-    thread_data.force_full_precision = ('nvidia' in device_name.lower() or 'geforce' in device_name.lower()) and (' 1660' in device_name or ' 1650' in device_name)
+    # Force full precision on 1660 and 1650 NVIDIA cards to avoid creating green images
+    device_name = thread_data.device_name.lower()
+    thread_data.force_full_precision = ('nvidia' in device_name or 'geforce' in device_name) and (' 1660' in device_name or ' 1650' in device_name)
    if thread_data.force_full_precision:
-        print('forcing full precision on NVIDIA 16xx cards, to avoid green images. GPU detected: ', device_name)
+        print('forcing full precision on NVIDIA 16xx cards, to avoid green images. GPU detected: ', thread_data.device_name)
        # Apply force_full_precision now before models are loaded.
        thread_data.precision = 'full'

-    thread_data.device = device
-    thread_data.has_valid_gpu = True
    return True

 def device_init(device_selection=None):
@ -100,24 +119,26 @@ def device_init(device_selection=None):
    thread_data.model_is_half = False
    thread_data.model_fs_is_half = False
    thread_data.device = None
+    thread_data.device_name = None
    thread_data.unet_bs = 1
    thread_data.precision = 'autocast'
    thread_data.sampler_plms = None
    thread_data.sampler_ddim = None

    thread_data.turbo = False
-    thread_data.has_valid_gpu = False
    thread_data.force_full_precision = False
    thread_data.reduced_memory = True

    if device_selection.lower() == 'cpu':
-        print('CPU requested, skipping gpu init.')
        thread_data.device = 'cpu'
+        thread_data.device_name = get_processor_name()
+        print('Render device CPU available as', thread_data.device_name)
        return
    if not torch.cuda.is_available():
        if device_selection == 'auto' or device_selection == 'current':
            print('WARNING: torch.cuda is not available. Using the CPU, but this will be very slow!')
            thread_data.device = 'cpu'
+            thread_data.device_name = get_processor_name()
            return
        else:
            raise EnvironmentError('torch.cuda is not available.')
@ -475,7 +496,7 @@ def do_mk_img(req: Request):
        thread_data.vae_file = req.use_vae_model
        needs_model_reload = True

-    if thread_data.has_valid_gpu:
+    if thread_data.device != 'cpu':
        if (thread_data.precision == 'autocast' and (req.use_full_precision or not thread_data.model_is_half)) or \
            (thread_data.precision == 'full' and not req.use_full_precision and not thread_data.force_full_precision):
            thread_data.precision = 'full' if req.use_full_precision else 'autocast'
@ -500,7 +521,7 @@ def do_mk_img(req: Request):
    opt_f = 8
    opt_ddim_eta = 0.0

-    print(req.to_string(), '\n    device', thread_data.device)
+    print(req, '\n    device', torch.device(thread_data.device), "as", thread_data.device_name)
    print('\n\n    Using precision:', thread_data.precision)

    seed_everything(opt_seed)
--- a/ui/sd_internal/task_manager.py
+++ b/ui/sd_internal/task_manager.py
@ -38,6 +38,7 @@ class RenderTask(): # Task with output queue and completion lock.
    def __init__(self, req: Request):
        self.request: Request = req # Initial Request
        self.response: Any = None # Copy of the last reponse
+        self.render_device = None
        self.temp_images:list = [None] * req.num_outputs * (1 if req.show_only_filtered_image else 2)
        self.error: Exception = None
        self.lock: threading.Lock = threading.Lock() # Locks at task start and unlocks when task is completed
@ -68,7 +69,8 @@ class ImageRequest(BaseModel):
    # allow_nsfw: bool = False
    save_to_disk_path: str = None
    turbo: bool = True
-    use_cpu: bool = False
+    use_cpu: bool = False ##TODO Remove after UI and plugins transition.
+    render_device: str = None
    use_full_precision: bool = False
    use_face_correction: str = None # or "GFPGANv1.3"
    use_upscale: str = None # or "RealESRGAN_x4plus" or "RealESRGAN_x4plus_anime_6B"
@ -89,7 +91,7 @@ class FilterRequest(BaseModel):
    height: int = 512
    save_to_disk_path: str = None
    turbo: bool = True
-    use_cpu: bool = False
+    render_device: str = None
    use_full_precision: bool = False
    output_format: str = "jpeg" # or "png"

@ -219,26 +221,24 @@ def thread_get_next_task():
                    queued_task.error = Exception('cuda:0 is not available with the current config. Remove GFPGANer filter to run task.')
                    task = queued_task
                    break
-                if queued_task.request.use_cpu:
+                if queued_task.render_device == 'cpu':
                    queued_task.error = Exception('Cpu cannot be used to run this task. Remove GFPGANer filter to run task.')
                    task = queued_task
                    break
                if not runtime.is_first_cuda_device(runtime.thread_data.device):
                    continue  # Wait for cuda:0
-            if queued_task.request.use_cpu and runtime.thread_data.device != 'cpu':
-                if is_alive('cpu') > 0:
-                    continue  # CPU Tasks, Skip GPU device
+            if queued_task.render_device and runtime.thread_data.device != queued_task.render_device:
+                # Is asking for a specific render device.
+                if is_alive(queued_task.render_device) > 0:
+                    continue  # requested device alive, skip current one.
                else:
-                    queued_task.error = Exception('Cpu is not enabled in render_devices.')
-                    task = queued_task
-                    break
-            if not queued_task.request.use_cpu and runtime.thread_data.device == 'cpu':
-                if is_alive() > 1:  # cpu is alive, so need more than one.
-                    continue  # GPU Tasks, don't run on CPU unless there is nothing else.
-                else:
-                    queued_task.error = Exception('No active gpu found. Please check the error message in the command-line window at startup.')
+                    # Requested device is not active, return error to UI.
+                    queued_task.error = Exception(str(queued_task.render_device) + ' is not currently active.')
                    task = queued_task
                    break
+            if not queued_task.render_device and runtime.thread_data.device == 'cpu' and is_alive() > 1:
+                 # not asking for any specific devices, cpu want to grab task but other render devices are alive.
+                    continue  # Skip Tasks, don't run on CPU unless there is nothing else or user asked for it.
            task = queued_task
            break
        if task is not None:
@ -256,7 +256,8 @@ def thread_render(device):
        print(traceback.format_exc())
        return
    weak_thread_data[threading.current_thread()] = {
-        'device': runtime.thread_data.device
+        'device': runtime.thread_data.device,
+        'device_name': runtime.thread_data.device_name
    }
    if runtime.thread_data.device != 'cpu' or is_alive() == 1:
        preload_model()
@ -341,6 +342,17 @@ def get_cached_task(session_id:str, update_ttl:bool=False):
        return None
    return task_cache.tryGet(session_id)

+def get_devices():
+    if not manager_lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('get_devices' + ERR_LOCK_FAILED)
+    try:
+        device_dict = {}
+        for rthread in render_threads:
+            weak_data = weak_thread_data.get(rthread)
+            device_dict.update({weak_data['device']:weak_data['device_name']})
+        return device_dict
+    finally:
+        manager_lock.release()
+
 def is_first_cuda_device(device):
    from . import runtime # When calling runtime from outside thread_render DO NOT USE thread specific attributes or functions.
    return runtime.is_first_cuda_device(device)
@ -416,7 +428,6 @@ def render(req : ImageRequest):
    r.sampler = req.sampler
    # r.allow_nsfw = req.allow_nsfw
    r.turbo = req.turbo
-    r.use_cpu = req.use_cpu
    r.use_full_precision = req.use_full_precision
    r.save_to_disk_path = req.save_to_disk_path
    r.use_upscale: str = req.use_upscale
@ -433,6 +444,8 @@ def render(req : ImageRequest):
        r.stream_image_progress = False

    new_task = RenderTask(r)
+    new_task.render_device = req.render_device
+
    if task_cache.put(r.session_id, new_task, TASK_TTL):
        # Use twice the normal timeout for adding user requests.
        # Tries to force task_cache.put to fail before tasks_queue.put would. 
--- a/ui/server.py
+++ b/ui/server.py
@ -261,6 +261,8 @@ def read_web_data(key:str=None):
        if config is None:
            raise HTTPException(status_code=500, detail="Config file is missing or unreadable")
        return JSONResponse(config, headers=NOCACHE_HEADERS)
+    elif key == 'devices':
+        return JSONResponse(task_manager.get_devices(), headers=NOCACHE_HEADERS)
    elif key == 'models':
        return JSONResponse(getModels(), headers=NOCACHE_HEADERS)
    elif key == 'modifiers': return FileResponse(os.path.join(SD_UI_DIR, 'modifiers.json'), headers=NOCACHE_HEADERS)
@ -305,7 +307,11 @@ def save_model_to_config(model_name):

@app.post('/render')
 def render(req : task_manager.ImageRequest):
-    if req.use_cpu and task_manager.is_alive('cpu') <= 0: raise HTTPException(status_code=403, detail=f'CPU rendering is not enabled in config.json or the thread has died...') # HTTP403 Forbidden
+    if req.use_cpu:  # TODO Remove after transition.
+        print('WARNING Replace {use_cpu: true} by {render_device: "cpu"}')
+        req.render_device = 'cpu'
+        del req.use_cpu
+    if req.render_device and task_manager.is_alive(req.render_device) <= 0: raise HTTPException(status_code=403, detail=f'{req.render_device} rendering is not enabled in config.json or the thread has died...') # HTTP403 Forbidden
    if req.use_face_correction and task_manager.is_alive(0) <= 0: #TODO Remove when GFPGANer is fixed upstream.
        raise HTTPException(status_code=412, detail=f'GFPGANer only works GPU:0, use CUDA_VISIBLE_DEVICES if GFPGANer is needed on a specific GPU.') # HTTP412 Precondition Failed
    try: