Start on multiple GPUs by default (top 75 percentile by free_mem); UI selection for 'cpu' or 'auto' or a list of specific GPUs, which is now linked to the backend; Dynamically start/stop render threads for the devices, without requiring a full program restart

2022-11-14 11:23:22 +05:30
parent a19ba40672
commit ea03fd22db
8 changed files with 339 additions and 179 deletions
--- a/ui/server.py
+++ b/ui/server.py
@ -224,7 +224,10 @@ def read_web_data(key:str=None):
            raise HTTPException(status_code=500, detail="Config file is missing or unreadable")
        return JSONResponse(config, headers=NOCACHE_HEADERS)
    elif key == 'devices':
-        return JSONResponse(task_manager.get_devices(), headers=NOCACHE_HEADERS)
+        config = getConfig()
+        devices = task_manager.get_devices()
+        devices['config'] = config.get('render_devices', "auto")
+        return JSONResponse(devices, headers=NOCACHE_HEADERS)
    elif key == 'models':
        return JSONResponse(getModels(), headers=NOCACHE_HEADERS)
    elif key == 'modifiers': return FileResponse(os.path.join(SD_UI_DIR, 'modifiers.json'), headers=NOCACHE_HEADERS)
@ -272,17 +275,41 @@ def save_model_to_config(ckpt_model_name, vae_model_name):

    setConfig(config)

-@app.post('/render')
-def render(req : task_manager.ImageRequest):
+def save_render_devices_to_config(render_devices):
+    config = getConfig()
+    if 'render_devices' not in config:
+        config['render_devices'] = {}
+
+    config['render_devices'] = render_devices
+    if render_devices is None or len(render_devices) == 0:
+        del config['render_devices']
+
+    setConfig(config)
+
+def update_render_threads_on_request(req : task_manager.ImageRequest):
    if req.use_cpu:  # TODO Remove after transition.
        print('WARNING Replace {use_cpu: true} by {render_device: "cpu"}')
        req.render_device = 'cpu'
        del req.use_cpu
-    if req.render_device != 'cpu':
-        req.render_device = 'cuda:0' # temp hack to get beta working
-    if req.render_device and task_manager.is_alive(req.render_device) <= 0: raise HTTPException(status_code=403, detail=f'{req.render_device} rendering is not enabled in config.json or the thread has died...') # HTTP403 Forbidden
+
+    if req.render_device not in ('cpu', 'auto') and not req.render_device.startswith('cuda:'):
+        raise HTTPException(status_code=400, detail=f'Invalid render device requested: {req.render_device}')
+
+    if req.render_device.startswith('cuda:'):
+        req.render_device = req.render_device.split(',')
+
+    save_render_devices_to_config(req.render_device)
+    del req.render_device
+
+    update_render_threads()
+
+@app.post('/render')
+def render(req : task_manager.ImageRequest):
+    update_render_threads_on_request(req)
+
    if req.use_face_correction and task_manager.is_alive('cuda:0') <= 0: #TODO Remove when GFPGANer is fixed upstream.
-        raise HTTPException(status_code=412, detail=f'GFPGANer only works GPU:0, use CUDA_VISIBLE_DEVICES if GFPGANer is needed on a specific GPU.') # HTTP412 Precondition Failed
+        raise HTTPException(status_code=412, detail=f'The "Fix incorrect faces" feature works only on cuda:0. Disable "Fix incorrect faces" (in Image Settings), or use the CUDA_VISIBLE_DEVICES environment variable.')
+
    try:
        save_model_to_config(req.use_stable_diffusion_model, req.use_vae_model)
        req.use_stable_diffusion_model = resolve_ckpt_to_use(req.use_stable_diffusion_model)
@ -359,44 +386,19 @@ class LogSuppressFilter(logging.Filter):
        return True
 logging.getLogger('uvicorn.access').addFilter(LogSuppressFilter())

-config = getConfig()
-
 # Start the task_manager
 task_manager.default_model_to_load = resolve_ckpt_to_use()
 task_manager.default_vae_to_load = resolve_vae_to_use()
-if 'render_devices' in config:  # Start a new thread for each device.
-    if not isinstance(config['render_devices'], list):
-        raise Exception('Invalid render_devices value in config. Should be a list')
-    config['render_devices'] = set(config['render_devices']) # de-duplicate
-    for device in config['render_devices']:
-        if task_manager.is_alive(device) >= 1:
-            print(device, 'already registered.')
-            continue
-        if not task_manager.start_render_thread(device):
-            print(device, 'failed to start.')
-    if task_manager.is_alive() <= 0: # No running devices, probably invalid user config.
-        print('WARNING: No active render devices after loading config. Validate "render_devices" in config.json')
-        print('Loading default render devices to replace invalid render_devices field from config', config['render_devices'])

-if task_manager.is_alive() <= 0: # Either no defaults or no devices after loading config.
-    # Select best GPU device using free memory, if more than one device.
-    if task_manager.start_render_thread('auto'): # Detect best device for renders
-        # if cuda:0 is missing, another cuda device is better. try to start it...
-        if task_manager.is_alive('cuda:0') <= 0 and task_manager.is_alive('cpu') <= 0 and not task_manager.start_render_thread('cuda:0'):
-            print('Failed to start GPU:0...')
-    else:
-        print('Failed to start gpu device.')
-    if task_manager.is_alive('cpu') <= 0 and not task_manager.start_render_thread('cpu'): # Allow CPU to be used for renders
-        print('Failed to start CPU render device...')
+def update_render_threads():
+    config = getConfig()
+    render_devices = config.get('render_devices', "auto")
+    active_devices = task_manager.get_devices()['active'].keys()

-is_using_a_gpu = (task_manager.is_alive() > task_manager.is_alive('cpu'))
-if is_using_a_gpu and task_manager.is_alive('cuda:0') <= 0:
-    print('WARNING: GFPGANer only works on GPU:0, use CUDA_VISIBLE_DEVICES if GFPGANer is needed on a specific GPU.')
-    print('Using CUDA_VISIBLE_DEVICES will remap the selected devices starting at GPU:0 fixing GFPGANer')
-    print('Add the line "@set CUDA_VISIBLE_DEVICES=N" where N is the GPUs to use to config.bat')
-    print('Add the line "CUDA_VISIBLE_DEVICES=N" where N is the GPUs to use to config.sh')
+    print('requesting for render_devices', render_devices)
+    task_manager.update_render_threads(render_devices, active_devices)

-print('active devices', task_manager.get_devices()['active'])
+update_render_threads()

 # start the browser ui
 import webbrowser; webbrowser.open('http://localhost:9000')