diff --git a/ui/media/js/auto-save.js b/ui/media/js/auto-save.js index 1677c6c1..91a2d267 100644 --- a/ui/media/js/auto-save.js +++ b/ui/media/js/auto-save.js @@ -36,7 +36,7 @@ const SETTINGS_IDS_LIST = [ "save_to_disk", "diskPath", "sound_toggle", - "turbo", + "performance_level", "confirm_dangerous_actions", "metadata_output_format", "auto_save_settings", diff --git a/ui/media/js/main.js b/ui/media/js/main.js index c2dc32f3..3a669b67 100644 --- a/ui/media/js/main.js +++ b/ui/media/js/main.js @@ -602,7 +602,7 @@ function onTaskCompleted(task, reqBody, instance, outputContainer, stepUpdate) { Suggestions:
1. If you have set an initial image, please try reducing its dimension to ${MAX_INIT_IMAGE_DIMENSION}x${MAX_INIT_IMAGE_DIMENSION} or smaller.
- 2. Try disabling the 'Turbo mode' under 'Advanced Settings'.
+ 2. Try picking a lower performance level in the 'Performance Level' setting (in the 'Settings' tab).
3. Try generating a smaller image.
` } } else { @@ -887,7 +887,7 @@ function getCurrentUserRequest() { width: parseInt(widthField.value), height: parseInt(heightField.value), // allow_nsfw: allowNSFWField.checked, - turbo: turboField.checked, + performance_level: perfLevelField.value, //render_device: undefined, // Set device affinity. Prefer this device, but wont activate. use_stable_diffusion_model: stableDiffusionModelField.value, use_vae_model: vaeModelField.value, diff --git a/ui/media/js/parameters.js b/ui/media/js/parameters.js index 52a4b67c..865f7bb5 100644 --- a/ui/media/js/parameters.js +++ b/ui/media/js/parameters.js @@ -94,12 +94,20 @@ var PARAMETERS = [ default: true, }, { - id: "turbo", - type: ParameterType.checkbox, - label: "Turbo Mode", - note: "generates images faster, but uses an additional 1 GB of GPU memory", + id: "performance_level", + type: ParameterType.select, + label: "Performance Level", + note: "Faster performance requires more GPU memory

" + + "High: fastest, maximum GPU memory usage
" + + "Medium: decent speed, uses 1 GB more memory than Low
" + + "Low: slowest, for GPUs with 4 GB (or less) memory", icon: "fa-forward", - default: true, + default: "high", + options: [ + {value: "high", label: "High"}, + {value: "medium", label: "Medium"}, + {value: "low", label: "Low"} + ], }, { id: "use_cpu", @@ -219,7 +227,7 @@ function initParameters() { initParameters() -let turboField = document.querySelector('#turbo') +let perfLevelField = document.querySelector('#performance_level') let useCPUField = document.querySelector('#use_cpu') let autoPickGPUsField = document.querySelector('#auto_pick_gpus') let useGPUsField = document.querySelector('#use_gpus') diff --git a/ui/sd_internal/__init__.py b/ui/sd_internal/__init__.py index 71073216..5475c3a6 100644 --- a/ui/sd_internal/__init__.py +++ b/ui/sd_internal/__init__.py @@ -6,7 +6,7 @@ class TaskData(BaseModel): request_id: str = None session_id: str = "session" save_to_disk_path: str = None - turbo: bool = True + performance_level: str = "high" # or "low" or "medium" use_face_correction: str = None # or "GFPGANv1.3" use_upscale: str = None # or "RealESRGAN_x4plus" or "RealESRGAN_x4plus_anime_6B" diff --git a/ui/sd_internal/app.py b/ui/sd_internal/app.py index 47a2d610..7171e6c1 100644 --- a/ui/sd_internal/app.py +++ b/ui/sd_internal/app.py @@ -110,7 +110,7 @@ def setConfig(config): except: log.error(traceback.format_exc()) -def save_model_to_config(ckpt_model_name, vae_model_name, hypernetwork_model_name): +def save_to_config(ckpt_model_name, vae_model_name, hypernetwork_model_name, performance_level): config = getConfig() if 'model' not in config: config['model'] = {} @@ -124,6 +124,8 @@ def save_model_to_config(ckpt_model_name, vae_model_name, hypernetwork_model_nam if hypernetwork_model_name is None or hypernetwork_model_name == "": del config['model']['hypernetwork'] + config['performance_level'] = performance_level + setConfig(config) def update_render_threads(): diff --git a/ui/sd_internal/device_manager.py b/ui/sd_internal/device_manager.py index 733bab50..8b5c49be 100644 --- a/ui/sd_internal/device_manager.py +++ b/ui/sd_internal/device_manager.py @@ -128,6 +128,18 @@ def needs_to_force_full_precision(context): device_name = context.device_name.lower() return (('nvidia' in device_name or 'geforce' in device_name) and (' 1660' in device_name or ' 1650' in device_name)) or ('Quadro T2000' in device_name) +def get_max_perf_level(device): + if device != 'cpu': + _, mem_total = torch.cuda.mem_get_info(device) + mem_total /= float(10**9) + + if mem_total < 4.5: + return 'low' + elif mem_total < 6.5: + return 'medium' + + return 'high' + def validate_device_id(device, log_prefix=''): def is_valid(): if not isinstance(device, str): diff --git a/ui/sd_internal/model_manager.py b/ui/sd_internal/model_manager.py index b6c4c92d..9c3320ea 100644 --- a/ui/sd_internal/model_manager.py +++ b/ui/sd_internal/model_manager.py @@ -3,7 +3,7 @@ import logging import picklescan.scanner import rich -from sd_internal import app, TaskData +from sd_internal import app, TaskData, device_manager from diffusionkit import model_loader from diffusionkit.types import Context @@ -25,6 +25,11 @@ DEFAULT_MODELS = { 'gfpgan': ['GFPGANv1.3'], 'realesrgan': ['RealESRGAN_x4plus'], } +PERF_LEVEL_TO_VRAM_OPTIMIZATIONS = { + 'low': {'KEEP_ENTIRE_MODEL_IN_CPU'}, + 'medium': {'KEEP_FS_AND_CS_IN_CPU', 'SET_ATTENTION_STEP_TO_4'}, + 'high': {}, +} known_models = {} @@ -37,8 +42,7 @@ def load_default_models(context: Context): for model_type in KNOWN_MODEL_TYPES: context.model_paths[model_type] = resolve_model_to_use(model_type=model_type) - # disable TURBO initially (this should be read from the config eventually) - context.vram_optimizations -= {'TURBO'} + set_vram_optimizations(context) # load mandatory models model_loader.load_model(context, 'stable-diffusion') @@ -94,20 +98,23 @@ def resolve_model_to_use(model_name:str=None, model_type:str=None): return None def reload_models_if_necessary(context: Context, task_data: TaskData): - model_paths_in_req = ( - ('stable-diffusion', task_data.use_stable_diffusion_model), - ('vae', task_data.use_vae_model), - ('hypernetwork', task_data.use_hypernetwork_model), - ('gfpgan', task_data.use_face_correction), - ('realesrgan', task_data.use_upscale), - ) + model_paths_in_req = { + 'stable-diffusion': task_data.use_stable_diffusion_model, + 'vae': task_data.use_vae_model, + 'hypernetwork': task_data.use_hypernetwork_model, + 'gfpgan': task_data.use_face_correction, + 'realesrgan': task_data.use_upscale, + } + models_to_reload = {model_type: path for model_type, path in model_paths_in_req.items() if context.model_paths.get(model_type) != path} - for model_type, model_path_in_req in model_paths_in_req: - if context.model_paths.get(model_type) != model_path_in_req: - context.model_paths[model_type] = model_path_in_req + if set_vram_optimizations(context): # reload SD + models_to_reload['stable-diffusion'] = model_paths_in_req['stable-diffusion'] - action_fn = model_loader.unload_model if context.model_paths[model_type] is None else model_loader.load_model - action_fn(context, model_type) + for model_type, model_path_in_req in models_to_reload.items(): + context.model_paths[model_type] = model_path_in_req + + action_fn = model_loader.unload_model if context.model_paths[model_type] is None else model_loader.load_model + action_fn(context, model_type) def resolve_model_paths(task_data: TaskData): task_data.use_stable_diffusion_model = resolve_model_to_use(task_data.use_stable_diffusion_model, model_type='stable-diffusion') @@ -117,11 +124,16 @@ def resolve_model_paths(task_data: TaskData): if task_data.use_face_correction: task_data.use_face_correction = resolve_model_to_use(task_data.use_face_correction, 'gfpgan') if task_data.use_upscale: task_data.use_upscale = resolve_model_to_use(task_data.use_upscale, 'gfpgan') -def set_vram_optimizations(context: Context, task_data: TaskData): - if task_data.turbo: - context.vram_optimizations.add('TURBO') - else: - context.vram_optimizations.remove('TURBO') +def set_vram_optimizations(context: Context): + config = app.getConfig() + perf_level = config.get('performance_level', device_manager.get_max_perf_level(context.device)) + vram_optimizations = PERF_LEVEL_TO_VRAM_OPTIMIZATIONS[perf_level] + + if vram_optimizations != context.vram_optimizations: + context.vram_optimizations = vram_optimizations + return True + + return False def make_model_folders(): for model_type in KNOWN_MODEL_TYPES: diff --git a/ui/sd_internal/task_manager.py b/ui/sd_internal/task_manager.py index 3b8f6082..0780283d 100644 --- a/ui/sd_internal/task_manager.py +++ b/ui/sd_internal/task_manager.py @@ -281,7 +281,6 @@ def thread_render(device): current_state = ServerStates.LoadingModel model_manager.resolve_model_paths(task.task_data) - model_manager.set_vram_optimizations(renderer.context, task.task_data) model_manager.reload_models_if_necessary(renderer.context, task.task_data) current_state = ServerStates.Rendering @@ -342,6 +341,7 @@ def get_devices(): 'name': torch.cuda.get_device_name(device), 'mem_free': mem_free, 'mem_total': mem_total, + 'max_perf_level': device_manager.get_max_perf_level(device), } # list the compatible devices diff --git a/ui/server.py b/ui/server.py index 7308dfc8..11d3731a 100644 --- a/ui/server.py +++ b/ui/server.py @@ -134,7 +134,7 @@ def render(req: dict): render_req.init_image_mask = req.get('mask') # hack: will rename this in the HTTP API in a future revision - app.save_model_to_config(task_data.use_stable_diffusion_model, task_data.use_vae_model, task_data.use_hypernetwork_model) + app.save_to_config(task_data.use_stable_diffusion_model, task_data.use_vae_model, task_data.use_hypernetwork_model, task_data.performance_level) # enqueue the task new_task = task_manager.render(render_req, task_data)