"""task_manager.py: manage tasks dispatching and render threads. Notes: render_threads should be the only hard reference held by the manager to the threads. Use weak_thread_data to store all other data using weak keys. This will allow for garbage collection after the thread dies. """ import json import traceback TASK_TTL = 15 * 60 # Discard last session's task timeout import queue, threading, time, weakref from typing import Any, Generator, Hashable, Optional, Union from pydantic import BaseModel from sd_internal import Request, Response THREAD_NAME_PREFIX = 'Runtime-Render/' ERR_LOCK_FAILED = ' failed to acquire lock within timeout.' LOCK_TIMEOUT = 15 # Maximum locking time in seconds before failing a task. # It's better to get an exception than a deadlock... ALWAYS use timeout in critical paths. class SymbolClass(type): # Print nicely formatted Symbol names. def __repr__(self): return self.__qualname__ def __str__(self): return self.__name__ class Symbol(metaclass=SymbolClass): pass class ServerStates: class Init(Symbol): pass class LoadingModel(Symbol): pass class Online(Symbol): pass class Rendering(Symbol): pass class Unavailable(Symbol): pass class RenderTask(): # Task with output queue and completion lock. def __init__(self, req: Request): self.request: Request = req # Initial Request self.response: Any = None # Copy of the last reponse self.temp_images:[] = [None] * req.num_outputs * (1 if req.show_only_filtered_image else 2) self.error: Exception = None self.lock: threading.Lock = threading.Lock() # Locks at task start and unlocks when task is completed self.buffer_queue: queue.Queue = queue.Queue() # Queue of JSON string segments async def read_buffer_generator(self): try: while not self.buffer_queue.empty(): res = self.buffer_queue.get(block=False) self.buffer_queue.task_done() yield res except queue.Empty as e: yield # defaults from https://huggingface.co/blog/stable_diffusion class ImageRequest(BaseModel): session_id: str = "session" prompt: str = "" negative_prompt: str = "" init_image: str = None # base64 mask: str = None # base64 num_outputs: int = 1 num_inference_steps: int = 50 guidance_scale: float = 7.5 width: int = 512 height: int = 512 seed: int = 42 prompt_strength: float = 0.8 sampler: str = None # "ddim", "plms", "heun", "euler", "euler_a", "dpm2", "dpm2_a", "lms" # allow_nsfw: bool = False save_to_disk_path: str = None turbo: bool = True use_cpu: bool = False use_full_precision: bool = False use_face_correction: str = None # or "GFPGANv1.3" use_upscale: str = None # or "RealESRGAN_x4plus" or "RealESRGAN_x4plus_anime_6B" use_stable_diffusion_model: str = "sd-v1-4" show_only_filtered_image: bool = False output_format: str = "jpeg" # or "png" stream_progress_updates: bool = False stream_image_progress: bool = False class FilterRequest(BaseModel): session_id: str = "session" model: str = None name: str = "" init_image: str = None # base64 width: int = 512 height: int = 512 save_to_disk_path: str = None turbo: bool = True use_cpu: bool = False use_full_precision: bool = False output_format: str = "jpeg" # or "png" # Temporary cache to allow to query tasks results for a short time after they are completed. class TaskCache(): def __init__(self): self._base = dict() self._lock: threading.Lock = threading.Lock() def _get_ttl_time(self, ttl: int) -> int: return int(time.time()) + ttl def _is_expired(self, timestamp: int) -> bool: return int(time.time()) >= timestamp def clean(self) -> None: if not self._lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('TaskCache.clean' + ERR_LOCK_FAILED) try: # Create a list of expired keys to delete to_delete = [] for key in self._base: ttl, _ = self._base[key] if self._is_expired(ttl): to_delete.append(key) # Remove Items for key in to_delete: del self._base[key] print(f'Session {key} expired. Data removed.') finally: self._lock.release() def clear(self) -> None: if not self._lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('TaskCache.clear' + ERR_LOCK_FAILED) try: self._base.clear() finally: self._lock.release() def delete(self, key: Hashable) -> bool: if not self._lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('TaskCache.delete' + ERR_LOCK_FAILED) try: if key not in self._base: return False del self._base[key] return True finally: self._lock.release() def keep(self, key: Hashable, ttl: int) -> bool: if not self._lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('TaskCache.keep' + ERR_LOCK_FAILED) try: if key in self._base: _, value = self._base.get(key) self._base[key] = (self._get_ttl_time(ttl), value) return True return False finally: self._lock.release() def put(self, key: Hashable, value: Any, ttl: int) -> bool: if not self._lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('TaskCache.put' + ERR_LOCK_FAILED) try: self._base[key] = ( self._get_ttl_time(ttl), value ) except Exception as e: print(str(e)) print(traceback.format_exc()) return False else: return True finally: self._lock.release() def tryGet(self, key: Hashable) -> Any: if not self._lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('TaskCache.tryGet' + ERR_LOCK_FAILED) try: ttl, value = self._base.get(key, (None, None)) if ttl is not None and self._is_expired(ttl): print(f'Session {key} expired. Discarding data.') del self._base[key] return None return value finally: self._lock.release() manager_lock = threading.RLock() render_threads = [] current_state = ServerStates.Init current_state_error:Exception = None current_model_path = None tasks_queue = [] task_cache = TaskCache() default_model_to_load = None weak_thread_data = weakref.WeakKeyDictionary() def preload_model(file_path=None): global current_state, current_state_error, current_model_path if file_path == None: file_path = default_model_to_load if file_path == current_model_path: return current_state = ServerStates.LoadingModel try: from . import runtime runtime.thread_data.ckpt_file = file_path runtime.load_model_ckpt() current_model_path = file_path current_state_error = None current_state = ServerStates.Online except Exception as e: current_model_path = None current_state_error = e current_state = ServerStates.Unavailable print(traceback.format_exc()) def thread_render(device): global current_state, current_state_error, current_model_path from . import runtime weak_thread_data[threading.current_thread()] = { 'device': device } try: runtime.device_init(device) except: print(traceback.format_exc()) return weak_thread_data[threading.current_thread()] = { 'device': runtime.thread_data.device } preload_model() current_state = ServerStates.Online while True: task_cache.clean() if isinstance(current_state_error, SystemExit): current_state = ServerStates.Unavailable return task = None if not manager_lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): print('Render thread on device', runtime.thread_data.device, 'failed to acquire manager lock.') time.sleep(1) continue if len(tasks_queue) <= 0: manager_lock.release() time.sleep(1) continue try: # Select a render task. for queued_task in tasks_queue: cpu_alive = is_alive('cpu') if queued_task.request.use_face_correction: #TODO Remove when fixed - A bug with GFPGANer and facexlib needs to be fixed before use on other devices. # Allows GFPGANer on cuda:0 and use cpu only when cuda:0 is not available. first_device_alive = True if is_alive(0) >= 1 else False if cpu_alive <= 0 and not first_device_alive: queued_task.request.use_face_correction = False print('cuda:0 and cpu are not available with the current config. Removed GFPGANer filter to run task.') continue if not queued_task.request.use_cpu: if first_device_alive: if not runtime.is_first_cuda_device(runtime.thread_data.device): continue # Wait for cuda:0 elif cpu_alive > 0: print('cuda:0 is not available with the current config. Forcing task requiring GFPGANer to cpu.') queued_task.request.use_cpu = True continue if queued_task.request.use_cpu and runtime.thread_data.device != 'cpu' and cpu_alive > 0: continue # CPU Tasks, Skip GPU device if not queued_task.request.use_cpu and runtime.thread_data.device == 'cpu' and is_alive() > 1: # cpu is alive, so need more than one. continue # GPU Tasks, don't run on CPU unless there is nothing else. task = queued_task break if task is not None: del tasks_queue[tasks_queue.index(task)] finally: manager_lock.release() if task is None: time.sleep(1) continue #if current_model_path != task.request.use_stable_diffusion_model: # preload_model(task.request.use_stable_diffusion_model) if current_state_error: task.error = current_state_error continue print(f'Session {task.request.session_id} starting task {id(task)}') if not task.lock.acquire(blocking=False): raise Exception('Got locked task from queue.') try: # Open data generator. res = runtime.mk_img(task.request) if current_model_path == task.request.use_stable_diffusion_model: current_state = ServerStates.Rendering else: current_state = ServerStates.LoadingModel # Start reading from generator. dataQueue = None if task.request.stream_progress_updates: dataQueue = task.buffer_queue for result in res: if current_state == ServerStates.LoadingModel: current_state = ServerStates.Rendering current_model_path = task.request.use_stable_diffusion_model if isinstance(current_state_error, SystemExit) or isinstance(current_state_error, StopAsyncIteration) or isinstance(task.error, StopAsyncIteration): runtime.stop_processing = True if isinstance(current_state_error, StopAsyncIteration): task.error = current_state_error current_state_error = None print(f'Session {task.request.session_id} sent cancel signal for task {id(task)}') if dataQueue: dataQueue.put(result) if isinstance(result, str): result = json.loads(result) task.response = result if 'output' in result: for out_obj in result['output']: if 'path' in out_obj: img_id = out_obj['path'][out_obj['path'].rindex('/') + 1:] task.temp_images[int(img_id)] = runtime.thread_data.temp_images[out_obj['path'][11:]] elif 'data' in out_obj: task.temp_images[result['output'].index(out_obj)] = out_obj['data'] # Before looping back to the generator, mark cache as still alive. task_cache.keep(task.request.session_id, TASK_TTL) except Exception as e: task.error = e print(traceback.format_exc()) continue finally: # Task completed task.lock.release() task_cache.keep(task.request.session_id, TASK_TTL) if isinstance(task.error, StopAsyncIteration): print(f'Session {task.request.session_id} task {id(task)} cancelled!') elif task.error is not None: print(f'Session {task.request.session_id} task {id(task)} failed!') else: print(f'Session {task.request.session_id} task {id(task)} completed.') current_state = ServerStates.Online def is_first_cuda_device(device): from . import runtime # When calling runtime from outside thread_render DO NOT USE thread specific attributes or functions. return runtime.is_first_cuda_device(device) def is_alive(name=None): if not manager_lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('is_alive' + ERR_LOCK_FAILED) nbr_alive = 0 try: for rthread in render_threads: if name is not None: weak_data = weak_thread_data.get(rthread) if weak_data is None or weak_data['device'] is None: print('The thread', rthread.name, 'is registered but has no data store in the task manager.') continue thread_name = str(weak_data['device']).lower() if is_first_cuda_device(name): if not is_first_cuda_device(thread_name): continue elif thread_name != name: continue if rthread.is_alive(): nbr_alive += 1 return nbr_alive finally: manager_lock.release() def start_render_thread(device='auto'): if not manager_lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('start_render_threads' + ERR_LOCK_FAILED) print('Start new Rendering Thread on device', device) try: rthread = threading.Thread(target=thread_render, kwargs={'device': device}) rthread.daemon = True rthread.name = THREAD_NAME_PREFIX + device rthread.start() timeout = LOCK_TIMEOUT while not rthread.is_alive(): if timeout <= 0: raise Exception('render_thread', rthread.name, 'failed to start before timeout or has crashed.') timeout -= 1 time.sleep(1) render_threads.append(rthread) finally: manager_lock.release() def shutdown_event(): # Signal render thread to close on shutdown global current_state_error current_state_error = SystemExit('Application shutting down.') def render(req : ImageRequest): if not is_alive(): # Render thread is dead raise ChildProcessError('Rendering thread has died.') # Alive, check if task in cache task = task_cache.tryGet(req.session_id) if task and not task.response and not task.error and not task.lock.locked(): # Unstarted task pending, deny queueing more than one. raise ConnectionRefusedError(f'Session {req.session_id} has an already pending task.') # from . import runtime r = Request() r.session_id = req.session_id r.prompt = req.prompt r.negative_prompt = req.negative_prompt r.init_image = req.init_image r.mask = req.mask r.num_outputs = req.num_outputs r.num_inference_steps = req.num_inference_steps r.guidance_scale = req.guidance_scale r.width = req.width r.height = req.height r.seed = req.seed r.prompt_strength = req.prompt_strength r.sampler = req.sampler # r.allow_nsfw = req.allow_nsfw r.turbo = req.turbo r.use_cpu = req.use_cpu r.use_full_precision = req.use_full_precision r.save_to_disk_path = req.save_to_disk_path r.use_upscale: str = req.use_upscale r.use_face_correction = req.use_face_correction r.use_stable_diffusion_model = req.use_stable_diffusion_model r.show_only_filtered_image = req.show_only_filtered_image r.output_format = req.output_format r.stream_progress_updates = True # the underlying implementation only supports streaming r.stream_image_progress = req.stream_image_progress if not req.stream_progress_updates: r.stream_image_progress = False new_task = RenderTask(r) if task_cache.put(r.session_id, new_task, TASK_TTL): # Use twice the normal timeout for adding user requests. # Tries to force task_cache.put to fail before tasks_queue.put would. if manager_lock.acquire(blocking=True, timeout=LOCK_TIMEOUT * 2): try: tasks_queue.append(new_task) return new_task finally: manager_lock.release() raise RuntimeError('Failed to add task to cache.')