import re import os import json import subprocess from datetime import datetime, timedelta import tempfile import shutil from django.core.cache import cache from django.conf import settings from django.core.files import File from django.db.models import Q from celery import Task from celery.decorators import task from celery.utils.log import get_task_logger from celery.exceptions import SoftTimeLimitExceeded from celery.task.control import revoke from celery.signals import task_revoked from .backends import FFmpegBackend from .exceptions import VideoEncodingError from .helpers import ( calculate_seconds, rm_file, create_temp_file, get_file_name, get_file_type, media_file_info, run_command, produce_ffmpeg_commands, produce_friendly_token, ) from actions.models import MediaAction, USER_MEDIA_ACTIONS from users.models import User from .models import Encoding, EncodeProfile, Media, Category, Rating, Tag from .methods import list_tasks, pre_save_action, notify_users logger = get_task_logger(__name__) VALID_USER_ACTIONS = [action for action, name in USER_MEDIA_ACTIONS] ERRORS_LIST = [ "Output file is empty, nothing was encoded", "Invalid data found when processing input", "Unable to find a suitable output format for", ] @task(name="chunkize_media", bind=True, queue="short_tasks", soft_time_limit=60 * 30) def chunkize_media(self, friendly_token, profiles, force=True): """Break media in chunks and start encoding tasks""" profiles = [EncodeProfile.objects.get(id=profile) for profile in profiles] media = Media.objects.get(friendly_token=friendly_token) cwd = os.path.dirname(os.path.realpath(media.media_file.path)) file_name = media.media_file.path.split("/")[-1] random_prefix = produce_friendly_token() file_format = "{0}_{1}".format(random_prefix, file_name) chunks_file_name = "%02d_{0}".format(file_format) chunks_file_name += ".mkv" cmd = [ settings.FFMPEG_COMMAND, "-y", "-i", media.media_file.path, "-c", "copy", "-f", "segment", "-segment_time", str(settings.VIDEO_CHUNKS_DURATION), chunks_file_name, ] chunks = [] ret = run_command(cmd, cwd=cwd) if "out" in ret.keys(): for line in ret.get("error").split("\n"): ch = re.findall(r"Opening \'([\W\w]+)\' for writing", line) if ch: chunks.append(ch[0]) if not chunks: # command completely failed to segment file.putting to normal encode logger.info( "Failed to break file {0} in chunks." " Putting to normal encode queue".format(friendly_token) ) for profile in profiles: if media.video_height and media.video_height < profile.resolution: if profile.resolution not in settings.MINIMUM_RESOLUTIONS_TO_ENCODE: continue encoding = Encoding(media=media, profile=profile) encoding.save() enc_url = settings.SSL_FRONTEND_HOST + encoding.get_absolute_url() encode_media.delay( friendly_token, profile.id, encoding.id, enc_url, force=force ) return False chunks = [os.path.join(cwd, ch) for ch in chunks] to_profiles = [] chunks_dict = {} # calculate once md5sums for chunk in chunks: cmd = ["md5sum", chunk] stdout = run_command(cmd).get("out") md5sum = stdout.strip().split()[0] chunks_dict[chunk] = md5sum for profile in profiles: if media.video_height and media.video_height < profile.resolution: if profile.resolution not in settings.MINIMUM_RESOLUTIONS_TO_ENCODE: continue to_profiles.append(profile) for chunk in chunks: encoding = Encoding( media=media, profile=profile, chunk_file_path=chunk, chunk=True, chunks_info=json.dumps(chunks_dict), md5sum=chunks_dict[chunk], ) encoding.save() enc_url = settings.SSL_FRONTEND_HOST + encoding.get_absolute_url() if profile.resolution in settings.MINIMUM_RESOLUTIONS_TO_ENCODE: priority = 0 else: priority = 9 encode_media.apply_async( args=[friendly_token, profile.id, encoding.id, enc_url], kwargs={"force": force, "chunk": True, "chunk_file_path": chunk}, priority=priority, ) logger.info( "got {0} chunks and will encode to {1} profiles".format( len(chunks), to_profiles ) ) return True class EncodingTask(Task): def on_failure(self, exc, task_id, args, kwargs, einfo): # mainly used to run some post failure steps # we get here if a task is revoked try: if hasattr(self, "encoding"): self.encoding.status = "fail" self.encoding.save(update_fields=["status"]) kill_ffmpeg_process(self.encoding.temp_file) if hasattr(self.encoding, "media"): self.encoding.media.post_encode_actions() except BaseException: pass return False @task( name="encode_media", base=EncodingTask, bind=True, queue="long_tasks", soft_time_limit=settings.CELERY_SOFT_TIME_LIMIT, ) def encode_media( self, friendly_token, profile_id, encoding_id, encoding_url, force=True, chunk=False, chunk_file_path="", ): """Encode a media to given profile, using ffmpeg, storing progress""" logger.info( "Encode Media started, friendly token {0}, profile id {1}, force {2}".format( friendly_token, profile_id, force ) ) if self.request.id: task_id = self.request.id else: task_id = None try: media = Media.objects.get(friendly_token=friendly_token) profile = EncodeProfile.objects.get(id=profile_id) except BaseException: Encoding.objects.filter(id=encoding_id).delete() return False # break logic with chunk True/False if chunk: # TODO: in case a video is chunkized and this enters here many times # it will always run since chunk_file_path is always different # thus find a better way for this check if ( Encoding.objects.filter( media=media, profile=profile, chunk_file_path=chunk_file_path ).count() > 1 and force == False ): Encoding.objects.filter(id=encoding_id).delete() return False else: try: encoding = Encoding.objects.get(id=encoding_id) encoding.status = "running" Encoding.objects.filter( media=media, profile=profile, chunk=True, chunk_file_path=chunk_file_path, ).exclude(id=encoding_id).delete() except BaseException: encoding = Encoding( media=media, profile=profile, status="running", chunk=True, chunk_file_path=chunk_file_path, ) else: if ( Encoding.objects.filter(media=media, profile=profile).count() > 1 and force is False ): Encoding.objects.filter(id=encoding_id).delete() return False else: try: encoding = Encoding.objects.get(id=encoding_id) encoding.status = "running" Encoding.objects.filter(media=media, profile=profile).exclude( id=encoding_id ).delete() except BaseException: encoding = Encoding(media=media, profile=profile, status="running") if task_id: encoding.task_id = task_id encoding.worker = "localhost" encoding.retries = self.request.retries encoding.save() if profile.extension == "gif": tf = create_temp_file(suffix=".gif") # -ss 5 start from 5 second. -t 25 until 25 sec command = [ settings.FFMPEG_COMMAND, "-y", "-ss", "3", "-i", media.media_file.path, "-hide_banner", "-vf", "scale=344:-1:flags=lanczos,fps=1", "-t", "25", "-f", "gif", tf, ] ret = run_command(command) if os.path.exists(tf) and get_file_type(tf) == "image": with open(tf, "rb") as f: myfile = File(f) encoding.status = "success" encoding.media_file.save(content=myfile, name=tf) rm_file(tf) return True else: return False if chunk: original_media_path = chunk_file_path else: original_media_path = media.media_file.path if not media.duration: encoding.status = "fail" encoding.save(update_fields=["status"]) return False with tempfile.TemporaryDirectory(dir=settings.TEMP_DIRECTORY) as temp_dir: tf = create_temp_file(suffix=".{0}".format(profile.extension), dir=temp_dir) tfpass = create_temp_file(suffix=".{0}".format(profile.extension), dir=temp_dir) ffmpeg_commands = produce_ffmpeg_commands( original_media_path, media.media_info, resolution=profile.resolution, codec=profile.codec, output_filename=tf, pass_file=tfpass, chunk=chunk, ) if not ffmpeg_commands: encoding.status = "fail" encoding.save(update_fields=["status"]) return False encoding.temp_file = tf encoding.commands = str(ffmpeg_commands) encoding.save(update_fields=["temp_file", "commands", "task_id"]) # binding these, so they are available on on_failure self.encoding = encoding self.media = media # can be one-pass or two-pass for ffmpeg_command in ffmpeg_commands: ffmpeg_command = [str(s) for s in ffmpeg_command] encoding_backend = FFmpegBackend() try: encoding_command = encoding_backend.encode(ffmpeg_command) duration, n_times = 0, 0 output = "" while encoding_command: try: # TODO: understand an eternal loop # eg h265 with mv4 file issue, and stop with error output = next(encoding_command) duration = calculate_seconds(output) if duration: percent = duration * 100 / media.duration if n_times % 60 == 0: encoding.progress = percent try: encoding.save( update_fields=["progress", "update_date"] ) logger.info("Saved {0}".format(round(percent, 2))) except BaseException: pass n_times += 1 except StopIteration: break except VideoEncodingError: # ffmpeg error, or ffmpeg was killed raise except Exception as e: try: # output is empty, fail message is on the exception output = e.message except AttributeError: output = "" if isinstance(e, SoftTimeLimitExceeded): kill_ffmpeg_process(encoding.temp_file) encoding.logs = output encoding.status = "fail" encoding.save(update_fields=["status", "logs"]) raise_exception = True # if this is an ffmpeg's valid error # no need for the task to be re-run # otherwise rerun task... for error_msg in ERRORS_LIST: if error_msg.lower() in output.lower(): raise_exception = False if raise_exception: raise self.retry(exc=e, countdown=5, max_retries=1) encoding.logs = output encoding.progress = 100 success = False encoding.status = "fail" if os.path.exists(tf) and os.path.getsize(tf) != 0: ret = media_file_info(tf) if ret.get("is_video") or ret.get("is_audio"): encoding.status = "success" success = True with open(tf, "rb") as f: myfile = File(f) output_name = "{0}.{1}".format( get_file_name(original_media_path), profile.extension ) encoding.media_file.save(content=myfile, name=output_name) encoding.total_run_time = ( encoding.update_date - encoding.add_date ).seconds try: encoding.save( update_fields=["status", "logs", "progress", "total_run_time"] ) # this will raise a django.db.utils.DatabaseError error when task is revoked, # since we delete the encoding at that stage except BaseException: pass return success @task(name="produce_sprite_from_video", queue="long_tasks") def produce_sprite_from_video(friendly_token): """Produces a sprites file for a video, uses ffmpeg""" try: media = Media.objects.get(friendly_token=friendly_token) except BaseException: logger.info("failed to get media with friendly_token %s" % friendly_token) return False with tempfile.TemporaryDirectory(dir=settings.TEMP_DIRECTORY) as tmpdirname: try: tmpdir_image_files = tmpdirname + "/img%03d.jpg" output_name = tmpdirname + "/sprites.jpg" cmd = "{0} -i {1} -f image2 -vf 'fps=1/10, scale=160:90' {2}&&files=$(ls {3}/img*.jpg | sort -t '-' -n -k 2 | tr '\n' ' ')&&convert $files -append {4}".format( settings.FFMPEG_COMMAND, media.media_file.path, tmpdir_image_files, tmpdirname, output_name, ) ret = subprocess.run(cmd, stdout=subprocess.PIPE, shell=True) if os.path.exists(output_name) and get_file_type(output_name) == "image": with open(output_name, "rb") as f: myfile = File(f) media.sprites.save( content=myfile, name=get_file_name(media.media_file.path) + "sprites.jpg", ) except BaseException: pass return True @task(name="create_hls", queue="long_tasks") def create_hls(friendly_token): """Creates HLS file for media, uses Bento4 mp4hls command""" if not hasattr(settings, "MP4HLS_COMMAND"): logger.info("Bento4 mp4hls command is missing from configuration") return False if not os.path.exists(settings.MP4HLS_COMMAND): logger.info("Bento4 mp4hls command is missing") return False try: media = Media.objects.get(friendly_token=friendly_token) except BaseException: logger.info("failed to get media with friendly_token %s" % friendly_token) return False p = media.uid.hex output_dir = os.path.join(settings.HLS_DIR, p) encodings = media.encodings.filter( profile__extension="mp4", status="success", chunk=False, profile__codec="h264" ) if encodings: existing_output_dir = None if os.path.exists(output_dir): existing_output_dir = output_dir output_dir = os.path.join(settings.HLS_DIR, p + produce_friendly_token()) files = " ".join([f.media_file.path for f in encodings if f.media_file]) cmd = "{0} --segment-duration=4 --output-dir={1} {2}".format( settings.MP4HLS_COMMAND, output_dir, files ) ret = subprocess.run(cmd, stdout=subprocess.PIPE, shell=True) if existing_output_dir: # override content with -T ! cmd = "cp -rT {0} {1}".format(output_dir, existing_output_dir) ret = subprocess.run(cmd, stdout=subprocess.PIPE, shell=True) shutil.rmtree(output_dir) output_dir = existing_output_dir pp = os.path.join(output_dir, "master.m3u8") if os.path.exists(pp): if media.hls_file != pp: media.hls_file = pp media.save(update_fields=["hls_file"]) return True @task(name="check_running_states", queue="short_tasks") def check_running_states(): # Experimental - unused """Check stale running encodings and delete/reencode media""" encodings = Encoding.objects.filter(status="running") logger.info("got {0} encodings that are in state running".format(encodings.count())) changed = 0 for encoding in encodings: now = datetime.now(encoding.update_date.tzinfo) if (now - encoding.update_date).seconds > settings.RUNNING_STATE_STALE: media = encoding.media profile = encoding.profile task_id = encoding.task_id # terminate task if task_id: revoke(task_id, terminate=True) encoding.delete() media.encode(profiles=[profile]) # TODO: allign with new code + chunksize... changed += 1 if changed: logger.info("changed from running to pending on {0} items".format(changed)) return True @task(name="check_media_states", queue="short_tasks") def check_media_states(): # Experimental - unused # check encoding status of not success media media = Media.objects.filter( Q(encoding_status="running") | Q(encoding_status="fail") | Q(encoding_status="pending") ) logger.info("got {0} media that are not in state success".format(media.count())) changed = 0 for m in media: m.set_encoding_status() m.save(update_fields=["encoding_status"]) changed += 1 if changed: logger.info("changed encoding status to {0} media items".format(changed)) return True @task(name="check_pending_states", queue="short_tasks") def check_pending_states(): # Experimental - unused # check encoding profiles that are on state pending and not on a queue encodings = Encoding.objects.filter(status="pending") if not encodings: return True changed = 0 tasks = list_tasks() task_ids = tasks["task_ids"] media_profile_pairs = tasks["media_profile_pairs"] for encoding in encodings: if encoding.task_id and encoding.task_id in task_ids: # encoding is in one of the active/reserved/scheduled tasks list continue elif ( encoding.media.friendly_token, encoding.profile.id, ) in media_profile_pairs: continue # encoding is in one of the reserved/scheduled tasks list. # has no task_id but will be run, so need to re-enter the queue else: media = encoding.media profile = encoding.profile encoding.delete() media.encode(profiles=[profile], force=False) changed += 1 if changed: logger.info( "set to the encode queue {0} encodings that were on pending state".format( changed ) ) return True @task(name="check_missing_profiles", queue="short_tasks") def check_missing_profiles(): # Experimental - unused # check if video files have missing profiles. If so, add them media = Media.objects.filter(media_type="video") profiles = list(EncodeProfile.objects.all()) changed = 0 for m in media: existing_profiles = [p.profile for p in m.encodings.all()] missing_profiles = [p for p in profiles if p not in existing_profiles] if missing_profiles: m.encode(profiles=missing_profiles, force=False) # since we call with force=False # encode_media won't delete existing profiles # if they appear on the meanwhile (eg on a big queue) changed += 1 if changed: logger.info("set to the encode queue {0} profiles".format(changed)) return True @task(name="clear_sessions", queue="short_tasks") def clear_sessions(): """Clear expired sessions""" try: from importlib import import_module from django.conf import settings engine = import_module(settings.SESSION_ENGINE) engine.SessionStore.clear_expired() except BaseException: return False return True @task(name="save_user_action", queue="short_tasks") def save_user_action( user_or_session, friendly_token=None, action="watch", extra_info=None ): """Short task that saves a user action""" if action not in VALID_USER_ACTIONS: return False try: media = Media.objects.get(friendly_token=friendly_token) except BaseException: return False user = user_or_session.get("user_id") session_key = user_or_session.get("user_session") remote_ip = user_or_session.get("remote_ip_addr") if user: try: user = User.objects.get(id=user) except BaseException: return False if not (user or session_key): return False if action in ["like", "dislike", "report"]: if not pre_save_action( media=media, user=user, session_key=session_key, action=action, remote_ip=remote_ip, ): return False if action == "watch": if user: MediaAction.objects.filter(user=user, media=media, action="watch").delete() else: MediaAction.objects.filter( session_key=session_key, media=media, action="watch" ).delete() if action == "rate": try: score = extra_info.get("score") rating_category = extra_info.get("category_id") except BaseException: # TODO: better error handling? return False try: rating = Rating.objects.filter( user=user, media=media, rating_category_id=rating_category ).first() if rating: rating.score = score rating.save(update_fields=["score"]) else: rating = Rating.objects.create( user=user, media=media, rating_category_id=rating_category, score=score, ) except Exception as exc: # TODO: more specific handling, for errors in score, or # rating_category? return False ma = MediaAction( user=user, session_key=session_key, media=media, action=action, extra_info=extra_info, remote_ip=remote_ip, ) ma.save() if action == "watch": media.views += 1 media.save(update_fields=["views"]) elif action == "report": media.reported_times += 1 if media.reported_times >= settings.REPORTED_TIMES_THRESHOLD: media.state = "private" media.save(update_fields=["reported_times", "state"]) notify_users( friendly_token=media.friendly_token, action="media_reported", extra=extra_info, ) elif action == "like": media.likes += 1 media.save(update_fields=["likes"]) elif action == "dislike": media.dislikes += 1 media.save(update_fields=["dislikes"]) return True @task(name="get_list_of_popular_media", queue="long_tasks") def get_list_of_popular_media(): """Experimental task for preparing media listing for index page / recommended section calculate and return the top 50 popular media, based on two rules X = the top 25 videos that have the most views during the last week Y = the most recent 25 videos that have been liked over the last 6 months """ valid_media_x = {} valid_media_y = {} basic_query = Q(listable=True) media_x = Media.objects.filter(basic_query).values("friendly_token") period_x = datetime.now() - timedelta(days=7) period_y = datetime.now() - timedelta(days=30 * 6) for media in media_x: ft = media["friendly_token"] num = MediaAction.objects.filter( action_date__gte=period_x, action="watch", media__friendly_token=ft ).count() if num: valid_media_x[ft] = num num = MediaAction.objects.filter( action_date__gte=period_y, action="like", media__friendly_token=ft ).count() if num: valid_media_y[ft] = num x = sorted(valid_media_x.items(), key=lambda kv: kv[1], reverse=True)[:25] y = sorted(valid_media_y.items(), key=lambda kv: kv[1], reverse=True)[:25] media_ids = [a[0] for a in x] media_ids.extend([a[0] for a in y]) media_ids = list(set(media_ids)) cache.set("popular_media_ids", media_ids, 60 * 60 * 12) logger.info("saved popular media ids") return True @task(name="update_listings_thumbnails", queue="long_tasks") def update_listings_thumbnails(): """Populate listings_thumbnail field for models""" # Categories used_media = [] saved = 0 qs = Category.objects.filter().order_by("-media_count") for object in qs: media = ( Media.objects.exclude(friendly_token__in=used_media) .filter(category=object, state="public", is_reviewed=True) .order_by("-views") .first() ) if media: object.listings_thumbnail = media.thumbnail_url object.save(update_fields=["listings_thumbnail"]) used_media.append(media.friendly_token) saved += 1 logger.info("updated {} categories".format(saved)) # Tags used_media = [] saved = 0 qs = Tag.objects.filter().order_by("-media_count") for object in qs: media = ( Media.objects.exclude(friendly_token__in=used_media) .filter(tags=object, state="public", is_reviewed=True) .order_by("-views") .first() ) if media: object.listings_thumbnail = media.thumbnail_url object.save(update_fields=["listings_thumbnail"]) used_media.append(media.friendly_token) saved += 1 logger.info("updated {} tags".format(saved)) return True @task_revoked.connect def task_sent_handler(sender=None, headers=None, body=None, **kwargs): # For encode_media tasks that are revoked, # ffmpeg command won't be stopped, since # it got started by a subprocess. # Need to stop that process # Also, removing the Encoding object, # since the task that would prepare it was killed # Maybe add a killed state for Encoding objects try: uid = kwargs["request"].task_id if uid: encoding = Encoding.objects.get(task_id=uid) encoding.delete() logger.info("deleted the Encoding object") if encoding.temp_file: kill_ffmpeg_process(encoding.temp_file) except BaseException: pass return True def kill_ffmpeg_process(filepath): # this is not ideal, ffmpeg pid could be linked to the Encoding object cmd = "ps aux|grep 'ffmpeg'|grep %s|grep -v grep |awk '{print $2}'" % filepath result = subprocess.run(cmd, stdout=subprocess.PIPE, shell=True) pid = result.stdout.decode("utf-8").strip() if pid: cmd = "kill -9 %s" % pid result = subprocess.run(cmd, stdout=subprocess.PIPE, shell=True) return result @task(name="remove_media_file", base=Task, queue="long_tasks") def remove_media_file(media_file=None): rm_file(media_file) return True # TODO LIST # 1 chunks are deleted from original server when file is fully encoded. # however need to enter this logic in cases of fail as well # 2 script to delete chunks in fail status # (and check for their encdings, and delete them as well, along with # all chunks) # 3 beat task, remove chunks