gnm_deliverables/models.py (415 lines of code) (raw):

# coding: utf-8 import errno import logging import os import time import urllib.error import urllib.parse import urllib.request from django.conf import settings from django.contrib.postgres.fields import ArrayField from django.db import models from django.db.models import Q from django.utils.functional import cached_property from django.utils.timezone import now from gnmvidispine.vs_item import VSItem, VSException, VSNotFound from gnmvidispine.vs_job import VSJob from .choices import DELIVERABLE_ASSET_STATUS_INGEST_FAILED, \ DELIVERABLE_ASSET_TYPES_DICT, UPLOAD_STATUS, PRODUCTION_OFFICE, PRIMARY_TONE, \ PUBLICATION_STATUS, DELIVERABLE_ASSET_STATUS_TRANSCODING from .choices import DELIVERABLE_ASSET_STATUS_NOT_INGESTED, DELIVERABLE_ASSET_STATUSES, DELIVERABLE_ASSET_STATUS_TRANSCODED from .choices import DELIVERABLE_ASSET_TYPE_CHOICES, DELIVERABLE_STATUS_ALL_FILES_INGESTED, \ DELIVERABLE_STATUS_FILES_TO_INGEST, DELIVERABLE_ASSET_TYPE_OTHER_SUBTITLE, \ DELIVERABLE_ASSET_TYPE_OTHER_TRAILER, \ DELIVERABLE_ASSET_TYPE_OTHER_MISCELLANEOUS, DELIVERABLE_ASSET_TYPE_OTHER_PAC_FORMS, DELIVERABLE_ASSET_TYPE_OTHER_POST_PRODUCTION_SCRIPT from .exceptions import ImportFailedError, NoShapeError from .files import get_path_for_deliverable, find_files_for_deliverable, create_folder, \ get_local_path_for_deliverable, create_folder_for_deliverable from .templatetags.deliverable_tags import sizeof_fmt from .transcodepreset import TranscodePresetFinder import datetime logger = logging.getLogger(__name__) def seconds_to_timestamp(seconds): return time.strftime('%H:%M:%S', time.gmtime(float(seconds))) transcode_preset_finder = TranscodePresetFinder() class Deliverable(models.Model): project_id = models.CharField(null=True, blank=True, max_length=61) commission_id = models.BigIntegerField(null=False, blank=False, db_index=True) pluto_core_project_id = models.BigIntegerField(null=False, blank=False, db_index=True, unique=True) name = models.CharField(null=False, blank=False, unique=True, max_length=255) created = models.DateTimeField(null=False, blank=False, auto_now_add=True) def sync_assets_from_file_system(self): """ performs a scan of the drop-folder associated with this deliverable. If a file is found that does not correspond to a DeliverableAsset record then create a record for it, if there is a corresponding record then update the ctime/ mtime/atime. Also, if any asset records have NOT been imported (i.e. their type and item_id is null) and their corresponding files have been removed then delete those records. :return: a dictionary with two keys, a count of "added" records and a count of "removed" records. """ assets_on_fs = [] added_count = 0 removed_count = 0 for f in find_files_for_deliverable(self.name): asset, created = DeliverableAsset.objects.get_or_create( filename=f.path, deliverable=self, size=f.size, modified_dt=f.modified_dt, defaults=dict( access_dt=f.access_dt, changed_dt=f.changed_dt, absolute_path=f.absolute_path ) ) assets_on_fs.append(asset) if created: logger.info('Asset created: %s' % asset) added_count += 1 else: logger.info('Asset already existed: %s' % asset) # Update defaults asset.access_dt = f.access_dt asset.changed_dt = f.changed_dt asset.absolute_path = f.absolute_path asset.save() # Remove assets rows that are not found on the FS and does not have an item tied to it assets_to_delete = self.assets.filter( type__isnull=True, online_item_id__isnull=True ).exclude( id__in=[a.id for a in assets_on_fs] ) delete_count = assets_to_delete.count() if delete_count > 0: assets_to_delete.delete() removed_count += 1 logger.info('Deleted %s asset rows' % delete_count) return {"added": added_count, "removed": removed_count} @cached_property def path(self): return get_path_for_deliverable(self.name) @cached_property def local_path(self): return get_local_path_for_deliverable(self.name) @cached_property def local_open_uri(self): local_path = self.local_path.rstrip('/') + '/' return 'pluto:openfolder:%s' % urllib.parse.quote(local_path) def status(self): if self.assets.filter(ingest_complete_dt__isnull=True).exists(): return DELIVERABLE_STATUS_FILES_TO_INGEST else: return DELIVERABLE_STATUS_ALL_FILES_INGESTED def asset_type(self, type): assets = self.assets.filter(type=type) return None if assets.count() <= 0 else assets[0] def create_folder(self): return create_folder(self.path) def create_folder_for_deliverable(self): return create_folder_for_deliverable(self.name) def create_asset_from_vs_item(self, item_id, user): """ tries to "adopt" the given existing vidispine item onto this deliverable bundle. if the item is already associated with this deliverable bundle then it is returned without re-import. can raise NoShapeError if either the item has no original shape, or VSNotFound if the item does not exist. can also raise other VSException from gnmvidispine if server communication fails. :param item_id: item id to adopt :param user: name of the user carrying out the operation, it will be done as this user in VS :return: a tuple of (DeliverableAsset, Boolean) where the boolean value indicates whether a new DeliverableAsset was created or not """ created = False try: asset = DeliverableAsset.objects.get(online_item_id=item_id, deliverable=self) logger.info( 'Asset for item "{item_id}" already existed: {asset}'.format(item_id=item_id, asset=asset)) except DeliverableAsset.DoesNotExist: asset = DeliverableAsset( online_item_id=item_id, deliverable=self ) item = asset.item(user) try: shape = item.get_shape("master") except VSNotFound: ##we can tidy the re-raise up later raise NoShapeError('No original shape attached to item') asset.filename = item.get("originalFilename", allowArray=False) asset.absolute_path = item.get("originalUri", allowArray=False) files = [x for x in shape.files()] if len(files) == 0: raise NoShapeError("Original shape had no files on it") asset.size = files[0].size mtime = None ##FIXME: sort this out asset.ingest_complete_dt = now() asset.created_from_existing_item = True asset.save() created = True logger.info( 'Asset for item "{item_id}" created: {asset}'.format(item_id=item_id, asset=asset)) return asset, created @classmethod def for_project(cls, project_id): return Deliverable.objects.filter(project_id=project_id) @classmethod def sync_project(cls, project_id): deliverables = cls.for_project(project_id) for deliverable in deliverables: deliverable.sync_assets_from_file_system() return deliverables def __str__(self): return self.__unicode__() def __unicode__(self): return '{name}'.format(name=self.name) class Meta: pass class DeliverableAsset(models.Model): type = models.PositiveIntegerField(null=True, blank=True, choices=DELIVERABLE_ASSET_TYPE_CHOICES) # Stat info filename = models.TextField(null=True, blank=True) absolute_path = models.TextField(null=True, blank=True) size = models.BigIntegerField(null=True, blank=True) access_dt = models.DateTimeField(null=True, blank=True) modified_dt = models.DateTimeField(null=True, blank=True) changed_dt = models.DateTimeField(null=True, blank=True) # Fields set on ingest job_id = models.TextField(null=True, blank=True) online_item_id = models.TextField(null=True, blank=True) nearline_item_id = models.TextField(null=True, blank=True) archive_item_id = models.TextField(null=True, blank=True) ingest_complete_dt = models.DateTimeField(null=True, blank=True) file_removed_dt = models.DateTimeField(null=True, blank=True) duration_seconds = models.FloatField(null=True, blank=True) version = models.IntegerField(null=True, blank=True) linked_to_lowres = models.BooleanField(null=True, blank=True) # Fields set by atom responder atom_id = models.UUIDField(null=True) # Lifecycle status = models.IntegerField(null=False, choices=DELIVERABLE_ASSET_STATUSES, default=DELIVERABLE_ASSET_STATUS_NOT_INGESTED) created_from_existing_item = models.BooleanField(default=False) deliverable = models.ForeignKey(Deliverable, related_name='assets', on_delete=models.PROTECT) # Syndication metadata gnm_website_master = models.ForeignKey('GNMWebsite', on_delete=models.SET_NULL, null=True) youtube_master = models.ForeignKey('Youtube', on_delete=models.SET_NULL, null=True) DailyMotion_master = models.ForeignKey('DailyMotion', on_delete=models.SET_NULL, null=True) mainstream_master = models.ForeignKey('Mainstream', on_delete=models.SET_NULL, null=True) oovvuu_master = models.ForeignKey("Oovvuu", on_delete=models.SET_NULL, null=True) reutersconnect_master = models.ForeignKey("ReutersConnect", on_delete=models.SET_NULL, null=True) def __init__(self, *args, **kwargs): super(DeliverableAsset, self).__init__(*args, **kwargs) self.__item = None self.__job = None def update_metadata(self, user): """ updates the metadata on all storage layers :param user: :return: """ raise Exception("update_metadata is not implemented yet") # set_item_metadata(user, self.item_id, metadata_document_from_dict(dict(title=self.get_name()), md_group='Deliverable')) def get_name(self): # naming convention return '{type} for {name}'.format(type=self.type_string, name=self.deliverable.name) def create_placeholder(self, user, commit=True): """ requests Vidispine to create a placeholder item for this deliverable, if it does not have an ID on it. if the deliverable does already have an item id, this is a no-op. this is automatically called from start_file_import, so you don't need to call it directly. :param user: (string) user to run the request as :param commit: if True, save the deliverable to the database immediately (default true) :return: None """ if not isinstance(user, str): raise TypeError("create_placeholder user must be a string, got a {0}".format(user.__class__.__name__)) if self.online_item_id is None: new_item = VSItem(url=settings.VIDISPINE_URL, user=settings.VIDISPINE_USER, passwd=settings.VIDISPINE_PASSWORD, run_as=user) builder = new_item.get_metadata_builder() builder.addMeta({ "title": self.get_name() }) builder.addGroup("Asset",{ "gnm_category": "Deliverable", "original_filename": self.absolute_path, "gnm_owner": user, "gnm_containing_projects": str(self.deliverable.pluto_core_project_id), "gnm_file_created": self.modified_dt }) builder.addGroup("Deliverable", { "gnm_deliverable_bundle": str(self.deliverable.pluto_core_project_id), "gnm_deliverable_id": str(self.id) }) new_item.createPlaceholder(builder.as_xml().decode("utf8")) if commit: self.save() return new_item #we don't store stuff in collections any more #add_to_collection(user, self.deliverable.project_id, self.item_id) def start_file_import(self, user, commit=True): """ tells VS to start importing the file. If no item currently exists, then a placeholder is created and the media imported to that. If an item does currently exist, then the media is added as a new version. This can raise any subclass of gnmvidispine.VSException :param user: username to run the operation as :param commit: whether to save the model before returning :return: """ if self.created_from_existing_item: return if self.online_item_id is None: current_item = self.create_placeholder(user, commit=False) else: current_item = self.item(user=user) should_we_not_attempt_a_transcode = self.type in [DELIVERABLE_ASSET_TYPE_OTHER_MISCELLANEOUS, DELIVERABLE_ASSET_TYPE_OTHER_PAC_FORMS, DELIVERABLE_ASSET_TYPE_OTHER_POST_PRODUCTION_SCRIPT, DELIVERABLE_ASSET_TYPE_OTHER_SUBTITLE] #run_as should be taken care of by the VSItem base class, initated from self.item import_job = current_item.import_to_shape(uri="file://" + self.absolute_path.replace(" ","%20"), priority="MEDIUM", essence=True, thumbnails=False, noTranscode=should_we_not_attempt_a_transcode, jobMetadata={ "import_source": "pluto-deliverables", "project_id": str(self.deliverable.pluto_core_project_id), "asset_id": str(self.id) }) self.job_id = import_job.name if commit: self.save() def create_proxy(self, priority='MEDIUM'): """ tells VS to start proxying the file. This is done seperately to the ingest portion, so that we can use Vidispine's understanding of the file format in order to determine which is the appropriate transcoding to use. Can raise either a VSException or a vidispine_api.HttpError if the operation fails. This is normally called from the callback endpoint, so we have no user present suitable for "run-as" :return: """ item_info = self.item(None) # caller should catch exceptions from this if item_info is None: raise ValueError("Item is not imported") mime_type = item_info.get("mimeType") if mime_type is None: logger.error("{0} for deliverable asset {1} in {2}: Could not determine MIME type for transcode".format(item_info.name, self.id, self.deliverable.id)) return None preset_name = transcode_preset_finder.match(mime_type) if preset_name is None: logger.error("{0} for deliverable asset {1} in {2}: Did not recognise MIME type {3}".format(item_info.name, self.id, self.deliverable.id, mime_type)) raise ValueError("Did not recognise MIME type of item") try: # raises VSNotFound if the shape does not exist, which is what we expect. item_info.get_shape(preset_name) logger.info("Item {0} already has a shape {1}, not transcoding it".format(item_info.name, preset_name)) self.status = DELIVERABLE_ASSET_STATUS_TRANSCODED self.save() return #don't proceed to make a transcode if one already exists! except VSNotFound: pass logger.info("Requesting transcode to {0} of {1}".format(item_info.name, preset_name)) job_id = item_info.transcode(preset_name, priority, wait=False, create_thumbnails=True, job_metadata={ "import_source": "pluto-deliverables", "project_id": str(self.deliverable.pluto_core_project_id), "asset_id": str(self.id) }) self.job_id = job_id self.status = DELIVERABLE_ASSET_STATUS_TRANSCODING self.save() return job_id def has_ongoing_job(self, user): """ returns a boolean indicating whether this deliverable has a job ongoing or not :param user: :return: """ try: current_job = self.job(user) if current_job is None: return None else: return not current_job.finished() except VSNotFound: # no job => no ongoing job! return None @cached_property def type_string(self): return DELIVERABLE_ASSET_TYPES_DICT.get(self.type) @cached_property def size_string(self): return sizeof_fmt(self.size) def type_allows_many(self): return self.type == DELIVERABLE_ASSET_TYPE_OTHER_SUBTITLE \ or self.type == DELIVERABLE_ASSET_TYPE_OTHER_TRAILER \ or self.type == DELIVERABLE_ASSET_TYPE_OTHER_MISCELLANEOUS def _set_cached_item(self, newvalue): """ method for testing, to manually set the cached item. you should not call this directly, but rely on it to be setr by calling item() :param newvalue: :return: """ self.__item = newvalue def item(self, user): """ returns a gnmvidispine VSItem object representing the vidispine item associated with this deliverable. the first time it is called (this happens internally) the data is lifted from the VS server and thereafter it is cached the item_id must be set for this to work. None is returned if the item id is not set; VSNotFound is raised if the item does not exist, or another VSException is raised if server communication fails :param user: username to run the metadata get as :return: the VSItem, possibly from in-memory cache """ if self.__item is not None: return self.__item if self.online_item_id is not None: self.__item = VSItem(url=settings.VIDISPINE_URL, user=settings.VIDISPINE_USER, passwd=settings.VIDISPINE_PASSWORD, run_as=user) self.__item.populate(self.online_item_id) return self.__item return None def job(self, user): """ returns a gnmvidispine VSJob that represents the import job assocaited with this deliverable, or None if there was none set. can raise a VSNotFound if the job does not exist or another VSException if server communication fails :param user: :return: """ if self.__job is not None: return self.__job if self.job_id is not None: self.__job = VSJob(url=settings.VIDISPINE_URL, user=settings.VIDISPINE_USER, passwd=settings.VIDISPINE_PASSWORD, run_as=user) return self.__job.populate(self.job_id) return None def purge(self, user=None): if self.online_item_id is not None: try: exists_in_other_deliverable = DeliverableAsset.objects.filter( Q(online_item_id=self.online_item_id) & ~Q(id=self.id) ).exists() if not self.created_from_existing_item and not exists_in_other_deliverable: self.item(user).delete() except VSException: logger.exception( 'Failed to delete item id "{item_id}" for deliverable asset "{id}"'.format( item_id=self.online_item_id, id=self.id )) super(DeliverableAsset, self).delete() def remove_file(self): """ Try to remove the original source file from disk, once it's been ingested. Sets file_removed_dt and saves the model. :return: True if the file was removed, False if not. """ try: os.remove(str(self.absolute_path)) logger.info('Removed file for asset "{asset}": "{path}"'.format(asset=self, path=self.absolute_path)) self.file_removed_dt = now() self.save() return True except OSError as e: # this _should_ get picked up by the periodic task remove_stale_files if e.errno == errno.ENOENT: # No such file if self.file_removed_dt is None: self.file_removed_dt = now() self.save() return False else: logger.exception('Failed to remove "{path}"'.format(path=self.absolute_path)) return False def __str__(self): return '{name}'.format(name=self.filename) class GNMWebsite(models.Model): media_atom_id = models.UUIDField(null=True, blank=True) upload_status = models.TextField(null=True, blank=True, choices=UPLOAD_STATUS, db_index=True) production_office = models.TextField(null=True, blank=True, choices=PRODUCTION_OFFICE, db_index=True) tags = ArrayField(models.CharField(null=True, max_length=255), null=True, blank=True, db_index=True) publication_date = models.DateTimeField(null=True, blank=True) website_title = models.TextField(null=True, blank=True, db_index=True) website_description = models.TextField(null=True, blank=True) primary_tone = models.TextField(null=True, blank=True, choices=PRIMARY_TONE, db_index=True) publication_status = models.TextField(null=True, blank=True, choices=PUBLICATION_STATUS) etag = models.DateTimeField(null=False, blank=False, auto_now_add=True) source = models.TextField(null=True, blank=True) class Mainstream(models.Model): mainstream_title = models.TextField(null=False, blank=False) mainstream_description = models.TextField(null=True, blank=True) mainstream_tags = ArrayField(models.CharField(null=False, max_length=255), null=True, blank=True) mainstream_rules_contains_adult_content = models.BooleanField() upload_status = models.TextField(null=True, blank=True, choices=UPLOAD_STATUS, db_index=True) publication_date = models.DateTimeField(null=True, blank=True) etag = models.DateTimeField(null=False, blank=False, auto_now_add=True) routename = models.TextField(null=True, blank=True) job_id = models.TextField(null=True, blank=True) class Youtube(models.Model): youtube_id = models.TextField(null=False, blank=False, db_index=True) youtube_title = models.TextField(null=False, blank=False) youtube_description = models.TextField(null=True, blank=True) youtube_tags = ArrayField(models.CharField(null=True, max_length=255), null=True, blank=True) youtube_category = models.CharField(null=False, max_length=255) youtube_channel = models.CharField(null=False, max_length=255) publication_date = models.DateTimeField(null=True, blank=True) etag = models.DateTimeField(null=False, blank=False, auto_now_add=True) def __str__(self): return '{title}'.format(title=self.youtube_title) class DailyMotion(models.Model): daily_motion_url = models.TextField(null=True, blank=True) daily_motion_title = models.TextField(null=False, blank=False) daily_motion_description = models.TextField(null=True, blank=True) daily_motion_tags = ArrayField(models.CharField(null=True, max_length=255), null=True, blank=True) daily_motion_category = models.TextField(default="news") publication_date = models.DateTimeField(null=True, blank=True) upload_status = models.TextField(null=True, blank=True, choices=UPLOAD_STATUS, db_index=True) daily_motion_no_mobile_access = models.BooleanField() daily_motion_contains_adult_content = models.BooleanField() etag = models.DateTimeField(null=False, blank=False, auto_now_add=True) routename = models.TextField(null=True, blank=True) job_id = models.TextField(null=True, blank=True) class Oovvuu(models.Model): seen_on_channel = models.BooleanField(default=False) etag = models.DateTimeField(null=False, blank=False, auto_now_add=True) class ReutersConnect(models.Model): seen_on_channel = models.BooleanField(default=False) etag = models.DateTimeField(null=False, blank=False, auto_now_add=True) class LogEntry(models.Model): timestamp = models.DateTimeField(null=False, blank=False) related_gnm_website = models.ForeignKey(GNMWebsite, on_delete=models.CASCADE, null=True, db_index=True) related_youtube = models.ForeignKey(Youtube, on_delete=models.CASCADE, null=True, db_index=True) related_daily_motion = models.ForeignKey(DailyMotion, on_delete=models.CASCADE, null=True, db_index=True) related_mainstream = models.ForeignKey(Mainstream, on_delete=models.CASCADE, null=True, db_index=True) sender = models.TextField(null=False, blank=False, db_index=True) log_line = models.TextField(null=False, blank=False) class SyndicationNotes(models.Model): timestamp = models.DateTimeField(default=datetime.datetime.now) username = models.TextField(max_length=255) content = models.TextField(max_length=32768) deliverable_asset = models.ForeignKey(DeliverableAsset, on_delete=models.CASCADE, db_index=True) class YouTubeCategories(models.Model): title = models.CharField(max_length=1024) identity = models.CharField(max_length=64, blank=True, null=True) class YouTubeChannels(models.Model): title = models.CharField(max_length=1024) identity = models.CharField(max_length=128, blank=True, null=True)