gnm_deliverables/launch_detector.py (332 lines of code) (raw):
import logging
import jsonschema
import datetime
import dateutil.parser
from typing import List
from .models import *
import re
import pytz
logger = logging.getLogger(__name__)
class InlineChangeRecord(object):
"""
Represents an individual change record
"""
def __init__(self, initial_data):
self._content = initial_data
timestamp_splitter = re.compile(r'\[.*]$')
@property
def user(self):
return self._content.get("user", None)
@property
def at(self) -> datetime.datetime:
# "at" parameter is mandatory
string_to_parse = self.timestamp_splitter.sub("",self._content["at"])
return dateutil.parser.isoparse(string_to_parse)
class YTMeta(object):
"""
Represents the youtube-specific metadata within an atom
"""
def __init__(self, initial_data):
self._content = initial_data
@property
def is_valid(self):
return "categoryId" in self._content and \
"channelId" in self._content and \
"privacyStatus" in self._content
@property
def category_id(self):
return self._content.get("categoryId", None)
@property
def channel_id(self):
return self._content.get("channelId", None)
@property
def expiry_date(self):
if "expiryDate" in self._content:
return dateutil.parser.isoparse(self._content["expiryDate"])
else:
return None
@property
def keywords(self):
return self._content.get("keywords", None)
@property
def privacy_status(self):
return self._content.get("privacyStatus", None)
@property
def license(self):
return self._content.get("license", None)
@property
def title(self):
return self._content.get("title","")
@property
def description(self):
return self._content.get("description", "")
class MediaAsset(object):
"""
Represents the individual media assets present within an atom/
Note that the accessors apart from mime_type are unsafe, since this assumes
that the object is constructed from pre-validated data.
"""
def __init__(self, initial_data):
self._content = initial_data
@property
def mime_type(self):
return self._content.get("maybeMimeType", None)
@property
def asset_type(self):
return self._content["assetType"]
@property
def platform(self):
return self._content["platform"]
@property
def asset_id(self):
return self._content["platformId"]
@property
def version(self):
return self._content["version"]
class LaunchDetectorUpdate(object):
schema = {
"type": "object",
"$schema": "http://json-schema.org/draft-07/schema#",
"properties": {
"title": {"type": "string"},
"category":{"type": "string"},
"atomId":{"type":"string"},
"duration":{"type": ["number", "null"]},
"source":{"type": ["string", "null"]},
"description":{"type": ["string", "null"]},
"posterImage": {
"type": ["object", "null"],
"properties": {
"mimeType": {"type": ["string", "null"]},
"file": {"type": "string"},
"credit": {"type": ["string", "null"]},
"copyright": {"type": ["string", "null"]},
"source": {"type": ["string", "null"]},
"mediaId": {"type": ["string", "null"]},
}
},
"trailtext":{"type": ["string", "null"]},
"byline": {
"type": ["array", "null"],
"items": {
"type": "string"
}
},
"keywords": {
"type": ["array", "null"],
"items": {
"type": "string"
}
},
"trailImage": {
"type": ["object", "null"],
"properties": {
"mimeType": {"type": ["string", "null"]},
"file": {"type": "string"},
"credit": {"type": ["string", "null"]},
"copyright": {"type": ["string", "null"]},
"source": {"type": ["string", "null"]},
"mediaId": {"type": ["string", "null"]},
}
},
"commissionId":{"type": ["string", "null"]},
"projectId":{"type": ["string", "null"]},
"masterId":{"type": ["string", "null"]},
"published":{
"type": ["object", "null"],
"properties": {
"user": {"type": ["string", "null"]},
"at": {"type": "string"} ##ISO date string
}
},
"lastModified":{
"type": ["object", "null"],
"properties": {
"user": {"type": ["string", "null"]},
"at": {"type": "string"} ##ISO date string
}
},
"assets": {
"type": ["array"],
"items": {
"type": "object",
"properties": {
"maybeMimeType": {"type":["string","null"]},
"assetType": {"type":"string"},
"platform":{"type":"string"},
"platformId":{"type":"string"},
"version":{"type":"number"}
}
}
},
"ytMeta": {
"type": ["object","null"],
"properties": {
"title": {"type": ["string","null"]},
"description": {"type": ["string", "null"]},
"categoryId": {"type":["string","null"]},
"channelId": {"type":["string","null"]},
"expiryDate": {"type":["string","null"]}, #ISO date string
"keywords": {
"type": ["array","null"],
"items": {
"type": "string"
}
},
"privacyStatus": {"type":["string","null"]},
"license": {"type":["string","null"]},
}
}
},
}
def __init__(self, raw_content: dict):
"""
initialise from predetermined data. Raises a ValidationError if the data is not right.
:param raw_content:
"""
jsonschema.validate(raw_content, schema=self.schema)
self._content = raw_content
@property
def title(self):
return self._content.get("title", None)
@property
def category(self):
return self._content.get("category", None)
@property
def atom_id(self):
return self._content.get("atomId", None)
@property
def duration(self):
longval: int = self._content.get("duration", None)
if longval is None:
return None
else:
return datetime.timedelta(longval)
@property
def source(self):
return self._content.get("source", None)
@property
def description(self):
return self._content.get("description", None)
@property
def poster_image(self):
return self._content.get("posterImage", None)
@property
def trail_text(self):
return self._content.get("trailText", None)
@property
def byline(self):
return self._content.get("byline", None)
@property
def keywords(self):
return self._content.get("keywords", None)
@property
def trail_image(self):
return self._content.get("trailImage", None)
@property
def commission_id(self):
return self._content.get("commissionId", None)
@property
def project_id(self):
return self._content.get("projectId", None)
@property
def master_id(self):
return self._content.get("masterId", None)
@property
def published(self):
raw_data = self._content.get("published", None)
if raw_data is None:
return None
else:
return InlineChangeRecord(raw_data)
@property
def last_modified(self):
raw_data = self._content.get("lastModified", None)
if raw_data is None:
return None
else:
return InlineChangeRecord(raw_data)
@property
def assets(self) -> List[MediaAsset]:
return [MediaAsset(entry) for entry in self._content.get("assets")]
@property
def yt_meta(self) -> YTMeta:
return YTMeta(self._content.get("ytMeta"))
def find_asset_for(msg: LaunchDetectorUpdate) -> DeliverableAsset:
"""
look up the DeliverableAsset for the given item. If none is found, then DeliverableAsset.DoesNotExist is raised
:param msg: LaunchDetectorMessage containing the incoming update
:return: DeliverableAsset instance
"""
matches = DeliverableAsset.objects.filter(atom_id=msg.atom_id)
if len(matches) == 0:
raise DeliverableAsset.DoesNotExist
elif len(matches) > 1:
logger.warning("Found {0} potential matches for atom id {1}, using the first".format(matches, msg.atom_id))
return matches[0]
def zoned_datetime() -> datetime:
"""
Outputs a datetime value with the correct time zone.
The configured timezone from the settings is applied to the resulting DateTime. If no timezone is configured,
then we default to UTC and emit a warning
:return: the timezone-aware datetime
"""
from django.conf import settings
tz = pytz.timezone("UTC")
aware_utc_dt = tz.localize(datetime.datetime.now())
if hasattr(settings, "TIME_ZONE"):
server_tz = pytz.timezone(settings.TIME_ZONE)
return aware_utc_dt.astimezone(server_tz)
else:
logger.warning("TIME_ZONE is not configured in the settings, defaulting to UTC")
return aware_utc_dt
def update_gnmwebsite(msg: LaunchDetectorUpdate, asset: DeliverableAsset):
"""
either create or update a gnmwebsite information object from the given message.
saves the gnmwebsite record to the database but NOT the DeliverableAsset.
:param msg: LaunchDetectorUpdate instance giving the content to update
:param asset: DeliverableAsset that needs to be updated.
:return:
"""
if asset.gnm_website_master is None:
rec: GNMWebsite = GNMWebsite(publication_status='Unpublished')
else:
rec: GNMWebsite = asset.gnm_website_master
#commented out fields are not currently provided by LD
rec.media_atom_id = msg.atom_id
#rec.production_office = msg.
rec.website_description = msg.description
rec.website_title = msg.title
rec.tags = msg.keywords
rec.primary_tone = msg.category
if msg.published is not None:
rec.publication_date = msg.published.at
rec.publication_status = 'Published'
# set the etag in case something else is editing it at the moment
rec.etag = zoned_datetime().isoformat('T')
rec.source = msg.source
asset.gnm_website_master = rec #no-op if it was already set like this
rec.save()
asset.save()
def update_dailymotion(msg: LaunchDetectorUpdate, asset: DeliverableAsset):
"""
either create or update a dailymotion information object from the given message
:param msg: LaunchDetectorUpdate instance giving the content to update
:param asset: DeliverableAsset that needs to be updated.
:return:
"""
if asset.DailyMotion_master is None:
rec: DailyMotion = DailyMotion()
else:
rec: DailyMotion = asset.DailyMotion_master
#would be good to build a mapping table for this
#rec.daily_motion_category
if rec.daily_motion_description is None or rec.daily_motion_description == "":
rec.daily_motion_description = msg.description
if rec.daily_motion_tags is None or len(rec.daily_motion_tags) == 0:
#would be good to map these to names
rec.daily_motion_tags = msg.keywords
if rec.daily_motion_title is None or rec.daily_motion_title=="":
rec.daily_motion_title = msg.title
if rec.daily_motion_no_mobile_access is None:
rec.daily_motion_no_mobile_access = False
if rec.daily_motion_contains_adult_content is None:
rec.daily_motion_contains_adult_content = False
# set the etag in case something else is editing it at the moment
rec.etag = zoned_datetime().isoformat('T')
asset.DailyMotion_master = rec #no-op if it was already set like this
rec.save()
asset.save()
def update_mainstream(msg: LaunchDetectorUpdate, asset: DeliverableAsset):
"""
either create or update a Mainstram information object from the given message
:param msg: LaunchDetectorUpdate instance giving the content to update
:param asset: DeliverableAsset that needs to be updated.
:return:
"""
if asset.mainstream_master is None:
rec: Mainstream = Mainstream()
else:
rec: Mainstream = asset.mainstream_master
if rec.mainstream_description is None or rec.mainstream_description=="":
rec.mainstream_description = msg.description
if rec.mainstream_title is None or rec.mainstream_title=="":
rec.mainstream_title = msg.title
if rec.mainstream_tags is None or len(rec.mainstream_tags)==0:
rec.mainstream_tags = msg.keywords
if rec.mainstream_rules_contains_adult_content is None:
rec.mainstream_rules_contains_adult_content = False
# set the etag in case something else is editing it at the moment
rec.etag = zoned_datetime().isoformat('T')
asset.mainstream_master = rec #no-op if it was already set like this
rec.save()
asset.save()
def update_youtube(msg: LaunchDetectorUpdate, asset: DeliverableAsset):
"""
either create or update a Youtube information object from the given message
:param msg: LaunchDetectorUpdate instance giving the content to update
:param asset: DeliverableAsset that needs to be updated
:return:
"""
if asset.youtube_master is None:
rec: Youtube = Youtube()
else:
rec: Youtube = asset.youtube_master
# FIXME: should update our data model to handle multiple assets on a given item, then we can remove this.
youtube_assets = [a for a in msg.assets if a.platform.lower() == "youtube"]
# sort is lowest-number first, we want the highest so take last of the list
youtube_assets_sorted: List[MediaAsset] = sorted(youtube_assets, key=lambda a: a.version)
if len(youtube_assets_sorted) > 0:
rec.youtube_id = youtube_assets_sorted[-1].asset_id
if msg.yt_meta:
rec.youtube_category = msg.yt_meta.category_id
rec.youtube_channel = msg.yt_meta.channel_id
rec.youtube_tags = msg.yt_meta.keywords
if msg.yt_meta.title != "":
rec.youtube_title = msg.yt_meta.title
if msg.yt_meta.description != "":
rec.youtube_description = msg.yt_meta.description
if msg.published is not None:
rec.publication_date = msg.published.at
# set the etag in case something else is editing it at the moment
rec.etag = zoned_datetime().isoformat('T')
asset.youtube_master = rec
rec.save()
asset.save()