in scripts/editorial_backfills.py [0:0]
def run():
'''
Finds editorialfronts which are either fully automated or do not contain any automated collections.
Writes the names of these fronts and the time they were last edited into a json file.
Fronts which have a mixuture of fully automated and curated collections will have the names of collections
in these groups recorded in a separate file.
Takes the stage (CODE/PROD) we are querying as the first argument
'''
print 'Starting script'
if len(sys.argv) > 1:
stage = sys.argv[1]
configPath = stage + '/frontsapi/config/config.json'
collectionPath = stage + '/frontsapi/collection/'
no_backfill_fronts = list()
automated_fronts = list()
manual_backfill_fronts = list()
s3.Object(bucket, configPath).download_file('./frontsconfig.json')
with open('frontsconfig.json') as data_file:
config = json.load(data_file)
fronts = (config['fronts'])
collections = (config['collections'])
for front_name in fronts:
front_config = fronts.get(front_name)
front_collections = front_config.get('collections')
if not 'priority' in front_config:
#Keep track of what kind of front we are looking at
automated = True
fully_manual = True
manual_backfill = False
#Keep track of which collections are either curated or fully automated
curated_collections = list()
automated_collections = list()
last_updates = list()
for collection_id in front_collections:
collection = collections.get(collection_id)
edited_collection = get_collection_json(collection_id, collectionPath)
curated_collection = False
name = collection.get('displayName')
if edited_collection:
update = edited_collection.get('lastUpdated')
if isinstance(update, int):
last_updates.append(update)
else:
#Some old collections have last update time saved as string
dt_obj = datetime.datetime.strptime(update, '%Y-%m-%dT%H:%M:%S.%fZ')
last_updates.append(time.mktime(dt_obj.timetuple())*1000)
if 'backfill' in collection:
fully_manual = False
#For any collection with a backfill we also need to check if it contains
#manually curated articles. If it does, it is not a fully automated collecion.
if edited_collection:
live = edited_collection.get('live')
draft = edited_collection.get('draft')
if live and len(live) > 0:
if any("snap" not in item.get('id') for item in live):
automated = False
curated_collection = True
if draft and len(draft) > 0:
if any("snap" not in item.get('id') for item in draft):
automated = False
curated_collection = True
if curated_collection:
manual_backfill = True
curated_collections.append(name)
else:
automated_collections.append(name)
else:
automated = False
curated_collections.append(name)
if len(last_updates) > 0:
last_front_update = max(last_updates)
date = datetime.datetime.fromtimestamp(last_front_update/1000.0).strftime('%Y-%m-%d %H:%M:%S')
else:
date = ''
if automated:
automated_fronts.append(front_name + ', ' + date)
if fully_manual:
no_backfill_fronts.append(front_name + ', ' + date)
if manual_backfill:
front_details = { front_name: { 'curated': curated_collections, 'automated': automated_collections} }
manual_backfill_fronts.append(front_details)
automated_json = json.dumps(automated_fronts, indent=4)
fautomated = open('automated_fronts.json', 'w')
fautomated.write(automated_json)
no_backfilled_json = json.dumps(no_backfill_fronts, indent=4)
fnobackfilled = open('not_backfilled_fronts.json', 'w')
fnobackfilled.write(no_backfilled_json)
manual_backfill_fronts_json = json.dumps(manual_backfill_fronts, indent=4)
fmanualbackfilled = open('manual_backfilled_fronts.json', 'w')
fmanualbackfilled.write(manual_backfill_fronts_json)
else:
print >> sys.stderr, "Usage: pass in the stage"