def run()

in scripts/editorial_backfills.py [0:0]


def run():
    '''
    Finds editorialfronts which are either fully automated or do not contain any automated collections.
    Writes the names of these fronts and the time they were last edited into a json file.
    Fronts which have a mixuture of fully automated and curated collections will have the names of collections
    in these groups recorded in a separate file.
    Takes the stage (CODE/PROD) we are querying as the first argument
    '''

    print 'Starting script'
    if len(sys.argv) > 1:
        stage = sys.argv[1]
        configPath = stage + '/frontsapi/config/config.json'
        collectionPath = stage + '/frontsapi/collection/'
        no_backfill_fronts = list()
        automated_fronts = list()
        manual_backfill_fronts = list()

        s3.Object(bucket, configPath).download_file('./frontsconfig.json')
        with open('frontsconfig.json') as data_file:
            config = json.load(data_file)

        fronts = (config['fronts'])
        collections = (config['collections'])

        for front_name in fronts:

            front_config = fronts.get(front_name)
            front_collections = front_config.get('collections')

            if not 'priority' in front_config:

                #Keep track of what kind of front we are looking at
                automated = True
                fully_manual = True
                manual_backfill = False

                #Keep track of which collections are either curated or fully automated
                curated_collections = list()
                automated_collections = list()

                last_updates = list()

                for collection_id in front_collections:
                    collection = collections.get(collection_id)
                    edited_collection = get_collection_json(collection_id, collectionPath)
                    curated_collection = False

                    name = collection.get('displayName')

                    if edited_collection:
                        update = edited_collection.get('lastUpdated')
                        if isinstance(update, int):
                            last_updates.append(update)
                        else:
                            #Some old collections have last update time saved as string
                            dt_obj = datetime.datetime.strptime(update, '%Y-%m-%dT%H:%M:%S.%fZ')
                            last_updates.append(time.mktime(dt_obj.timetuple())*1000)

                    if 'backfill' in collection:
                        fully_manual = False
                        #For any collection with a backfill we also need to check if it contains
                        #manually curated articles. If it does, it is not a fully automated collecion.
                        if edited_collection:
                            live = edited_collection.get('live')
                            draft = edited_collection.get('draft')
                            if live and len(live) > 0:
                                if any("snap" not in item.get('id') for item in live):
                                    automated = False
                                    curated_collection = True

                            if draft and len(draft) > 0:
                                if any("snap" not in item.get('id') for item in draft):
                                    automated = False
                                    curated_collection = True
                        if curated_collection:
                            manual_backfill = True
                            curated_collections.append(name)
                        else:
                            automated_collections.append(name)
                    else:
                        automated = False
                        curated_collections.append(name)

                if len(last_updates) > 0:
                    last_front_update = max(last_updates)
                    date = datetime.datetime.fromtimestamp(last_front_update/1000.0).strftime('%Y-%m-%d %H:%M:%S')
                else:
                    date = ''

                if automated:
                    automated_fronts.append(front_name + ', ' + date)
                if fully_manual:
                    no_backfill_fronts.append(front_name + ', ' + date)
                if manual_backfill:
                    front_details = { front_name: { 'curated': curated_collections, 'automated': automated_collections} }
                    manual_backfill_fronts.append(front_details)

        automated_json = json.dumps(automated_fronts, indent=4)
        fautomated = open('automated_fronts.json', 'w')
        fautomated.write(automated_json)

        no_backfilled_json = json.dumps(no_backfill_fronts, indent=4)
        fnobackfilled = open('not_backfilled_fronts.json', 'w')
        fnobackfilled.write(no_backfilled_json)

        manual_backfill_fronts_json = json.dumps(manual_backfill_fronts, indent=4)
        fmanualbackfilled = open('manual_backfilled_fronts.json', 'w')
        fmanualbackfilled.write(manual_backfill_fronts_json)
    else:
        print >> sys.stderr, "Usage: pass in the stage"