def verify_modelling_inputs()

in source/lambda/graph-modelling/index.py [0:0]


def verify_modelling_inputs(event_source_s3):
    training_kickoff_signal = "tags.csv"
    if training_kickoff_signal not in event_source_s3['object']['key']:
        msg = "Event source was not the training signal. Triggered by {} but expected folder to contain {}"
        return False, msg.format(get_full_s3_path(event_source_s3['bucket']['name'], event_source_s3['object']['key']),
                                 training_kickoff_signal)

    training_folder = os.path.dirname(event_source_s3['object']['key'])
    full_s3_training_folder = get_full_s3_path(event_source_s3['bucket']['name'], training_folder)

    objects = s3_client.list_objects_v2(Bucket=event_source_s3['bucket']['name'],  Prefix=training_folder)
    files = [content['Key'] for content in objects['Contents']]
    print("Contents of training data folder :")
    print("\n".join(files))
    minimum_expected_files = ['features.csv', 'tags.csv']

    if not all([file in [os.path.basename(s3_file) for s3_file in files] for file in minimum_expected_files]):
        return False, "Training data absent or incomplete in {}".format(full_s3_training_folder)

    return full_s3_training_folder, "Minimum files needed for training present in {}".format(full_s3_training_folder)