in source/lambda/graph-modelling/index.py [0:0]
def verify_modelling_inputs(event_source_s3):
training_kickoff_signal = "tags.csv"
if training_kickoff_signal not in event_source_s3['object']['key']:
msg = "Event source was not the training signal. Triggered by {} but expected folder to contain {}"
return False, msg.format(get_full_s3_path(event_source_s3['bucket']['name'], event_source_s3['object']['key']),
training_kickoff_signal)
training_folder = os.path.dirname(event_source_s3['object']['key'])
full_s3_training_folder = get_full_s3_path(event_source_s3['bucket']['name'], training_folder)
objects = s3_client.list_objects_v2(Bucket=event_source_s3['bucket']['name'], Prefix=training_folder)
files = [content['Key'] for content in objects['Contents']]
print("Contents of training data folder :")
print("\n".join(files))
minimum_expected_files = ['features.csv', 'tags.csv']
if not all([file in [os.path.basename(s3_file) for s3_file in files] for file in minimum_expected_files]):
return False, "Training data absent or incomplete in {}".format(full_s3_training_folder)
return full_s3_training_folder, "Minimum files needed for training present in {}".format(full_s3_training_folder)