in workflow3_local/local_endpointbuilder.py [0:0]
def create_training_dataset(dataset_path, csv_name, bucket_name, _id):
root_path = os.path.dirname(os.path.abspath(__file__))
temp_dir_path = root_path + "/workflow1.5_local_temp"
csv_file_path = temp_dir_path + "/" + csv_name
# create local directories that store the files generated by this script
create_temp_directories(temp_dir_path, dataset_path)
# create training dataset CSV
with open(csv_file_path, "w", newline="") as file:
new_image_info = get_processed_images(temp_dir_path)
items = enumerate(new_image_info)
with ThreadPoolExecutor() as executor:
rows = executor.map(add_image_to_csv, items, repeat(temp_dir_path))
# write rows to CSV
writer = csv.writer(file)
writer.writerows(rows)
# upload CSV to S3 bucket
s3 = boto3.client("s3")
s3.upload_file(csv_file_path, bucket_name, csv_name)
print("Training dataset CSV file has been uploaded to S3")
# delete the local directories containing the CSV, images, and PDFs
rmtree(temp_dir_path)
print("Deleted local CSV, PDFs, and images")