def create_training_dataset()

in workflow3_local/local_endpointbuilder.py [0:0]


def create_training_dataset(dataset_path, csv_name, bucket_name, _id):
    root_path = os.path.dirname(os.path.abspath(__file__))
    temp_dir_path = root_path + "/workflow1.5_local_temp"
    csv_file_path = temp_dir_path + "/" + csv_name

    # create local directories that store the files generated by this script
    create_temp_directories(temp_dir_path, dataset_path)

    # create training dataset CSV
    with open(csv_file_path, "w", newline="") as file:
        new_image_info = get_processed_images(temp_dir_path)
        items = enumerate(new_image_info)
        with ThreadPoolExecutor() as executor:
            rows = executor.map(add_image_to_csv, items, repeat(temp_dir_path))

        # write rows to CSV
        writer = csv.writer(file)
        writer.writerows(rows)

    # upload CSV to S3 bucket
    s3 = boto3.client("s3")
    s3.upload_file(csv_file_path, bucket_name, csv_name)
    print("Training dataset CSV file has been uploaded to S3")

    # delete the local directories containing the CSV, images, and PDFs
    rmtree(temp_dir_path)
    print("Deleted local CSV, PDFs, and images")