ASLRecognition/scripts/create_csv.py (32 lines of code) (raw):

''' USAGE: python create_csv.py ''' import pandas as pd import numpy as np import os import joblib from sklearn.preprocessing import LabelBinarizer from tqdm import tqdm from imutils import paths # get all the image paths image_paths = list(paths.list_images('preprocessed_image')) # create a DataFrame data = pd.DataFrame() labels = [] for i, image_path in tqdm(enumerate(image_paths), total=len(image_paths)): label = image_path.split(os.path.sep)[-2] # save the relative path for mapping image to target data.loc[i, 'image_path'] = image_path labels.append(label) labels = np.array(labels) # one hot encode the labels lb = LabelBinarizer() labels = lb.fit_transform(labels) print(f"The first one hot encoded labels: {labels[0]}") print(f"Mapping the first one hot encoded label to its category: {lb.classes_[0]}") print(f"Total instances: {len(labels)}") for i in range(len(labels)): index = np.argmax(labels[i]) data.loc[i, 'target'] = int(index) # shuffle the dataset data = data.sample(frac=1).reset_index(drop=True) # save as CSV file data.to_csv('data.csv', index=False) # pickle the binarized labels print('Saving the binarized labels as pickled file') joblib.dump(lb, 'lb.pkl') print(data.head(5))