in datasets/toy_binary.py [0:0]
def get_dataset(self):
data_location = "data/"
# parse dataframe and feature domains
self.df = pd.read_csv(os.path.join(data_location, "toy_binary.csv"))
self.domain = json.load(open(os.path.join(data_location, "toy_binary.json")))
# check domain and csv header are consistent
assert set(self.df.columns) == set(self.domain.keys())
# return one-hot encoding of entrire dataset
dataset = self.project_feats()
if not self.use_subset:
self.n, self.d = dataset.shape
return dataset
else:
return dataset[: self.n, : self.d]