in sagemaker/source/dataset/dataset_generator.py [0:0]
def _generate_fleet_info(self):
data = []
df = self.statistics_df.drop(columns=['voltage_mean',
'current_mean',
'voltage_std',
'current_std',
'resistance_mean',
'resistance_std'])
for idx, row in df.iterrows():
for c in range(self.size_per_type):
vehicle_id = self.size_per_type * idx + c
data.append([vehicle_id, row.make, row.model, row.year, row.vehicle_class, row.engine_type])
cols = list(df.columns)
cols.insert(0, 'vehicle_id')
fleet_info_df = pd.DataFrame(data=data, columns=cols)
# No need to shuffle the dataset at this point.
#fleet_info_df.vehicle_id = fleet_info_df.vehicle_id.sample(frac=1) # Shuffle rows
return fleet_info_df