def _generate_fleet_info()

in sagemaker/source/dataset/dataset_generator.py [0:0]


    def _generate_fleet_info(self):
        data = []
        df = self.statistics_df.drop(columns=['voltage_mean',
                                              'current_mean',
                                              'voltage_std',
                                              'current_std',
                                              'resistance_mean',
                                              'resistance_std'])
        for idx, row in df.iterrows():
            for c in range(self.size_per_type):
                vehicle_id = self.size_per_type * idx + c
                data.append([vehicle_id, row.make, row.model, row.year, row.vehicle_class, row.engine_type])
        cols = list(df.columns)
        cols.insert(0, 'vehicle_id')
        fleet_info_df = pd.DataFrame(data=data, columns=cols)

        # No need to shuffle the dataset at this point.
        #fleet_info_df.vehicle_id = fleet_info_df.vehicle_id.sample(frac=1)  #  Shuffle rows

        return fleet_info_df