in k-fold-cross-validation/fold.py [0:0]
def create_datasources(self):
"""
Creates datasource for model training and evaluation on Amazon ML.
"""
# create training datasource for this fold
self.train_ds_id = "ds-" + base64.b32encode(os.urandom(10)).decode(
"ascii")
self.train_ds_rearrange = self.build_rearrangement_str(
is_complement=True)
self.train_ds_name = self.build_datasource_name(
self.data_spec.name, self.train_ds_rearrange)
self._ml.create_data_source_from_s3(
data_source_id=self.train_ds_id,
data_source_name=self.train_ds_name,
data_spec={
"DataLocationS3": self.data_spec.data_s3_url,
"DataSchema": self.data_spec.schema,
"DataRearrangement": self.train_ds_rearrange
},
compute_statistics=True
)
logger.info("Created Training Datasource " + self.train_ds_id)
# create evaluation datasource for this fold
self.eval_ds_id = "ds-" + base64.b32encode(os.urandom(10)).decode(
"ascii")
self.eval_ds_rearrange = self.build_rearrangement_str(
is_complement=False)
self.eval_ds_name = self.build_datasource_name(
self.data_spec.name, self.eval_ds_rearrange)
self._ml.create_data_source_from_s3(
data_source_id=self.eval_ds_id,
data_source_name=self.eval_ds_name,
data_spec={
"DataLocationS3": self.data_spec.data_s3_url,
"DataSchema": self.data_spec.schema,
"DataRearrangement": self.eval_ds_rearrange
},
compute_statistics=True
)
logger.info("Created Evaluation Datasource " + self.eval_ds_id)