def create_data_source()

in social-media/create-aml-model.py [0:0]


def create_data_source(s3_uri, dataset_schema, ds_type, percent_begin, percent_end, compute_statistics):
    global time_stamp
    ds_id = "ds-tweets-{0}-{1}".format(ds_type, time_stamp)
    data_spec = {}
    data_spec['DataLocationS3'] = s3_uri
    data_spec['DataSchema'] = dataset_schema
    data_spec['DataRearrangement'] = '{{"randomSeed":"0","splitting":{{"percentBegin":{0},"percentEnd":{1}}}}}'.format(
        percent_begin, percent_end)
    ml.create_data_source_from_s3(
        ds_id,
        data_spec,
        data_source_name="{0}_DataSplitting [percentBegin={1}, percentEnd={2}]".format(aml_training_dataset,
                                                                                       percent_begin, percent_end),
        compute_statistics=compute_statistics)
    print("Creating {0} datasource. See:".format(ds_type))
    print(CONSOLE_URL_DATA_SOURCE.format(CONSOLE_URL_BASE, ds_id))
    return ds_id