in ml_service/pipelines/diabetes_regression_build_parallel_batchscore_pipeline.py [0:0]
def get_fallback_input_dataset(ws: Workspace, env: Env) -> Dataset:
"""
Called when an input datastore does not exist or no input data file exists
at that location. Create a sample dataset using the diabetes dataset from
scikit-learn. Useful when debugging this code in the absence of the input
data location Azure blob.
:param ws: AML Workspace
:param env: Environment Variables
:returns: Fallback input dataset
:raises: FileNotFoundError
"""
# This call creates an example CSV from sklearn sample data. If you
# have already bootstrapped your project, you can comment this line
# out and use your own CSV.
create_sample_data_csv(
file_name=env.scoring_datastore_input_filename, for_scoring=True
)
if not os.path.exists(env.scoring_datastore_input_filename):
error_message = (
"Could not find CSV dataset for scoring at {}. "
+ "No alternate data store location was provided either.".format(
env.scoring_datastore_input_filename
) # NOQA: E501
)
raise FileNotFoundError(error_message)
# upload the input data to the workspace default datastore
default_datastore = ws.get_default_datastore()
scoreinputdataref = default_datastore.upload_files(
[env.scoring_datastore_input_filename],
target_path="scoringinput",
overwrite=False,
)
scoringinputds = (
Dataset.Tabular.from_delimited_files(scoreinputdataref)
.register(ws, env.scoring_dataset_name, create_new_version=True)
.as_named_input(env.scoring_dataset_name)
)
return scoringinputds