def get_fallback_input_dataset()

in ml_service/pipelines/diabetes_regression_build_parallel_batchscore_pipeline.py [0:0]


def get_fallback_input_dataset(ws: Workspace, env: Env) -> Dataset:
    """
    Called when an input datastore does not exist or no input data file exists
    at that location. Create a sample dataset using the diabetes dataset from
    scikit-learn. Useful when debugging this code in the absence of the input
    data location Azure blob.


    :param ws: AML Workspace
    :param env: Environment Variables

    :returns: Fallback input dataset

    :raises: FileNotFoundError
    """
    # This call creates an example CSV from sklearn sample data. If you
    # have already bootstrapped your project, you can comment this line
    # out and use your own CSV.
    create_sample_data_csv(
        file_name=env.scoring_datastore_input_filename, for_scoring=True
    )

    if not os.path.exists(env.scoring_datastore_input_filename):
        error_message = (
            "Could not find CSV dataset for scoring at {}. "
            + "No alternate data store location was provided either.".format(
                env.scoring_datastore_input_filename
            )  # NOQA: E501
        )

        raise FileNotFoundError(error_message)

    # upload the input data to the workspace default datastore
    default_datastore = ws.get_default_datastore()
    scoreinputdataref = default_datastore.upload_files(
        [env.scoring_datastore_input_filename],
        target_path="scoringinput",
        overwrite=False,
    )

    scoringinputds = (
        Dataset.Tabular.from_delimited_files(scoreinputdataref)
        .register(ws, env.scoring_dataset_name, create_new_version=True)
        .as_named_input(env.scoring_dataset_name)
    )

    return scoringinputds