in src/python/tensorflow_cloud/core/run.py [0:0]
def run_cloudtuner(num_jobs=1, **kwargs):
"""A wrapper for tfc.run that allows for running concurrent CloudTuner jobs.
This method takes the same parameters as tfc.run() and it allows duplicating
a job multiple times to enable running parallel tuning jobs using
CloudTuner. All jobs are identical except they will have a unique
KERASTUNER_TUNER_ID environment variable set in the cluster to enable tuning
job concurrency. This feature is only supported in Notebooks and Colab.
Args:
num_jobs: Number of concurrent jobs to be submitted to AI Platform
training. Note that these are clones of the same job that are executed
independently. Setting this value to 1 is identical to just calling
`tfc.run()`.
**kwargs: keyword arguments for `tfc.run()`.
Returns:
A dictionary with two keys.'job_ids' - a list of training job ids
and 'docker_image'- Docker image generated for the training job.
"""
# If code is triggered in a cloud environment, do nothing.
if remote():
return
if num_jobs < 1:
raise ValueError("num_jobs must be greater than 0.")
run_results = run(**kwargs)
# Setting prameters for rerun, notes paramters have already been validated
# in tfc.run()
docker_img_uri = run_results["docker_image"]
chief_config = kwargs.pop("chief_config", "auto")
worker_count = kwargs.pop("worker_count", 0)
worker_config = kwargs.pop("worker_config", "auto")
entry_point_args = kwargs.pop("entry_point_args", None)
stream_logs = kwargs.pop("stream_logs", False)
job_labels = kwargs.pop("job_labels", None)
service_account = kwargs.pop("service_account", None)
job_ids = [run_results["job_id"]]
for _ in range(1, num_jobs):
# Setting a unique default Tuner_ID for each Job,
# This is to ensure all workers within a cluster (job) ask for the same
# Tuner parameters, while across multiple jobs they recive different
# HP Tuning parameters.
default_tuner_id = f"TUNER_ID_{uuid.uuid4().hex}"
exnteded_entry_point_args = [default_tuner_id]
if entry_point_args:
exnteded_entry_point_args.extend(entry_point_args)
# Deploy Docker image on the cloud.
job_ids.extend([
deploy.deploy_job(
docker_img_uri,
chief_config,
worker_count,
worker_config,
exnteded_entry_point_args,
stream_logs,
job_labels=job_labels,
service_account=service_account,
)
])
return {
"job_ids": job_ids,
"docker_image": docker_img_uri,
}