in dynalab_cli/test.py [0:0]
def run_docker_test(self, config):
tmp_dir = os.path.join(self.config_handler.config_dir, "tmp")
os.makedirs(tmp_dir, exist_ok=True)
# tarball everything
print("Tarballing the project directory...")
exclude_list_file = os.path.join(tmp_dir, "exclude.txt")
self.config_handler.write_exclude_filelist(
exclude_list_file, self.args.name, exclude_model=True
)
process = subprocess.run(
[
"tar",
f"--exclude-from={exclude_list_file}",
"-czf",
os.path.join(tmp_dir, f"{self.args.name}.tar.gz"),
".",
],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True,
)
if process.returncode != 0:
raise RuntimeError(
f"Exception in tarballing the project directory {process.stderr}"
)
# torch model archive
print("Archive the model for torchserve...")
archive_command = [
"torch-model-archiver",
"--model-name",
self.args.name,
"--serialized-file",
config["checkpoint"],
"--handler",
config["handler"],
"--version",
"1.0",
"-f",
"--export-path",
tmp_dir,
]
if config["model_files"]:
archive_command += ["--extra-files", ",".join(config["model_files"])]
process = subprocess.run(
archive_command,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True,
)
if process.returncode != 0:
raise RuntimeError(f"Exception in torchserve archive {process.stderr}")
# pull docker
lib_dir = os.path.join(
os.path.dirname(os.path.dirname(os.path.realpath(__file__))),
"dynalab",
"dockerfiles",
)
use_gpu = self.use_gpu(config)
docker_file = Path(lib_dir) / "Dockerfile.dev"
if use_gpu:
docker_file = Path(lib_dir) / "Dockerfile.cuda"
docker_path = os.path.join(tmp_dir, "Dockerfile")
# TODO: pull the files from dynalab repo once public
shutil.copyfile(str(docker_file), docker_path)
shutil.copyfile(
os.path.join(lib_dir, "dev-docker-entrypoint.sh"),
os.path.join(tmp_dir, "dev-docker-entrypoint.sh"),
)
# Copy task annotation config file
annotation_config_file_path = os.path.join(
self.config_handler.root_dir,
self.config_handler.dynalab_dir,
f"{config['task']}.json",
)
shutil.copyfile(
annotation_config_file_path, os.path.join(tmp_dir, f"{config['task']}.json")
)
# build docker
repository_name = self.args.name.lower()
print("Building docker image...")
docker_build_args = [
"--build-arg",
f"add_dir={tmp_dir}",
"--build-arg",
f"model_name={self.args.name}",
"--build-arg",
f"requirements={str(config['requirements'])}",
"--build-arg",
f"setup={str(config['setup'])}",
"--build-arg",
f"task_code={config['task']}",
]
docker_build_command = [
"docker",
"build",
"--network=host",
"-t",
repository_name,
"-f",
docker_path,
".",
] + docker_build_args
subprocess.run(docker_build_command)
# NOTE: cpu and memory limit are specific to ml.m5.xlarge
docker_run = [
"docker",
"run",
"--network=none",
"--cpus=4",
"--memory=16G",
repository_name,
]
if use_gpu:
docker_run.insert(-1, "--gpus=all")
process = subprocess.run(
docker_run,
stderr=subprocess.PIPE,
stdout=subprocess.PIPE,
universal_newlines=True,
)
ts_log = os.path.join(tmp_dir, "ts_log.err")
with open(ts_log, "w") as f:
f.write(process.stderr)
with open(os.path.join(tmp_dir, "ts_log.out"), "w") as f:
f.write(process.stdout)
if process.returncode != 0:
raise RuntimeError(
f"Integrated test failed. Please refer to "
f"{ts_log} for detailed torchserve log."
)
else:
print(
f"Integrated test passed. " f"Torchserve log can be found at {ts_log}"
)
# clean up local tarball, .mar and intermediate docker layers
os.remove(os.path.join(tmp_dir, f"{self.args.name}.tar.gz"))
os.remove(os.path.join(tmp_dir, f"{self.args.name}.mar"))
print(
"We suggest removing unused docker data by `docker system prune`, "
"including unused containers, networks and images. "
"To do so, choose 'y' for the following prompt. "
"More info available at "
"https://docs.docker.com/engine/reference/commandline/system_prune/"
)
subprocess.run(f"docker system prune", shell=True)