def remote()

in chatlearn/schedule/model_manager.py [0:0]


    def remote(self) -> list:
        """
        convert model to remote
        """
        logger.info(f"{LOG_START} model_manager start to convert model to remote")
        t1 = time.time()
        if self.converted:
            return self.dist_models

        self._name2distmodel = {}
        remote_states = set()
        for model in self.local_models:
            # create dist model object for each local model
            dist_model = self._to_dist_model(model)
            self.dist_models.append(dist_model)
            self._name2distmodel[model.name] = dist_model
        total_gpu_required = self._get_total_gpu_required()
        if total_gpu_required > self.resouce_manager.total_gpu:
            raise RuntimeError(f"The number of required gpus for current job is {total_gpu_required}, " + \
                               f"while the number of applied gpus is {self.resouce_manager.total_gpu}")
        if self.resouce_manager.total_gpu > total_gpu_required:
            logger.warning(f"The number of applied gpus is {self.resouce_manager.total_gpu}, " + \
                           f"while the number of required gpus is {total_gpu_required}, " + \
                           f"there is {self.resouce_manager.total_gpu - total_gpu_required} wasted gpus")

        t2 = time.time()
        logger.info(f"{LOG_START} model_manager convert model to remote, get_total_gpu_required(s):{(t2-t1)}")
        env_list = []
        for group in self.runtime_args.colocation:
            colocate_models = [self._name2distmodel[name] for name in group]
            self.place_models_to_remote_devices(colocate_models, env_list)
            if len(colocate_models) > 1:
                set_colocate = []
                for model in colocate_models:
                    model.is_colocate = True
                    set_colocate.extend(model.set_colocate(True))
                future.wait(set_colocate)
            for name in group:
                remote_states.add(name)
        t3 = time.time()
        logger.info(f"{LOG_START} model_manager convert model to remote, set_colocate(s):{(t3-t2)}")
        for model in self.dist_models:
            # place non-colocate models
            if model.name not in remote_states:
                self.place_models_to_remote_devices([model], env_list)
        self.set_dist_env_concurrent(env_list)
        self.converted = True
        t4 = time.time()
        logger.info(f"{LOG_START} model_manager convert model to remote, place_models_to_remote_devices(s):{(t4-t3)}")
        return self.dist_models