MTRF/r3l/r3l/r3l_envs/inhand_env/rotate.py [268:312]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        policy_params_path = ckpt_path / "policy_params.pkl"

        params_path = path / "params.pkl"

        with open(policy_params_path, "rb") as f:
            policy_params_data = pickle.load(f)

        with open(params_path, "rb") as f:
            variant = pickle.load(f)

        env_params = variant["environment_params"]["evaluation"]
        env_kwargs = env_params.pop("kwargs", {})
        env_kwargs["sim"] = self.sim
        env_params["kwargs"] = env_kwargs
        env_params["kwargs"]["commanded_phase_changes"] = False
        env = get_environment_from_params(env_params)
        # Create environment as softlearning expects it for policy initialization
        env = GymAdapter(None, None, env=env)

        # Never transition into perturbation for the reset controllers
        if hasattr(env, "turn_perturb_off"):
            env.turn_perturb_off()

        reset_horizons = []
        policies = []
        wrapped_policies = []

        for phase in range(env.num_phases):
            phase_env = env.unwrapped._envs[phase]
            policy = get_policy_from_variant(variant, GymAdapter(None, None, env=phase_env))
            policy.set_weights(policy_params_data[phase])
            policies.append(policy)

            # Save some time by taking a max reset horizon of 50 steps
            horizon = min(
                50,
                variant.get('sampler_params', {}).get('kwargs', {}).get('max_path_length', 50)
            )
            reset_horizons.append(horizon)
            wrapped_policies.append(self.wrap_policy(policy))

        env.unwrapped._training_phases = []
        env.unwrapped._phase_policies = wrapped_policies
        self.reset_env = env
        self.reset_horizons = reset_horizons
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


MTRF/r3l/r3l/r3l_envs/inhand_env/rotate.py [387:430]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        policy_params_path = ckpt_path / "policy_params.pkl"
        params_path = path / "params.pkl"

        with open(policy_params_path, "rb") as f:
            policy_params_data = pickle.load(f)

        with open(params_path, "rb") as f:
            variant = pickle.load(f)

        env_params = variant["environment_params"]["evaluation"]
        env_kwargs = env_params.pop("kwargs", {})
        env_kwargs["sim"] = self.sim
        env_params["kwargs"] = env_kwargs
        env_params["kwargs"]["commanded_phase_changes"] = False
        env = get_environment_from_params(env_params)
        # Create environment as softlearning expects it for policy initialization
        env = GymAdapter(None, None, env=env)

        # Never transition into perturbation for the reset controllers
        if hasattr(env, "turn_perturb_off"):
            env.turn_perturb_off()

        reset_horizons = []
        policies = []
        wrapped_policies = []

        for phase in range(env.num_phases):
            phase_env = env.unwrapped._envs[phase]
            policy = get_policy_from_variant(variant, GymAdapter(None, None, env=phase_env))
            policy.set_weights(policy_params_data[phase])
            policies.append(policy)

            # Save some time by taking a max reset horizon of 50 steps
            horizon = min(
                50,
                variant.get('sampler_params', {}).get('kwargs', {}).get('max_path_length', 50)
            )
            reset_horizons.append(horizon)
            wrapped_policies.append(self.wrap_policy(policy))

        env.unwrapped._training_phases = []
        env.unwrapped._phase_policies = wrapped_policies
        self.reset_env = env
        self.reset_horizons = reset_horizons
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -