in mujoco_worldgen/env.py [0:0]
def __init__(self,
get_sim,
get_obs=flatten_get_obs,
get_reward=zero_get_reward,
get_info=empty_get_info,
get_diverged=false_get_diverged,
set_action=ctrl_set_action,
action_space=None,
horizon=100,
start_seed=None,
deterministic_mode=False):
"""
Env is a Gym environment subclass tuned for robotics learning
research.
Args:
- get_sim (callable): a callable that returns an MjSim.
- get_obs (callable): callable with an MjSim object as the sole
argument and should return observations.
- set_action (callable): callable which takes an MjSim object and
updates its data and buffer directly.
- get_reward (callable): callable which takes an MjSim object and
returns a scalar reward.
- get_info (callable): callable which takes an MjSim object and
returns info (dictionary).
- get_diverged (callable): callable which takes an MjSim object
and returns a (bool, float) tuple. First value is True if
simulator diverged and second value is the reward at divergence.
- action_space: a space of allowed actions or a two-tuple of a ranges
if number of actions is unknown until the simulation is instantiated
- horizon (int): horizon of environment (i.e. max number of steps).
- start_seed (int or string): seed for random state generator (None for random seed).
Strings will be hashed. A non-None value implies deterministic_mode=True.
This argument allows us to run a deterministic series of goals/randomizations
for a given policy. Then applying the same seed to another policy will allow the
comparison of results more accurately. The reason a string is allowed is so
that we can more easily find and share seeds that are farther from 0,
which is the default starting point for deterministic_mode, and thus have
more likelihood of getting a performant sequence of goals.
"""
if (horizon is not None) and not isinstance(horizon, int):
raise TypeError('horizon must be an int')
self.get_sim = enforce_is_callable(get_sim, (
'get_sim should be callable and should return an MjSim object'))
self.get_obs = enforce_is_callable(get_obs, (
'get_obs should be callable with an MjSim object as the sole '
'argument and should return observations'))
self.set_action = enforce_is_callable(set_action, (
'set_action should be a callable which takes an MjSim object and '
'updates its data and buffer directly'))
self.get_reward = enforce_is_callable(get_reward, (
'get_reward should be a callable which takes an MjSim object and '
'returns a scalar reward'))
self.get_info = enforce_is_callable(get_info, (
'get_info should be a callable which takes an MjSim object and '
'returns a dictionary'))
self.get_diverged = enforce_is_callable(get_diverged, (
'get_diverged should be a callable which takes an MjSim object '
'and returns a (bool, float) tuple. First value is whether '
'simulator is diverged (or done) and second value is the reward at '
'that time.'))
self.sim = None
self.horizon = horizon
self.t = None
self.deterministic_mode = deterministic_mode
# Numpy Random State
if isinstance(start_seed, str):
start_seed = int(hashlib.sha1(start_seed.encode()).hexdigest(), 16) % (2**32)
self.deterministic_mode = True
elif isinstance(start_seed, int):
self.deterministic_mode = True
else:
start_seed = 0 if self.deterministic_mode else np.random.randint(2**32)
self._random_state = np.random.RandomState(start_seed)
# Seed that will be used on next _reset()
self._next_seed = start_seed
# Seed that was used in last _reset()
self._current_seed = None
# For rendering
self.viewer = None
# These are required by Gym
self._action_space = action_space
self._observation_space = None
self._spec = Spec(max_episode_steps=horizon, timestep_limit=horizon)
self._name = None