gym/gym/envs/__init__.py (460 lines of code) (raw):
from gym.envs.registration import registry, register, make, spec
# Algorithmic
# ----------------------------------------
register(
id='Copy-v0',
entry_point='gym.envs.algorithmic:CopyEnv',
max_episode_steps=200,
reward_threshold=25.0,
)
register(
id='RepeatCopy-v0',
entry_point='gym.envs.algorithmic:RepeatCopyEnv',
max_episode_steps=200,
reward_threshold=75.0,
)
register(
id='ReversedAddition-v0',
entry_point='gym.envs.algorithmic:ReversedAdditionEnv',
kwargs={'rows' : 2},
max_episode_steps=200,
reward_threshold=25.0,
)
register(
id='ReversedAddition3-v0',
entry_point='gym.envs.algorithmic:ReversedAdditionEnv',
kwargs={'rows' : 3},
max_episode_steps=200,
reward_threshold=25.0,
)
register(
id='DuplicatedInput-v0',
entry_point='gym.envs.algorithmic:DuplicatedInputEnv',
max_episode_steps=200,
reward_threshold=9.0,
)
register(
id='Reverse-v0',
entry_point='gym.envs.algorithmic:ReverseEnv',
max_episode_steps=200,
reward_threshold=25.0,
)
# Classic
# ----------------------------------------
register(
id='CartPole-v0',
entry_point='gym.envs.classic_control:CartPoleEnv',
max_episode_steps=200,
reward_threshold=195.0,
)
register(
id='CartPole-v1',
entry_point='gym.envs.classic_control:CartPoleEnv',
max_episode_steps=500,
reward_threshold=475.0,
)
register(
id='MountainCar-v0',
entry_point='gym.envs.classic_control:MountainCarEnv',
max_episode_steps=200,
reward_threshold=-110.0,
)
register(
id='MountainCarContinuous-v0',
entry_point='gym.envs.classic_control:Continuous_MountainCarEnv',
max_episode_steps=999,
reward_threshold=90.0,
)
register(
id='Pendulum-v0',
entry_point='gym.envs.classic_control:PendulumEnv',
max_episode_steps=200,
)
register(
id='Acrobot-v1',
entry_point='gym.envs.classic_control:AcrobotEnv',
max_episode_steps=500,
)
# Box2d
# ----------------------------------------
register(
id='LunarLander-v2',
entry_point='gym.envs.box2d:LunarLander',
max_episode_steps=1000,
reward_threshold=200,
)
register(
id='LunarLanderContinuous-v2',
entry_point='gym.envs.box2d:LunarLanderContinuous',
max_episode_steps=1000,
reward_threshold=200,
)
register(
id='BipedalWalker-v2',
entry_point='gym.envs.box2d:BipedalWalker',
max_episode_steps=1600,
reward_threshold=300,
)
register(
id='BipedalWalkerHardcore-v2',
entry_point='gym.envs.box2d:BipedalWalkerHardcore',
max_episode_steps=2000,
reward_threshold=300,
)
register(
id='CarRacing-v0',
entry_point='gym.envs.box2d:CarRacing',
max_episode_steps=1000,
reward_threshold=900,
)
# Toy Text
# ----------------------------------------
register(
id='Blackjack-v0',
entry_point='gym.envs.toy_text:BlackjackEnv',
)
register(
id='KellyCoinflip-v0',
entry_point='gym.envs.toy_text:KellyCoinflipEnv',
reward_threshold=246.61,
)
register(
id='KellyCoinflipGeneralized-v0',
entry_point='gym.envs.toy_text:KellyCoinflipGeneralizedEnv',
)
register(
id='FrozenLake-v0',
entry_point='gym.envs.toy_text:FrozenLakeEnv',
kwargs={'map_name' : '4x4'},
max_episode_steps=100,
reward_threshold=0.78, # optimum = .8196
)
register(
id='FrozenLake8x8-v0',
entry_point='gym.envs.toy_text:FrozenLakeEnv',
kwargs={'map_name' : '8x8'},
max_episode_steps=200,
reward_threshold=0.99, # optimum = 1
)
register(
id='CliffWalking-v0',
entry_point='gym.envs.toy_text:CliffWalkingEnv',
)
register(
id='NChain-v0',
entry_point='gym.envs.toy_text:NChainEnv',
max_episode_steps=1000,
)
register(
id='Roulette-v0',
entry_point='gym.envs.toy_text:RouletteEnv',
max_episode_steps=100,
)
register(
id='Taxi-v2',
entry_point='gym.envs.toy_text.taxi:TaxiEnv',
reward_threshold=8, # optimum = 8.46
max_episode_steps=200,
)
register(
id='GuessingGame-v0',
entry_point='gym.envs.toy_text.guessing_game:GuessingGame',
max_episode_steps=200,
)
register(
id='HotterColder-v0',
entry_point='gym.envs.toy_text.hotter_colder:HotterColder',
max_episode_steps=200,
)
# Mujoco
# ----------------------------------------
# 2D
register(
id='Reacher-v1',
entry_point='gym.envs.mujoco:ReacherEnv',
max_episode_steps=50,
reward_threshold=-3.75,
)
register(
id='Pusher-v0',
entry_point='gym.envs.mujoco:PusherEnv',
max_episode_steps=100,
reward_threshold=0.0,
)
register(
id='Thrower-v0',
entry_point='gym.envs.mujoco:ThrowerEnv',
max_episode_steps=100,
reward_threshold=0.0,
)
register(
id='Striker-v0',
entry_point='gym.envs.mujoco:StrikerEnv',
max_episode_steps=100,
reward_threshold=0.0,
)
register(
id='InvertedPendulum-v1',
entry_point='gym.envs.mujoco:InvertedPendulumEnv',
max_episode_steps=1000,
reward_threshold=950.0,
)
register(
id='InvertedDoublePendulum-v1',
entry_point='gym.envs.mujoco:InvertedDoublePendulumEnv',
max_episode_steps=1000,
reward_threshold=9100.0,
)
register(
id='HalfCheetah-v1',
entry_point='gym.envs.mujoco:HalfCheetahEnv',
max_episode_steps=1000,
reward_threshold=4800.0,
)
register(
id='Hopper-v1',
entry_point='gym.envs.mujoco:HopperEnv',
max_episode_steps=1000,
reward_threshold=3800.0,
)
register(
id='Swimmer-v1',
entry_point='gym.envs.mujoco:SwimmerEnv',
max_episode_steps=1000,
reward_threshold=360.0,
)
register(
id='SwimmerBandits-v1',
entry_point='gym.envs.mujoco:SwimmerBanditsEnv',
max_episode_steps=1000,
)
register(
id='Obstacles-v1',
entry_point='gym.envs.mujoco:Obstacles',
max_episode_steps=1000,
)
register(
id='AntBandits-v1',
entry_point='gym.envs.mujoco:AntBanditsEnv',
max_episode_steps=1000,
)
register(
id='AntMovement-v1',
entry_point='gym.envs.mujoco:AntMovementEnv',
max_episode_steps=600,
)
register(
id='AntObstacles-v1',
entry_point='gym.envs.mujoco:AntObstaclesEnv',
max_episode_steps=1000,
)
register(
id='AntObstaclesBig-v1',
entry_point='gym.envs.mujoco:AntObstaclesBigEnv',
max_episode_steps=3000,
)
register(
id='AntObstaclesGen-v1',
entry_point='gym.envs.mujoco:AntObstaclesGenEnv',
max_episode_steps=1000,
)
register(
id='Walker2d-v1',
max_episode_steps=1000,
entry_point='gym.envs.mujoco:Walker2dEnv',
)
register(
id='Ant-v1',
entry_point='gym.envs.mujoco:AntEnv',
max_episode_steps=1000,
reward_threshold=6000.0,
)
register(
id='Humanoid-v1',
entry_point='gym.envs.mujoco:HumanoidEnv',
max_episode_steps=1000,
)
register(
id='HumanoidCourse-v1',
entry_point='gym.envs.mujoco:HumanoidCourseEnv',
max_episode_steps=3000,
)
register(
id='HumanoidSeq-v1',
entry_point='gym.envs.mujoco:HumanoidSeqEnv',
max_episode_steps=1000,
)
register(
id='HumanoidStandup-v1',
entry_point='gym.envs.mujoco:HumanoidStandupEnv',
max_episode_steps=1000,
)
# Atari
# ----------------------------------------
# # print ', '.join(["'{}'".format(name.split('.')[0]) for name in atari_py.list_games()])
for game in ['air_raid', 'alien', 'amidar', 'assault', 'asterix', 'asteroids', 'atlantis',
'bank_heist', 'battle_zone', 'beam_rider', 'berzerk', 'bowling', 'boxing', 'breakout', 'carnival',
'centipede', 'chopper_command', 'crazy_climber', 'demon_attack', 'double_dunk',
'elevator_action', 'enduro', 'fishing_derby', 'freeway', 'frostbite', 'gopher', 'gravitar',
'hero', 'ice_hockey', 'jamesbond', 'journey_escape', 'kangaroo', 'krull', 'kung_fu_master',
'montezuma_revenge', 'ms_pacman', 'name_this_game', 'phoenix', 'pitfall', 'pong', 'pooyan',
'private_eye', 'qbert', 'riverraid', 'road_runner', 'robotank', 'seaquest', 'skiing',
'solaris', 'space_invaders', 'star_gunner', 'tennis', 'time_pilot', 'tutankham', 'up_n_down',
'venture', 'video_pinball', 'wizard_of_wor', 'yars_revenge', 'zaxxon']:
for obs_type in ['image', 'ram']:
# space_invaders should yield SpaceInvaders-v0 and SpaceInvaders-ram-v0
name = ''.join([g.capitalize() for g in game.split('_')])
if obs_type == 'ram':
name = '{}-ram'.format(name)
nondeterministic = False
if game == 'elevator_action' and obs_type == 'ram':
# ElevatorAction-ram-v0 seems to yield slightly
# non-deterministic observations about 10% of the time. We
# should track this down eventually, but for now we just
# mark it as nondeterministic.
nondeterministic = True
register(
id='{}-v0'.format(name),
entry_point='gym.envs.atari:AtariEnv',
kwargs={'game': game, 'obs_type': obs_type, 'repeat_action_probability': 0.25},
max_episode_steps=10000,
nondeterministic=nondeterministic,
)
register(
id='{}-v4'.format(name),
entry_point='gym.envs.atari:AtariEnv',
kwargs={'game': game, 'obs_type': obs_type},
max_episode_steps=100000,
nondeterministic=nondeterministic,
)
# Standard Deterministic (as in the original DeepMind paper)
if game == 'space_invaders':
frameskip = 3
else:
frameskip = 4
# Use a deterministic frame skip.
register(
id='{}Deterministic-v0'.format(name),
entry_point='gym.envs.atari:AtariEnv',
kwargs={'game': game, 'obs_type': obs_type, 'frameskip': frameskip, 'repeat_action_probability': 0.25},
max_episode_steps=100000,
nondeterministic=nondeterministic,
)
register(
id='{}Deterministic-v4'.format(name),
entry_point='gym.envs.atari:AtariEnv',
kwargs={'game': game, 'obs_type': obs_type, 'frameskip': frameskip},
max_episode_steps=100000,
nondeterministic=nondeterministic,
)
register(
id='{}NoFrameskip-v0'.format(name),
entry_point='gym.envs.atari:AtariEnv',
kwargs={'game': game, 'obs_type': obs_type, 'frameskip': 1, 'repeat_action_probability': 0.25}, # A frameskip of 1 means we get every frame
max_episode_steps=frameskip * 100000,
nondeterministic=nondeterministic,
)
# No frameskip. (Atari has no entropy source, so these are
# deterministic environments.)
register(
id='{}NoFrameskip-v4'.format(name),
entry_point='gym.envs.atari:AtariEnv',
kwargs={'game': game, 'obs_type': obs_type, 'frameskip': 1}, # A frameskip of 1 means we get every frame
max_episode_steps=frameskip * 100000,
nondeterministic=nondeterministic,
)
# Board games
# ----------------------------------------
register(
id='Go9x9-v0',
entry_point='gym.envs.board_game:GoEnv',
kwargs={
'player_color': 'black',
'opponent': 'pachi:uct:_2400',
'observation_type': 'image3c',
'illegal_move_mode': 'lose',
'board_size': 9,
},
# The pachi player seems not to be determistic given a fixed seed.
# (Reproduce by running 'import gym; h = gym.make('Go9x9-v0'); h.seed(1); h.reset(); h.step(15); h.step(16); h.step(17)' a few times.)
#
# This is probably due to a computation time limit.
nondeterministic=True,
)
register(
id='Go19x19-v0',
entry_point='gym.envs.board_game:GoEnv',
kwargs={
'player_color': 'black',
'opponent': 'pachi:uct:_2400',
'observation_type': 'image3c',
'illegal_move_mode': 'lose',
'board_size': 19,
},
nondeterministic=True,
)
register(
id='Hex9x9-v0',
entry_point='gym.envs.board_game:HexEnv',
kwargs={
'player_color': 'black',
'opponent': 'random',
'observation_type': 'numpy3c',
'illegal_move_mode': 'lose',
'board_size': 9,
},
)
# Debugging
# ----------------------------------------
register(
id='OneRoundDeterministicReward-v0',
entry_point='gym.envs.debugging:OneRoundDeterministicRewardEnv',
local_only=True
)
register(
id='TwoRoundDeterministicReward-v0',
entry_point='gym.envs.debugging:TwoRoundDeterministicRewardEnv',
local_only=True
)
register(
id='OneRoundNondeterministicReward-v0',
entry_point='gym.envs.debugging:OneRoundNondeterministicRewardEnv',
local_only=True
)
register(
id='TwoRoundNondeterministicReward-v0',
entry_point='gym.envs.debugging:TwoRoundNondeterministicRewardEnv',
local_only=True,
)
# Parameter tuning
# ----------------------------------------
register(
id='ConvergenceControl-v0',
entry_point='gym.envs.parameter_tuning:ConvergenceControl',
)
register(
id='CNNClassifierTraining-v0',
entry_point='gym.envs.parameter_tuning:CNNClassifierTraining',
)
# Safety
# ----------------------------------------
# interpretability envs
register(
id='PredictActionsCartpole-v0',
entry_point='gym.envs.safety:PredictActionsCartpoleEnv',
max_episode_steps=200,
)
register(
id='PredictObsCartpole-v0',
entry_point='gym.envs.safety:PredictObsCartpoleEnv',
max_episode_steps=200,
)
# semi_supervised envs
# probably the easiest:
register(
id='SemisuperPendulumNoise-v0',
entry_point='gym.envs.safety:SemisuperPendulumNoiseEnv',
max_episode_steps=200,
)
# somewhat harder because of higher variance:
register(
id='SemisuperPendulumRandom-v0',
entry_point='gym.envs.safety:SemisuperPendulumRandomEnv',
max_episode_steps=200,
)
# probably the hardest because you only get a constant number of rewards in total:
register(
id='SemisuperPendulumDecay-v0',
entry_point='gym.envs.safety:SemisuperPendulumDecayEnv',
max_episode_steps=200,
)
# off_switch envs
register(
id='OffSwitchCartpole-v0',
entry_point='gym.envs.safety:OffSwitchCartpoleEnv',
max_episode_steps=200,
)
register(
id='OffSwitchCartpoleProb-v0',
entry_point='gym.envs.safety:OffSwitchCartpoleProbEnv',
max_episode_steps=200,
)
# RL^2
# ----------------------------------------
# Bandits
for n_arms in [5, 10, 50]:
for n_episodes in [10, 100, 500, 1000]:
register(
id='BernoulliBandit-{k}.arms-{n}.episodes-v0'.format(k=n_arms, n=n_episodes),
entry_point='gym.envs.rl2:BernoulliBanditEnv',
kwargs={'n_arms': n_arms, 'n_episodes': n_episodes},
timestep_limit=n_episodes,
)
# Tabular MDPs
for n_states in [10]:
for n_actions in [5]:
for episode_length in [10]:
for n_episodes in [10, 25, 50, 75, 100, 200, 400]:
register(
id='RandomTabularMDP-{s}.states-{a}.actions-{t}.timesteps-{n}.episodes-v0'.format(
s=n_states, a=n_actions, t=episode_length, n=n_episodes),
entry_point='gym.envs.rl2:RandomTabularMDPEnv',
kwargs={'n_states': n_states, 'n_actions': n_actions, 'episode_length': episode_length,
'n_episodes': n_episodes},
timestep_limit=n_episodes * episode_length,
)