in qlearn/atari/train_noisy_agent.py [0:0]
def parse_args():
parser = argparse.ArgumentParser("Noisy DQN experiments for Atari games")
# Environment
parser.add_argument("--env", type=str, default='PongNoFrameskip-v4', help="name of game")
parser.add_argument("--seed", type=int, default=42, help="which seed to use")
# Core DQN parameters
parser.add_argument("--replay-buffer-size", type=int, default=int(1e6), help="replay buffer size")
parser.add_argument("--lr", type=float, default=1e-4, help="learning rate for Adam optimizer")
# parser.add_argument("--lr", type=float, default=2.5e-4, help="learning rate for RMSprop optimizer")
# parser.add_argument("--alpha", type=float, default=0.95, help="alpha (squared gradient momentum) parameter for RMSprop optimizer")
# parser.add_argument("--momentum", type=float, default=0.95, help="momentum parameter for RMSprop optimizer")
# parser.add_argument("--eps-rmsprop", type=float, default=0.01, help="epsilon (min squared gradient) parameter for RMSprop optimizer")
parser.add_argument("--num-steps", type=int, default=int(1e7),
help="total number of steps to run the environment for")
parser.add_argument("--batch-size", type=int, default=32, help="number of transitions to optimize at the same time")
parser.add_argument("--learning-freq", type=int, default=4,
help="number of iterations between every optimization step")
parser.add_argument("--target-update-freq", type=int, default=10000,
help="number of iterations between every target network update")
parser.add_argument("--learning-starts", type=int, default=50000,
help="number of iterations after which learning starts")
# boolean_flag(parser, "double-q", default=False, help="whether or not to use double q learning")
parser.add_argument("--double-q", type=int, default=0, help="whether or not to use double q learning")
# Checkpointing
parser.add_argument("--log-dir", type=str, default="log_dir",
help="directory in which tensorboard events will be written out.")
parser.add_argument("--save-dir", type=str, default="save_dir",
help="directory in which training state and model will be saved")
parser.add_argument("--save-freq", type=int, default=int(1e6),
help="save model once every time this many iterations are completed")
parser.add_argument("--final-exploration", type=float, default=0.1,
help="final value of epsilon in epsilon greedy exploration")
parser.add_argument("--final-exploration-frame", type=int, default=int(1e6),
help="the number of frames over which the initial value of epsilon is linearly annealed to its final value")
# New options
parser.add_argument("--print-freq", type=int, default=100, help="printing frequency")
parser.add_argument("--run-index", type=int, default=None, help="index RUN_ID")
parser.add_argument("--cuda", type=int, default=1, help="whether or not to use cuda")
parser.add_argument("--agent", type=str, default="NoisyDQN", help="which agent to run")
parser.add_argument("--discount", type=float, default=0.99, help="discount factor")
parser.add_argument("--model", type=str, default=None, help="model directory to load")
return parser.parse_args()