spinup/exercises/pytorch/problem_set_1/exercise1_2_auxiliary.py (32 lines of code) (raw):
import torch
import torch.nn as nn
import numpy as np
"""
Auxiliary code for Exercise 1.2. No part of the exercise requires you to
look into or modify this file (and since it contains an mlp function,
it has spoilers for the answer). Removed from the main file to avoid
cluttering it up.
In other words, nothing to see here, move along, these are not the
droids you're looking for, and all that...
"""
def mlp(sizes, activation, output_activation=nn.Identity):
layers = []
for j in range(len(sizes)-1):
act = activation if j < len(sizes)-2 else output_activation
layers += [nn.Linear(sizes[j], sizes[j+1]), act()]
return nn.Sequential(*layers)
class MLPCritic(nn.Module):
def __init__(self, obs_dim, hidden_sizes, activation):
super().__init__()
self.v_net = mlp([obs_dim] + list(hidden_sizes) + [1], activation)
def forward(self, obs):
return torch.squeeze(self.v_net(obs), -1) # Critical to ensure v has right shape.
class ExerciseActorCritic(nn.Module):
def __init__(self, observation_space, action_space,
hidden_sizes=(64,64), activation=nn.Tanh,
actor=None):
super().__init__()
obs_dim = observation_space.shape[0]
self.pi = actor(obs_dim, action_space.shape[0], hidden_sizes, activation)
self.v = MLPCritic(obs_dim, hidden_sizes, activation)
def step(self, obs):
with torch.no_grad():
pi, _ = self.pi(obs)
a = pi.sample()
logp_a = pi.log_prob(a)
v = self.v(obs)
return a.numpy(), v.numpy(), logp_a.numpy()
def act(self, obs):
return self.step(obs)[0]