in dlrm_s_caffe2.py [0:0]
def create_mlp(self, ln, sigmoid_layer, model, tag):
(tag_layer, tag_in, tag_out) = tag
# build MLP layer by layer
layers = []
weights = []
for i in range(1, ln.size):
n = ln[i - 1]
m = ln[i]
# create tags
tag_fc_w = tag_layer + ":::" + "fc" + str(i) + "_w"
tag_fc_b = tag_layer + ":::" + "fc" + str(i) + "_b"
tag_fc_y = tag_layer + ":::" + "fc" + str(i) + "_y"
tag_fc_z = tag_layer + ":::" + "fc" + str(i) + "_z"
if i == ln.size - 1:
tag_fc_z = tag_out
weights.append(tag_fc_w)
weights.append(tag_fc_b)
# initialize the weights
# approach 1: custom Xavier input, output or two-sided fill
mean = 0.0 # std_dev = np.sqrt(variance)
std_dev = np.sqrt(2 / (m + n)) # np.sqrt(1 / m) # np.sqrt(1 / n)
W = np.random.normal(mean, std_dev, size=(m, n)).astype(np.float32)
std_dev = np.sqrt(1 / m) # np.sqrt(2 / (m + 1))
b = np.random.normal(mean, std_dev, size=m).astype(np.float32)
self.FeedBlobWrapper(tag_fc_w, W)
self.FeedBlobWrapper(tag_fc_b, b)
# approach 2: caffe2 xavier
# W = self.AddLayerWrapper(
# model.param_init_net.XavierFill,
# [],
# tag_fc_w,
# shape=[m, n]
# )
# b = self.AddLayerWrapper(
# model.param_init_net.ConstantFill,
# [],
# tag_fc_b,
# shape=[m]
# )
# initialize the MLP's momentum for the Adagrad optimizer
if self.emb_optimizer in ["adagrad", "rwsadagrad"]:
# momentum of the weights
self.FeedBlobWrapper(
"momentum_mlp_{}_{}".format(tag_layer, 2 * i - 1),
np.full((m, n), 0, dtype=np.float32)
)
# momentum of the biases
self.FeedBlobWrapper(
"momentum_mlp_{}_{}".format(tag_layer, 2 * i),
np.full((m), 0, dtype=np.float32)
)
# save the blob shapes for latter (only needed if onnx is requested)
if self.save_onnx:
self.onnx_tsd[tag_fc_w] = (onnx.TensorProto.FLOAT, W.shape)
self.onnx_tsd[tag_fc_b] = (onnx.TensorProto.FLOAT, b.shape)
# approach 1: construct fully connected operator using model.net
fc = self.AddLayerWrapper(
model.net.FC, [tag_in, tag_fc_w, tag_fc_b], tag_fc_y
)
# approach 2: construct fully connected operator using brew
# https://github.com/caffe2/tutorials/blob/master/MNIST.ipynb
# fc = brew.fc(model, layer, tag_fc_w, dim_in=m, dim_out=n)
layers.append(fc)
if i == sigmoid_layer:
# approach 1: construct sigmoid operator using model.net
layer = self.AddLayerWrapper(model.net.Sigmoid, tag_fc_y, tag_fc_z)
# approach 2: using brew (which currently does not support sigmoid)
# tag_sigm = tag_layer + ":::" + "sigmoid" + str(i)
# layer = brew.sigmoid(model,fc,tag_sigmoid)
else:
# approach 1: construct relu operator using model.net
layer = self.AddLayerWrapper(model.net.Relu, tag_fc_y, tag_fc_z)
# approach 2: using brew
# tag_relu = tag_layer + ":::" + "relu" + str(i)
# layer = brew.relu(model,fc,tag_relu)
tag_in = tag_fc_z
layers.append(layer)
# WARNING: the dependency between layers is implicit in the tags,
# so only the last layer is added to the layers list. It will
# later be used for interactions.
return layers, weights