def create_mlp()

in dlrm_s_caffe2.py [0:0]


    def create_mlp(self, ln, sigmoid_layer, model, tag):
        (tag_layer, tag_in, tag_out) = tag

        # build MLP layer by layer
        layers = []
        weights = []
        for i in range(1, ln.size):
            n = ln[i - 1]
            m = ln[i]

            # create tags
            tag_fc_w = tag_layer + ":::" + "fc" + str(i) + "_w"
            tag_fc_b = tag_layer + ":::" + "fc" + str(i) + "_b"
            tag_fc_y = tag_layer + ":::" + "fc" + str(i) + "_y"
            tag_fc_z = tag_layer + ":::" + "fc" + str(i) + "_z"
            if i == ln.size - 1:
                tag_fc_z = tag_out
            weights.append(tag_fc_w)
            weights.append(tag_fc_b)

            # initialize the weights
            # approach 1: custom Xavier input, output or two-sided fill
            mean = 0.0  # std_dev = np.sqrt(variance)
            std_dev = np.sqrt(2 / (m + n))  # np.sqrt(1 / m) # np.sqrt(1 / n)
            W = np.random.normal(mean, std_dev, size=(m, n)).astype(np.float32)
            std_dev = np.sqrt(1 / m)  # np.sqrt(2 / (m + 1))
            b = np.random.normal(mean, std_dev, size=m).astype(np.float32)
            self.FeedBlobWrapper(tag_fc_w, W)
            self.FeedBlobWrapper(tag_fc_b, b)
            # approach 2: caffe2 xavier
            # W = self.AddLayerWrapper(
            #     model.param_init_net.XavierFill,
            #     [],
            #     tag_fc_w,
            #     shape=[m, n]
            # )
            # b = self.AddLayerWrapper(
            #     model.param_init_net.ConstantFill,
            #     [],
            #     tag_fc_b,
            #     shape=[m]
            # )

            # initialize the MLP's momentum for the Adagrad optimizer
            if self.emb_optimizer in ["adagrad", "rwsadagrad"]:
                # momentum of the weights
                self.FeedBlobWrapper(
                    "momentum_mlp_{}_{}".format(tag_layer, 2 * i - 1),
                    np.full((m, n), 0, dtype=np.float32)
                )
                # momentum of the biases
                self.FeedBlobWrapper(
                    "momentum_mlp_{}_{}".format(tag_layer, 2 * i),
                    np.full((m), 0, dtype=np.float32)
                )

            # save the blob shapes for latter (only needed if onnx is requested)
            if self.save_onnx:
                self.onnx_tsd[tag_fc_w] = (onnx.TensorProto.FLOAT, W.shape)
                self.onnx_tsd[tag_fc_b] = (onnx.TensorProto.FLOAT, b.shape)

            # approach 1: construct fully connected operator using model.net
            fc = self.AddLayerWrapper(
                model.net.FC, [tag_in, tag_fc_w, tag_fc_b], tag_fc_y
            )
            # approach 2: construct fully connected operator using brew
            # https://github.com/caffe2/tutorials/blob/master/MNIST.ipynb
            # fc = brew.fc(model, layer, tag_fc_w, dim_in=m, dim_out=n)
            layers.append(fc)

            if i == sigmoid_layer:
                # approach 1: construct sigmoid operator using model.net
                layer = self.AddLayerWrapper(model.net.Sigmoid, tag_fc_y, tag_fc_z)
                # approach 2: using brew (which currently does not support sigmoid)
                # tag_sigm = tag_layer + ":::" + "sigmoid" + str(i)
                # layer = brew.sigmoid(model,fc,tag_sigmoid)
            else:
                # approach 1: construct relu operator using model.net
                layer = self.AddLayerWrapper(model.net.Relu, tag_fc_y, tag_fc_z)
                # approach 2: using brew
                # tag_relu = tag_layer + ":::" + "relu" + str(i)
                # layer = brew.relu(model,fc,tag_relu)
            tag_in = tag_fc_z
            layers.append(layer)

        # WARNING: the dependency between layers is implicit in the tags,
        # so only the last layer is added to the layers list. It will
        # later be used for interactions.
        return layers, weights