def create_emb()

in dlrm_s_caffe2.py [0:0]


    def create_emb(self, m, ln, model, tag):
        (tag_layer, tag_in, tag_out) = tag
        emb_l = []
        weights_l = []
        vw_l = []
        for i in range(0, ln.size):
            n = ln[i]

            # select device
            if self.ndevices > 1:
                d = i % self.ndevices
            else:
                d = -1

            # create tags
            on_device = "" if self.ndevices <= 1 else "gpu_" + str(d) + "/"
            len_s = on_device + tag_layer + ":::" + "sls" + str(i) + "_l"
            ind_s = on_device + tag_layer + ":::" + "sls" + str(i) + "_i"
            tbl_s = on_device + tag_layer + ":::" + "sls" + str(i) + "_w"
            sum_s = on_device + tag_layer + ":::" + "sls" + str(i) + "_z"
            weights_l.append(tbl_s)

            # initialize the weights
            # approach 1a: custom
            W = np.random.uniform(low=-np.sqrt(1 / n),
                                  high=np.sqrt(1 / n),
                                  size=(n, m)).astype(np.float32)
            # approach 1b: numpy rand
            # W = ra.rand(n, m).astype(np.float32)
            self.FeedBlobWrapper(tbl_s, W, False, device_id=d)
            # approach 2: caffe2 xavier
            # with core.DeviceScope(core.DeviceOption(workspace.GpuDeviceType, d)):
            #     W = model.param_init_net.XavierFill([], tbl_s, shape=[n, m])
            # save the blob shapes for latter (only needed if onnx is requested)

            # initialize the embedding's momentum for the Adagrad optimizer
            if self.emb_optimizer == "adagrad":
                self.FeedBlobWrapper("momentum_emb_{}".format(i),
                    np.full((n, m), 0), add_prefix=False, device_id=d)
            elif self.emb_optimizer == "rwsadagrad":
                self.FeedBlobWrapper("momentum_emb_{}".format(i),
                    np.full((n), 0), add_prefix=False, device_id=d)

            if self.save_onnx:
                self.onnx_tsd[tbl_s] = (onnx.TensorProto.FLOAT, W.shape)

            # create operator
            if self.weighted_pooling is not None:
                vw_s = on_device + tag_layer + ":::" + "sls" + str(i) + "_v"
                psw_s = on_device + tag_layer + ":::" + "sls" + str(i) + "_s"
                VW = np.ones(n).astype(np.float32)
                self.FeedBlobWrapper(vw_s, VW, False, device_id=d)
                if self.weighted_pooling == "learned":
                    vw_l.append(vw_s)
                    grad_on_weights = True
                else:
                    grad_on_weights = False
                if self.save_onnx:
                    self.onnx_tsd[vw_s] = (onnx.TensorProto.FLOAT, VW.shape)
                if self.ndevices <= 1:
                    PSW = model.net.Gather([vw_s, ind_s], [psw_s])
                    EE = model.net.SparseLengthsWeightedSum(
                        [tbl_s, PSW, ind_s, len_s], [sum_s],
                        grad_on_weights=grad_on_weights
                    )
                else:
                    with core.DeviceScope(
                        core.DeviceOption(workspace.GpuDeviceType, d)
                    ):
                        PSW = model.net.Gather([vw_s, ind_s], [psw_s])
                        EE = model.net.SparseLengthsWeightedSum(
                            [tbl_s, PSW, ind_s, len_s], [sum_s],
                            grad_on_weights=grad_on_weights
                        )
            else:
                if self.ndevices <= 1:
                    EE = model.net.SparseLengthsSum(
                        [tbl_s, ind_s, len_s], [sum_s]
                    )
                else:
                    with core.DeviceScope(
                        core.DeviceOption(workspace.GpuDeviceType, d)
                    ):
                        EE = model.net.SparseLengthsSum(
                            [tbl_s, ind_s, len_s], [sum_s]
                        )
            emb_l.append(EE)

        return emb_l, weights_l, vw_l