in dlrm_s_caffe2.py [0:0]
def create_emb(self, m, ln, model, tag):
(tag_layer, tag_in, tag_out) = tag
emb_l = []
weights_l = []
vw_l = []
for i in range(0, ln.size):
n = ln[i]
# select device
if self.ndevices > 1:
d = i % self.ndevices
else:
d = -1
# create tags
on_device = "" if self.ndevices <= 1 else "gpu_" + str(d) + "/"
len_s = on_device + tag_layer + ":::" + "sls" + str(i) + "_l"
ind_s = on_device + tag_layer + ":::" + "sls" + str(i) + "_i"
tbl_s = on_device + tag_layer + ":::" + "sls" + str(i) + "_w"
sum_s = on_device + tag_layer + ":::" + "sls" + str(i) + "_z"
weights_l.append(tbl_s)
# initialize the weights
# approach 1a: custom
W = np.random.uniform(low=-np.sqrt(1 / n),
high=np.sqrt(1 / n),
size=(n, m)).astype(np.float32)
# approach 1b: numpy rand
# W = ra.rand(n, m).astype(np.float32)
self.FeedBlobWrapper(tbl_s, W, False, device_id=d)
# approach 2: caffe2 xavier
# with core.DeviceScope(core.DeviceOption(workspace.GpuDeviceType, d)):
# W = model.param_init_net.XavierFill([], tbl_s, shape=[n, m])
# save the blob shapes for latter (only needed if onnx is requested)
# initialize the embedding's momentum for the Adagrad optimizer
if self.emb_optimizer == "adagrad":
self.FeedBlobWrapper("momentum_emb_{}".format(i),
np.full((n, m), 0), add_prefix=False, device_id=d)
elif self.emb_optimizer == "rwsadagrad":
self.FeedBlobWrapper("momentum_emb_{}".format(i),
np.full((n), 0), add_prefix=False, device_id=d)
if self.save_onnx:
self.onnx_tsd[tbl_s] = (onnx.TensorProto.FLOAT, W.shape)
# create operator
if self.weighted_pooling is not None:
vw_s = on_device + tag_layer + ":::" + "sls" + str(i) + "_v"
psw_s = on_device + tag_layer + ":::" + "sls" + str(i) + "_s"
VW = np.ones(n).astype(np.float32)
self.FeedBlobWrapper(vw_s, VW, False, device_id=d)
if self.weighted_pooling == "learned":
vw_l.append(vw_s)
grad_on_weights = True
else:
grad_on_weights = False
if self.save_onnx:
self.onnx_tsd[vw_s] = (onnx.TensorProto.FLOAT, VW.shape)
if self.ndevices <= 1:
PSW = model.net.Gather([vw_s, ind_s], [psw_s])
EE = model.net.SparseLengthsWeightedSum(
[tbl_s, PSW, ind_s, len_s], [sum_s],
grad_on_weights=grad_on_weights
)
else:
with core.DeviceScope(
core.DeviceOption(workspace.GpuDeviceType, d)
):
PSW = model.net.Gather([vw_s, ind_s], [psw_s])
EE = model.net.SparseLengthsWeightedSum(
[tbl_s, PSW, ind_s, len_s], [sum_s],
grad_on_weights=grad_on_weights
)
else:
if self.ndevices <= 1:
EE = model.net.SparseLengthsSum(
[tbl_s, ind_s, len_s], [sum_s]
)
else:
with core.DeviceScope(
core.DeviceOption(workspace.GpuDeviceType, d)
):
EE = model.net.SparseLengthsSum(
[tbl_s, ind_s, len_s], [sum_s]
)
emb_l.append(EE)
return emb_l, weights_l, vw_l