in dlrm_s_caffe2.py [0:0]
def FeedBlobWrapper(self, tag, val, add_prefix=True, split=False, device_id=-1):
if self.ndevices > 1 and add_prefix:
if split:
# split across devices
mini_batch_size = val.shape[0]
# approach 1: np and caffe2 operators assume the mini-batch size is
# divisible exactly by the number of available devices
if mini_batch_size % self.ndevices != 0:
sys.exit("ERROR: caffe2 net assumes that the mini_batch_size "
+ str(mini_batch_size)
+ " is evenly divisible by the number of available devices"
+ str(self.ndevices))
vals = np.split(val, self.ndevices, axis=0)
"""
# approach 2: np and caffe2 operators do not assume exact divisibility
if args.mini_batch_size != mini_batch_size:
sys.exit("ERROR: caffe2 net was prepared for mini-batch size "
+ str(args.mini_batch_size)
+ " which is different from current mini-batch size "
+ str(mini_batch_size) + " being passed to it. "
+ "This is common for the last mini-batch, when "
+ "mini-batch size does not evenly divided the number of "
+ "elements in the data set.")
ls = where_to_split(mini_batch_size, self.ndevices)
vals = np.split(val, ls, axis=0)
"""
# feed to multiple devices
for d in range(self.ndevices):
tag_on_device = "gpu_" + str(d) + "/" + tag
_d = core.DeviceOption(workspace.GpuDeviceType, d)
workspace.FeedBlob(tag_on_device, vals[d], device_option=_d)
else:
# feed to multiple devices
for d in range(self.ndevices):
tag_on_device = "gpu_" + str(d) + "/" + tag
_d = core.DeviceOption(workspace.GpuDeviceType, d)
workspace.FeedBlob(tag_on_device, val, device_option=_d)
else:
# feed to a single device (named or not)
if device_id >= 0:
_d = core.DeviceOption(workspace.GpuDeviceType, device_id)
workspace.FeedBlob(tag, val, device_option=_d)
else:
workspace.FeedBlob(tag, val)