torchbenchmark/models/dlrm/__init_

from __future__ import absolute_import, division, print_function, unicode_literals # miscellaneous import builtins import functools # import bisect # import shutil import time import json from typing import Tuple import sys # data generation from . import dlrm_data_pytorch as dp # numpy import numpy as np # pytorch import torch import torch.nn as nn from torch.nn.parallel.parallel_apply import parallel_apply from torch.nn.parallel.replicate import replicate from torch.nn.parallel.scatter_gather import gather, scatter # quotient-remainder trick from .tricks.qr_embedding_bag import QREmbeddingBag # mixed-dimension trick from .tricks.md_embedding_bag import PrEmbeddingBag, md_solver from torch.optim.lr_scheduler import _LRScheduler from .dlrm_s_pytorch import DLRM_Net, LRPolicyScheduler from argparse import Namespace from ...util.model import BenchmarkModel from torchbenchmark.tasks import RECOMMENDATION class Model(BenchmarkModel): task = RECOMMENDATION.RECOMMENDATION DEFAULT_TRAIN_BSIZE = 1000 DEFAULT_EVAL_BSIZE = 1000 def __init__(self, test, device, jit=False, batch_size=None, extra_args=[]): super().__init__(test=test, device=device, jit=jit, batch_size=batch_size, extra_args=extra_args) # Train architecture: use the configuration in the paper. # Source: https://arxiv.org/pdf/1906.00091.pdf arch_embedding_size = "1000000-1000000-1000000-1000000-1000000-1000000-1000000-1000000" arch_sparse_feature_size = 64 arch_mlp_bot = "512-512-64" arch_mlp_top = "1024-1024-1024-1" data_generation = "random" mini_batch_size = 2048 num_batches = self.batch_size num_indicies_per_lookup = 100 self.opt = Namespace(**{ 'm_spa' : None, 'ln_emb': None, 'ln_bot': None, 'ln_top': None, 'arch_interaction_op': "dot", 'arch_interaction_itself': False, 'sigmoid_bot': -1, 'sigmoid_top': -1, 'sync_dense_params': True, 'loss_threshold': 0.0, 'ndevices': -1, 'qr_flag': False, 'qr_operation': "mult", 'qr_collisions': 0, 'qr_threshold': 200, 'md_flag': False, 'md_threshold': 200, 'md_temperature': 0.3, 'activation_function': "relu", 'loss_function': "bce", 'loss_weights': "1.0-1.0", 'loss_threshold': 0.0, 'round_targets': False, 'data_size': 6, 'data_generation': data_generation, 'data_trace_file': "./input/dist_emb_j.log", 'raw_data_file': "", 'processed_data_file': "", 'data_randomize': "total", 'data_trace_enable_padding': False, 'max_ind_range': -1, 'num_workers': 0, 'memory_map': False, 'data_sub_sample_rate': 0.0, 'learning_rate': 0.01, 'lr_num_warmup_steps': 0, 'lr_decay_start_step': 0, 'lr_num_decay_steps': 0, 'arch_embedding_size': arch_embedding_size, 'arch_sparse_feature_size': arch_sparse_feature_size, 'arch_mlp_bot': arch_mlp_bot, 'arch_mlp_top': arch_mlp_top, 'mini_batch_size': mini_batch_size, 'num_batches': num_batches, 'num_indices_per_lookup': num_indicies_per_lookup, 'num_indices_per_lookup_fixed': True, 'numpy_rand_seed': 123, }) if self.device == "cuda": torch.cuda.manual_seed_all(self.opt.numpy_rand_seed) torch.backends.cudnn.deterministic = True # Prepare training data self.opt.ln_bot = np.fromstring(self.opt.arch_mlp_bot, dtype=int, sep="-") # Input and target at random self.opt.ln_emb = np.fromstring(self.opt.arch_embedding_size, dtype=int, sep="-") self.opt.m_den = self.opt.ln_bot[0] train_data, self.train_ld = dp.make_random_data_and_loader(self.opt, self.opt.ln_emb, self.opt.m_den) self.opt.nbatches = len(self.train_ld) self.opt.m_spa = self.opt.arch_sparse_feature_size num_fea = self.opt.ln_emb.size + 1 # num sparse + num dense features m_den_out = self.opt.ln_bot[self.opt.ln_bot.size - 1] if self.opt.arch_interaction_op == "dot": # approach 1: all # num_int = num_fea * num_fea + m_den_out # approach 2: unique if self.opt.arch_interaction_itself: num_int = (num_fea * (num_fea + 1)) // 2 + m_den_out else: num_int = (num_fea * (num_fea - 1)) // 2 + m_den_out elif self.opt.arch_interaction_op == "cat": num_int = num_fea * m_den_out else: sys.exit( "ERROR: --arch-interaction-op=" + self.opt.arch_interaction_op + " is not supported" ) arch_mlp_top_adjusted = str(num_int) + "-" + self.opt.arch_mlp_top self.opt.ln_top = np.fromstring(arch_mlp_top_adjusted, dtype=int, sep="-") dlrm = DLRM_Net( self.opt.m_spa, self.opt.ln_emb, self.opt.ln_bot, self.opt.ln_top, arch_interaction_op=self.opt.arch_interaction_op, arch_interaction_itself=self.opt.arch_interaction_itself, sigmoid_bot=self.opt.sigmoid_bot, sigmoid_top=self.opt.sigmoid_top, sync_dense_params=self.opt.sync_dense_params, loss_threshold=self.opt.loss_threshold, ndevices=self.opt.ndevices, qr_flag=self.opt.qr_flag, qr_operation=self.opt.qr_operation, qr_collisions=self.opt.qr_collisions, qr_threshold=self.opt.qr_threshold, md_flag=self.opt.md_flag, md_threshold=self.opt.md_threshold, ) # Preparing data X, lS_o, lS_i, self.targets = next(iter(self.train_ld)) X = X.to(self.device) lS_i = [S_i.to(self.device) for S_i in lS_i] if isinstance(lS_i, list) \ else lS_i.to(self.device) lS_o = [S_o.to(self.device) for S_o in lS_o] if isinstance(lS_o, list) \ else lS_o.to(self.device) self.targets = self.targets.to(self.device) # Setting Loss Function if self.opt.loss_function == "mse": self.loss_fn = torch.nn.MSELoss(reduction="mean") elif self.opt.loss_function == "bce": self.loss_fn = torch.nn.BCELoss(reduction="mean") elif self.opt.loss_function == "wbce": self.loss_ws = torch.tensor(np.fromstring(self.opt.loss_weights, dtype=float, sep="-")) self.loss_fn = torch.nn.BCELoss(reduction="none") else: sys.exit("ERROR: --loss-function=" + self.opt.loss_function + " is not supported") self.model = dlrm.to(self.device) self.example_inputs = (X, lS_o, lS_i) if test == "train": self.model.train() self.loss_fn = torch.nn.MSELoss(reduction="mean") self.optimizer = torch.optim.SGD(dlrm.parameters(), lr=self.opt.learning_rate) self.lr_scheduler = LRPolicyScheduler(self.optimizer, self.opt.lr_num_warmup_steps, self.opt.lr_decay_start_step, self.opt.lr_num_decay_steps) elif test == "eval": self.model.eval() def get_module(self): return self.model, self.example_inputs def eval(self, niter=1) -> Tuple[torch.Tensor]: for _ in range(niter): out = self.model(*self.example_inputs) return (out, ) def train(self, niter=1): gen = self.model(*self.example_inputs) for _ in range(niter): self.optimizer.zero_grad() loss = self.loss_fn(gen, self.targets) if self.opt.loss_function == "wbce": loss_ws_ = self.loss_ws[T.data.view(-1).long()].view_as(T) loss = loss_ws_ * loss loss = loss.mean() loss.backward() self.optimizer.step() self.lr_scheduler.step()

torchbenchmark/models/dlrm/__init__.py (175 lines of code) (raw):

torchbenchmark/models/dlrm/init.py (175 lines of code) (raw):