def create_NETFLIX_data_timesplit()

in torchbenchmark/models/nvidia_deeprecommender/data_utils/netflix_data_convert.py [0:0]


def create_NETFLIX_data_timesplit(all_data,
                                  train_min,
                                  train_max,
                                  test_min,
                                  test_max):
  """
  Creates time-based split of NETFLIX data into train, and (validation, test)
  :param all_data:
  :param train_min:
  :param train_max:
  :param test_min:
  :param test_max:
  :return:
  """
  train_min_ts = time.mktime(datetime.datetime.strptime(train_min,"%Y-%m-%d").timetuple())
  train_max_ts = time.mktime(datetime.datetime.strptime(train_max, "%Y-%m-%d").timetuple())
  test_min_ts = time.mktime(datetime.datetime.strptime(test_min, "%Y-%m-%d").timetuple())
  test_max_ts = time.mktime(datetime.datetime.strptime(test_max, "%Y-%m-%d").timetuple())

  training_data = dict()
  validation_data = dict()
  test_data = dict()

  train_set_items = set()

  for userId, userRatings in all_data.items():
    time_sorted_ratings = sorted(userRatings, key=lambda x: x[2])  # sort by timestamp
    for rating_item in time_sorted_ratings:
      if rating_item[2] >= train_min_ts and rating_item[2] <= train_max_ts:
        if not userId in training_data:
          training_data[userId] = []
        training_data[userId].append(rating_item)
        train_set_items.add(rating_item[0]) # keep track of items from training set
      elif rating_item[2] >= test_min_ts and rating_item[2] <= test_max_ts:
        if not userId in training_data: # only include users seen in the training set
          continue
        p = random.random()
        if p <=0.5:
          if not userId in validation_data:
            validation_data[userId] = []
          validation_data[userId].append(rating_item)
        else:
          if not userId in test_data:
            test_data[userId] = []
          test_data[userId].append(rating_item)

  # remove items not not seen in training set
  for userId, userRatings in test_data.items():
    test_data[userId] = [rating for rating in userRatings if rating[0] in train_set_items]
  for userId, userRatings in validation_data.items():
    validation_data[userId] = [rating for rating in userRatings if rating[0] in train_set_items]

  return training_data, validation_data, test_data