in recommenders/models/deeprec/io/sequential_iterator.py [0:0]
def load_data_from_file(self, infile, batch_num_ngs=0, min_seq_length=1):
"""Read and parse data from a file.
Args:
infile (str): Text input file. Each line in this file is an instance.
batch_num_ngs (int): The number of negative sampling here in batch.
0 represents that there is no need to do negative sampling here.
min_seq_length (int): The minimum number of a sequence length.
Sequences with length lower than min_seq_length will be ignored.
Yields:
object: An iterator that yields parsed results, in the format of graph `feed_dict`.
"""
label_list = []
user_list = []
item_list = []
item_cate_list = []
item_history_batch = []
item_cate_history_batch = []
time_list = []
time_diff_list = []
time_from_first_action_list = []
time_to_now_list = []
cnt = 0
if infile not in self.iter_data:
lines = self.parse_file(infile)
self.iter_data[infile] = lines
else:
lines = self.iter_data[infile]
if batch_num_ngs > 0:
random.shuffle(lines)
for line in lines:
if not line:
continue
(
label,
user_id,
item_id,
item_cate,
item_history_sequence,
item_cate_history_sequence,
current_time,
time_diff,
time_from_first_action,
time_to_now,
) = line
if len(item_history_sequence) < min_seq_length:
continue
label_list.append(label)
user_list.append(user_id)
item_list.append(item_id)
item_cate_list.append(item_cate)
item_history_batch.append(item_history_sequence)
item_cate_history_batch.append(item_cate_history_sequence)
time_list.append(current_time)
time_diff_list.append(time_diff)
time_from_first_action_list.append(time_from_first_action)
time_to_now_list.append(time_to_now)
cnt += 1
if cnt == self.batch_size:
res = self._convert_data(
label_list,
user_list,
item_list,
item_cate_list,
item_history_batch,
item_cate_history_batch,
time_list,
time_diff_list,
time_from_first_action_list,
time_to_now_list,
batch_num_ngs,
)
batch_input = self.gen_feed_dict(res)
yield batch_input if batch_input else None
label_list = []
user_list = []
item_list = []
item_cate_list = []
item_history_batch = []
item_cate_history_batch = []
time_list = []
time_diff_list = []
time_from_first_action_list = []
time_to_now_list = []
cnt = 0
if cnt > 0:
res = self._convert_data(
label_list,
user_list,
item_list,
item_cate_list,
item_history_batch,
item_cate_history_batch,
time_list,
time_diff_list,
time_from_first_action_list,
time_to_now_list,
batch_num_ngs,
)
batch_input = self.gen_feed_dict(res)
yield batch_input if batch_input else None