in recommenders/models/newsrec/io/mind_all_iterator.py [0:0]
def load_data_from_file(self, news_file, behavior_file):
"""Read and parse data from a file.
Args:
news_file (str): A file contains several informations of news.
beahaviros_file (str): A file contains information of user impressions.
Yields:
object: An iterator that yields parsed results, in the format of graph feed_dict.
"""
if not hasattr(self, "news_title_index"):
self.init_news(news_file)
if not hasattr(self, "impr_indexes"):
self.init_behaviors(behavior_file)
label_list = []
imp_indexes = []
user_indexes = []
candidate_title_indexes = []
candidate_ab_indexes = []
candidate_vert_indexes = []
candidate_subvert_indexes = []
click_title_indexes = []
click_ab_indexes = []
click_vert_indexes = []
click_subvert_indexes = []
cnt = 0
indexes = np.arange(len(self.labels))
if self.npratio > 0:
np.random.shuffle(indexes)
for index in indexes:
for (
label,
impr_index,
user_index,
candidate_title_index,
candidate_ab_index,
candidate_vert_index,
candidate_subvert_index,
click_title_index,
click_ab_index,
click_vert_index,
click_subvert_index,
) in self.parser_one_line(index):
candidate_title_indexes.append(candidate_title_index)
candidate_ab_indexes.append(candidate_ab_index)
candidate_vert_indexes.append(candidate_vert_index)
candidate_subvert_indexes.append(candidate_subvert_index)
click_title_indexes.append(click_title_index)
click_ab_indexes.append(click_ab_index)
click_vert_indexes.append(click_vert_index)
click_subvert_indexes.append(click_subvert_index)
imp_indexes.append(impr_index)
user_indexes.append(user_index)
label_list.append(label)
cnt += 1
if cnt >= self.batch_size:
yield self._convert_data(
label_list,
imp_indexes,
user_indexes,
candidate_title_indexes,
candidate_ab_indexes,
candidate_vert_indexes,
candidate_subvert_indexes,
click_title_indexes,
click_ab_indexes,
click_vert_indexes,
click_subvert_indexes,
)
label_list = []
imp_indexes = []
user_indexes = []
candidate_title_indexes = []
candidate_ab_indexes = []
candidate_vert_indexes = []
candidate_subvert_indexes = []
click_title_indexes = []
click_ab_indexes = []
click_vert_indexes = []
click_subvert_indexes = []
cnt = 0