in tools/taobao_prepare.py [0:0]
def produce_neg_item_hist_with_cate(train_file, test_file):
item_dict = {}
sample_count = 0
hist_seq = 0
for line in train_file:
units = line.strip().split("\t")
item_hist_list = units[4].split(",")
cate_hist_list = units[5].split(",")
hist_list = zip(item_hist_list, cate_hist_list)
hist_seq = len(hist_list)
sample_count += 1
for item in hist_list:
item_dict.setdefault(str(item),0)
for line in test_file:
units = line.strip().split("\t")
item_hist_list = units[4].split(",")
cate_hist_list = units[5].split(",")
hist_list = zip(item_hist_list, cate_hist_list)
hist_seq = len(hist_list)
sample_count += 1
for item in hist_list:
item_dict.setdefault(str(item),0)
del(item_dict["('0', '0')"])
neg_array = np.random.choice(np.array(item_dict.keys()), (sample_count, hist_seq+20))
neg_list = neg_array.tolist()
sample_count = 0
for line in train_file:
units = line.strip().split("\t")
item_hist_list = units[4].split(",")
cate_hist_list = units[5].split(",")
hist_list = zip(item_hist_list, cate_hist_list)
hist_seq = len(hist_list)
neg_hist_list = []
for item in neg_list[sample_count]:
item = eval(item)
if item not in hist_list:
neg_hist_list.append(item)
if len(neg_hist_list) == hist_seq:
break
sample_count += 1
neg_item_list, neg_cate_list = zip(*neg_hist_list)
Train_handle.write(line.strip() + "\t" + ",".join(neg_item_list) + "\t" + ",".join(neg_cate_list) + "\n" )
for line in test_file:
units = line.strip().split("\t")
item_hist_list = units[4].split(",")
cate_hist_list = units[5].split(",")
hist_list = zip(item_hist_list, cate_hist_list)
hist_seq = len(hist_list)
neg_hist_list = []
for item in neg_list[sample_count]:
item = eval(item)
if item not in hist_list:
neg_hist_list.append(item)
if len(neg_hist_list) == hist_seq:
break
sample_count += 1
neg_item_list, neg_cate_list = zip(*neg_hist_list)
Test_handle.write(line.strip() + "\t" + ",".join(neg_item_list) + "\t" + ",".join(neg_cate_list) + "\n" )