in recommenders/models/deeprec/io/sequential_iterator.py [0:0]
def parser_one_line(self, line):
"""Parse one string line into feature values.
Args:
line (str): a string indicating one instance.
This string contains tab-separated values including:
label, user_hash, item_hash, item_cate, operation_time, item_history_sequence,
item_cate_history_sequence, and time_history_sequence.
Returns:
list: Parsed results including `label`, `user_id`, `item_id`, `item_cate`, `item_history_sequence`, `cate_history_sequence`,
`current_time`, `time_diff`, `time_from_first_action`, `time_to_now`.
"""
words = line.strip().split(self.col_spliter)
label = int(words[0])
user_id = self.userdict[words[1]] if words[1] in self.userdict else 0
item_id = self.itemdict[words[2]] if words[2] in self.itemdict else 0
item_cate = self.catedict[words[3]] if words[3] in self.catedict else 0
current_time = float(words[4])
item_history_sequence = []
cate_history_sequence = []
time_history_sequence = []
item_history_words = words[5].strip().split(",")
for item in item_history_words:
item_history_sequence.append(
self.itemdict[item] if item in self.itemdict else 0
)
cate_history_words = words[6].strip().split(",")
for cate in cate_history_words:
cate_history_sequence.append(
self.catedict[cate] if cate in self.catedict else 0
)
time_history_words = words[7].strip().split(",")
time_history_sequence = [float(i) for i in time_history_words]
time_range = 3600 * 24
time_diff = []
for i in range(len(time_history_sequence) - 1):
diff = (
time_history_sequence[i + 1] - time_history_sequence[i]
) / time_range
diff = max(diff, 0.5)
time_diff.append(diff)
last_diff = (current_time - time_history_sequence[-1]) / time_range
last_diff = max(last_diff, 0.5)
time_diff.append(last_diff)
time_diff = np.log(time_diff)
time_from_first_action = []
first_time = time_history_sequence[0]
time_from_first_action = [
(t - first_time) / time_range for t in time_history_sequence[1:]
]
time_from_first_action = [max(t, 0.5) for t in time_from_first_action]
last_diff = (current_time - first_time) / time_range
last_diff = max(last_diff, 0.5)
time_from_first_action.append(last_diff)
time_from_first_action = np.log(time_from_first_action)
time_to_now = []
time_to_now = [(current_time - t) / time_range for t in time_history_sequence]
time_to_now = [max(t, 0.5) for t in time_to_now]
time_to_now = np.log(time_to_now)
return (
label,
user_id,
item_id,
item_cate,
item_history_sequence,
cate_history_sequence,
current_time,
time_diff,
time_from_first_action,
time_to_now,
)