in easy_rec/python/input/input.py [0:0]
def _parse_seq_feature(self, fc, parsed_dict, field_dict):
input_0 = fc.input_names[0]
feature_name = fc.feature_name if fc.HasField('feature_name') else input_0
field = field_dict[input_0]
sub_feature_type = fc.sub_feature_type
# Construct the output of SeqFeature according to the dimension of field_dict.
# When the input field exceeds 2 dimensions, convert SeqFeature to 2D output.
if len(field.get_shape()) < 2:
parsed_dict[feature_name] = tf.strings.split(field, fc.separator)
if fc.HasField('seq_multi_sep'):
indices = parsed_dict[feature_name].indices
values = parsed_dict[feature_name].values
multi_vals = tf.string_split(values, fc.seq_multi_sep)
indices_1 = multi_vals.indices
indices = tf.gather(indices, indices_1[:, 0])
out_indices = tf.concat([indices, indices_1[:, 1:]], axis=1)
# 3 dimensional sparse tensor
out_shape = tf.concat(
[parsed_dict[feature_name].dense_shape, multi_vals.dense_shape[1:]],
axis=0)
parsed_dict[feature_name] = tf.sparse.SparseTensor(
out_indices, multi_vals.values, out_shape)
if (fc.num_buckets > 1 and fc.max_val == fc.min_val):
check_list = [
tf.py_func(
check_string_to_number,
[parsed_dict[feature_name].values, input_0],
Tout=tf.bool)
] if self._check_mode else []
with tf.control_dependencies(check_list):
parsed_dict[feature_name] = tf.sparse.SparseTensor(
parsed_dict[feature_name].indices,
tf.string_to_number(
parsed_dict[feature_name].values,
tf.int64,
name='sequence_str_2_int_%s' % input_0),
parsed_dict[feature_name].dense_shape)
elif sub_feature_type == fc.RawFeature:
check_list = [
tf.py_func(
check_string_to_number,
[parsed_dict[feature_name].values, input_0],
Tout=tf.bool)
] if self._check_mode else []
with tf.control_dependencies(check_list):
parsed_dict[feature_name] = tf.sparse.SparseTensor(
parsed_dict[feature_name].indices,
tf.string_to_number(
parsed_dict[feature_name].values,
tf.float32,
name='sequence_str_2_float_%s' % input_0),
parsed_dict[feature_name].dense_shape)
if fc.num_buckets > 1 and fc.max_val > fc.min_val:
normalized_values = (parsed_dict[feature_name].values - fc.min_val) / (
fc.max_val - fc.min_val)
parsed_dict[feature_name] = tf.sparse.SparseTensor(
parsed_dict[feature_name].indices, normalized_values,
parsed_dict[feature_name].dense_shape)
else:
parsed_dict[feature_name] = field
if not fc.boundaries and fc.num_buckets <= 1 and\
self._data_config.sample_weight != input_0 and\
sub_feature_type == fc.RawFeature and\
fc.raw_input_dim == 1:
logging.info(
'Not set boundaries or num_buckets or hash_bucket_size, %s will process as two dimension sequence raw feature'
% feature_name)
parsed_dict[feature_name] = tf.sparse_to_dense(
parsed_dict[feature_name].indices,
[tf.shape(parsed_dict[feature_name])[0], fc.sequence_length],
parsed_dict[feature_name].values)
sample_num = tf.to_int64(tf.shape(parsed_dict[feature_name])[0])
indices_0 = tf.range(sample_num, dtype=tf.int64)
indices_1 = tf.range(fc.sequence_length, dtype=tf.int64)
indices_0 = indices_0[:, None]
indices_1 = indices_1[None, :]
indices_0 = tf.tile(indices_0, [1, fc.sequence_length])
indices_1 = tf.tile(indices_1, [sample_num, 1])
indices_0 = tf.reshape(indices_0, [-1, 1])
indices_1 = tf.reshape(indices_1, [-1, 1])
indices = tf.concat([indices_0, indices_1], axis=1)
tmp_parsed = parsed_dict[feature_name]
parsed_dict[feature_name + '_raw_proj_id'] = tf.SparseTensor(
indices=indices,
values=indices_1[:, 0],
dense_shape=[sample_num, fc.sequence_length])
parsed_dict[feature_name + '_raw_proj_val'] = tf.SparseTensor(
indices=indices,
values=tf.reshape(tmp_parsed, [-1]),
dense_shape=[sample_num, fc.sequence_length])
elif (not fc.boundaries and fc.num_buckets <= 1 and
self._data_config.sample_weight != input_0 and
sub_feature_type == fc.RawFeature and fc.raw_input_dim > 1):
# for 3 dimension sequence feature input.
logging.info('Not set boundaries or num_buckets or hash_bucket_size,'
' %s will process as three dimension sequence raw feature' %
feature_name)
parsed_dict[feature_name] = tf.sparse_to_dense(
parsed_dict[feature_name].indices, [
tf.shape(parsed_dict[feature_name])[0], fc.sequence_length,
fc.raw_input_dim
], parsed_dict[feature_name].values)
sample_num = tf.to_int64(tf.shape(parsed_dict[feature_name])[0])
indices_0 = tf.range(sample_num, dtype=tf.int64)
indices_1 = tf.range(fc.sequence_length, dtype=tf.int64)
indices_2 = tf.range(fc.raw_input_dim, dtype=tf.int64)
indices_0 = indices_0[:, None, None]
indices_1 = indices_1[None, :, None]
indices_2 = indices_2[None, None, :]
indices_0 = tf.tile(indices_0, [1, fc.sequence_length, fc.raw_input_dim])
indices_1 = tf.tile(indices_1, [sample_num, 1, fc.raw_input_dim])
indices_2 = tf.tile(indices_2, [sample_num, fc.sequence_length, 1])
indices_0 = tf.reshape(indices_0, [-1, 1])
indices_1 = tf.reshape(indices_1, [-1, 1])
indices_2 = tf.reshape(indices_2, [-1, 1])
indices = tf.concat([indices_0, indices_1, indices_2], axis=1)
tmp_parsed = parsed_dict[feature_name]
parsed_dict[feature_name + '_raw_proj_id'] = tf.SparseTensor(
indices=indices,
values=indices_1[:, 0],
dense_shape=[sample_num, fc.sequence_length, fc.raw_input_dim])
parsed_dict[feature_name + '_raw_proj_val'] = tf.SparseTensor(
indices=indices,
values=tf.reshape(parsed_dict[feature_name], [-1]),
dense_shape=[sample_num, fc.sequence_length, fc.raw_input_dim])