in src/data_preprocess/tf_records_generator.py [0:0]
def _serialize_example(self, obj_id, sequence, obj, label):
d_feature = {}
d_feature['id'] = self._bytes_feature(obj_id.encode())
d_feature['seq'] = self._bytes_feature(sequence.encode())
d_feature['L'] = self._int_feature(len(sequence))
cur_example_label = label
if isinstance(cur_example_label, dict):
assert self.label_type is not None
cur_example_label = cur_example_label[self.label_type]
if cur_example_label is None or len(cur_example_label) == 0:
return None
if isinstance(cur_example_label, list) or isinstance(cur_example_label, set):
self.labels = self.labels.union(set(cur_example_label))
cur_example_label_ids = [self.label_2_id[v] for v in cur_example_label]
d_feature['label'] = self._int_feature(cur_example_label_ids)
else:
self.labels.add(cur_example_label)
cur_example_label_id = self.label_2_id[cur_example_label]
d_feature['label'] = self._int_feature(cur_example_label_id)
for item in obj.items():
name = item[0]
dtype = item[1][0]
value = item[1][1]
if isinstance(value, np.ndarray):
value = list(value.reshape(-1))
elif isinstance(value, int) or isinstance(value, float) or isinstance(value, str):
value = [value]
if dtype == "str":
d_feature[name] = self._bytes_feature(value)
elif dtype == "int":
d_feature[name] = self._int_feature(value)
else:
d_feature[name] = self._float_feature(value)
example = tf.train.Example(features=tf.train.Features(feature=d_feature))
return example.SerializeToString()