in tensorflow_examples/lite/model_maker/core/data_util/object_detector_dataloader.py [0:0]
def _group_csv_lines(csv_file: str,
set_prefixes: List[str],
delimiter: str = ',',
quotechar: str = '"') -> CsvLines:
"""Groups csv_lines for different set_names and label_map.
Args:
csv_file: filename of the csv file.
set_prefixes: Set prefix names for training, validation and test data. e.g.
['TRAIN', 'VAL', 'TEST'].
delimiter: Character used to separate fields.
quotechar: Character used to quote fields containing special characters.
Returns:
[training csv lines, validation csv lines, test csv lines], label_map
"""
# Dict that maps integer label ids to string label names.
label_map = {}
with tf.io.gfile.GFile(csv_file, 'r') as f:
reader = csv.reader(f, delimiter=delimiter, quotechar=quotechar)
# `lines_list` = [training csv lines, validation csv lines, test csv lines]
# Each csv line is a list of strings separated by delimiter. e.g.
# row 'one,two,three' in the csv file will be ['one', two', 'three'].
lines_list = [[], [], []]
for line in reader:
# Groups lines by the set_name.
set_name = line[0].strip()
for i, set_prefix in enumerate(set_prefixes):
if set_name.startswith(set_prefix):
lines_list[i].append(line)
label = line[2].strip()
# Updates label_map if it's a new label.
if label not in label_map.values():
label_map[len(label_map) + 1] = label
return lines_list, label_map