def _group_csv_lines()

in tensorflow_examples/lite/model_maker/core/data_util/object_detector_dataloader.py [0:0]


def _group_csv_lines(csv_file: str,
                     set_prefixes: List[str],
                     delimiter: str = ',',
                     quotechar: str = '"') -> CsvLines:
  """Groups csv_lines for different set_names and label_map.

  Args:
    csv_file: filename of the csv file.
    set_prefixes: Set prefix names for training, validation and test data. e.g.
      ['TRAIN', 'VAL', 'TEST'].
    delimiter: Character used to separate fields.
    quotechar: Character used to quote fields containing special characters.

  Returns:
    [training csv lines, validation csv lines, test csv lines], label_map
  """
  # Dict that maps integer label ids to string label names.
  label_map = {}
  with tf.io.gfile.GFile(csv_file, 'r') as f:
    reader = csv.reader(f, delimiter=delimiter, quotechar=quotechar)
    # `lines_list` = [training csv lines, validation csv lines, test csv lines]
    # Each csv line is a list of strings separated by delimiter. e.g.
    # row 'one,two,three' in the csv file will be ['one', two', 'three'].
    lines_list = [[], [], []]
    for line in reader:
      # Groups lines by the set_name.
      set_name = line[0].strip()
      for i, set_prefix in enumerate(set_prefixes):
        if set_name.startswith(set_prefix):
          lines_list[i].append(line)

      label = line[2].strip()
      # Updates label_map if it's a new label.
      if label not in label_map.values():
        label_map[len(label_map) + 1] = label

  return lines_list, label_map