def build_index()

in petastorm/etl/rowgroup_indexers.py [0:0]


    def build_index(self, decoded_rows, piece_index):
        field_column = [row[self._column_name] for row in decoded_rows]
        if not field_column:
            raise ValueError("Cannot build index for empty rows, column '{}'"
                             .format(self._column_name))

        for field_val in field_column:
            if field_val is not None:
                # check type of field, if it is array index each array value,
                # otherwise index field value directly
                if isinstance(field_val, np.ndarray):
                    for val in field_val:
                        self._index_data[val].add(piece_index)
                else:
                    self._index_data[field_val].add(piece_index)

        return self._index_data