def __init__()

in tensorflow_transform/coders/csv_coder.py [0:0]


  def __init__(self,
               column_names,
               schema,
               delimiter=',',
               secondary_delimiter=None,
               multivalent_columns=None):
    """Initializes CsvCoder.

    Args:
      column_names: Tuple of strings. Order must match the order in the file.
      schema: A `Schema` proto.
      delimiter: A one-character string used to separate fields.
      secondary_delimiter: A one-character string used to separate values within
        the same field.
      multivalent_columns: A list of names for multivalent columns that need to
        be split based on secondary delimiter.

    Raises:
      ValueError: If `schema` is invalid.
    """
    self._column_names = column_names
    self._schema = schema
    self._delimiter = delimiter
    self._secondary_delimiter = secondary_delimiter
    self._encoder = self._WriterWrapper(delimiter)

    if multivalent_columns is None:
      multivalent_columns = []
    self._multivalent_columns = multivalent_columns

    if secondary_delimiter:
      secondary_encoder = self._WriterWrapper(secondary_delimiter)
    elif multivalent_columns:
      raise ValueError(
          'secondary_delimiter unspecified for multivalent columns "{}"'.format(
              multivalent_columns))
    secondary_encoder_by_name = {
        name: secondary_encoder for name in multivalent_columns
    }
    indices_by_name = {
        name: index for index, name in enumerate(self._column_names)
    }

    def index(name):
      index = indices_by_name.get(name)
      if index is None:
        raise ValueError('Column not found: "{}"'.format(name))
      else:
        return index

    self._feature_handlers = []
    for name, feature_spec in schema_utils.schema_as_feature_spec(
        schema).feature_spec.items():
      if isinstance(feature_spec, tf.io.FixedLenFeature):
        self._feature_handlers.append(
            _FixedLenFeatureHandler(name, feature_spec, index(name),
                                    secondary_encoder_by_name.get(name)))
      elif isinstance(feature_spec, tf.io.VarLenFeature):
        self._feature_handlers.append(
            _VarLenFeatureHandler(name, feature_spec.dtype, index(name),
                                  secondary_encoder_by_name.get(name)))
      elif isinstance(feature_spec, tf.io.SparseFeature):
        index_keys = (
            feature_spec.index_key if isinstance(feature_spec.index_key, list)
            else [feature_spec.index_key])
        for key in index_keys:
          self._feature_handlers.append(
              _VarLenFeatureHandler(key, tf.int64, index(key),
                                    secondary_encoder_by_name.get(name)))
        self._feature_handlers.append(
            _VarLenFeatureHandler(feature_spec.value_key, feature_spec.dtype,
                                  index(feature_spec.value_key),
                                  secondary_encoder_by_name.get(name)))
      else:
        raise ValueError(
            'feature_spec should be one of tf.FixedLenFeature, '
            'tf.VarLenFeature or tf.SparseFeature: {!r} was {!r}'.format(
                name, type(feature_spec)))