def _parse_csv()

in easy_rec/python/input/rtp_input.py [0:0]


  def _parse_csv(self, line):
    record_defaults = ['' for i in range(self._num_cols)]

    # the actual features are in one single column
    record_defaults[self._feature_col_id] = self._data_config.separator.join([
        str(self.get_type_defaults(t, v))
        for x, t, v in zip(self._input_fields, self._input_field_types,
                           self._input_field_defaults)
        if x not in self._label_fields
    ])

    check_list = [
        tf.py_func(
            check_split, [line, self._rtp_separator,
                          len(record_defaults)],
            Tout=tf.bool)
    ] if self._check_mode else []
    with tf.control_dependencies(check_list):
      fields = tf.string_split(line, self._rtp_separator, skip_empty=False)

    fields = tf.reshape(fields.values, [-1, len(record_defaults)])

    labels = []
    for idx, x in enumerate(self._selected_cols[:-1]):
      field = fields[:, x]
      fname = self._input_fields[idx]
      ftype = self._input_field_types[idx]
      tf_type = get_tf_type(ftype)
      if field.dtype in [tf.string]:
        check_list = [
            tf.py_func(check_string_to_number, [field, fname], Tout=tf.bool)
        ] if self._check_mode else []
        with tf.control_dependencies(check_list):
          field = tf.string_to_number(field, tf_type)
      labels.append(field)

    # only for features, labels excluded
    record_types = [
        t for x, t in zip(self._input_fields, self._input_field_types)
        if x not in self._label_fields
    ]
    # assume that the last field is the generated feature column
    print('field_delim = %s' % self._data_config.separator)
    feature_str = fields[:, self._feature_col_id]
    check_list = [
        tf.py_func(
            check_split,
            [feature_str, self._data_config.separator,
             len(record_types)],
            Tout=tf.bool)
    ] if self._check_mode else []
    with tf.control_dependencies(check_list):
      fields = str_split_by_chr(
          feature_str, self._data_config.separator, skip_empty=False)
    tmp_fields = tf.reshape(fields.values, [-1, len(record_types)])
    rtp_record_defaults = [
        str(self.get_type_defaults(t, v))
        for x, t, v in zip(self._input_fields, self._input_field_types,
                           self._input_field_defaults)
        if x not in self._label_fields
    ]
    fields = []
    for i in range(len(record_types)):
      field = string_to_number(tmp_fields[:, i], record_types[i],
                               rtp_record_defaults[i], i)
      fields.append(field)

    field_keys = [x for x in self._input_fields if x not in self._label_fields]
    effective_fids = [field_keys.index(x) for x in self._effective_fields]
    inputs = {field_keys[x]: fields[x] for x in effective_fids}

    for x in range(len(self._label_fields)):
      inputs[self._label_fields[x]] = labels[x]
    return inputs