def _parse_csv()

in easy_rec/python/input/hive_rtp_input.py [0:0]


  def _parse_csv(self, line):
    non_feature_cols = self._label_fields
    if self._selected_cols:
      non_feature_cols = self._selected_cols[:-1]
    record_defaults = []
    for tid, field_name in enumerate(self._input_table_col_names):
      if field_name in self._selected_cols[:-1]:
        idx = self._input_fields.index(field_name)
        record_defaults.append(
            self.get_type_defaults(self._input_field_types[idx],
                                   self._input_field_defaults[idx]))
      else:
        record_defaults.append('')
    print('record_defaults: ', record_defaults)
    tmp_fields = tf.decode_csv(
        line,
        field_delim=self._rtp_separator,
        record_defaults=record_defaults,
        name='decode_csv')
    print('tmp_fields: ', tmp_fields)

    fields = []
    if self._selected_cols:
      for idx, field_name in enumerate(self._input_table_col_names):
        if field_name in self._selected_cols:
          fields.append(tmp_fields[idx])
    print('fields: ', fields)
    labels = fields[:-1]

    # only for features, labels and sample_weight excluded
    record_types = [
        t for x, t in zip(self._input_fields, self._input_field_types)
        if x not in non_feature_cols
    ]
    feature_num = len(record_types)

    check_list = [
        tf.py_func(
            check_split,
            [fields[-1], self._data_config.separator,
             len(record_types)],
            Tout=tf.bool)
    ] if self._check_mode else []
    with tf.control_dependencies(check_list):
      fields = tf.string_split(
          fields[-1], self._data_config.separator, skip_empty=False)
    tmp_fields = tf.reshape(fields.values, [-1, feature_num])

    rtp_record_defaults = [
        str(self.get_type_defaults(t, v))
        for x, t, v in zip(self._input_fields, self._input_field_types,
                           self._input_field_defaults)
        if x not in non_feature_cols
    ]
    fields = labels[len(self._label_fields):]
    for i in range(feature_num):
      field = string_to_number(tmp_fields[:, i], record_types[i],
                               rtp_record_defaults[i], i)
      fields.append(field)

    field_keys = [x for x in self._input_fields if x not in self._label_fields]
    effective_fids = [field_keys.index(x) for x in self._effective_fields]
    inputs = {field_keys[x]: fields[x] for x in effective_fids}

    for x in range(len(self._label_fields)):
      inputs[self._label_fields[x]] = labels[x]
    return inputs