def _parse_features()

in easy_rec/python/tools/create_config_from_excel.py [0:0]


  def _parse_features(self):
    df = pd.read_excel(self._excel_path, sheet_name='features')
    for i, row in df.iterrows():
      field = {}
      name = field['name'] = row['name'].strip()
      self._feature_names.append(name)
      field['data_type'] = row['data_type'].strip()
      field['type'] = row['type'].strip()
      g = str(row['global']).strip()

      if g and g != 'nan':
        field['global'] = g

      field['field_name'] = name

      if row['type'].strip() == 'label':
        self._label = name

      if 'global' in field and field['global'] in self._dict_global:
        # 如果是global 有值,就跳过
        def _is_good(v):
          return str(v) not in ['nan', '']

        if _is_good(self._dict_global[field['global']]['default_value']):
          field['default_value'] = self._dict_global[
              field['global']]['default_value']
        if _is_good(self._dict_global[field['global']]['hash_bucket_size']):
          field['hash_bucket_size'] = self._dict_global[
              field['global']]['hash_bucket_size']
        if _is_good(self._dict_global[field['global']]['embedding_dim']):
          field['embedding_dim'] = self._dict_global[
              field['global']]['embedding_dim']
        field['embedding_name'] = field['global']

      for t in [
          'type', 'global', 'hash_bucket_size', 'embedding_dim',
          'default_value', 'weights', 'boundaries'
      ]:
        if t not in row:
          continue
        v = row[t]
        if v not in ['', ' ', 'NaN', np.NaN, np.NAN, 'nan']:
          if self._is_str(v):
            field[t] = v.strip()
          elif not math.isnan(v):
            field[t] = int(v)

        if t == 'default_value' and t not in field:
          field[t] = ''
          if field['type'] == 'dense':
            field[t] = 0.0

      if field['type'] == 'weights':
        field['default_value'] = '1'

      tower_name = row['group']
      if name in self._dict_global:
        field['type'] = 'category'
        field['hash_bucket_size'] = self._dict_global[name]['hash_bucket_size']
        field['embedding_dim'] = self._dict_global[name]['embedding_dim']
        field['default_value'] = self._dict_global[name]['default_value']

      if field['data_type'] == 'bigint':
        field['default_value'] = 0
      elif field['data_type'] == 'double':
        field['default_value'] = 0.0

      if field['type'] not in ['notneed', 'not_need', 'not_needed']:
        tower_name = str(tower_name).strip()
        self._add_to_tower(tower_name, field)
      self._feature_details[name] = field

    # check that tag features weights are one of the fields
    for name, config in self._feature_details.items():
      if config['type'] == 'tags':
        if 'weights' in config and config[
            'weights'] not in self._feature_details:
          raise ValueError(config['weights'] + ' not in field names')