in easy_rec/python/tools/create_config_from_excel.py [0:0]
def _parse_features(self):
df = pd.read_excel(self._excel_path, sheet_name='features')
for i, row in df.iterrows():
field = {}
name = field['name'] = row['name'].strip()
self._feature_names.append(name)
field['data_type'] = row['data_type'].strip()
field['type'] = row['type'].strip()
g = str(row['global']).strip()
if g and g != 'nan':
field['global'] = g
field['field_name'] = name
if row['type'].strip() == 'label':
self._label = name
if 'global' in field and field['global'] in self._dict_global:
# 如果是global 有值,就跳过
def _is_good(v):
return str(v) not in ['nan', '']
if _is_good(self._dict_global[field['global']]['default_value']):
field['default_value'] = self._dict_global[
field['global']]['default_value']
if _is_good(self._dict_global[field['global']]['hash_bucket_size']):
field['hash_bucket_size'] = self._dict_global[
field['global']]['hash_bucket_size']
if _is_good(self._dict_global[field['global']]['embedding_dim']):
field['embedding_dim'] = self._dict_global[
field['global']]['embedding_dim']
field['embedding_name'] = field['global']
for t in [
'type', 'global', 'hash_bucket_size', 'embedding_dim',
'default_value', 'weights', 'boundaries'
]:
if t not in row:
continue
v = row[t]
if v not in ['', ' ', 'NaN', np.NaN, np.NAN, 'nan']:
if self._is_str(v):
field[t] = v.strip()
elif not math.isnan(v):
field[t] = int(v)
if t == 'default_value' and t not in field:
field[t] = ''
if field['type'] == 'dense':
field[t] = 0.0
if field['type'] == 'weights':
field['default_value'] = '1'
tower_name = row['group']
if name in self._dict_global:
field['type'] = 'category'
field['hash_bucket_size'] = self._dict_global[name]['hash_bucket_size']
field['embedding_dim'] = self._dict_global[name]['embedding_dim']
field['default_value'] = self._dict_global[name]['default_value']
if field['data_type'] == 'bigint':
field['default_value'] = 0
elif field['data_type'] == 'double':
field['default_value'] = 0.0
if field['type'] not in ['notneed', 'not_need', 'not_needed']:
tower_name = str(tower_name).strip()
self._add_to_tower(tower_name, field)
self._feature_details[name] = field
# check that tag features weights are one of the fields
for name, config in self._feature_details.items():
if config['type'] == 'tags':
if 'weights' in config and config[
'weights'] not in self._feature_details:
raise ValueError(config['weights'] + ' not in field names')