in utilities/feature_store_helper.py [0:0]
def get_latest_featureset_values(self, id_dict: Dict[str,Union[str,int]], features: List[str],
verbose: bool=False) -> Dict[str, Union[str, int, float]]:
"""Retrieves a set of features of identified records from one or more online feature groups.
This convenience method lets the caller specify a "feature set" to be retrieved. A feature set is an
ordered list of fully-qualified feature names with a feature group name as well as a feature name.
The feature name can be a wildcard to indicate that all features from that feature
group should be returned. Since multiple feature groups are involved, an identifier dictionary must
also be specified. For each unique identifier name across the feature set, a single record identifier
value is specified for the lookup. That same identifier value will be used for each feature group with
a matching record identifier feature name. For example, 'customer_id' may be the identifier used for
a 'customer' feature group and a 'customer-demographics' feature group. The 'customer_id' lookup identifier
would be specified once in the 'id_dict' input argument.
Args:
fg_name (str): Name of the feature group from which to retrieve the records.
id_dict (Dict[str,Union[str,int]]): Dictionary of record identifiers whose records are to be retrieved, key is the identifier feature
name (can be different for each feature group), and value is the actual record identifier.
features (List[str]): List of named features to retrieve. Features are fully-qualified as 'fg-name:feature-name',
or 'fg-name:*' for all features.
Returns:
Dict[str, Union[str, int, float]]: Dictionary of named feature values with native Python types
"""
## TODO: BatchGetRecord does not honor the order of the features requested, so this function
## should enforce reordering of results to match the requested order. This is important when mapping
## to a feature vector to feed a model prediction.
_feature_types = {}
_resp = None
_features_df = self._feature_list_to_df(features)
if verbose:
print(_features_df.head())
_gb = _features_df.groupby('fg_name')
_fg_requests = []
for _g in _gb:
_curr_features = []
_fg_name = _g[0]
for _f in _g[1].iterrows():
_curr_features.append(_f[1]['feature_name'])
_feature_defs = self.describe_feature_group(_fg_name)['FeatureDefinitions']
for _fd in _feature_defs:
_feature_types[_fd['FeatureName']] = _fd['FeatureType']
_id_feature_name = self.describe_feature_group(_fg_name)['RecordIdentifierFeatureName']
_id_value = id_dict[_id_feature_name]
if verbose:
print(f'fg name: {_fg_name}, id: {_id_feature_name}, id val: {_id_value}, features: {_curr_features}')
_fg_requests.append({'FeatureGroupName': _fg_name,
'RecordIdentifiersValueAsString': [str(_id_value)],
'FeatureNames': _curr_features})
if verbose:
print(_fg_requests)
print(_feature_types)
_resp = self._featurestore_runtime.batch_get_record(Identifiers=_fg_requests)
if verbose:
_num_recs = len(_resp['Records'])
print(f'got back {_num_recs} records')
_results_dict = []
_all_records = _resp['Records']
for _req in _fg_requests:
_fg_name = _req['FeatureGroupName']
# enforce the original order of feature groups
_curr_record = next(_item for _item in _all_records if _item['FeatureGroupName'] == _fg_name)['Record']
if verbose:
print(_curr_record)
## TODO: extend _record_to_dict to take a feature name list and enforce that order in the return value
_curr_record_dict = self._record_to_dict(_curr_record, _feature_types)
if verbose:
print(_results_dict)
print(_curr_record_dict)
_results_dict = dict(_results_dict, **_curr_record_dict)
return _results_dict