src/google/appengine/api/search/stub/simple_facet.py (227 lines of code) (raw):

#!/usr/bin/env python # # Copyright 2007 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # """A simple working model of facted search backend used in Search API stub.""" from google.appengine.datastore import document_pb2 class SimpleFacet(object): """A simple facet analyzer. This is a simple working model of facted search backend used in Search Api stub. It has two parts, one for aggregating facet information and add them to the response(FillFacetResponse) and the other for refining the search results based on requested facet refinements(RefineResults). """ def __init__(self, params): """Initializer. Args: params: A SearchParams protocol buffer object contain facet request parameters. """ self._params = params def FillFacetResponse(self, results, response): """Extract facet results and add them to the response. This method goes through all facets and add aggregated facet information to the search response according to facet request parameters. Args: results: Search Query result set. response: Search Query response protocol buffer objects. Facet results will be added to this response. Raises: ValueError: if a facet type is invalid or facet request has invalid values. """ if (not self._params.include_facet and not self._params.auto_discover_facet_count): return self._PreprocessManualFacets() self._discovered_facets = {} for result in results[:self._params.facet_depth]: for facet in result.document.facet: if facet.value.type == document_pb2.FacetValue.ATOM: self._ProcessAtomFacet(facet) elif facet.value.type == document_pb2.FacetValue.NUMBER: self._ProcessNumberFacet(facet) else: raise ValueError('Facet type %d is not supported' % facet.value.type) for facet in self._manual_facets.values(): self._FillResponseForSingleFacet(facet, response.facet_result.add()) for facet in _GetTopN( list(self._discovered_facets.values()), self._params.auto_discover_facet_count): self._FillResponseForSingleFacet(facet, response.facet_result.add()) def _PreprocessManualFacets(self): """Create a map for manual facets to be accessed easier by name later.""" self._manual_facets = {} self._manual_facet_map = {} for manual_facet in self._params.include_facet: self._manual_facet_map[manual_facet.name] = manual_facet.params if (manual_facet.params.range and manual_facet.params.value_constraint): raise ValueError('Manual facet request should either specify range ' 'or value constraint, not both') for constraint in manual_facet.params.value_constraint: if not constraint: raise ValueError('Facet value is empty') facet_obj = _Facet(manual_facet.name, (manual_facet.params.value_limit if manual_facet.params.HasField('value_limit') else self._params.facet_auto_detect_param.value_limit)) self._manual_facets[manual_facet.name] = facet_obj for value in manual_facet.params.value_constraint: facet_obj.AddValue(value, 0) for range_request in manual_facet.params.range: range_pair = (float(range_request.start) if range_request.HasField('start') else None, float(range_request.end) if range_request.HasField('end') else None) facet_obj.AddValue(self._GetFacetLabel(range_request), 0, refinement=range_pair) def _ProcessAtomFacet(self, facet): """Aggregate an atom facet values for manual or auto-discovery facets.""" if facet.name in self._manual_facet_map: manual_facet_req = self._manual_facet_map[facet.name] facet_obj = self._manual_facets[facet.name] if not manual_facet_req.range and ( not manual_facet_req.value_constraint or facet.value.string_value in manual_facet_req.value_constraint): facet_obj.AddValue(facet.value.string_value) elif self._params.auto_discover_facet_count: if facet.name in self._discovered_facets: facet_obj = self._discovered_facets[facet.name] else: facet_obj = self._discovered_facets[facet.name] = _Facet( facet.name, self._params.facet_auto_detect_param.value_limit) facet_obj.AddValue(facet.value.string_value) def _ProcessNumberFacet(self, facet): """Aggregate a number facet values for manual or auto-discovery facets.""" facet_value = float(facet.value.string_value) if facet.name in self._manual_facet_map: manual_facet_req = self._manual_facet_map[facet.name] facet_obj = self._manual_facets[facet.name] if manual_facet_req.range: for range_request in manual_facet_req.range: range_pair = (float(range_request.start) if range_request.HasField('start') else None, float(range_request.end) if range_request.HasField('end') else None) if ((range_pair[0] is None or facet_value >= range_pair[0]) and (range_pair[1] is None or facet_value < range_pair[1])): facet_obj.AddValue(self._GetFacetLabel(range_request), refinement=range_pair) elif manual_facet_req.value_constraint: for constraint in manual_facet_req.value_constraint: if facet_value == float(constraint): facet_obj.AddValue(constraint) else: facet_obj.AddNumericValue(facet_value) elif self._params.auto_discover_facet_count: if facet.name in self._discovered_facets: facet_obj = self._discovered_facets[facet.name] else: facet_obj = self._discovered_facets[facet.name] = _Facet( facet.name, self._params.facet_auto_detect_param.value_limit) facet_obj.AddNumericValue(facet_value) def _FillResponseForSingleFacet(self, facet, facet_response): """Convert a single _Facet to a SearchResponse.facet_result.""" if isinstance(facet.min, float) and isinstance(facet.max, float): facet.AddValue('[%r,%r)' % (facet.min, facet.max), facet.min_max_count, (facet.min, facet.max)) facet_response.name = facet.name for value in facet.GetTopValues(facet.value_limit): resp_value = facet_response.value.add() resp_ref = resp_value.refinement if value.refinement: if value.refinement[0] is not None: resp_ref.range.start = repr(value.refinement[0]) if value.refinement[1] is not None: resp_ref.range.end = repr(value.refinement[1]) else: resp_ref.value = value.label resp_ref.name = facet.name resp_value.name = value.label resp_value.count = value.count def _GetFacetLabel(self, facet_range): """Creates an forced (by the backend) label for facet ranges.""" if facet_range.HasField('name'): return facet_range.name else: return '[%s,%s)' % (repr(float(facet_range.start())) if facet_range.HasField('start') else '-Infinity', repr(float(facet_range.end())) if facet_range.HasField('end') else 'Infinity') def RefineResults(self, results): """Returns refined results using facet refinement parameters. Args: results: Search Query result set. Returns: The filtered result. Raises: ValueError: for bad facet refinement parameters. """ if not self._params.facet_refinement: return results ref_groups = {} for refinement in self._params.facet_refinement: if not refinement.value and not refinement.HasField('range'): raise ValueError('Facet value is empty') ref_groups.setdefault(refinement.name, []).append(refinement) return [doc for doc in results if self._MatchFacetRefinements(doc, ref_groups)] def _MatchFacetRefinements(self, doc, ref_groups): return all((self._MatchFacetRefinementSameName(doc, ref_same_names) for ref_same_names in ref_groups.values())) def _MatchFacetRefinementSameName(self, doc, ref_same_names): return any((self._MatchFacetRefinement(doc, ref) for ref in ref_same_names)) def _MatchFacetRefinement(self, doc, refinement): doc_facets = [] for facet in doc.document.facet: if facet.name == refinement.name: doc_facets.append(facet) return any((self._MatchSingleFacetRefinement(doc_facet, refinement) for doc_facet in doc_facets)) def _MatchSingleFacetRefinement(self, doc_facet, refinement): """Matches a single document facet with a single refinement.""" if refinement.HasField('value'): if refinement.HasField('range'): raise ValueError('Refinement request for facet %s should either ' 'specify range or value constraint, ' 'not both.' % refinement.name) facet_value = doc_facet.value.string_value if doc_facet.value.type == document_pb2.FacetValue.NUMBER: return float(facet_value) == float(refinement.value) else: return facet_value == refinement.value if not refinement.HasField('range'): raise ValueError('Refinement request for facet %s should specify ' 'range or value constraint.' % refinement.name) if doc_facet.value.type != document_pb2.FacetValue.NUMBER: return False facet_value = float(doc_facet.value.string_value) ref_range = refinement.range start = float(ref_range.start) if ref_range.HasField('start') else None end = float(ref_range.end) if ref_range.HasField('end') else None return ((start is None or facet_value >= start) and (end is None or facet_value < end)) class _FacetValue(object): """A representation of a single facet value.""" def __init__(self, label, count=0, refinement=None): """Initializer. Args: label: label (of string type) of this value. can be the actual value or a custom label for ranges. If this is a custom label, refinement should be set. count: Initial number of facets with this value. This number can be increased later. refinement: If this value does not need a custom refinement, this value should be None. If the value needs a range refinement, this value should be a pair representing start and end value for the range. """ self._label = label self._count = count self._refinement = refinement @property def label(self): return self._label @property def count(self): return self._count @property def refinement(self): return self._refinement def IncCount(self, value): self._count += value def __repr__(self): return '_FacetValue(label=%s, count=%d, refinement=%s)' % (self.label, self.count, self.refinement) class _Facet(object): """Simple facet implementation that holds values and overall count.""" def __init__(self, name, value_limit): """Initializer. Args: name: The name of the facet. value_limit: Maximum number of values for this facet. """ self._name = name self._value_limit = value_limit self._values = {} self._count = 0 self._min = self._max = None self._min_max_count = 0 @property def name(self): return self._name @property def value_limit(self): return self._value_limit @property def count(self): return self._count + self._min_max_count @property def min(self): return self._min @property def max(self): return self._max @property def min_max_count(self): return self._min_max_count def AddNumericValue(self, value): """Add value for discovered numeric facets. For numeric facets, we only keep minimum and maximum values not the actual value. Args: value: numeric value. """ if self._min is None or self._min > value: self._min = value if self._max is None or self._max < value: self._max = value self._min_max_count += 1 def AddValue(self, label, count=1, refinement=None): if label in self._values: self._values[label].IncCount(count) else: self._values[label] = _FacetValue(label, count, refinement) self._count += count def GetTopValues(self, n): return _GetTopN(list(self._values.values()), n) def __repr__(self): return '_Facet(name=%s, count=%d, values=%s)' % ( self.name, self.count, self._values) def _GetTopN(objects, n): """Returns top n objects with maximum count. Args: objects: any object that has count property n: number of top elements to return Returns: top N elements if objects size is greater than N otherwise the map elements in a sorted order. """ return sorted(objects, key=lambda o: o.count, reverse=True)[:n]