elasticsearch/dsl/response/__init__.py (232 lines of code) (raw):
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from typing import (
TYPE_CHECKING,
Any,
Dict,
Generic,
Iterator,
List,
Mapping,
Optional,
Sequence,
Tuple,
Union,
cast,
)
from ..utils import _R, AttrDict, AttrList, _wrap
from .hit import Hit, HitMeta
if TYPE_CHECKING:
from .. import types
from ..aggs import Agg
from ..faceted_search_base import FacetedSearchBase
from ..search_base import Request, SearchBase
from ..update_by_query_base import UpdateByQueryBase
__all__ = [
"Response",
"AggResponse",
"UpdateByQueryResponse",
"Hit",
"HitMeta",
"AggregateResponseType",
]
class Response(AttrDict[Any], Generic[_R]):
"""An Elasticsearch search response.
:arg took: (required) The number of milliseconds it took Elasticsearch
to run the request. This value is calculated by measuring the time
elapsed between receipt of a request on the coordinating node and
the time at which the coordinating node is ready to send the
response. It includes: * Communication time between the
coordinating node and data nodes * Time the request spends in the
search thread pool, queued for execution * Actual run time It
does not include: * Time needed to send the request to
Elasticsearch * Time needed to serialize the JSON response * Time
needed to send the response to a client
:arg timed_out: (required) If `true`, the request timed out before
completion; returned results may be partial or empty.
:arg _shards: (required) A count of shards used for the request.
:arg hits: search results
:arg aggregations: aggregation results
:arg _clusters:
:arg fields:
:arg max_score:
:arg num_reduce_phases:
:arg profile:
:arg pit_id:
:arg _scroll_id: The identifier for the search and its search context.
You can use this scroll ID with the scroll API to retrieve the
next batch of search results for the request. This property is
returned only if the `scroll` query parameter is specified in the
request.
:arg suggest:
:arg terminated_early:
"""
_search: "SearchBase[_R]"
_faceted_search: "FacetedSearchBase[_R]"
_doc_class: Optional[_R]
_hits: List[_R]
took: int
timed_out: bool
_shards: "types.ShardStatistics"
_clusters: "types.ClusterStatistics"
fields: Mapping[str, Any]
max_score: float
num_reduce_phases: int
profile: "types.Profile"
pit_id: str
_scroll_id: str
suggest: Mapping[
str,
Sequence[
Union["types.CompletionSuggest", "types.PhraseSuggest", "types.TermSuggest"]
],
]
terminated_early: bool
def __init__(
self,
search: "Request[_R]",
response: Dict[str, Any],
doc_class: Optional[_R] = None,
):
super(AttrDict, self).__setattr__("_search", search)
super(AttrDict, self).__setattr__("_doc_class", doc_class)
super().__init__(response)
def __iter__(self) -> Iterator[_R]: # type: ignore[override]
return iter(self.hits)
def __getitem__(self, key: Union[slice, int, str]) -> Any:
if isinstance(key, (slice, int)):
# for slicing etc
return self.hits[key]
return super().__getitem__(key)
def __nonzero__(self) -> bool:
return bool(self.hits)
__bool__ = __nonzero__
def __repr__(self) -> str:
return "<Response: %r>" % (self.hits or self.aggregations)
def __len__(self) -> int:
return len(self.hits)
def __getstate__(self) -> Tuple[Dict[str, Any], "Request[_R]", Optional[_R]]: # type: ignore[override]
return self._d_, self._search, self._doc_class
def __setstate__(
self, state: Tuple[Dict[str, Any], "Request[_R]", Optional[_R]] # type: ignore[override]
) -> None:
super(AttrDict, self).__setattr__("_d_", state[0])
super(AttrDict, self).__setattr__("_search", state[1])
super(AttrDict, self).__setattr__("_doc_class", state[2])
def success(self) -> bool:
return self._shards.total == self._shards.successful and not self.timed_out
@property
def hits(self) -> List[_R]:
if not hasattr(self, "_hits"):
h = cast(AttrDict[Any], self._d_["hits"])
try:
hits = AttrList(list(map(self._search._get_result, h["hits"])))
except AttributeError as e:
# avoid raising AttributeError since it will be hidden by the property
raise TypeError("Could not parse hits.", e)
# avoid assigning _hits into self._d_
super(AttrDict, self).__setattr__("_hits", hits)
for k in h:
setattr(self._hits, k, _wrap(h[k]))
return self._hits
@property
def aggregations(self) -> "AggResponse[_R]":
return self.aggs
@property
def aggs(self) -> "AggResponse[_R]":
if not hasattr(self, "_aggs"):
aggs = AggResponse[_R](
cast("Agg[_R]", self._search.aggs),
self._search,
cast(Dict[str, Any], self._d_.get("aggregations", {})),
)
# avoid assigning _aggs into self._d_
super(AttrDict, self).__setattr__("_aggs", aggs)
return cast("AggResponse[_R]", self._aggs)
def search_after(self) -> "SearchBase[_R]":
"""
Return a ``Search`` instance that retrieves the next page of results.
This method provides an easy way to paginate a long list of results using
the ``search_after`` option. For example::
page_size = 20
s = Search()[:page_size].sort("date")
while True:
# get a page of results
r = await s.execute()
# do something with this page of results
# exit the loop if we reached the end
if len(r.hits) < page_size:
break
# get a search object with the next page of results
s = r.search_after()
Note that the ``search_after`` option requires the search to have an
explicit ``sort`` order.
"""
if len(self.hits) == 0:
raise ValueError("Cannot use search_after when there are no search results")
if not hasattr(self.hits[-1].meta, "sort"): # type: ignore[attr-defined]
raise ValueError("Cannot use search_after when results are not sorted")
return self._search.extra(search_after=self.hits[-1].meta.sort) # type: ignore[attr-defined]
AggregateResponseType = Union[
"types.CardinalityAggregate",
"types.HdrPercentilesAggregate",
"types.HdrPercentileRanksAggregate",
"types.TDigestPercentilesAggregate",
"types.TDigestPercentileRanksAggregate",
"types.PercentilesBucketAggregate",
"types.MedianAbsoluteDeviationAggregate",
"types.MinAggregate",
"types.MaxAggregate",
"types.SumAggregate",
"types.AvgAggregate",
"types.WeightedAvgAggregate",
"types.ValueCountAggregate",
"types.SimpleValueAggregate",
"types.DerivativeAggregate",
"types.BucketMetricValueAggregate",
"types.StatsAggregate",
"types.StatsBucketAggregate",
"types.ExtendedStatsAggregate",
"types.ExtendedStatsBucketAggregate",
"types.GeoBoundsAggregate",
"types.GeoCentroidAggregate",
"types.HistogramAggregate",
"types.DateHistogramAggregate",
"types.AutoDateHistogramAggregate",
"types.VariableWidthHistogramAggregate",
"types.StringTermsAggregate",
"types.LongTermsAggregate",
"types.DoubleTermsAggregate",
"types.UnmappedTermsAggregate",
"types.LongRareTermsAggregate",
"types.StringRareTermsAggregate",
"types.UnmappedRareTermsAggregate",
"types.MultiTermsAggregate",
"types.MissingAggregate",
"types.NestedAggregate",
"types.ReverseNestedAggregate",
"types.GlobalAggregate",
"types.FilterAggregate",
"types.ChildrenAggregate",
"types.ParentAggregate",
"types.SamplerAggregate",
"types.UnmappedSamplerAggregate",
"types.GeoHashGridAggregate",
"types.GeoTileGridAggregate",
"types.GeoHexGridAggregate",
"types.RangeAggregate",
"types.DateRangeAggregate",
"types.GeoDistanceAggregate",
"types.IpRangeAggregate",
"types.IpPrefixAggregate",
"types.FiltersAggregate",
"types.AdjacencyMatrixAggregate",
"types.SignificantLongTermsAggregate",
"types.SignificantStringTermsAggregate",
"types.UnmappedSignificantTermsAggregate",
"types.CompositeAggregate",
"types.FrequentItemSetsAggregate",
"types.TimeSeriesAggregate",
"types.ScriptedMetricAggregate",
"types.TopHitsAggregate",
"types.InferenceAggregate",
"types.StringStatsAggregate",
"types.BoxPlotAggregate",
"types.TopMetricsAggregate",
"types.TTestAggregate",
"types.RateAggregate",
"types.CumulativeCardinalityAggregate",
"types.MatrixStatsAggregate",
"types.GeoLineAggregate",
]
class AggResponse(AttrDict[Any], Generic[_R]):
"""An Elasticsearch aggregation response."""
_meta: Dict[str, Any]
def __init__(self, aggs: "Agg[_R]", search: "Request[_R]", data: Dict[str, Any]):
super(AttrDict, self).__setattr__("_meta", {"search": search, "aggs": aggs})
super().__init__(data)
def __getitem__(self, attr_name: str) -> AggregateResponseType:
if attr_name in self._meta["aggs"]:
# don't do self._meta['aggs'][attr_name] to avoid copying
agg = self._meta["aggs"].aggs[attr_name]
return cast(
AggregateResponseType,
agg.result(self._meta["search"], self._d_[attr_name]),
)
return super().__getitem__(attr_name) # type: ignore[no-any-return]
def __iter__(self) -> Iterator[AggregateResponseType]: # type: ignore[override]
for name in self._meta["aggs"]:
yield self[name]
class UpdateByQueryResponse(AttrDict[Any], Generic[_R]):
"""An Elasticsearch update by query response.
:arg batches: The number of scroll responses pulled back by the update
by query.
:arg failures: Array of failures if there were any unrecoverable
errors during the process. If this is non-empty then the request
ended because of those failures. Update by query is implemented
using batches. Any failure causes the entire process to end, but
all failures in the current batch are collected into the array.
You can use the `conflicts` option to prevent reindex from ending
when version conflicts occur.
:arg noops: The number of documents that were ignored because the
script used for the update by query returned a noop value for
`ctx.op`.
:arg deleted: The number of documents that were successfully deleted.
:arg requests_per_second: The number of requests per second
effectively run during the update by query.
:arg retries: The number of retries attempted by update by query.
`bulk` is the number of bulk actions retried. `search` is the
number of search actions retried.
:arg task:
:arg timed_out: If true, some requests timed out during the update by
query.
:arg took: The number of milliseconds from start to end of the whole
operation.
:arg total: The number of documents that were successfully processed.
:arg updated: The number of documents that were successfully updated.
:arg version_conflicts: The number of version conflicts that the
update by query hit.
:arg throttled:
:arg throttled_millis: The number of milliseconds the request slept to
conform to `requests_per_second`.
:arg throttled_until:
:arg throttled_until_millis: This field should always be equal to zero
in an _update_by_query response. It only has meaning when using
the task API, where it indicates the next time (in milliseconds
since epoch) a throttled request will be run again in order to
conform to `requests_per_second`.
"""
_search: "UpdateByQueryBase[_R]"
batches: int
failures: Sequence["types.BulkIndexByScrollFailure"]
noops: int
deleted: int
requests_per_second: float
retries: "types.Retries"
task: Union[str, int]
timed_out: bool
took: Any
total: int
updated: int
version_conflicts: int
throttled: Any
throttled_millis: Any
throttled_until: Any
throttled_until_millis: Any
def __init__(
self,
search: "Request[_R]",
response: Dict[str, Any],
doc_class: Optional[_R] = None,
):
super(AttrDict, self).__setattr__("_search", search)
super(AttrDict, self).__setattr__("_doc_class", doc_class)
super().__init__(response)
def success(self) -> bool:
return not self.timed_out and not self.failures