elasticapm/instrumentation/packages/elasticsearch.py

# BSD 3-Clause License # # Copyright (c) 2019, Elasticsearch BV # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # * Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # * Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from __future__ import absolute_import import re from typing import Optional from urllib.parse import parse_qs, urlparse import elasticapm from elasticapm.instrumentation.packages.base import AbstractInstrumentedModule from elasticapm.traces import DroppedSpan, execution_context from elasticapm.utils.logging import get_logger logger = get_logger("elasticapm.instrument") should_capture_body_re = re.compile("/(_search|_msearch|_count|_async_search|_sql|_eql)(/|$)") class ElasticsearchConnectionInstrumentation(AbstractInstrumentedModule): name = "elasticsearch_connection" def get_instrument_list(self): try: import elastic_transport # noqa: F401 return [ ("elastic_transport._node._http_urllib3", "Urllib3HttpNode.perform_request"), ("elastic_transport._node._http_requests", "RequestsHttpNode.perform_request"), ] except ImportError: return [ ("elasticsearch.connection.http_urllib3", "Urllib3HttpConnection.perform_request"), ("elasticsearch.connection.http_requests", "RequestsHttpConnection.perform_request"), ] def call(self, module, method, wrapped, instance, args, kwargs): span = execution_context.get_span() if not span or isinstance(span, DroppedSpan): return wrapped(*args, **kwargs) self._update_context_by_request_data(span.context, instance, args, kwargs) result = wrapped(*args, **kwargs) if hasattr(result, "meta"): # elasticsearch-py 8.x+ status_code = result.meta.status cluster = result.meta.headers.get("x-found-handling-cluster") else: status_code = result[0] cluster = result[1].get("x-found-handling-cluster") span.update_context("http", {"status_code": status_code}) if cluster: span.update_context("db", {"instance": cluster}) return result def _update_context_by_request_data(self, context, instance, args, kwargs) -> None: args_len = len(args) url = args[1] if args_len > 1 else kwargs.get("url") params = args[2] if args_len > 2 else kwargs.get("params") body_serialized = args[3] if args_len > 3 else kwargs.get("body") if "?" in url and not params: url, qs = url.split("?", 1) params = {k: v[0] for k, v in parse_qs(qs).items()} should_capture_body = bool(should_capture_body_re.search(url)) context["db"] = {"type": "elasticsearch"} if should_capture_body: query = [] # using both q AND body is allowed in some API endpoints / ES versions, # but not in others. We simply capture both if they are there so the # user can see it. if params and "q" in params: # 'q' may already be encoded to a byte string at this point. # We assume utf8, which is the default q = params["q"] if isinstance(q, bytes): q = q.decode("utf-8", errors="replace") query.append("q=" + q) if body_serialized: if isinstance(body_serialized, bytes): query.append(body_serialized.decode("utf-8", errors="replace")) else: query.append(body_serialized) if query: context["db"]["statement"] = "\n\n".join(query) # ES5: `host` is URL, no `port` attribute # ES6, ES7: `host` URL, `hostname` is host, `port` is port # ES8: `host` is hostname, no `hostname` attribute, `port` is `port` if not hasattr(instance, "port"): # ES5, parse hostname and port from URL stored in `host` parsed_url = urlparse(instance.host) host = parsed_url.hostname port = parsed_url.port elif not hasattr(instance, "hostname"): # ES8 (and up, one can hope) host = instance.host port = instance.port else: # ES6, ES7 host = instance.hostname port = instance.port context["destination"] = {"address": host, "port": port} class ElasticsearchTransportInstrumentation(AbstractInstrumentedModule): name = "elasticsearch_connection" def get_instrument_list(self): try: import elastic_transport # noqa: F401 return [ ("elastic_transport", "Transport.perform_request"), ] except ImportError: return [ ("elasticsearch.transport", "Transport.perform_request"), ] def call(self, module, method, wrapped, instance, args, kwargs): with elasticapm.capture_span( self._get_signature(args, kwargs), span_type="db", span_subtype="elasticsearch", span_action="query", extra={}, skip_frames=2, leaf=True, ) as span: result_data = wrapped(*args, **kwargs) hits = self._get_hits(result_data) if hits: span.update_context("db", {"rows_affected": hits}) return result_data def _get_signature(self, args, kwargs): args_len = len(args) http_method = args[0] if args_len else kwargs.get("method") http_path = args[1] if args_len > 1 else kwargs.get("url") http_path = http_path.split("?", 1)[0] # we don't want to capture a potential query string in the span name return "ES %s %s" % (http_method, http_path) def _get_hits(self, result) -> Optional[int]: if getattr(result, "body", None) and "hits" in result.body: # ES >= 8 return result.body["hits"].get("total", {}).get("value") elif isinstance(result, dict) and "hits" in result and "total" in result["hits"]: return ( result["hits"]["total"]["value"] if isinstance(result["hits"]["total"], dict) else result["hits"]["total"] )

elasticapm/instrumentation/packages/elasticsearch.py (117 lines of code) (raw):