elasticapm/processors.py (167 lines of code) (raw):

# BSD 3-Clause License # # Copyright (c) 2012, the Sentry Team, see AUTHORS for more details # Copyright (c) 2019, Elasticsearch BV # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # * Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # * Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE import warnings from collections import defaultdict from elasticapm.conf.constants import BASE_SANITIZE_FIELD_NAMES, ERROR, MASK, SPAN, TRANSACTION from elasticapm.utils import varmap from elasticapm.utils.encoding import force_text from elasticapm.utils.stacks import get_lines_from_file def for_events(*events): """ :param events: list of event types Only calls wrapped function if given event_type is in list of events """ events = set(events) def wrap(func): func.event_types = events return func return wrap @for_events(ERROR, TRANSACTION) def remove_http_request_body(client, event): """ Removes request.body from context :param client: an ElasticAPM client :param event: a transaction or error event :return: The modified event """ if "context" in event and "request" in event["context"]: event["context"]["request"].pop("body", None) return event @for_events(ERROR, SPAN) def remove_stacktrace_locals(client, event): """ Removes local variables from any frames. :param client: an ElasticAPM client :param event: a transaction or error event :return: The modified event """ func = lambda frame: frame.pop("vars", None) return _process_stack_frames(event, func) @for_events(ERROR, SPAN) def sanitize_stacktrace_locals(client, event): """ Sanitizes local variables in all frames :param client: an ElasticAPM client :param event: a transaction or error event :return: The modified event """ def func(frame) -> None: if "vars" in frame: frame["vars"] = varmap(_sanitize, frame["vars"], sanitize_field_names=client.config.sanitize_field_names) return _process_stack_frames(event, func) @for_events(ERROR, TRANSACTION) def sanitize_http_request_cookies(client, event): """ Sanitizes http request cookies :param client: an ElasticAPM client :param event: a transaction or error event :return: The modified event """ # sanitize request.cookies dict try: cookies = event["context"]["request"]["cookies"] event["context"]["request"]["cookies"] = varmap( _sanitize, cookies, sanitize_field_names=client.config.sanitize_field_names ) except (KeyError, TypeError): pass # sanitize request.header.cookie string try: cookie_string = force_text(event["context"]["request"]["headers"]["cookie"], errors="replace") event["context"]["request"]["headers"]["cookie"] = _sanitize_string( cookie_string, "; ", "=", sanitize_field_names=client.config.sanitize_field_names ) except (KeyError, TypeError): try: # Sometimes it's Cookie, not cookie cookie_string = force_text(event["context"]["request"]["headers"]["Cookie"], errors="replace") event["context"]["request"]["headers"]["Cookie"] = _sanitize_string( cookie_string, "; ", "=", sanitize_field_names=client.config.sanitize_field_names ) except (KeyError, TypeError): pass return event @for_events(ERROR, TRANSACTION) def sanitize_http_response_cookies(client, event): """ Sanitizes the set-cookie header of the response :param client: an ElasticAPM client :param event: a transaction or error event :return: The modified event """ try: cookie_string = force_text(event["context"]["response"]["headers"]["set-cookie"], errors="replace") event["context"]["response"]["headers"]["set-cookie"] = _sanitize_string( cookie_string, ";", "=", sanitize_field_names=client.config.sanitize_field_names ) except (KeyError, TypeError): pass return event @for_events(ERROR, TRANSACTION) def sanitize_http_headers(client, event): """ Sanitizes http request/response headers :param client: an ElasticAPM client :param event: a transaction or error event :return: The modified event """ # request headers try: headers = event["context"]["request"]["headers"] event["context"]["request"]["headers"] = varmap( _sanitize, headers, sanitize_field_names=client.config.sanitize_field_names ) except (KeyError, TypeError): pass # response headers try: headers = event["context"]["response"]["headers"] event["context"]["response"]["headers"] = varmap( _sanitize, headers, sanitize_field_names=client.config.sanitize_field_names ) except (KeyError, TypeError): pass return event @for_events(ERROR, TRANSACTION) def sanitize_http_wsgi_env(client, event): """ Sanitizes WSGI environment variables :param client: an ElasticAPM client :param event: a transaction or error event :return: The modified event """ try: env = event["context"]["request"]["env"] event["context"]["request"]["env"] = varmap( _sanitize, env, sanitize_field_names=client.config.sanitize_field_names ) except (KeyError, TypeError): pass return event @for_events(ERROR, TRANSACTION) def sanitize_http_request_body(client, event): """ Sanitizes http request body. This only works if the request body is a query-encoded string. Other types (e.g. JSON) are not handled by this sanitizer. :param client: an ElasticAPM client :param event: a transaction or error event :return: The modified event """ try: body = force_text(event["context"]["request"]["body"], errors="replace") except (KeyError, TypeError): return event if "=" in body: sanitized_query_string = _sanitize_string( body, "&", "=", sanitize_field_names=client.config.sanitize_field_names ) event["context"]["request"]["body"] = sanitized_query_string return event @for_events(ERROR, SPAN) def add_context_lines_to_frames(client, event): # divide frames up into source files before reading from disk. This should help # with utilizing the disk cache better # # TODO: further optimize by only opening each file once and reading all needed source # TODO: blocks at once. per_file = defaultdict(list) _process_stack_frames( event, lambda frame: per_file[frame["context_metadata"][0]].append(frame) if "context_metadata" in frame else None, ) for frames in per_file.values(): for frame in frames: # context_metadata key has been set in elasticapm.utils.stacks.get_frame_info for # all frames for which we should gather source code context lines fname, lineno, context_lines, loader, module_name = frame.pop("context_metadata") pre_context, context_line, post_context = get_lines_from_file( fname, lineno, context_lines, loader, module_name ) if context_line: frame["pre_context"] = pre_context frame["context_line"] = context_line frame["post_context"] = post_context return event @for_events(ERROR, SPAN) def mark_in_app_frames(client, event): warnings.warn( "The mark_in_app_frames processor is deprecated and can be removed from your PROCESSORS setting", DeprecationWarning, ) return event def _sanitize(key, value, **kwargs): if "sanitize_field_names" in kwargs: sanitize_field_names = kwargs["sanitize_field_names"] else: sanitize_field_names = BASE_SANITIZE_FIELD_NAMES if value is None: return if isinstance(value, dict): # varmap will call _sanitize on each k:v pair of the dict, so we don't # have to do anything with dicts here return value if not key: # key can be a NoneType return value key = key.lower() for field in sanitize_field_names: if field.match(key.strip()): # store mask as a fixed length for security return MASK return value def _sanitize_string(unsanitized, itemsep, kvsep, sanitize_field_names=BASE_SANITIZE_FIELD_NAMES): """ sanitizes a string that contains multiple key/value items :param unsanitized: the unsanitized string :param itemsep: string that separates items :param kvsep: string that separates key from value :param sanitize_field_names: field names to pass to _sanitize :return: a sanitized string """ sanitized = [] kvs = unsanitized.split(itemsep) for kv in kvs: kv = kv.split(kvsep) if len(kv) == 2: sanitized.append((kv[0], _sanitize(kv[0], kv[1], sanitize_field_names=sanitize_field_names))) else: sanitized.append(kv) return itemsep.join(kvsep.join(kv) for kv in sanitized) def _process_stack_frames(event, func): if "stacktrace" in event: for frame in event["stacktrace"]: func(frame) # an error can have two stacktraces, one in "exception", one in "log" if "exception" in event and "stacktrace" in event["exception"]: for frame in event["exception"]["stacktrace"]: func(frame) # check for chained exceptions cause = event["exception"].get("cause", None) while cause: if "stacktrace" in cause[0]: for frame in cause[0]["stacktrace"]: func(frame) cause = cause[0].get("cause", None) if "log" in event and "stacktrace" in event["log"]: for frame in event["log"]["stacktrace"]: func(frame) return event