prod/native/extension/code/InternalFunctionInstrumentation.cpp (371 lines of code) (raw):

/* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one * or more contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch B.V. licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ #include "ModuleGlobals.h" #include "InternalFunctionInstrumentation.h" #include "AutoZval.h" #include "PhpBridge.h" #include "LoggerInterface.h" #include "Zend/zend.h" #include "Zend/zend_exceptions.h" #include "Zend/zend_hash.h" #include "Zend/zend_globals.h" #include <Zend/zend_observer.h> #include "InternalFunctionInstrumentationStorage.h" #include "RequestScope.h" #include "InstrumentedFunctionHooksStorage.h" #include <array> #include <algorithm> namespace elasticapm::php { using namespace std::literals; using InternalStorage_t = InternalFunctionInstrumentationStorage<zend_ulong, zif_handler>; void handleAndReleaseHookException(zend_object *exception) { if (!exception || !instanceof_function(exception->ce, zend_ce_throwable)) { return; } ELOGF_ERROR(EAPM_GL(logger_), INSTRUMENTATION, "Instrumentation hook error: %s", exceptionToString(exception).c_str()); OBJ_RELEASE(exception); } uint32_t getFunctionArgumentIndex(zend_string *name, zend_function *function) { uint32_t numArgs = function->common.num_args; if (function->type == ZEND_USER_FUNCTION || (function->common.fn_flags & ZEND_ACC_USER_ARG_INFO)) { for (uint32_t i = 0; i < numArgs; i++) { if (zend_string_equals(name, function->op_array.arg_info[i].name)) { return i; } } } else { std::string_view nameSv(ZSTR_VAL(name), ZSTR_LEN(name)); for (uint32_t i = 0; i < numArgs; i++) { std::string_view argName(function->internal_function.arg_info[i].name); if (nameSv == argName) { return i; } } } throw std::runtime_error("argument not found"); } void argsPostProcessing(AutoZval &functionArgs, AutoZval &returnValue) { if (!returnValue.isArray()) { return; } if (zend_is_identical(returnValue.get(), functionArgs.get())) { return; } zend_ulong argIndex = 0; zend_string *argStrKey = nullptr; zval *argValue = nullptr; zend_execute_data *execute_data = EG(current_execute_data); uint32_t requiredArgsCount = execute_data->func->type == ZEND_INTERNAL_FUNCTION ? ZEND_CALL_NUM_ARGS(execute_data) : execute_data->func->op_array.last_var; uint32_t initalCallNumArgs = ZEND_CALL_NUM_ARGS(execute_data); ELOGF_DEBUG(EAPM_GL(logger_), INSTRUMENTATION, "argsPostProcessing requiredArgsCount: %d initialCallNumArgs: %d", requiredArgsCount, initalCallNumArgs); uint32_t highestArgIdx = 0; ZEND_HASH_FOREACH_KEY_VAL(Z_ARR_P(returnValue.get()), argIndex, argStrKey, argValue) { if (!argStrKey) { highestArgIdx = std::max(highestArgIdx, (uint32_t)argIndex); } } ZEND_HASH_FOREACH_END(); ELOGF_DEBUG(EAPM_GL(logger_), INSTRUMENTATION, "argsPostProcessing highestArgIdx: %d vm_stack free: %d", highestArgIdx, EG(vm_stack_end) - EG(vm_stack_top)); // extending stack and undefining potential gaps if (highestArgIdx + 1 > initalCallNumArgs) { uint32_t howManyArgsToAdd = highestArgIdx + 1 - initalCallNumArgs; ELOGF_DEBUG(EAPM_GL(logger_), INSTRUMENTATION, "postProcessing trying extend stack frame with %d arguments", howManyArgsToAdd); zend_vm_stack_extend_call_frame(&execute_data, initalCallNumArgs, howManyArgsToAdd); for (uint32_t idx = 0; idx < howManyArgsToAdd; ++idx) { zval *target = ZEND_CALL_ARG(execute_data, execute_data->func->type == ZEND_INTERNAL_FUNCTION ? initalCallNumArgs + idx + 1 : initalCallNumArgs + idx + 1 + execute_data->func->op_array.T); ZVAL_UNDEF(target); } ZEND_CALL_NUM_ARGS(execute_data) += howManyArgsToAdd; ZEND_ADD_CALL_FLAG(execute_data, ZEND_CALL_FREE_EXTRA_ARGS); ZEND_ADD_CALL_FLAG(execute_data, ZEND_CALL_MAY_HAVE_UNDEF); } ZEND_HASH_FOREACH_KEY_VAL(Z_ARR_P(returnValue.get()), argIndex, argStrKey, argValue) { if (argStrKey) { ELOGF_DEBUG(EAPM_GL(logger_), INSTRUMENTATION, "argsPostProcessing str: %s", ZSTR_VAL(argStrKey)); try { argIndex = getFunctionArgumentIndex(argStrKey, execute_data->func); } catch (std::exception const &e) { ELOGF_WARNING(EAPM_GL(logger_), INSTRUMENTATION, "postProcessing argument index not found for: '%s'", ZSTR_VAL(argStrKey)); continue; } } ELOGF_DEBUG(EAPM_GL(logger_), INSTRUMENTATION, "argsPostProcessing idx: %d", argIndex); zval *target = nullptr; if (argIndex < requiredArgsCount) { target = ZEND_CALL_ARG(execute_data, argIndex + 1); } else { target = ZEND_CALL_ARG(execute_data, execute_data->func->type == ZEND_INTERNAL_FUNCTION ? argIndex + 1 : argIndex + 1 + execute_data->func->op_array.T); } //TODO consider refs zval_ptr_dtor(target); ZVAL_COPY(target, argValue); } ZEND_HASH_FOREACH_END(); } void callPreHook(AutoZval &prehook) { zend_fcall_info fci = empty_fcall_info; zend_fcall_info_cache fcc = empty_fcall_info_cache; if (zend_fcall_info_init(const_cast<zval *>(prehook.get()), 0, &fci, &fcc, nullptr, nullptr) == ZEND_RESULT_CODE::FAILURE) { throw std::runtime_error("Unable to initialize prehook fcall"); } std::array<AutoZval, 6> parameters; getScopeNameOrThis(parameters[0].get(), EG(current_execute_data)); getCallArguments(parameters[1].get(), EG(current_execute_data)); getFunctionDeclaringScope(parameters[2].get(), EG(current_execute_data)); getFunctionName(parameters[3].get(), EG(current_execute_data)); getFunctionDeclarationFileName(parameters[4].get(), EG(current_execute_data)); getFunctionDeclarationLineNo(parameters[5].get(), EG(current_execute_data)); AutoZval ret; fci.param_count = parameters.size(); fci.params = parameters[0].get(); fci.named_params = nullptr; fci.retval = ret.get(); if (zend_call_function(&fci, &fcc) != SUCCESS) { throw std::runtime_error("Unable to call prehook function"); } argsPostProcessing(parameters[1], ret); } void callPostHook(AutoZval &hook, zval *return_value, zend_object *exception, zend_execute_data *execute_data) { zend_fcall_info fci = empty_fcall_info; zend_fcall_info_cache fcc = empty_fcall_info_cache; if (zend_fcall_info_init(const_cast<zval *>(hook.get()), 0, &fci, &fcc, nullptr, nullptr) == ZEND_RESULT_CODE::FAILURE) { throw std::runtime_error("Unable to initialize posthook fcall"); } std::array<AutoZval, 8> parameters; getScopeNameOrThis(parameters[0].get(), EG(current_execute_data)); getCallArguments(parameters[1].get(), EG(current_execute_data)); getFunctionReturnValue(parameters[2].get(), return_value); getCurrentException(parameters[3].get(), exception); getFunctionDeclaringScope(parameters[4].get(), EG(current_execute_data)); getFunctionName(parameters[5].get(), EG(current_execute_data)); getFunctionDeclarationFileName(parameters[6].get(), EG(current_execute_data)); getFunctionDeclarationLineNo(parameters[7].get(), EG(current_execute_data)); AutoZval hookRv; fci.param_count = parameters.size(); fci.params = parameters[0].get(); fci.named_params = nullptr; fci.retval = hookRv.get(); if (zend_call_function(&fci, &fcc) != SUCCESS) { throw std::runtime_error("Unable to call posthook function"); } if (Z_TYPE_P(hookRv.get()) == IS_UNDEF) { return; } if (!return_value) { return; } // thre is no way to distinguish if posthook returned NULL, becuase in PHP functions are always returning NULL, even if there is no return keyword // in that case we can only try to overwrite return value for posthooks with return value type specified explicitly if (!(fcc.function_handler->op_array.fn_flags & ZEND_ACC_HAS_RETURN_TYPE) || (ZEND_TYPE_PURE_MASK(fcc.function_handler->common.arg_info[-1].type) & MAY_BE_VOID)) { ELOGF_TRACE(EAPM_GL(logger_), INSTRUMENTATION, "callPostHook hook doesn't explicitly specify return type other than void"); return; } if (execute_data->func->op_array.fn_flags & ZEND_ACC_HAS_RETURN_TYPE) { // uncomment if want to block possibility of adding rv to instrumented void-rv function // if ((ZEND_TYPE_PURE_MASK(execute_data->func->common.arg_info[-1].type) & MAY_BE_VOID)) { // return; // } bool sameType = ZEND_TYPE_CONTAINS_CODE(execute_data->func->common.arg_info[-1].type, Z_TYPE_P(hookRv.get())); ELOGF_DEBUG(EAPM_GL(logger_), INSTRUMENTATION, "callPostHook hasRvType: %d, isVoid: %d sameType: %d, hookRvType: %d", execute_data->func->op_array.fn_flags & ZEND_ACC_HAS_RETURN_TYPE, static_cast<bool>(ZEND_TYPE_PURE_MASK(execute_data->func->common.arg_info[-1].type) & MAY_BE_VOID), sameType, hookRv.getType()); } zval_ptr_dtor(return_value); ZVAL_COPY(return_value, hookRv.get()); } inline void callOriginalHandler(zif_handler handler, INTERNAL_FUNCTION_PARAMETERS) { zend_try { handler(INTERNAL_FUNCTION_PARAM_PASSTHRU); } zend_catch { if (*EG(bailout)) { LONGJMP(*EG(bailout), FAILURE); } else { zend_bailout(); } } zend_end_try(); } void ZEND_FASTCALL internal_function_handler(INTERNAL_FUNCTION_PARAMETERS) { auto hash = getClassAndFunctionHashFromExecuteData(execute_data); auto originalHandler = InternalStorage_t::getInstance().get(hash); if (!originalHandler) { auto [cls, func] = getClassAndFunctionName(execute_data); ELOGF_CRITICAL(EAPM_GL(logger_), INSTRUMENTATION, "Unable to find function handler " PRsv "::" PRsv, PRsvArg(cls), PRsvArg(func)); return; } if (!EAPM_GL(requestScope_)->isFunctional()) { callOriginalHandler(originalHandler, INTERNAL_FUNCTION_PARAM_PASSTHRU); return; } auto callbacks = reinterpret_cast<InstrumentedFunctionHooksStorage_t *>(EAPM_GL(hooksStorage_).get())->find(hash); if (!callbacks) { callOriginalHandler(originalHandler, INTERNAL_FUNCTION_PARAM_PASSTHRU); ELOGF_WARNING(EAPM_GL(logger_), INSTRUMENTATION, "Unable to find function callbacks"); return; } for (auto &callback : *callbacks) { if (callback.first.isNull() || callback.first.isUndef()) { continue; } try { AutomaticExceptionStateRestorer restorer; callPreHook(callback.first); handleAndReleaseHookException(EG(exception)); } catch (std::exception const &e) { auto [cls, func] = getClassAndFunctionName(execute_data); ELOGF_CRITICAL(EAPM_GL(logger_), INSTRUMENTATION, "%s hash: 0x%X " PRsv "::" PRsv, e.what(), hash, PRsvArg(cls), PRsvArg(func)); } } callOriginalHandler(originalHandler, INTERNAL_FUNCTION_PARAM_PASSTHRU); for (auto &callback : *callbacks) { if (callback.second.isNull() || callback.second.isUndef()) { continue; } try { AutomaticExceptionStateRestorer restorer; callPostHook(callback.second, return_value, restorer.getException(), execute_data); handleAndReleaseHookException(EG(exception)); } catch (std::exception const &e) { auto [cls, func] = getClassAndFunctionName(execute_data); ELOGF_CRITICAL(EAPM_GL(logger_), INSTRUMENTATION, "%s hash: 0x%X " PRsv "::" PRsv, e.what(), hash, PRsvArg(cls), PRsvArg(func)); } } } bool instrumentFunction(LoggerInterface *log, std::string_view cName, std::string_view fName, zval *callableOnEntry, zval *callableOnExit) { //TODO if called from other place that MINIT - make it thread safe in ZTS //TODO use hash struct instead of combined to prevent collisions std::string className{cName.data(), cName.length()}; std::string functionName{fName.data(), fName.length()}; std::transform(className.begin(), className.end(), className.begin(), [](unsigned char c){ return std::tolower(c); }); std::transform(functionName.begin(), functionName.end(), functionName.begin(), [](unsigned char c){ return std::tolower(c); }); HashTable *table = nullptr; zend_ulong classHash = 0; if (className.empty()) { // looking for function table = EG(function_table); } else { if (!EG(class_table)) { ELOGF_DEBUG(log, INSTRUMENTATION, "instrumentFunction Class table is empty. Function " PRsv "::" PRsv " not found and cannot be instrumented.", PRsvArg(className), PRsvArg(functionName)); return false; } auto ce = static_cast<zend_class_entry *>(zend_hash_str_find_ptr(EG(class_table), className.data(), className.length())); if (!ce) { ELOGF_DEBUG(log, INSTRUMENTATION, "instrumentFunction Class not found. Function " PRsv "::" PRsv " not found and cannot be instrumented.", PRsvArg(className), PRsvArg(functionName)); if (log->doesMeetsLevelCondition(logLevel_trace)) { zend_string *argStrKey = nullptr; ZEND_HASH_FOREACH_STR_KEY(EG(class_table), argStrKey) { if (argStrKey) { ELOGF_DEBUG(log, INSTRUMENTATION, "instrumentFunction Class not found. Function " PRsv "::" PRsv " not found and cannot be instrumented. %s", PRsvArg(className), PRsvArg(functionName), ZSTR_VAL(argStrKey)); } } ZEND_HASH_FOREACH_END(); } return false; } table = &ce->function_table; classHash = ZSTR_HASH(ce->name); } if (!table) { return false; } zend_function *func = reinterpret_cast<zend_function *>(zend_hash_str_find_ptr(table, functionName.data(), functionName.length())); if (!func) { ELOGF_DEBUG(log, INSTRUMENTATION, "instrumentFunction " PRsv "::" PRsv " not found and cannot be instrumented.", PRsvArg(className), PRsvArg(functionName)); return false; } zend_ulong funcHash = ZSTR_HASH(func->common.function_name); zend_ulong hash = classHash ^ (funcHash << 1); ELOGF_DEBUG(log, INSTRUMENTATION, "instrumentFunction 0x%X " PRsv "::" PRsv " type: %s is marked to be instrumented", hash, PRsvArg(className), PRsvArg(functionName), func->common.type == ZEND_INTERNAL_FUNCTION ? "internal" : "user"); reinterpret_cast<InstrumentedFunctionHooksStorage_t *>(EAPM_GL(hooksStorage_).get())->store(hash, AutoZval{callableOnEntry}, AutoZval{callableOnExit}); // we only keep original handler for internal (native) functions if (func->common.type == ZEND_INTERNAL_FUNCTION) { if (func->internal_function.handler != internal_function_handler) { InternalStorage_t::getInstance().store(hash, func->internal_function.handler); func->internal_function.handler = internal_function_handler; } ELOGF_DEBUG(log, INSTRUMENTATION, PRsv "::" PRsv " instrumented, key: 0x%X", PRsvArg(className), PRsvArg(functionName), hash); } return true; } void elasticObserverFcallBeginHandler(zend_execute_data *execute_data) { auto hash = getClassAndFunctionHashFromExecuteData(execute_data); ELOGF_TRACE(EAPM_GL(logger_), INSTRUMENTATION, "elasticObserverFcallBeginHandler hash 0x%X", hash); auto callbacks = reinterpret_cast<InstrumentedFunctionHooksStorage_t *>(EAPM_GL(hooksStorage_).get())->find(hash); if (!callbacks) { auto [cls, func] = getClassAndFunctionName(execute_data); ELOGF_ERROR(EAPM_GL(logger_), INSTRUMENTATION, "Unable to find prehook handler for 0x%X " PRsv "::" PRsv, hash, PRsvArg(cls), PRsvArg(func)); return; } for (auto &callback : *callbacks) { try { AutomaticExceptionStateRestorer restorer; callPreHook(callback.first); handleAndReleaseHookException(EG(exception)); } catch (std::exception const &e) { auto [cls, func] = getClassAndFunctionName(execute_data); ELOGF_CRITICAL(EAPM_GL(logger_), INSTRUMENTATION, "elasticObserverFcallBeginHandler. Unable to call prehook for 0x%X " PRsv "::" PRsv ": '%s'", hash, PRsvArg(cls), PRsvArg(func), e.what()); } } } void elasticObserverFcallEndHandler(zend_execute_data *execute_data, zval *retval) { auto hash = getClassAndFunctionHashFromExecuteData(execute_data); ELOGF_TRACE(EAPM_GL(logger_), INSTRUMENTATION, "elasticObserverFcallEndHandler hash 0x%X", hash); auto callbacks = reinterpret_cast<InstrumentedFunctionHooksStorage_t *>(EAPM_GL(hooksStorage_).get())->find(hash); if (!callbacks) { auto [cls, func] = getClassAndFunctionName(execute_data); ELOGF_ERROR(EAPM_GL(logger_), INSTRUMENTATION, "Unable to find posthook handler for 0x%X " PRsv "::" PRsv, hash, PRsvArg(cls), PRsvArg(func)); return; } for (auto &callback : *callbacks) { try { AutomaticExceptionStateRestorer restorer; callPostHook(callback.second, retval, restorer.getException(), execute_data); handleAndReleaseHookException(EG(exception)); } catch (std::exception const &e) { auto [cls, func] = getClassAndFunctionName(execute_data); ELOGF_CRITICAL(EAPM_GL(logger_), INSTRUMENTATION, "elasticObserverFcallEndHandler. Unable to call posthook for 0x%X " PRsv "::" PRsv ": '%s'", hash, PRsvArg(cls), PRsvArg(func), e.what()); } } } zend_observer_fcall_handlers elasticRegisterObserver(zend_execute_data *execute_data) { if (execute_data->func->common.type == ZEND_INTERNAL_FUNCTION) { return {nullptr, nullptr}; } auto hash = getClassAndFunctionHashFromExecuteData(execute_data); if (hash == 0) { ELOGF_TRACE(EAPM_GL(logger_), INSTRUMENTATION, "elasticRegisterObserver main scope"); return {nullptr, nullptr}; } auto callbacks = reinterpret_cast<InstrumentedFunctionHooksStorage_t *>(EAPM_GL(hooksStorage_).get())->find(hash); if (!callbacks) { if (EAPM_GL(logger_)->doesMeetsLevelCondition(LogLevel::logLevel_trace)) { auto [cls, func] = getClassAndFunctionName(execute_data); ELOGF_TRACE(EAPM_GL(logger_), INSTRUMENTATION, "elasticRegisterObserver hash: 0x%X " PRsv "::" PRsv ", not marked to be instrumented", hash, PRsvArg(cls), PRsvArg(func)); } // lookup for class interfaces auto ce = execute_data->func->common.scope; if (ce) { for (uint32_t i = 0; i < ce->num_interfaces; ++i) { auto classHash = ZSTR_HASH(ce->interfaces[i]->name); zend_ulong funcHash = ZSTR_HASH(execute_data->func->common.function_name); zend_ulong ifaceHash = classHash ^ (funcHash << 1); callbacks = reinterpret_cast<InstrumentedFunctionHooksStorage_t *>(EAPM_GL(hooksStorage_).get())->find(ifaceHash); if (callbacks) { if (EAPM_GL(logger_)->doesMeetsLevelCondition(LogLevel::logLevel_trace)) { auto [cls, func] = getClassAndFunctionName(execute_data); ELOGF_TRACE(EAPM_GL(logger_), INSTRUMENTATION, "elasticRegisterObserver hash: 0x%X " PRsv "::" PRsv ", will be instrumented because interface 0x%X '" PRsv "' was marked to be instrumented", hash, PRsvArg(cls), PRsvArg(func), ifaceHash, PRzsArg(ce->interfaces[i]->name)); } // copy callbacks from interface storage hash to implementation hash for (auto &item : *callbacks) { reinterpret_cast<InstrumentedFunctionHooksStorage_t *>(EAPM_GL(hooksStorage_).get())->store(hash, AutoZval(item.first.get()), AutoZval(item.second.get())); } break; } } } } if (EAPM_GL(config_)->get().debug_instrument_all && EAPM_GL(requestScope_)->isFunctional()) { std::string_view filename(ZSTR_VAL(execute_data->func->op_array.filename), ZSTR_LEN(execute_data->func->op_array.filename)); if (!(execute_data->func->common.fn_flags & ZEND_ACC_CLOSURE) && filename.find("/ElasticOTel/") == std::string_view::npos && filename.find("/open-telemetry/") == std::string_view::npos) { callbacks = reinterpret_cast<InstrumentedFunctionHooksStorage_t *>(EAPM_GL(hooksStorage_).get())->storeFront(hash, AutoZval("Elastic\\OTel\\PhpPartFacade::debugPreHook"sv), AutoZval("Elastic\\OTel\\PhpPartFacade::debugPostHook")); } } if (!callbacks) { return {nullptr, nullptr}; } bool havePreHook = false; bool havePostHook = false; for (auto const &item : *callbacks) { if (!item.first.isNull()) { havePreHook = true; } if (!item.second.isNull()) { havePostHook = true; } if (havePreHook && havePostHook) { break; } } ELOGF_TRACE(EAPM_GL(logger_), INSTRUMENTATION, "elasticRegisterObserver hash: 0x%X, havePreHooks: %d havePostHooks: %d", hash, havePreHook, havePostHook); return {havePreHook ? elasticObserverFcallBeginHandler : nullptr, havePostHook ? elasticObserverFcallEndHandler : nullptr}; } }