Jit/pyjit.cpp (1,614 lines of code) (raw):

// Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com) #include "Jit/pyjit.h" #include "Python.h" //#include "internal/pycore_pystate.h" #include "Include/internal/pycore_pystate.h" #include "internal/pycore_shadow_frame.h" #include "Jit/code_allocator.h" #include "Jit/codegen/gen_asm.h" #include "Jit/containers.h" #include "Jit/frame.h" #include "Jit/hir/builder.h" #include "Jit/hir/preload.h" #include "Jit/inline_cache.h" #include "Jit/jit_context.h" #include "Jit/jit_flag_processor.h" #include "Jit/jit_gdb_support.h" #include "Jit/jit_list.h" #include "Jit/jit_time_log.h" #include "Jit/lir/inliner.h" #include "Jit/log.h" #include "Jit/perf_jitdump.h" #include "Jit/profile_data.h" #include "Jit/ref.h" #include "Jit/runtime.h" #include "Jit/type_profiler.h" #include "Jit/util.h" #include <dis-asm.h> #include <atomic> #include <chrono> #include <climits> #include <cstddef> #include <cstdio> #include <cstdlib> #include <memory> #include <thread> #include <unordered_set> #include <utility> #define DEFAULT_CODE_SIZE 2 * 1024 * 1024 using namespace jit; int64_t __strobe_CodeRuntime_py_code = CodeRuntime::kPyCodeOffset; struct JitConfig { InitStateJitConfig init_state{JIT_NOT_INITIALIZED}; int is_enabled{0}; FrameModeJitConfig frame_mode{PY_FRAME}; int are_type_slots_enabled{1}; int allow_jit_list_wildcards{0}; int compile_all_static_functions{0}; size_t batch_compile_workers{0}; int multithreaded_compile_test{0}; bool use_huge_pages{true}; int hir_inliner_enabled{0}; }; static JitConfig jit_config; void initJitConfig_() { jit_config = JitConfig(); } int _PyJIT_IsJitConfigAllow_jit_list_wildcards() { return jit_config.allow_jit_list_wildcards; } int _PyJIT_IsJitConfigCompile_all_static_functions() { return jit_config.compile_all_static_functions; } size_t _PyJIT_GetJitConfigBatch_compile_workers() { return jit_config.batch_compile_workers; } int _PyJIT_IsJitConfigMultithreaded_compile_test() { return jit_config.multithreaded_compile_test; } namespace { // Extra information needed to compile a PyCodeObject. struct CodeData { CodeData(PyObject* m, PyObject* g) : module{m}, globals{g} {} Ref<> module; Ref<PyDictObject> globals; }; // Amount of time taken to batch compile everything when disable_jit is called long g_batch_compilation_time_ms = 0; } // namespace static _PyJITContext* jit_ctx; static JITList* g_jit_list{nullptr}; // Function and code objects ("units") registered for compilation. static std::unordered_set<BorrowedRef<>> jit_reg_units; // Every unit that is a code object has corresponding entry in jit_code_data. static std::unordered_map<BorrowedRef<PyCodeObject>, CodeData> jit_code_data; // Every unit has an entry in preloaders if we are doing multithreaded compile. static std::unordered_map<BorrowedRef<>, hir::Preloader> jit_preloaders; namespace jit { bool isPreloaded(BorrowedRef<PyFunctionObject> func) { return jit_preloaders.find(func) != jit_preloaders.end(); } const jit::hir::Preloader& getPreloader(BorrowedRef<PyFunctionObject> func) { auto it = jit_preloaders.find(func); if (it != jit_preloaders.end()) { return it->second; } return map_get_strict(jit_preloaders, func->func_code); } } // namespace jit // Strong references to every function and code object that were ever // registered, to keep them alive for batch testing. static std::vector<Ref<>> test_multithreaded_units; static std::unordered_map<PyFunctionObject*, std::chrono::duration<double>> jit_time_functions; // If non-empty, profile information will be written to this filename at // shutdown. static std::string g_write_profile_file; // Frequently-used strings that we intern at JIT startup and hold references to. #define INTERNED_STRINGS(X) \ X(bc_offset) \ X(code_hash) \ X(count) \ X(description) \ X(filename) \ X(firstlineno) \ X(func_qualname) \ X(guilty_type) \ X(int) \ X(lineno) \ X(normal) \ X(normvector) \ X(opname) \ X(reason) \ X(types) #define DECLARE_STR(s) static PyObject* s_str_##s{nullptr}; INTERNED_STRINGS(DECLARE_STR) #undef DECLARE_STR static std::array<PyObject*, 256> s_opnames; static double total_compliation_time = 0.0; int g_profile_new_interp_threads = 0; struct CompilationTimer { explicit CompilationTimer(BorrowedRef<PyFunctionObject> f) : start(std::chrono::steady_clock::now()), func(f) {} ~CompilationTimer() { auto end = std::chrono::steady_clock::now(); std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double>>(end - start); double time = time_span.count(); total_compliation_time += time; jit::ThreadedCompileSerialize guard; jit_time_functions.emplace(func, time_span); } std::chrono::steady_clock::time_point start; BorrowedRef<PyFunctionObject> func{nullptr}; }; static std::atomic<int> g_compile_workers_attempted; static int g_compile_workers_retries; void setJitLogFile(string log_filename) { // Redirect logging to a file if configured. const char* kPidMarker = "{pid}"; std::string pid_filename = log_filename; auto marker_pos = pid_filename.find(kPidMarker); if (marker_pos != std::string::npos) { pid_filename.replace( marker_pos, std::strlen(kPidMarker), fmt::format("{}", getpid())); } FILE* file = fopen(pid_filename.c_str(), "w"); if (file == NULL) { JIT_LOG( "Couldn't open log file %s (%s), logging to stderr", pid_filename, strerror(errno)); } else { g_log_file = file; } } void setASMSyntax(string asm_syntax) { if (asm_syntax.compare("intel") == 0) { set_intel_syntax(); } else if (asm_syntax.compare("att") == 0) { set_att_syntax(); } else { JIT_CHECK(false, "unknown asm syntax '%s'", asm_syntax); } } static jit::FlagProcessor xarg_flag_processor; static int use_jit = 0; static int jit_help = 0; static string write_profile_file; static int jit_profile_interp = 0; static string jl_fn; void initFlagProcessor() { use_jit = 0; write_profile_file = ""; jit_profile_interp = 0; jl_fn = ""; jit_help = 0; if (!xarg_flag_processor.hasOptions()) { // flags are inspected in order of definition below xarg_flag_processor.addOption( "jit", "PYTHONJIT", use_jit, "Enable the JIT"); xarg_flag_processor.addOption( "jit-debug", "PYTHONJITDEBUG", [](string) { g_debug = 1; g_debug_verbose = 1; }, "JIT debug and extra logging"); xarg_flag_processor .addOption( "jit-log-file", "PYTHONJITLOGFILE", [](string log_filename) { setJitLogFile(log_filename); }, "write log entries to <filename> rather than stderr") .withFlagParamName("filename"); xarg_flag_processor .addOption( "jit-asm-syntax", "PYTHONJITASMSYNTAX", [](string asm_syntax) { setASMSyntax(asm_syntax); }, "set the assembly syntax used in log files") .withFlagParamName("intel|att") .withDebugMessageOverride("Sets the assembly syntax used in log files"); xarg_flag_processor .addOption( "jit-debug-refcount", "PYTHONJITDEBUGREFCOUNT", g_debug_refcount, "JIT refcount insertion debug mode") .withDebugMessageOverride("Enabling"); xarg_flag_processor .addOption( "jit-dump-hir", "PYTHONJITDUMPHIR", g_dump_hir, "log the HIR representation of all functions after initial " "lowering from bytecode") .withDebugMessageOverride("Dump initial HIR of JITted functions"); xarg_flag_processor .addOption( "jit-dump-hir-passes", "PYTHONJITDUMPHIRPASSES", g_dump_hir_passes, "log the HIR after each optimization pass") .withDebugMessageOverride( "Dump HIR of JITted functions after each individual optimization " "pass"); xarg_flag_processor .addOption( "jit-dump-final-hir", "PYTHONJITDUMPFINALHIR", g_dump_final_hir, "log the HIR after all optimizations") .withDebugMessageOverride( "Dump final HIR of JITted functions after all optimizations"); xarg_flag_processor .addOption( "jit-dump-lir", "PYTHONJITDUMPLIR", g_dump_lir, "log the LIR representation of all functions after lowering from " "HIR") .withDebugMessageOverride("Dump initial LIR of JITted functions"); xarg_flag_processor.addOption( "jit-dump-lir-no-origin", "PYTHONJITDUMPLIRNOORIGIN", [](string) { g_dump_lir = 1; g_dump_lir_no_origin = 1; }, "JIT dump-lir mode without origin data"); xarg_flag_processor.addOption( "jit-dump-c-helper", "PYTHONJITDUMPCHELPER", g_dump_c_helper, "dump all c invocations"); xarg_flag_processor.addOption( "jit-disas-funcs", "PYTHONJITDISASFUNCS", g_dump_asm, "jit-disas-funcs/PYTHONJITDISASFUNCS are deprecated and will soon be " "removed. Use jit-dump-asm and PYTHONJITDUMPASM instead"); xarg_flag_processor .addOption( "jit-dump-asm", "PYTHONJITDUMPASM", g_dump_asm, "log the final compiled code, annotated with HIR instructions") .withDebugMessageOverride("Dump asm of JITted functions"); xarg_flag_processor.addOption( "jit-gdb-support", "PYTHONJITGDBSUPPORT", [](string) { g_debug = 1; g_gdb_support = 1; }, "GDB support and JIT debug mode"); xarg_flag_processor.addOption( "jit-gdb-stubs-support", "PYTHONJITGDBSTUBSSUPPORT", g_gdb_stubs_support, "GDB support for stubs"); xarg_flag_processor.addOption( "jit-gdb-write-elf", "PYTHONJITGDBWRITEELF", [](string) { g_debug = 1; g_gdb_support = 1; g_gdb_write_elf_objects = 1; }, "Debugging aid, GDB support with ELF output"); xarg_flag_processor.addOption( "jit-dump-stats", "PYTHONJITDUMPSTATS", g_dump_stats, "Dump JIT runtime stats at shutdown"); xarg_flag_processor.addOption( "jit-disable-lir-inliner", "PYTHONJITDISABLELIRINLINER", g_disable_lir_inliner, "disable JIT lir inlining"); xarg_flag_processor.addOption( "jit-disable-huge-pages", "PYTHONJITDISABLEHUGEPAGES", [](string) { jit_config.use_huge_pages = false; }, "disable huge page support"); xarg_flag_processor.addOption( "jit-enable-jit-list-wildcards", "PYTHONJITENABLEJITLISTWILDCARDS", jit_config.allow_jit_list_wildcards, "allow wildcards in JIT list"); xarg_flag_processor.addOption( "jit-all-static-functions", "PYTHONJITALLSTATICFUNCTIONS", jit_config.compile_all_static_functions, "JIT-compile all static functions"); xarg_flag_processor .addOption( "jit-list-file", "PYTHONJITLISTFILE", [](string listFile) { jl_fn = listFile; use_jit = 1; }, "Load list of functions to compile from <filename>") .withFlagParamName("filename"); xarg_flag_processor .addOption( "jit-read-profile", "PYTHONJITREADPROFILE", [](string read_profile_file) { JIT_LOG("Loading profile data from %s", read_profile_file); readProfileData(read_profile_file); }, "Load profile data from <filename>") .withFlagParamName("filename"); xarg_flag_processor .addOption( "jit-write-profile", "PYTHONJITWRITEPROFILE", write_profile_file, "Write profiling data to <filename>") .withFlagParamName("filename"); xarg_flag_processor.addOption( "jit-profile-interp", "PYTHONJITPROFILEINTERP", jit_profile_interp, "interpreter profiling"); xarg_flag_processor.addOption( "jit-disable", "PYTHONJITDISABLE", [](int val) { use_jit = !val; }, "disable the JIT"); // these are only set if use_jit == 1 xarg_flag_processor.addOption( "jit-shadow-frame", "PYTHONJITSHADOWFRAME", [](int val) { if (use_jit) { jit_config.frame_mode = val ? SHADOW_FRAME : PY_FRAME; } }, "enable shadow frame mode"); xarg_flag_processor.addOption( "jit-no-type-slots", "PYTHONJITNOTYPESLOTS", [](int val) { if (use_jit) { jit_config.are_type_slots_enabled = !val; } }, "turn off type slots"); xarg_flag_processor .addOption( "jit-batch-compile-workers", "PYTHONJITBATCHCOMPILEWORKERS", jit_config.batch_compile_workers, "set the number of batch compile workers to <COUNT>") .withFlagParamName("COUNT"); xarg_flag_processor .addOption( "jit-multithreaded-compile-test", "PYTHONJITMULTITHREADEDCOMPILETEST", [](int val) { if (use_jit) { jit_config.multithreaded_compile_test = val; } }, "JIT multithreaded compile test") .isHiddenFlag(true); xarg_flag_processor.addOption( "jit-list-match-line-numbers", "PYTHONJITLISTMATCHLINENUMBERS", [](int val) { if (use_jit) { jitlist_match_line_numbers(val); } }, "JIT list match line numbers"); xarg_flag_processor .addOption( "jit-time", "", [](string flag_value) { parseAndSetFuncList(flag_value); }, "Measure time taken in compilation phases and output summary to " "stderr or approperiate logfile. Only functions in comma seperated " "<function_list> list will be included. Comma seperated list may " "include wildcards, * and ?. Wildcards are processed in glob " "fashion and not as regex.") .withFlagParamName("function_list") .withDebugMessageOverride( "Will capture time taken in compilation phases and output summary"); ; xarg_flag_processor.addOption( "jit-enable-hir-inliner", "PYTHONJITENABLEHIRINLINER", [](int val) { if (use_jit && val) { _PyJIT_EnableHIRInliner(); } }, "Enable the JIT's HIR inliner"); xarg_flag_processor.addOption( "jit-dump-hir-passes-json", "PYTHONJITDUMPHIRPASSESJSON", [](string json_output_dir) { g_dump_hir_passes_json = ::strdup(json_output_dir.c_str()); int mkdir_result = ::mkdir(g_dump_hir_passes_json, 0755); JIT_CHECK( mkdir_result == 0 || errno == EEXIST, "could not make JSON directory"); }, "Dump IR passes as JSON to the directory specified by this flag's " "value"); xarg_flag_processor.addOption( "jit-help", "", jit_help, "print all available JIT flags and exits"); } xarg_flag_processor.setFlags(PySys_GetXOptions()); } // Compile the given compilation unit, returning the result code. static _PyJIT_Result compileUnit(BorrowedRef<> unit) { if (PyFunction_Check(unit)) { BorrowedRef<PyFunctionObject> func(unit); CompilationTimer t{func}; return _PyJITContext_CompileFunction(jit_ctx, func); } JIT_CHECK(PyCode_Check(unit), "Expected function or code object"); BorrowedRef<PyCodeObject> code(unit); const CodeData& data = map_get(jit_code_data, code); return _PyJITContext_CompileCode(jit_ctx, data.module, code, data.globals); } // Compile the given function or code object with preloader from jit_preloaders // map. static _PyJIT_Result compilePreloaded(BorrowedRef<> unit) { return _PyJITContext_CompilePreloader(jit_ctx, map_get(jit_preloaders, unit)); } static void compile_worker_thread() { JIT_DLOG("Started compile worker in thread %d", std::this_thread::get_id()); BorrowedRef<> unit; while ((unit = g_threaded_compile_context.nextUnit()) != nullptr) { g_compile_workers_attempted++; if (compilePreloaded(unit) == PYJIT_RESULT_RETRY) { ThreadedCompileSerialize guard; g_compile_workers_retries++; g_threaded_compile_context.retryUnit(unit); } } JIT_DLOG("Finished compile worker in thread %d", std::this_thread::get_id()); } static void multithread_compile_all(std::vector<BorrowedRef<>>&& work_units) { JIT_CHECK(jit_ctx, "JIT not initialized"); // first we have to preload everything we are going to compile for (auto unit : work_units) { if (PyFunction_Check(unit)) { BorrowedRef<PyFunctionObject> func(unit); jit_preloaders.emplace(unit, func); } else { JIT_CHECK(PyCode_Check(unit), "Expected function or code object"); BorrowedRef<PyCodeObject> code(unit); const CodeData& data = map_get(jit_code_data, code); jit_preloaders.emplace( std::piecewise_construct, std::forward_as_tuple(unit), std::forward_as_tuple( code, data.globals, codeFullname(data.module, code))); } } // Disable checks for using GIL protected data across threads. // Conceptually what we're doing here is saying we're taking our own // responsibility for managing locking of CPython runtime data structures. // Instead of holding the GIL to serialize execution to one thread, we're // holding the GIL for a group of co-operating threads which are aware of each // other. We still need the GIL as this protects the cooperating threads from // unknown other threads. Within our group of cooperating threads we can // safely do any read-only operations in parallel, but we grab our own lock if // we do a write (e.g. an incref). int old_gil_check_enabled = _PyGILState_check_enabled; _PyGILState_check_enabled = 0; g_threaded_compile_context.startCompile(std::move(work_units)); std::vector<std::thread> worker_threads; JIT_CHECK(jit_config.batch_compile_workers, "Zero workers for compile"); { // Hold a lock while we create threads because IG production has magic to // wrap pthread_create() and run Python code before threads are created. ThreadedCompileSerialize guard; for (size_t i = 0; i < jit_config.batch_compile_workers; i++) { worker_threads.emplace_back(compile_worker_thread); } } for (std::thread& worker_thread : worker_threads) { worker_thread.join(); } std::vector<BorrowedRef<>> retry_list{ g_threaded_compile_context.endCompile()}; for (auto unit : retry_list) { compilePreloaded(unit); } _PyGILState_check_enabled = old_gil_check_enabled; jit_preloaders.clear(); } static PyObject* multithreaded_compile_test(PyObject*, PyObject*) { if (!jit_config.multithreaded_compile_test) { PyErr_SetString( PyExc_NotImplementedError, "multithreaded_compile_test not enabled"); return NULL; } g_compile_workers_attempted = 0; g_compile_workers_retries = 0; JIT_LOG("(Re)compiling %d units", test_multithreaded_units.size()); _PyJITContext_ClearCache(jit_ctx); std::chrono::time_point time_start = std::chrono::steady_clock::now(); multithread_compile_all( {test_multithreaded_units.begin(), test_multithreaded_units.end()}); std::chrono::time_point time_end = std::chrono::steady_clock::now(); JIT_LOG( "Took %d ms, compiles attempted: %d, compiles retried: %d", std::chrono::duration_cast<std::chrono::milliseconds>( time_end - time_start) .count(), g_compile_workers_attempted, g_compile_workers_retries); test_multithreaded_units.clear(); Py_RETURN_NONE; } static PyObject* is_multithreaded_compile_test_enabled(PyObject*, PyObject*) { if (jit_config.multithreaded_compile_test) { Py_RETURN_TRUE; } Py_RETURN_FALSE; } static PyObject* disable_jit(PyObject* /* self */, PyObject* const* args, Py_ssize_t nargs) { if (nargs > 1) { PyErr_SetString(PyExc_TypeError, "disable expects 0 or 1 arg"); return NULL; } else if (nargs == 1 && !PyBool_Check(args[0])) { PyErr_SetString( PyExc_TypeError, "disable expects bool indicating to compile pending functions"); return NULL; } if (nargs == 0 || args[0] == Py_True) { // Compile all of the pending functions/codes before shutting down std::chrono::time_point start = std::chrono::steady_clock::now(); if (jit_config.batch_compile_workers > 0) { multithread_compile_all({jit_reg_units.begin(), jit_reg_units.end()}); jit_reg_units.clear(); } else { std::unordered_set<BorrowedRef<>> units; units.swap(jit_reg_units); for (auto unit : units) { compileUnit(unit); } } std::chrono::time_point end = std::chrono::steady_clock::now(); g_batch_compilation_time_ms = std::chrono::duration_cast<std::chrono::milliseconds>(end - start) .count(); jit_code_data.clear(); } _PyJIT_Disable(); Py_RETURN_NONE; } static PyObject* get_batch_compilation_time_ms(PyObject*, PyObject*) { return PyLong_FromLong(g_batch_compilation_time_ms); } static PyObject* force_compile(PyObject* /* self */, PyObject* func) { if (!PyFunction_Check(func)) { PyErr_SetString(PyExc_TypeError, "force_compile expected a function"); return NULL; } if (jit_reg_units.count(func)) { _PyJIT_CompileFunction((PyFunctionObject*)func); Py_RETURN_TRUE; } Py_RETURN_FALSE; } int _PyJIT_IsCompiled(PyObject* func) { if (jit_ctx == nullptr) { return 0; } JIT_DCHECK( PyFunction_Check(func), "Expected PyFunctionObject, got '%.200s'", Py_TYPE(func)->tp_name); return _PyJITContext_DidCompile(jit_ctx, func); } static PyObject* is_jit_compiled(PyObject* /* self */, PyObject* func) { int st = _PyJIT_IsCompiled(func); PyObject* res = NULL; if (st == 1) { res = Py_True; } else if (st == 0) { res = Py_False; } Py_XINCREF(res); return res; } static PyObject* print_hir(PyObject* /* self */, PyObject* func) { if (!PyFunction_Check(func)) { PyErr_SetString(PyExc_TypeError, "arg 1 must be a function"); return NULL; } int st = _PyJITContext_DidCompile(jit_ctx, func); if (st == -1) { return NULL; } else if (st == 0) { PyErr_SetString(PyExc_ValueError, "function is not jit compiled"); return NULL; } if (_PyJITContext_PrintHIR(jit_ctx, func) < 0) { return NULL; } else { Py_RETURN_NONE; } } static PyObject* disassemble(PyObject* /* self */, PyObject* func) { if (!PyFunction_Check(func)) { PyErr_SetString(PyExc_TypeError, "arg 1 must be a function"); return NULL; } int st = _PyJITContext_DidCompile(jit_ctx, func); if (st == -1) { return NULL; } else if (st == 0) { PyErr_SetString(PyExc_ValueError, "function is not jit compiled"); return NULL; } if (_PyJITContext_Disassemble(jit_ctx, func) < 0) { return NULL; } else { Py_RETURN_NONE; } } static PyObject* get_jit_list(PyObject* /* self */, PyObject*) { if (g_jit_list == nullptr) { Py_RETURN_NONE; } else { auto jit_list = Ref<>::steal(g_jit_list->getList()); return jit_list.release(); } } static PyObject* jit_list_append(PyObject* /* self */, PyObject* line) { if (g_jit_list == nullptr) { g_jit_list = JITList::create().release(); } Py_ssize_t line_len; const char* line_str = PyUnicode_AsUTF8AndSize(line, &line_len); if (line_str == NULL) { return NULL; } g_jit_list->parseLine( {line_str, static_cast<std::string::size_type>(line_len)}); Py_RETURN_NONE; } static PyObject* get_compiled_functions(PyObject* /* self */, PyObject*) { return _PyJITContext_GetCompiledFunctions(jit_ctx); } static PyObject* get_compilation_time(PyObject* /* self */, PyObject*) { PyObject* res = PyLong_FromLong(static_cast<long>(total_compliation_time * 1000)); return res; } static PyObject* get_function_compilation_time( PyObject* /* self */, PyObject* func) { auto iter = jit_time_functions.find(reinterpret_cast<PyFunctionObject*>(func)); if (iter == jit_time_functions.end()) { Py_RETURN_NONE; } PyObject* res = PyLong_FromLong(iter->second.count() * 1000); return res; } namespace { // Simple wrapper functions to turn NULL or -1 return values from C-API // functions into a thrown exception. Meant for repetitive runs of C-API calls // and not intended for use in public APIs. class CAPIError : public std::exception {}; PyObject* check(PyObject* obj) { if (obj == nullptr) { throw CAPIError(); } return obj; } int check(int ret) { if (ret < 0) { throw CAPIError(); } return ret; } Ref<> make_deopt_stats() { Runtime* runtime = codegen::NativeGeneratorFactory::runtime(); auto stats = Ref<>::steal(check(PyList_New(0))); for (auto& pair : runtime->deoptStats()) { const DeoptMetadata& meta = runtime->getDeoptMetadata(pair.first); const DeoptFrameMetadata& frame_meta = meta.frame_meta[meta.inline_depth]; const DeoptStat& stat = pair.second; BorrowedRef<PyCodeObject> code = frame_meta.code; auto func_qualname = code->co_qualname; int lineno_raw = code->co_lnotab != nullptr ? PyCode_Addr2Line(code, frame_meta.next_instr_offset) : -1; auto lineno = Ref<>::steal(check(PyLong_FromLong(lineno_raw))); auto reason = Ref<>::steal(check(PyUnicode_FromString(deoptReasonName(meta.reason)))); auto description = Ref<>::steal(check(PyUnicode_FromString(meta.descr))); // Helper to create an event dict with a given count value. auto append_event = [&](size_t count_raw, const char* type) { auto event = Ref<>::steal(check(PyDict_New())); auto normals = Ref<>::steal(check(PyDict_New())); auto ints = Ref<>::steal(check(PyDict_New())); check(PyDict_SetItem(event, s_str_normal, normals)); check(PyDict_SetItem(event, s_str_int, ints)); check(PyDict_SetItem(normals, s_str_func_qualname, func_qualname)); check(PyDict_SetItem(normals, s_str_filename, code->co_filename)); check(PyDict_SetItem(ints, s_str_lineno, lineno)); check(PyDict_SetItem(normals, s_str_reason, reason)); check(PyDict_SetItem(normals, s_str_description, description)); auto count = Ref<>::steal(check(PyLong_FromSize_t(count_raw))); check(PyDict_SetItem(ints, s_str_count, count)); auto type_str = Ref<>::steal(check(PyUnicode_InternFromString(type))); check(PyDict_SetItem(normals, s_str_guilty_type, type_str) < 0); check(PyList_Append(stats, event)); }; // For deopts with type profiles, add a copy of the dict with counts for // each type, including "other". if (!stat.types.empty()) { for (size_t i = 0; i < stat.types.size && stat.types.types[i] != nullptr; ++i) { append_event(stat.types.counts[i], stat.types.types[i]->tp_name); } if (stat.types.other > 0) { append_event(stat.types.other, "<other>"); } } else { append_event(stat.count, "<none>"); } } runtime->clearDeoptStats(); return stats; } } // namespace static PyObject* get_and_clear_runtime_stats(PyObject* /* self */, PyObject*) { auto stats = Ref<>::steal(PyDict_New()); if (stats == nullptr) { return nullptr; } try { Ref<> deopt_stats = make_deopt_stats(); check(PyDict_SetItemString(stats, "deopt", deopt_stats)); } catch (const CAPIError&) { return nullptr; } return stats.release(); } static PyObject* clear_runtime_stats(PyObject* /* self */, PyObject*) { codegen::NativeGeneratorFactory::runtime()->clearDeoptStats(); Py_RETURN_NONE; } static PyObject* get_compiled_size(PyObject* /* self */, PyObject* func) { if (jit_ctx == NULL) { return PyLong_FromLong(0); } long size = _PyJITContext_GetCodeSize(jit_ctx, func); PyObject* res = PyLong_FromLong(size); return res; } static PyObject* get_compiled_stack_size(PyObject* /* self */, PyObject* func) { if (jit_ctx == NULL) { return PyLong_FromLong(0); } long size = _PyJITContext_GetStackSize(jit_ctx, func); PyObject* res = PyLong_FromLong(size); return res; } static PyObject* get_compiled_spill_stack_size( PyObject* /* self */, PyObject* func) { if (jit_ctx == NULL) { return PyLong_FromLong(0); } long size = _PyJITContext_GetSpillStackSize(jit_ctx, func); PyObject* res = PyLong_FromLong(size); return res; } static PyObject* jit_frame_mode(PyObject* /* self */, PyObject*) { return PyLong_FromLong(jit_config.frame_mode); } static PyObject* get_supported_opcodes(PyObject* /* self */, PyObject*) { auto set = Ref<>::steal(PySet_New(nullptr)); if (set == nullptr) { return nullptr; } for (auto op : hir::kSupportedOpcodes) { auto op_obj = Ref<>::steal(PyLong_FromLong(op)); if (op_obj == nullptr) { return nullptr; } if (PySet_Add(set, op_obj) < 0) { return nullptr; } } return set.release(); } static PyObject* jit_force_normal_frame(PyObject*, PyObject* func_obj) { if (!PyFunction_Check(func_obj)) { PyErr_SetString(PyExc_TypeError, "Input must be a function"); return NULL; } PyFunctionObject* func = reinterpret_cast<PyFunctionObject*>(func_obj); reinterpret_cast<PyCodeObject*>(func->func_code)->co_flags |= CO_NORMAL_FRAME; Py_INCREF(func_obj); return func_obj; } static PyObject* jit_suppress(PyObject*, PyObject* func_obj) { if (!PyFunction_Check(func_obj)) { PyErr_SetString(PyExc_TypeError, "Input must be a function"); return NULL; } PyFunctionObject* func = reinterpret_cast<PyFunctionObject*>(func_obj); reinterpret_cast<PyCodeObject*>(func->func_code)->co_flags |= CO_SUPPRESS_JIT; Py_INCREF(func_obj); return func_obj; } static PyObject* get_allocator_stats(PyObject*, PyObject*) { if (!_PyJIT_UseHugePages()) { Py_RETURN_NONE; } auto stats = Ref<>::steal(PyDict_New()); if (stats == NULL) { return NULL; } auto used_bytes = Ref<>::steal(PyLong_FromLong(CodeAllocatorCinder::usedBytes())); if (used_bytes == NULL || PyDict_SetItemString(stats, "used_bytes", used_bytes) < 0) { return NULL; } auto lost_bytes = Ref<>::steal(PyLong_FromLong(CodeAllocatorCinder::lostBytes())); if (lost_bytes == NULL || PyDict_SetItemString(stats, "lost_bytes", lost_bytes) < 0) { return NULL; } auto fragmented_allocs = Ref<>::steal(PyLong_FromLong(CodeAllocatorCinder::fragmentedAllocs())); if (fragmented_allocs == NULL || PyDict_SetItemString(stats, "fragmented_allocs", fragmented_allocs) < 0) { return NULL; } auto huge_allocs = Ref<>::steal(PyLong_FromLong(CodeAllocatorCinder::hugeAllocs())); if (huge_allocs == NULL || PyDict_SetItemString(stats, "huge_allocs", huge_allocs) < 0) { return NULL; } return stats.release(); } static PyObject* is_hir_inliner_enabled(PyObject* /* self */, PyObject*) { int result = _PyJIT_IsHIRInlinerEnabled(); if (result) { Py_RETURN_TRUE; } Py_RETURN_FALSE; } static PyObject* enable_hir_inliner(PyObject* /* self */, PyObject*) { _PyJIT_EnableHIRInliner(); Py_RETURN_NONE; } static PyObject* disable_hir_inliner(PyObject* /* self */, PyObject*) { _PyJIT_DisableHIRInliner(); Py_RETURN_NONE; } static PyMethodDef jit_methods[] = { {"disable", (PyCFunction)(void*)disable_jit, METH_FASTCALL, "Disable the jit."}, {"disassemble", disassemble, METH_O, "Disassemble JIT compiled functions"}, {"is_jit_compiled", is_jit_compiled, METH_O, "Check if a function is jit compiled."}, {"force_compile", force_compile, METH_O, "Force a function to be JIT compiled if it hasn't yet"}, {"jit_frame_mode", jit_frame_mode, METH_NOARGS, "Get JIT frame mode (0 = normal frames, 1 = no frames, 2 = shadow frames"}, {"get_jit_list", get_jit_list, METH_NOARGS, "Get the JIT-list"}, {"jit_list_append", jit_list_append, METH_O, "Parse a JIT-list line"}, {"print_hir", print_hir, METH_O, "Print the HIR for a jitted function to stdout."}, {"get_supported_opcodes", get_supported_opcodes, METH_NOARGS, "Return a set of all supported opcodes, as ints."}, {"get_compiled_functions", get_compiled_functions, METH_NOARGS, "Return a list of functions that are currently JIT-compiled."}, {"get_compilation_time", get_compilation_time, METH_NOARGS, "Return the total time used for JIT compiling functions in milliseconds."}, {"get_function_compilation_time", get_function_compilation_time, METH_O, "Return the time used for JIT compiling a given function in " "milliseconds."}, {"get_and_clear_runtime_stats", get_and_clear_runtime_stats, METH_NOARGS, "Returns information about the runtime behavior of JIT-compiled code."}, {"clear_runtime_stats", clear_runtime_stats, METH_NOARGS, "Clears runtime stats about JIT-compiled code without returning a value."}, {"get_compiled_size", get_compiled_size, METH_O, "Return code size in bytes for a JIT-compiled function."}, {"get_compiled_stack_size", get_compiled_stack_size, METH_O, "Return stack size in bytes for a JIT-compiled function."}, {"get_compiled_spill_stack_size", get_compiled_spill_stack_size, METH_O, "Return stack size in bytes used for register spills for a JIT-compiled " "function."}, {"jit_force_normal_frame", jit_force_normal_frame, METH_O, "Decorator forcing a function to always use normal frame mode when JIT."}, {"jit_suppress", jit_suppress, METH_O, "Decorator to disable the JIT for the decorated function."}, {"multithreaded_compile_test", multithreaded_compile_test, METH_NOARGS, "Force multi-threaded recompile of still existing JIT functions for test"}, {"is_multithreaded_compile_test_enabled", is_multithreaded_compile_test_enabled, METH_NOARGS, "Return True if multithreaded_compile_test mode is enabled"}, {"get_batch_compilation_time_ms", get_batch_compilation_time_ms, METH_NOARGS, "Return the number of milliseconds spent in batch compilation when " "disabling the JIT."}, {"get_allocator_stats", get_allocator_stats, METH_NOARGS, "Return stats from the code allocator as a dictionary."}, {"is_hir_inliner_enabled", is_hir_inliner_enabled, METH_NOARGS, "Return True if the HIR inliner is enabled and False otherwise."}, {"enable_hir_inliner", enable_hir_inliner, METH_NOARGS, "Enable the HIR inliner."}, {"disable_hir_inliner", disable_hir_inliner, METH_NOARGS, "Disable the HIR inliner."}, {NULL, NULL, 0, NULL}}; static PyModuleDef jit_module = { PyModuleDef_HEAD_INIT, .m_name = "cinderjit", .m_doc = NULL, .m_size = -1, .m_methods = jit_methods, .m_slots = nullptr, .m_traverse = nullptr, .m_clear = nullptr, .m_free = nullptr}; static int onJitListImpl( BorrowedRef<PyCodeObject> code, BorrowedRef<> mod, BorrowedRef<> qualname) { bool is_static = code->co_flags & CO_STATICALLY_COMPILED; if (g_jit_list == nullptr || (is_static && jit_config.compile_all_static_functions)) { // There's no jit list or the function is static. return 1; } if (g_jit_list->lookupCO(code) != 1) { return g_jit_list->lookupFO(mod, qualname); } return 1; } int _PyJIT_OnJitList(PyFunctionObject* func) { return onJitListImpl(func->func_code, func->func_module, func->func_qualname); } int _PyJIT_Initialize() { if (jit_config.init_state == JIT_INITIALIZED) { return 0; } initJitConfig_(); // Initialize some interned strings that can be used even when the JIT is // off. #define INTERN_STR(s) \ s_str_##s = PyUnicode_InternFromString(#s); \ if (s_str_##s == nullptr) { \ return -1; \ } INTERNED_STRINGS(INTERN_STR) #undef INTERN_STR #define MAKE_OPNAME(opname, opnum) \ if ((s_opnames.at(opnum) = PyUnicode_InternFromString(#opname)) == \ nullptr) { \ return -1; \ } PY_OPCODES(MAKE_OPNAME) #undef MAKE_OPNAME initFlagProcessor(); if (jit_help) { std::cout << xarg_flag_processor.jitXOptionHelpMessage() << endl; return -2; } std::unique_ptr<JITList> jit_list; if (!jl_fn.empty()) { if (jit_config.allow_jit_list_wildcards) { jit_list = jit::WildcardJITList::create(); } else { jit_list = jit::JITList::create(); } if (jit_list == nullptr) { JIT_LOG("Failed to allocate JIT list"); return -1; } if (!jit_list->parseFile(jl_fn.c_str())) { JIT_LOG("Could not parse jit-list, disabling JIT."); return 0; } } if (!write_profile_file.empty() || jit_profile_interp == 1) { if (use_jit) { use_jit = 0; JIT_LOG("Keeping JIT disabled to enable interpreter profiling."); } g_profile_new_interp_threads = 1; _PyThreadState_SetProfileInterpAll(1); if (!write_profile_file.empty()) { g_write_profile_file = write_profile_file; } } if (use_jit) { JIT_DLOG("Enabling JIT."); } else { return 0; } CodeAllocator::makeGlobalCodeAllocator(); jit_ctx = new _PyJITContext(); PyObject* mod = PyModule_Create(&jit_module); if (mod == NULL) { return -1; } PyObject* modname = PyUnicode_InternFromString("cinderjit"); if (modname == NULL) { return -1; } PyObject* modules = PyImport_GetModuleDict(); int st = _PyImport_FixupExtensionObject(mod, modname, modname, modules); Py_DECREF(modname); if (st == -1) { return -1; } jit_config.init_state = JIT_INITIALIZED; jit_config.is_enabled = 1; g_jit_list = jit_list.release(); // Unconditionally set this, since we might have shadow frames from // CO_SHADOW_FRAME or inlined functions. _PyThreadState_GetFrame = reinterpret_cast<PyThreadFrameGetter>(materializeShadowCallStack); total_compliation_time = 0.0; return 0; } bool _PyJIT_UseHugePages() { return jit_config.use_huge_pages; } int _PyJIT_IsEnabled() { return (jit_config.init_state == JIT_INITIALIZED) && jit_config.is_enabled; } void _PyJIT_AfterFork_Child() { perf::afterForkChild(); } int _PyJIT_AreTypeSlotsEnabled() { return (jit_config.init_state == JIT_INITIALIZED) && jit_config.are_type_slots_enabled; } void _PyJIT_EnableHIRInliner() { jit_config.hir_inliner_enabled = 1; } void _PyJIT_DisableHIRInliner() { jit_config.hir_inliner_enabled = 0; } int _PyJIT_IsHIRInlinerEnabled() { return jit_config.hir_inliner_enabled; } int _PyJIT_Enable() { if (jit_config.init_state != JIT_INITIALIZED) { return 0; } jit_config.is_enabled = 1; return 0; } int _PyJIT_EnableTypeSlots() { if (!_PyJIT_IsEnabled()) { return 0; } jit_config.are_type_slots_enabled = 1; return 1; } void _PyJIT_Disable() { jit_config.is_enabled = 0; jit_config.are_type_slots_enabled = 0; } _PyJIT_Result _PyJIT_SpecializeType( PyTypeObject* type, _PyJIT_TypeSlots* slots) { return _PyJITContext_SpecializeType(jit_ctx, type, slots); } _PyJIT_Result _PyJIT_CompileFunction(PyFunctionObject* func) { if (jit_ctx == nullptr) { return PYJIT_NOT_INITIALIZED; } if (g_threaded_compile_context.compileRunning()) { // we were called recursively (by emitInvokeFunction); // find preloader in global map and compile it. auto it = jit_preloaders.find(func->func_code); if (it == jit_preloaders.end()) { return PYJIT_RESULT_CANNOT_SPECIALIZE; } return _PyJITContext_CompilePreloader(jit_ctx, it->second); } if (!_PyJIT_OnJitList(func)) { return PYJIT_RESULT_CANNOT_SPECIALIZE; } CompilationTimer timer(func); jit_reg_units.erase(reinterpret_cast<PyObject*>(func)); return _PyJITContext_CompileFunction(jit_ctx, func); } // Recursively search the given co_consts tuple for any code objects that are // on the current jit-list, using the given module name to form a // fully-qualified function name. static std::vector<BorrowedRef<PyCodeObject>> findNestedCodes( BorrowedRef<> module, BorrowedRef<> root_consts) { std::queue<PyObject*> consts_tuples; std::unordered_set<PyCodeObject*> visited; std::vector<BorrowedRef<PyCodeObject>> result; consts_tuples.push(root_consts); while (!consts_tuples.empty()) { PyObject* consts = consts_tuples.front(); consts_tuples.pop(); for (size_t i = 0, size = PyTuple_GET_SIZE(consts); i < size; ++i) { BorrowedRef<PyCodeObject> code = PyTuple_GET_ITEM(consts, i); if (!PyCode_Check(code) || !visited.insert(code).second || code->co_qualname == nullptr || !onJitListImpl(code, module, code->co_qualname)) { continue; } result.emplace_back(code); consts_tuples.emplace(code->co_consts); } } return result; } int _PyJIT_RegisterFunction(PyFunctionObject* func) { // Attempt to attach already-compiled code even if the JIT is disabled, as // long as it hasn't been finalized. if (jit_ctx != nullptr && _PyJITContext_AttachCompiledCode(jit_ctx, func) == PYJIT_RESULT_OK) { return 1; } if (!_PyJIT_IsEnabled()) { return 0; } JIT_CHECK( !g_threaded_compile_context.compileRunning(), "Not intended for using during threaded compilation"); int result = 0; auto register_unit = [](BorrowedRef<> unit) { if (jit_config.multithreaded_compile_test) { test_multithreaded_units.emplace_back(unit); } jit_reg_units.emplace(unit); }; if (_PyJIT_OnJitList(func)) { register_unit(reinterpret_cast<PyObject*>(func)); result = 1; } // If we have an active jit-list, scan this function's code object for any // nested functions that might be on the jit-list, and register them as // well. if (g_jit_list != nullptr) { PyObject* module = func->func_module; PyObject* globals = func->func_globals; for (auto code : findNestedCodes( module, reinterpret_cast<PyCodeObject*>(func->func_code)->co_consts)) { register_unit(reinterpret_cast<PyObject*>(code.get())); jit_code_data.emplace( std::piecewise_construct, std::forward_as_tuple(code), std::forward_as_tuple(module, globals)); } } return result; } void _PyJIT_TypeCreated(PyTypeObject* type) { registerProfiledType(type); } void _PyJIT_TypeModified(PyTypeObject* type) { if (jit_ctx) { _PyJITContext_TypeModified(jit_ctx, type); } jit::notifyICsTypeChanged(type); } void _PyJIT_TypeNameModified(PyTypeObject* type) { // We assume that this is a very rare case, and simply give up on tracking // the type if it happens. unregisterProfiledType(type); } void _PyJIT_TypeDestroyed(PyTypeObject* type) { if (jit_ctx) { _PyJITContext_TypeDestroyed(jit_ctx, type); } unregisterProfiledType(type); } void _PyJIT_FuncModified(PyFunctionObject* func) { if (jit_ctx) { _PyJITContext_FuncModified(jit_ctx, func); } } void _PyJIT_FuncDestroyed(PyFunctionObject* func) { if (_PyJIT_IsEnabled()) { jit_reg_units.erase(reinterpret_cast<PyObject*>(func)); } if (jit_ctx) { _PyJITContext_FuncDestroyed(jit_ctx, func); } } void _PyJIT_CodeDestroyed(PyCodeObject* code) { if (_PyJIT_IsEnabled()) { jit_reg_units.erase(reinterpret_cast<PyObject*>(code)); jit_code_data.erase(code); } } static void dump_jit_stats() { auto stats = get_and_clear_runtime_stats(nullptr, nullptr); if (stats == nullptr) { return; } auto stats_str = PyObject_Str(stats); if (stats_str == nullptr) { return; } JIT_LOG("JIT runtime stats:\n%s", PyUnicode_AsUTF8(stats_str)); Py_DECREF(stats_str); } int _PyJIT_Finalize() { if (g_dump_stats) { dump_jit_stats(); } if (!g_write_profile_file.empty()) { writeProfileData(g_write_profile_file.c_str()); g_write_profile_file.clear(); } clearProfileData(); // Always release references from Runtime objects: C++ clients may have // invoked the JIT directly without initializing a full _PyJITContext. jit::codegen::NativeGeneratorFactory::runtime()->clearDeoptStats(); jit::codegen::NativeGeneratorFactory::runtime()->releaseReferences(); if (jit_config.init_state != JIT_INITIALIZED) { return 0; } delete g_jit_list; g_jit_list = nullptr; jit_config.init_state = JIT_FINALIZED; JIT_CHECK(jit_ctx != nullptr, "jit_ctx not initialized"); delete jit_ctx; jit_ctx = nullptr; #define CLEAR_STR(s) Py_CLEAR(s_str_##s); INTERNED_STRINGS(CLEAR_STR) #undef CLEAR_STR for (PyObject*& opname : s_opnames) { if (opname != nullptr) { Py_DECREF(opname); opname = nullptr; } } jit::codegen::NativeGeneratorFactory::shutdown(); CodeAllocator::freeGlobalCodeAllocator(); return 0; } int _PyJIT_ShadowFrame() { return jit_config.frame_mode == SHADOW_FRAME; } PyObject* _PyJIT_GenSend( PyGenObject* gen, PyObject* arg, int exc, PyFrameObject* f, PyThreadState* tstate, int finish_yield_from) { auto gen_footer = reinterpret_cast<GenDataFooter*>(gen->gi_jit_data); // state should be valid and the generator should not be completed JIT_DCHECK( gen_footer->state == _PyJitGenState_JustStarted || gen_footer->state == _PyJitGenState_Running, "Invalid JIT generator state"); gen_footer->state = _PyJitGenState_Running; // JIT generators use NULL arg to indicate an exception if (exc) { JIT_DCHECK( arg == Py_None, "Arg should be None when injecting an exception"); arg = NULL; } else { if (arg == NULL) { arg = Py_None; } } if (f) { // Setup tstate/frame as would be done in PyEval_EvalFrameEx() or // prologue of a JITed function. tstate->frame = f; f->f_executing = 1; // This compensates for the decref which occurs in JITRT_UnlinkFrame(). Py_INCREF(f); // This satisfies code which uses f_lasti == -1 or < 0 to check if a // generator is not yet started, but still provides a garbage value in case // anything tries to actually use f_lasti. f->f_lasti = std::numeric_limits<int>::max(); } // Enter generated code. JIT_DCHECK( gen_footer->yieldPoint != nullptr, "Attempting to resume a generator with no yield point"); PyObject* result = gen_footer->resumeEntry((PyObject*)gen, arg, tstate, finish_yield_from); if (!result && (gen->gi_jit_data != nullptr)) { // Generator jit data (gen_footer) will be freed if the generator // deopts gen_footer->state = _PyJitGenState_Completed; } return result; } PyFrameObject* _PyJIT_GenMaterializeFrame(PyGenObject* gen) { PyThreadState* tstate = PyThreadState_Get(); PyFrameObject* frame = jit::materializePyFrameForGen(tstate, gen); return frame; } int _PyJIT_GenVisitRefs(PyGenObject* gen, visitproc visit, void* arg) { auto gen_footer = reinterpret_cast<GenDataFooter*>(gen->gi_jit_data); JIT_DCHECK(gen_footer, "Generator missing JIT data"); if (gen_footer->state != _PyJitGenState_Completed && gen_footer->yieldPoint) { return reinterpret_cast<GenYieldPoint*>(gen_footer->yieldPoint) ->visitRefs(gen, visit, arg); } return 0; } void _PyJIT_GenDealloc(PyGenObject* gen) { auto gen_footer = reinterpret_cast<GenDataFooter*>(gen->gi_jit_data); JIT_DCHECK(gen_footer, "Generator missing JIT data"); if (gen_footer->state != _PyJitGenState_Completed && gen_footer->yieldPoint) { reinterpret_cast<GenYieldPoint*>(gen_footer->yieldPoint)->releaseRefs(gen); } JITRT_GenJitDataFree(gen); } PyObject* _PyJIT_GenYieldFromValue(PyGenObject* gen) { auto gen_footer = reinterpret_cast<GenDataFooter*>(gen->gi_jit_data); JIT_DCHECK(gen_footer, "Generator missing JIT data"); PyObject* yf = NULL; if (gen_footer->state != _PyJitGenState_Completed && gen_footer->yieldPoint) { yf = gen_footer->yieldPoint->yieldFromValue(gen_footer); Py_XINCREF(yf); } return yf; } PyObject* _PyJIT_GetGlobals(PyThreadState* tstate) { _PyShadowFrame* shadow_frame = tstate->shadow_frame; if (shadow_frame == nullptr) { JIT_CHECK( tstate->frame == nullptr, "py frame w/out corresponding shadow frame\n"); return nullptr; } if (_PyShadowFrame_GetPtrKind(shadow_frame) == PYSF_PYFRAME) { return _PyShadowFrame_GetPyFrame(shadow_frame)->f_globals; } if (_PyShadowFrame_GetPtrKind(shadow_frame) == PYSF_RTFS) { return static_cast<jit::RuntimeFrameState*>( _PyShadowFrame_GetPtr(shadow_frame)) ->globals(); } // TODO(T110700318): Collapse into RTFS case JIT_DCHECK( _PyShadowFrame_GetPtrKind(shadow_frame) == PYSF_CODE_RT, "Unexpected shadow frame type"); jit::CodeRuntime* code_rt = static_cast<jit::CodeRuntime*>(_PyShadowFrame_GetPtr(shadow_frame)); return code_rt->frameState()->globals(); } void _PyJIT_ProfileCurrentInstr( PyFrameObject* frame, PyObject** stack_top, int opcode, int oparg) { auto profile_stack = [&](auto... stack_offsets) { CodeProfile& code_profile = jit::codegen::NativeGeneratorFactory::runtime() ->typeProfiles()[Ref<PyCodeObject>{frame->f_code}]; int opcode_offset = frame->f_lasti; auto pair = code_profile.typed_hits.emplace(opcode_offset, nullptr); if (pair.second) { constexpr int kProfilerRows = 4; pair.first->second = TypeProfiler::create(kProfilerRows, sizeof...(stack_offsets)); } auto get_type = [&](int offset) { PyObject* obj = stack_top[-(offset + 1)]; return obj != nullptr ? Py_TYPE(obj) : nullptr; }; pair.first->second->recordTypes(get_type(stack_offsets)...); }; switch (opcode) { case BEFORE_ASYNC_WITH: case DELETE_ATTR: case END_ASYNC_FOR: case END_FINALLY: case FOR_ITER: case GET_AITER: case GET_ANEXT: case GET_AWAITABLE: case GET_ITER: case GET_YIELD_FROM_ITER: case JUMP_IF_FALSE_OR_POP: case JUMP_IF_TRUE_OR_POP: case LOAD_ATTR: case LOAD_FIELD: case LOAD_METHOD: case POP_JUMP_IF_FALSE: case POP_JUMP_IF_TRUE: case RETURN_VALUE: case SETUP_WITH: case STORE_DEREF: case STORE_GLOBAL: case UNARY_INVERT: case UNARY_NEGATIVE: case UNARY_NOT: case UNARY_POSITIVE: case UNPACK_EX: case UNPACK_SEQUENCE: case WITH_CLEANUP_START: case YIELD_FROM: case YIELD_VALUE: { profile_stack(0); break; } case BINARY_ADD: case BINARY_AND: case BINARY_FLOOR_DIVIDE: case BINARY_LSHIFT: case BINARY_MATRIX_MULTIPLY: case BINARY_MODULO: case BINARY_MULTIPLY: case BINARY_OR: case BINARY_POWER: case BINARY_RSHIFT: case BINARY_SUBSCR: case BINARY_SUBTRACT: case BINARY_TRUE_DIVIDE: case BINARY_XOR: case COMPARE_OP: case DELETE_SUBSCR: case INPLACE_ADD: case INPLACE_AND: case INPLACE_FLOOR_DIVIDE: case INPLACE_LSHIFT: case INPLACE_MATRIX_MULTIPLY: case INPLACE_MODULO: case INPLACE_MULTIPLY: case INPLACE_OR: case INPLACE_POWER: case INPLACE_RSHIFT: case INPLACE_SUBTRACT: case INPLACE_TRUE_DIVIDE: case INPLACE_XOR: case LIST_APPEND: case MAP_ADD: case SET_ADD: case STORE_ATTR: case STORE_FIELD: case WITH_CLEANUP_FINISH: { profile_stack(1, 0); break; } case STORE_SUBSCR: { profile_stack(2, 1, 0); break; } case CALL_FUNCTION: { profile_stack(oparg); break; }; case CALL_METHOD: { profile_stack(oparg, oparg + 1); break; } } } void _PyJIT_CountProfiledInstrs(PyCodeObject* code, Py_ssize_t count) { jit::codegen::NativeGeneratorFactory::runtime() ->typeProfiles()[Ref<PyCodeObject>{code}] .total_hits += count; } namespace { // ProfileEnv and the functions below that use it are for building the // complicated, nested data structure returned by // _PyJIT_GetAndClearTypeProfiles(). struct ProfileEnv { // These members are applicable during the whole process: Ref<> stats_list; Ref<> other_list; Ref<> empty_list; UnorderedMap<BorrowedRef<PyTypeObject>, Ref<>> type_name_cache; // These members vary with each code object: BorrowedRef<PyCodeObject> code; Ref<> code_hash; Ref<> qualname; Ref<> firstlineno; // These members vary with each instruction: int64_t profiled_hits; Ref<> bc_offset; Ref<> opname; Ref<> lineno; }; void init_env(ProfileEnv& env) { env.stats_list = Ref<>::steal(check(PyList_New(0))); env.other_list = Ref<>::steal(check(PyList_New(0))); auto other_str = Ref<>::steal(check(PyUnicode_InternFromString("<other>"))); check(PyList_Append(env.other_list, other_str)); env.empty_list = Ref<>::steal(check(PyList_New(0))); env.type_name_cache.emplace( nullptr, Ref<>::steal(check(PyUnicode_InternFromString("<NULL>")))); } PyObject* get_type_name(ProfileEnv& env, PyTypeObject* ty) { auto pair = env.type_name_cache.emplace(ty, nullptr); Ref<>& cached_name = pair.first->second; if (pair.second) { cached_name = Ref<>::steal( check(PyUnicode_InternFromString(typeFullname(ty).c_str()))); } return cached_name; } void start_code(ProfileEnv& env, PyCodeObject* code) { env.code = code; env.code_hash = Ref<>::steal(check(PyLong_FromUnsignedLong(hashBytecode(code)))); env.qualname = Ref<>::steal( check(PyUnicode_InternFromString(codeQualname(code).c_str()))); env.firstlineno = Ref<>::steal(check(PyLong_FromLong(code->co_firstlineno))); env.profiled_hits = 0; } void start_instr(ProfileEnv& env, int bcoff_raw) { int lineno_raw = env.code->co_lnotab != nullptr ? PyCode_Addr2Line(env.code, bcoff_raw) : -1; int opcode = _Py_OPCODE(PyBytes_AS_STRING(env.code->co_code)[bcoff_raw]); env.bc_offset = Ref<>::steal(check(PyLong_FromLong(bcoff_raw))); env.lineno = Ref<>::steal(check(PyLong_FromLong(lineno_raw))); env.opname.reset(s_opnames.at(opcode)); } void append_item( ProfileEnv& env, long count_raw, PyObject* type_names, bool use_op = true) { auto item = Ref<>::steal(check(PyDict_New())); auto normals = Ref<>::steal(check(PyDict_New())); auto ints = Ref<>::steal(check(PyDict_New())); auto count = Ref<>::steal(check(PyLong_FromLong(count_raw))); check(PyDict_SetItem(item, s_str_normal, normals)); check(PyDict_SetItem(item, s_str_int, ints)); check(PyDict_SetItem(normals, s_str_func_qualname, env.qualname)); check(PyDict_SetItem(normals, s_str_filename, env.code->co_filename)); check(PyDict_SetItem(ints, s_str_code_hash, env.code_hash)); check(PyDict_SetItem(ints, s_str_firstlineno, env.firstlineno)); check(PyDict_SetItem(ints, s_str_count, count)); if (use_op) { check(PyDict_SetItem(ints, s_str_lineno, env.lineno)); check(PyDict_SetItem(ints, s_str_bc_offset, env.bc_offset)); check(PyDict_SetItem(normals, s_str_opname, env.opname)); } if (type_names != nullptr) { auto normvectors = Ref<>::steal(check(PyDict_New())); check(PyDict_SetItem(normvectors, s_str_types, type_names)); check(PyDict_SetItem(item, s_str_normvector, normvectors)); } check(PyList_Append(env.stats_list, item)); env.profiled_hits += count_raw; } void build_profile(ProfileEnv& env, TypeProfiles& profiles) { for (auto& code_pair : profiles) { start_code(env, code_pair.first); const CodeProfile& code_profile = code_pair.second; for (auto& profile_pair : code_profile.typed_hits) { const TypeProfiler& profile = *profile_pair.second; if (profile.empty()) { continue; } start_instr(env, profile_pair.first); for (int row = 0; row < profile.rows() && profile.count(row) != 0; ++row) { auto type_names = Ref<>::steal(check(PyList_New(0))); for (int col = 0; col < profile.cols(); ++col) { PyTypeObject* ty = profile.type(row, col); check(PyList_Append(type_names, get_type_name(env, ty))); } append_item(env, profile.count(row), type_names); } if (profile.other() > 0) { append_item(env, profile.other(), env.other_list); } } int64_t untyped_hits = code_profile.total_hits - env.profiled_hits; if (untyped_hits != 0) { append_item(env, untyped_hits, nullptr, false); } } } } // namespace PyObject* _PyJIT_GetAndClearTypeProfiles() { auto& profiles = jit::codegen::NativeGeneratorFactory::runtime()->typeProfiles(); ProfileEnv env; try { init_env(env); build_profile(env, profiles); } catch (const CAPIError&) { return nullptr; } profiles.clear(); return env.stats_list.release(); } void _PyJIT_ClearTypeProfiles() { jit::codegen::NativeGeneratorFactory::runtime()->typeProfiles().clear(); }