erts/emulator/beam/jit/instr

/* * %CopyrightBegin% * * Copyright Ericsson AB 2020-2020. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * %CopyrightEnd% */ /* * Some notes on how to minimize the code size. * * Instructions that use 32-bit registers (e.g. eax) are generally * one byte shorter than instructions that use 64-bits registers * (e.g. rax). This does not apply to registers r8-r15 beacuse they'll * always need a rex prefix. The `and`, `or`, and `cmp` instructions * are even shorter than operating on the RETb (al) register. The * `test` instruction with an immediate second operand is shorter * when operating on an 8-bit register. * * On both Unix and Windows, instructions can be shortened by using * RETd, ARG1d, or ARG2d instead of RET, ARG1, or ARG2, respectively. * On Unix, but not on Windows, ARG3d and ARG4d will also result in * shorter instructions. * * Here are some examples. If we know that the higher 32 bits of * a register is uninteresting or should be zeroed, we can write: * * a.mov(RETd, ARG1d) * * (When writing to the lower 32 bits of a register, the high 32 * bits are zeroed.) * * Here is a tag test on the contents of ARG1: * * a.and_(ARG1d, 15) * a.cmp(ARG1d, 15) * * The same tag test on RET can be even shorter if written like this: * * a.and_(RETb, 15) * a.cmp(RETb, 15) * * An alignment test can be written like this (when unit <= 256): * * a.test(RETb, imm(unit - 1)); * a.test(ARG1.r8(), imm(unit -1)); * * ASMJIT will automatically encode backward jumps (jumps to bound * labels) in the shortest form possible. However, forward jumps * (jumps to unbound labels) will by default be encoded in the long * form (using a 32-bit relative address). * * Within a single BEAM instruction, a `short_()` prefix can be used * to emit short forward jumps (using a signed byte as an offset, * limiting the distance to about 128 bytes). * * Example: * * a.short_().je(next); * . * . * . * a.bind(next); */ #include <algorithm> #include "beam_asm.hpp" extern "C" { #include "erl_bif_table.h" #include "big.h" #include "beam_catches.h" #include "beam_common.h" #include "code_ix.h" } using namespace asmjit; /* Helpers */ void BeamModuleAssembler::emit_error(int reason) { a.mov(x86::qword_ptr(c_p, offsetof(Process, freason)), imm(reason)); emit_handle_error(); } void BeamModuleAssembler::emit_gc_test_preserve(const ArgVal &Need, const ArgVal &Live, x86::Gp term) { const int32_t bytes_needed = (Need.getValue() + S_RESERVED) * sizeof(Eterm); Label after_gc_check = a.newLabel(); ASSERT(term != ARG3); a.lea(ARG3, x86::qword_ptr(HTOP, bytes_needed)); a.cmp(ARG3, E); a.short_().jbe(after_gc_check); a.mov(getXRef(Live.getValue()), term); mov_imm(ARG4, Live.getValue() + 1); fragment_call(ga->get_garbage_collect()); a.mov(term, getXRef(Live.getValue())); a.bind(after_gc_check); } void BeamModuleAssembler::emit_gc_test(const ArgVal &Ns, const ArgVal &Nh, const ArgVal &Live) { const int32_t bytes_needed = (Ns.getValue() + Nh.getValue() + S_RESERVED) * sizeof(Eterm); Label after_gc_check = a.newLabel(); a.lea(ARG3, x86::qword_ptr(HTOP, bytes_needed)); a.cmp(ARG3, E); a.short_().jbe(after_gc_check); mov_imm(ARG4, Live.getValue()); fragment_call(ga->get_garbage_collect()); a.bind(after_gc_check); } #if defined(DEBUG) && defined(HARD_DEBUG) static void validate_term(Eterm term) { if (is_boxed(term)) { Eterm header = *boxed_val(term); if (header_is_bin_matchstate(header)) { return; } } size_object_x(term, nullptr); } #endif void BeamModuleAssembler::emit_validate(const ArgVal &arity) { #ifdef DEBUG Label next = a.newLabel(), crash = a.newLabel(); /* Crash if the Erlang heap is not word-aligned */ a.test(HTOP, imm(sizeof(Eterm) - 1)); a.jne(crash); /* Crash if the Erlang stack is not word-aligned */ a.test(E, imm(sizeof(Eterm) - 1)); a.jne(crash); /* Crash if we've overrun the stack */ a.lea(ARG1, x86::qword_ptr(E, -(int32_t)(S_REDZONE * sizeof(Eterm)))); a.cmp(HTOP, ARG1); a.ja(crash); a.jmp(next); a.bind(crash); a.hlt(); a.bind(next); # ifdef HARD_DEBUG emit_enter_runtime(); for (unsigned i = 0; i < arity.getValue(); i++) { a.mov(ARG1, getXRef(i)); runtime_call<1>(validate_term); } emit_leave_runtime(); # endif #endif } /* Instrs */ void BeamModuleAssembler::emit_i_validate(const ArgVal &Arity) { emit_validate(Arity); } void BeamModuleAssembler::emit_allocate_heap(const ArgVal &NeedStack, const ArgVal &NeedHeap, const ArgVal &Live) { ASSERT(NeedStack.getType() == ArgVal::TYPE::u); ASSERT(NeedStack.getValue() <= MAX_REG); ArgVal needed = NeedStack; #if !defined(NATIVE_ERLANG_STACK) needed = needed + CP_SIZE; #endif emit_gc_test(needed, NeedHeap, Live); if (needed.getValue() > 0) { a.sub(E, imm(needed.getValue() * sizeof(Eterm))); } #if !defined(NATIVE_ERLANG_STACK) a.mov(getCPRef(), imm(NIL)); #endif } void BeamModuleAssembler::emit_allocate(const ArgVal &NeedStack, const ArgVal &Live) { emit_allocate_heap(NeedStack, ArgVal(ArgVal::TYPE::u, 0), Live); } void BeamModuleAssembler::emit_allocate_heap_zero(const ArgVal &NeedStack, const ArgVal &NeedHeap, const ArgVal &Live) { ASSERT(NeedStack.getType() == ArgVal::TYPE::u); ASSERT(NeedStack.getValue() <= MAX_REG); emit_allocate_heap(NeedStack, NeedHeap, Live); int slots = NeedStack.getValue(); if (slots == 1) { a.mov(getYRef(0), imm(NIL)); } else { /* `stosq` is more compact than `mov` after 2 slots. */ mov_imm(x86::rax, NIL); #ifdef NATIVE_ERLANG_STACK /* `mov` is two bytes shorter than `lea`. */ a.mov(x86::rdi, E); #else /* y(0) is at E+8. Must use `lea` here. */ a.lea(x86::rdi, getYRef(0)); #endif if (slots <= 4) { /* Slightly more compact than `rep stosq`. */ for (int i = 0; i < slots; i++) { a.stosq(); } } else { mov_imm(x86::rcx, slots); a.rep().stosq(); } } } void BeamModuleAssembler::emit_allocate_zero(const ArgVal &NeedStack, const ArgVal &Live) { emit_allocate_heap_zero(NeedStack, ArgVal(ArgVal::TYPE::u, 0), Live); } void BeamModuleAssembler::emit_deallocate(const ArgVal &Deallocate) { ASSERT(Deallocate.getType() == ArgVal::TYPE::u); ASSERT(Deallocate.getValue() <= 1023); ArgVal dealloc = Deallocate; #if !defined(NATIVE_ERLANG_STACK) dealloc = dealloc + CP_SIZE; #endif if (dealloc.getValue() > 0) { a.add(E, imm(dealloc.getValue() * sizeof(Eterm))); } } void BeamModuleAssembler::emit_test_heap(const ArgVal &Nh, const ArgVal &Live) { emit_gc_test(ArgVal(ArgVal::u, 0), Nh, Live); } void BeamGlobalAssembler::emit_dispatch_return() { #ifdef NATIVE_ERLANG_STACK /* ARG3 should contain the place to jump to. */ a.pop(ARG3); #else /* ARG3 already contains the place to jump to. */ #endif a.mov(x86::qword_ptr(c_p, offsetof(Process, current)), 0); a.mov(x86::qword_ptr(c_p, offsetof(Process, arity)), 1); a.jmp(labels[context_switch_simplified]); } void BeamModuleAssembler::emit_return() { Label dispatch_return = a.newLabel(); #ifdef HARD_DEBUG /* Validate return address and {x,0} */ emit_validate(ArgVal(ArgVal::u, 1)); #endif #if !defined(NATIVE_ERLANG_STACK) a.mov(ARG3, getCPRef()); a.mov(getCPRef(), imm(NIL)); #endif /* The reduction test is kept in module code because moving it to a shared * fragment caused major performance regressions in dialyzer. */ a.dec(FCALLS); a.short_().jl(dispatch_return); #ifdef NATIVE_ERLANG_STACK a.ret(); #else a.jmp(ARG3); #endif a.bind(dispatch_return); abs_jmp(ga->get_dispatch_return()); } void BeamModuleAssembler::emit_i_call(const ArgVal &CallDest) { Label dest = labels[CallDest.getValue()]; erlang_call(dest, RET); } void BeamModuleAssembler::emit_i_call_last(const ArgVal &CallDest, const ArgVal &Deallocate) { emit_deallocate(Deallocate); a.jmp(labels[CallDest.getValue()]); } void BeamModuleAssembler::emit_i_call_only(const ArgVal &CallDest) { a.jmp(labels[CallDest.getValue()]); } /* Handles save_calls. Export entry is in ARG2. * * When the active code index is ERTS_SAVE_CALLS_CODE_IX, all remote calls will * land here. */ void BeamGlobalAssembler::emit_dispatch_save_calls() { a.mov(TMP_MEM1q, ARG2); emit_enter_runtime(); a.mov(ARG1, c_p); runtime_call<2>(save_calls); emit_leave_runtime(); a.mov(ARG2, TMP_MEM1q); /* Keep going with the actual code index. */ a.mov(ARG1, imm(&the_active_code_index)); a.mov(ARG1d, x86::dword_ptr(ARG1)); a.jmp(x86::qword_ptr(ARG2, ARG1, 3, offsetof(Export, addressv))); } x86::Mem BeamModuleAssembler::emit_setup_export(const ArgVal &Exp) { /* Load export pointer / addressv */ make_move_patch(ARG2, imports[Exp.getValue()].patches); return x86::qword_ptr(ARG2, active_code_ix, 3, offsetof(Export, addressv)); } void BeamModuleAssembler::emit_i_call_ext(const ArgVal &Exp) { x86::Mem destination = emit_setup_export(Exp); erlang_call(destination, RET); } void BeamModuleAssembler::emit_i_call_ext_only(const ArgVal &Exp) { auto destination = emit_setup_export(Exp); a.jmp(destination); } void BeamModuleAssembler::emit_i_call_ext_last(const ArgVal &Exp, const ArgVal &Deallocate) { emit_deallocate(Deallocate); auto destination = emit_setup_export(Exp); a.jmp(destination); } void BeamModuleAssembler::emit_normal_exit() { /* This is implictly global; it does not normally appear in modules and * doesn't require size optimization. */ emit_enter_runtime<Update::eReductions | Update::eStack | Update::eHeap>(); emit_proc_lc_unrequire(); a.mov(x86::qword_ptr(c_p, offsetof(Process, freason)), imm(EXC_NORMAL)); a.mov(x86::qword_ptr(c_p, offsetof(Process, arity)), imm(0)); a.mov(ARG1, c_p); mov_imm(ARG2, am_normal); runtime_call<2>(erts_do_exit_process); emit_proc_lc_require(); emit_leave_runtime<Update::eReductions | Update::eStack | Update::eHeap>(); abs_jmp(ga->get_do_schedule()); } void BeamModuleAssembler::emit_continue_exit() { /* This is implictly global; it does not normally appear in modules and * doesn't require size optimization. */ emit_enter_runtime<Update::eReductions | Update::eStack | Update::eHeap>(); emit_proc_lc_unrequire(); a.mov(ARG1, c_p); runtime_call<1>(erts_continue_exit_process); emit_proc_lc_require(); emit_leave_runtime<Update::eReductions | Update::eStack | Update::eHeap>(); abs_jmp(ga->get_do_schedule()); } /* This is an alias for handle_error */ void BeamModuleAssembler::emit_error_action_code() { abs_jmp(ga->get_error_action_code()); } static ErtsCodeMFA apply3_mfa = {am_erlang, am_apply, 3}; x86::Gp BeamModuleAssembler::emit_variable_apply(bool includeI) { Label dispatch = a.newLabel(), entry = a.newLabel(); a.align(kAlignCode, 8); a.bind(entry); emit_enter_runtime<Update::eStack | Update::eHeap>(); a.mov(ARG1, c_p); load_x_reg_array(ARG2); if (includeI) { a.lea(ARG3, x86::qword_ptr(entry)); } else { mov_imm(ARG3, 0); } mov_imm(ARG4, 0); runtime_call<4>(apply); emit_leave_runtime<Update::eStack | Update::eHeap>(); a.test(RET, RET); a.short_().jne(dispatch); emit_handle_error(entry, &apply3_mfa); a.bind(dispatch); return RET; } void BeamModuleAssembler::emit_i_apply() { x86::Gp dest = emit_variable_apply(false); ASSERT(dest != ARG1); erlang_call(dest, ARG1); } void BeamModuleAssembler::emit_i_apply_last(const ArgVal &Deallocate) { emit_deallocate(Deallocate); emit_i_apply_only(); } void BeamModuleAssembler::emit_i_apply_only() { x86::Gp dest = emit_variable_apply(true); a.jmp(dest); } x86::Gp BeamModuleAssembler::emit_fixed_apply(const ArgVal &Arity, bool includeI) { Label dispatch = a.newLabel(), entry = a.newLabel(); a.align(kAlignCode, 8); a.bind(entry); mov_arg(ARG3, Arity); emit_enter_runtime<Update::eStack | Update::eHeap>(); a.mov(ARG1, c_p); load_x_reg_array(ARG2); if (includeI) { a.lea(ARG4, x86::qword_ptr(entry)); } else { mov_imm(ARG4, 0); } mov_imm(ARG5, 0); runtime_call<5>(fixed_apply); emit_leave_runtime<Update::eStack | Update::eHeap>(); a.test(RET, RET); a.short_().jne(dispatch); emit_handle_error(entry, &apply3_mfa); a.bind(dispatch); return RET; } void BeamModuleAssembler::emit_apply(const ArgVal &Arity) { x86::Gp dest = emit_fixed_apply(Arity, false); ASSERT(dest != ARG1); erlang_call(dest, ARG1); } void BeamModuleAssembler::emit_apply_last(const ArgVal &Arity, const ArgVal &Deallocate) { emit_deallocate(Deallocate); x86::Gp dest = emit_fixed_apply(Arity, true); a.jmp(dest); } x86::Gp BeamModuleAssembler::emit_call_fun(const ArgVal &Fun) { Label dispatch = a.newLabel(); mov_arg(ARG2, Fun); emit_enter_runtime<Update::eStack | Update::eHeap>(); a.mov(ARG1, c_p); load_x_reg_array(ARG3); mov_imm(ARG4, THE_NON_VALUE); runtime_call<4>(call_fun); emit_leave_runtime<Update::eStack | Update::eHeap>(); a.test(RET, RET); a.short_().jne(dispatch); emit_handle_error(); a.bind(dispatch); return RET; } void BeamModuleAssembler::emit_i_call_fun(const ArgVal &Fun) { x86::Gp dest = emit_call_fun(Fun); ASSERT(dest != ARG1); erlang_call(dest, ARG1); } void BeamModuleAssembler::emit_i_call_fun_last(const ArgVal &Fun, const ArgVal &Deallocate) { emit_deallocate(Deallocate); x86::Gp dest = emit_call_fun(Fun); a.jmp(dest); } x86::Gp BeamModuleAssembler::emit_apply_fun() { Label dispatch = a.newLabel(); emit_enter_runtime<Update::eStack | Update::eHeap>(); a.mov(ARG1, c_p); a.mov(ARG2, getXRef(0)); a.mov(ARG3, getXRef(1)); load_x_reg_array(ARG4); runtime_call<4>(apply_fun); emit_leave_runtime<Update::eStack | Update::eHeap>(); a.test(RET, RET); a.short_().jne(dispatch); emit_handle_error(); a.bind(dispatch); return RET; } void BeamModuleAssembler::emit_i_apply_fun() { x86::Gp dest = emit_apply_fun(); ASSERT(dest != ARG1); erlang_call(dest, ARG1); } void BeamModuleAssembler::emit_i_apply_fun_last(const ArgVal &Deallocate) { emit_deallocate(Deallocate); emit_i_apply_fun_only(); } void BeamModuleAssembler::emit_i_apply_fun_only() { x86::Gp dest = emit_apply_fun(); a.jmp(dest); } /* Psuedo-instruction for signalling lambda load errors. Never actually runs. */ void BeamModuleAssembler::emit_i_lambda_error(const ArgVal &Dummy) { a.hlt(); } void BeamModuleAssembler::emit_i_make_fun(const ArgVal &Fun, const ArgVal &NumFree) { mov_arg(ARG4, NumFree); emit_enter_runtime<Update::eReductions | Update::eStack | Update::eHeap>(); a.mov(ARG1, c_p); load_x_reg_array(ARG2); make_move_patch(ARG3, lambdas[Fun.getValue()].patches); runtime_call<4>(new_fun); emit_leave_runtime<Update::eReductions | Update::eStack | Update::eHeap>(); a.mov(getXRef(0), RET); } void BeamModuleAssembler::emit_i_make_fun3(const ArgVal &Fun, const ArgVal &Dst, const ArgVal &NumFree, const std::vector<ArgVal> &env) { size_t num_free = env.size(); ASSERT(NumFree.getValue() == num_free); mov_arg(ARG3, NumFree); emit_enter_runtime<Update::eHeap>(); a.mov(ARG1, c_p); make_move_patch(ARG2, lambdas[Fun.getValue()].patches); runtime_call<3>(new_fun_thing); emit_leave_runtime<Update::eHeap>(); comment("Move fun environment"); for (unsigned i = 0; i < num_free; i++) { mov_arg(x86::qword_ptr(RET, offsetof(ErlFunThing, env) + i * sizeof(Eterm)), env[i]); } comment("Create boxed ptr"); a.or_(RETb, TAG_PRIMARY_BOXED); mov_arg(Dst, RET); } void BeamModuleAssembler::emit_get_list(const x86::Gp src, const ArgVal &Hd, const ArgVal &Tl) { x86::Gp boxed_ptr = emit_ptr_val(src, src); switch (ArgVal::register_relation(Hd, Tl)) { case ArgVal::Relation::consecutive: { comment("(moving head and tail together)"); x86::Mem dst_ptr = getArgRef(Hd, 16); x86::Mem src_ptr = getCARRef(boxed_ptr, 16); a.movups(x86::xmm0, src_ptr); a.movups(dst_ptr, x86::xmm0); break; } case ArgVal::Relation::reverse_consecutive: { if (!hasCpuFeature(x86::Features::kAVX)) { goto fallback; } comment("(moving and swapping head and tail together)"); x86::Mem dst_ptr = getArgRef(Tl, 16); x86::Mem src_ptr = getCARRef(boxed_ptr, 16); a.vpermilpd(x86::xmm0, src_ptr, 1); /* Load and swap */ a.vmovups(dst_ptr, x86::xmm0); break; } case ArgVal::Relation::none: fallback: a.mov(ARG2, getCARRef(boxed_ptr)); a.mov(ARG3, getCDRRef(boxed_ptr)); mov_arg(Hd, ARG2); mov_arg(Tl, ARG3); break; } } void BeamModuleAssembler::emit_get_list(const ArgVal &Src, const ArgVal &Hd, const ArgVal &Tl) { mov_arg(ARG1, Src); emit_get_list(ARG1, Hd, Tl); } void BeamModuleAssembler::emit_get_hd(const ArgVal &Src, const ArgVal &Hd) { mov_arg(ARG1, Src); x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1); a.mov(ARG2, getCARRef(boxed_ptr)); mov_arg(Hd, ARG2); } void BeamModuleAssembler::emit_get_tl(const ArgVal &Src, const ArgVal &Tl) { mov_arg(ARG1, Src); x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1); a.mov(ARG2, getCDRRef(boxed_ptr)); mov_arg(Tl, ARG2); } void BeamModuleAssembler::emit_is_nonempty_list_get_list(const ArgVal &Fail, const ArgVal &Src, const ArgVal &Hd, const ArgVal &Tl) { mov_arg(RET, Src); a.test(RETb, imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_LIST)); a.jne(labels[Fail.getValue()]); emit_get_list(RET, Hd, Tl); } void BeamModuleAssembler::emit_is_nonempty_list_get_hd(const ArgVal &Fail, const ArgVal &Src, const ArgVal &Hd) { mov_arg(RET, Src); a.test(RETb, imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_LIST)); a.jne(labels[Fail.getValue()]); x86::Gp boxed_ptr = emit_ptr_val(RET, RET); a.mov(ARG2, getCARRef(boxed_ptr)); mov_arg(Hd, ARG2); } void BeamModuleAssembler::emit_is_nonempty_list_get_tl(const ArgVal &Fail, const ArgVal &Src, const ArgVal &Tl) { mov_arg(RET, Src); a.test(RETb, imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_LIST)); a.jne(labels[Fail.getValue()]); x86::Gp boxed_ptr = emit_ptr_val(RET, RET); a.mov(ARG2, getCDRRef(boxed_ptr)); mov_arg(Tl, ARG2); } void BeamModuleAssembler::emit_i_get(const ArgVal &Src, const ArgVal &Dst) { mov_arg(ARG2, Src); emit_enter_runtime(); a.mov(ARG1, c_p); runtime_call<2>(erts_pd_hash_get); emit_leave_runtime(); mov_arg(Dst, RET); } void BeamModuleAssembler::emit_i_get_hash(const ArgVal &Src, const ArgVal &Hash, const ArgVal &Dst) { mov_arg(ARG2, Hash); mov_arg(ARG3, Src); emit_enter_runtime(); a.mov(ARG1, c_p); runtime_call<3>(erts_pd_hash_get_with_hx); emit_leave_runtime(); mov_arg(Dst, RET); } /* Store the pointer to a tuple in ARG2. Remove any LITERAL_PTR tag. */ void BeamModuleAssembler::emit_load_tuple_ptr(const ArgVal &Term) { mov_arg(ARG2, Term); (void)emit_ptr_val(ARG2, ARG2); } #ifdef DEBUG /* Emit an assertion to ensure that tuple_reg points into the same * tuple as Src. */ void BeamModuleAssembler::emit_tuple_assertion(const ArgVal &Src, x86::Gp tuple_reg) { Label ok = a.newLabel(), fatal = a.newLabel(); ASSERT(tuple_reg != RET); mov_arg(RET, Src); emit_is_boxed(fatal, RET, dShort); (void)emit_ptr_val(RET, RET); a.cmp(RET, tuple_reg); a.short_().je(ok); a.bind(fatal); { a.ud2(); } a.bind(ok); } #endif /* Fetch an element from the tuple pointed to by the boxed pointer * in ARG2. */ void BeamModuleAssembler::emit_i_get_tuple_element(const ArgVal &Src, const ArgVal &Element, const ArgVal &Dst) { #ifdef DEBUG emit_tuple_assertion(Src, ARG2); #endif a.mov(ARG1, emit_boxed_val(ARG2, Element.getValue())); mov_arg(Dst, ARG1); } /* Fetch two consecutive tuple elements from the tuple pointed to by * the boxed pointer in ARG2. */ void BeamModuleAssembler::emit_get_two_tuple_elements(const ArgVal &Src, const ArgVal &Element, const ArgVal &Dst1, const ArgVal &Dst2) { #ifdef DEBUG emit_tuple_assertion(Src, ARG2); #endif x86::Mem element_ptr = emit_boxed_val(ARG2, Element.getValue(), 2 * sizeof(Eterm)); switch (ArgVal::register_relation(Dst1, Dst2)) { case ArgVal::Relation::consecutive: { x86::Mem dst_ptr = getArgRef(Dst1, 16); a.movups(x86::xmm0, element_ptr); a.movups(dst_ptr, x86::xmm0); break; } case ArgVal::Relation::reverse_consecutive: { if (!hasCpuFeature(x86::Features::kAVX)) { goto fallback; } else { x86::Mem dst_ptr = getArgRef(Dst2, 16); a.vpermilpd(x86::xmm0, element_ptr, 1); /* Load and swap */ a.vmovups(dst_ptr, x86::xmm0); break; } } case ArgVal::Relation::none: fallback: a.mov(ARG1, emit_boxed_val(ARG2, Element.getValue())); a.mov(ARG3, emit_boxed_val(ARG2, (Element + sizeof(Eterm)).getValue())); mov_arg(Dst1, ARG1); mov_arg(Dst2, ARG3); break; } } void BeamModuleAssembler::emit_init(const ArgVal &Y) { mov_arg(Y, NIL); } void BeamModuleAssembler::emit_i_trim(const ArgVal &Words) { ASSERT(Words.getType() == ArgVal::TYPE::u); ASSERT(Words.getValue() <= 1023); if (Words.getValue() > 0) { a.add(E, imm(Words.getValue() * sizeof(Eterm))); } } void BeamModuleAssembler::emit_i_move(const ArgVal &Src, const ArgVal &Dst) { mov_arg(Dst, Src); } /* Move two words at consecutive addresses to consecutive or reverse * consecutive destinations. */ void BeamModuleAssembler::emit_move_two_words(const ArgVal &Src1, const ArgVal &Dst1, const ArgVal &Src2, const ArgVal &Dst2) { x86::Mem src_ptr = getArgRef(Src1, 16); ASSERT(ArgVal::register_relation(Src1, Src2) == ArgVal::Relation::consecutive); switch (ArgVal::register_relation(Dst1, Dst2)) { case ArgVal::Relation::consecutive: { x86::Mem dst_ptr = getArgRef(Dst1, 16); a.movups(x86::xmm0, src_ptr); a.movups(dst_ptr, x86::xmm0); break; } case ArgVal::Relation::reverse_consecutive: { x86::Mem dst_ptr = getArgRef(Dst2, 16); comment("(moving and swapping)"); if (hasCpuFeature(x86::Features::kAVX)) { a.vpermilpd(x86::xmm0, src_ptr, 1); /* Load and swap */ a.vmovups(dst_ptr, x86::xmm0); } else { mov_arg(ARG1, Src1); mov_arg(ARG2, Src2); mov_arg(Dst1, ARG1); mov_arg(Dst2, ARG2); } break; } case ArgVal::Relation::none: ASSERT(0); break; } } void BeamModuleAssembler::emit_swap(const ArgVal &R1, const ArgVal &R2) { if (!hasCpuFeature(x86::Features::kAVX)) { goto fallback; } switch (ArgVal::register_relation(R1, R2)) { case ArgVal::Relation::consecutive: { x86::Mem ptr = getArgRef(R1, 16); comment("(swapping using AVX)"); a.vpermilpd(x86::xmm0, ptr, 1); /* Load and swap */ a.vmovups(ptr, x86::xmm0); break; } case ArgVal::Relation::reverse_consecutive: { x86::Mem ptr = getArgRef(R2, 16); comment("(swapping using AVX)"); a.vpermilpd(x86::xmm0, ptr, 1); /* Load and swap */ a.vmovups(ptr, x86::xmm0); break; } case ArgVal::Relation::none: fallback: mov_arg(ARG1, R1); mov_arg(ARG2, R2); mov_arg(R2, ARG1); mov_arg(R1, ARG2); break; } } void BeamModuleAssembler::emit_node(const ArgVal &Dst) { a.mov(ARG1, imm(&erts_this_node)); a.mov(ARG1, x86::qword_ptr(ARG1)); a.mov(ARG1, x86::qword_ptr(ARG1, offsetof(ErlNode, sysname))); mov_arg(Dst, ARG1); } void BeamModuleAssembler::emit_put_cons(const ArgVal &Hd, const ArgVal &Tl) { switch (ArgVal::register_relation(Hd, Tl)) { case ArgVal::Relation::consecutive: { x86::Mem src_ptr = getArgRef(Hd, 16); x86::Mem dst_ptr = x86::xmmword_ptr(HTOP, 0); comment("(put head and tail together)"); a.movups(x86::xmm0, src_ptr); a.movups(dst_ptr, x86::xmm0); break; } case ArgVal::Relation::reverse_consecutive: { if (!hasCpuFeature(x86::Features::kAVX)) { goto fallback; } x86::Mem src_ptr = getArgRef(Tl, 16); x86::Mem dst_ptr = x86::xmmword_ptr(HTOP, 0); comment("(putting and swapping head and tail together)"); a.vpermilpd(x86::xmm0, src_ptr, 1); /* Load and swap */ a.vmovups(dst_ptr, x86::xmm0); break; } case ArgVal::Relation::none: fallback: mov_arg(x86::qword_ptr(HTOP, 0), Hd); mov_arg(x86::qword_ptr(HTOP, 1 * sizeof(Eterm)), Tl); break; } a.lea(ARG2, x86::qword_ptr(HTOP, TAG_PRIMARY_LIST)); } void BeamModuleAssembler::emit_append_cons(const ArgVal &index, const ArgVal &Hd) { size_t offset = 2 * index.getValue() * sizeof(Eterm); mov_arg(x86::qword_ptr(HTOP, offset), Hd); a.mov(x86::qword_ptr(HTOP, offset + sizeof(Eterm)), ARG2); a.lea(ARG2, x86::qword_ptr(HTOP, offset + TAG_PRIMARY_LIST)); } void BeamModuleAssembler::emit_store_cons(const ArgVal &len, const ArgVal &Dst) { a.add(HTOP, imm(len.getValue() * 2 * sizeof(Eterm))); mov_arg(Dst, ARG2); } void BeamModuleAssembler::emit_put_tuple2(const ArgVal &Dst, const ArgVal &Arity, const std::vector<ArgVal> &args) { size_t size = args.size(); ASSERT(arityval(Arity.getValue()) == size); comment("Move arity word"); mov_arg(x86::qword_ptr(HTOP, 0), Arity); comment("Move tuple data"); for (unsigned i = 0; i < size; i++) { x86::Mem dst_ptr = x86::qword_ptr(HTOP, (i + 1) * sizeof(Eterm)); if (i + 1 == size) { mov_arg(dst_ptr, args[i]); } else { switch (ArgVal::register_relation(args[i], args[i + 1])) { case ArgVal::consecutive: { x86::Mem src_ptr = getArgRef(args[i], 16); comment("(moving two elements at once)"); dst_ptr.setSize(16); a.movups(x86::xmm0, src_ptr); a.movups(dst_ptr, x86::xmm0); i++; break; } case ArgVal::reverse_consecutive: { if (!hasCpuFeature(x86::Features::kAVX)) { mov_arg(dst_ptr, args[i]); } else { x86::Mem src_ptr = getArgRef(args[i + 1], 16); comment("(moving and swapping two elements at once)"); dst_ptr.setSize(16); a.vpermilpd(x86::xmm0, src_ptr, 1); /* Load and swap */ a.vmovups(dst_ptr, x86::xmm0); i++; } break; } case ArgVal::none: mov_arg(dst_ptr, args[i]); break; } } } comment("Create boxed ptr"); a.lea(ARG1, x86::qword_ptr(HTOP, TAG_PRIMARY_BOXED)); a.add(HTOP, imm((size + 1) * sizeof(Eterm))); mov_arg(Dst, ARG1); } void BeamModuleAssembler::emit_self(const ArgVal &Dst) { a.mov(ARG1, x86::qword_ptr(c_p, offsetof(Process, common.id))); mov_arg(Dst, ARG1); } void BeamModuleAssembler::emit_set_tuple_element(const ArgVal &Element, const ArgVal &Tuple, const ArgVal &Offset) { mov_arg(ARG1, Tuple); x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1); mov_arg(emit_boxed_val(boxed_ptr, Offset.getValue()), Element, ARG2); } void BeamModuleAssembler::emit_is_nonempty_list(const ArgVal &Fail, const ArgVal &Src) { x86::Mem list_ptr = getArgRef(Src, 1); a.test(list_ptr, imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_LIST)); a.jne(labels[Fail.getValue()]); } void BeamModuleAssembler::emit_jump(const ArgVal &Fail) { a.jmp(labels[Fail.getValue()]); } void BeamModuleAssembler::emit_is_atom(const ArgVal &Fail, const ArgVal &Src) { mov_arg(RET, Src); ERTS_CT_ASSERT(_TAG_IMMED2_MASK < 256); a.and_(RETb, imm(_TAG_IMMED2_MASK)); a.cmp(RETb, imm(_TAG_IMMED2_ATOM)); a.jne(labels[Fail.getValue()]); } void BeamModuleAssembler::emit_is_boolean(const ArgVal &Fail, const ArgVal &Src) { /* Since am_true and am_false differ by a single bit, we can simplify the * check by clearing said bit and comparing against the lesser one. */ ERTS_CT_ASSERT(am_false == make_atom(0)); ERTS_CT_ASSERT(am_true == make_atom(1)); mov_arg(ARG1, Src); a.and_(ARG1, imm(~(am_true & ~_TAG_IMMED1_MASK))); a.cmp(ARG1, imm(am_false)); a.jne(labels[Fail.getValue()]); } void BeamModuleAssembler::emit_is_binary(Label fail, x86::Gp src, Label next, Label subbin) { ASSERT(src != RET && src != ARG2); emit_is_boxed(fail, src); x86::Gp boxed_ptr = emit_ptr_val(src, src); a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32))); a.and_(RETb, imm(_TAG_HEADER_MASK)); a.cmp(RETb, imm(_TAG_HEADER_SUB_BIN)); a.short_().je(subbin); ERTS_CT_ASSERT(_TAG_HEADER_REFC_BIN + 4 == _TAG_HEADER_HEAP_BIN); a.and_(RETb, imm(~4)); a.cmp(RETb, imm(_TAG_HEADER_REFC_BIN)); a.short_().je(next); a.jmp(fail); } void BeamModuleAssembler::emit_is_binary(const ArgVal &Fail, const ArgVal &Src) { Label next = a.newLabel(), subbin = a.newLabel(); mov_arg(ARG1, Src); emit_is_binary(labels[Fail.getValue()], ARG1, next, subbin); a.bind(subbin); { /* emit_is_binary has already removed the literal tag from Src, if * applicable. */ a.cmp(emit_boxed_val(ARG1, offsetof(ErlSubBin, bitsize), sizeof(byte)), imm(0)); a.jne(labels[Fail.getValue()]); } a.bind(next); } void BeamModuleAssembler::emit_is_bitstring(const ArgVal &Fail, const ArgVal &Src) { Label next = a.newLabel(); mov_arg(ARG1, Src); emit_is_binary(labels[Fail.getValue()], ARG1, next, next); a.bind(next); } void BeamModuleAssembler::emit_is_float(const ArgVal &Fail, const ArgVal &Src) { mov_arg(ARG1, Src); emit_is_boxed(labels[Fail.getValue()], ARG1); x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1); a.cmp(emit_boxed_val(boxed_ptr), imm(HEADER_FLONUM)); a.jne(labels[Fail.getValue()]); } void BeamModuleAssembler::emit_is_function(const ArgVal &Fail, const ArgVal &Src) { Label next = a.newLabel(); mov_arg(RET, Src); emit_is_boxed(labels[Fail.getValue()], RET); x86::Gp boxed_ptr = emit_ptr_val(RET, RET); a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32))); a.cmp(RET, imm(HEADER_FUN)); a.short_().je(next); ERTS_CT_ASSERT(HEADER_EXPORT < 256); a.cmp(RETb, imm(HEADER_EXPORT)); a.jne(labels[Fail.getValue()]); a.bind(next); } void BeamModuleAssembler::emit_is_function2(const ArgVal &Fail, const ArgVal &Src, const ArgVal &Arity) { if (Arity.getType() != ArgVal::i) { /* * Non-literal arity - extremely uncommon. Generate simple code. */ mov_arg(ARG2, Src); mov_arg(ARG3, Arity); emit_enter_runtime(); a.mov(ARG1, c_p); runtime_call<3>(erl_is_function); emit_leave_runtime(); a.cmp(RET, imm(am_true)); a.jne(labels[Fail.getValue()]); return; } unsigned arity = unsigned_val(Arity.getValue()); if (arity > MAX_ARG) { /* Arity is negative or too large. */ a.jmp(labels[Fail.getValue()]); return; } Label next = a.newLabel(), fun = a.newLabel(); mov_arg(ARG1, Src); emit_is_boxed(labels[Fail.getValue()], ARG1); x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1); a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32))); a.cmp(RETd, imm(HEADER_FUN)); a.short_().je(fun); ERTS_CT_ASSERT(HEADER_EXPORT < 256); a.cmp(RETb, imm(HEADER_EXPORT)); a.jne(labels[Fail.getValue()]); comment("Check arity of export fun"); a.mov(ARG2, emit_boxed_val(boxed_ptr, sizeof(Eterm))); a.cmp(x86::qword_ptr(ARG2, offsetof(Export, info.mfa.arity)), imm(arity)); a.jne(labels[Fail.getValue()]); a.short_().jmp(next); comment("Check arity of fun"); a.bind(fun); { a.cmp(emit_boxed_val(boxed_ptr, offsetof(ErlFunThing, arity)), imm(arity)); a.jne(labels[Fail.getValue()]); } a.bind(next); } void BeamModuleAssembler::emit_is_integer(const ArgVal &Fail, const ArgVal &Src) { Label next = a.newLabel(); Label fail = labels[Fail.getValue()]; mov_arg(ARG1, Src); a.mov(RETd, ARG1d); a.and_(RETb, imm(_TAG_IMMED1_MASK)); a.cmp(RETb, imm(_TAG_IMMED1_SMALL)); a.short_().je(next); emit_is_boxed(fail, RET); x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1); a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32))); a.and_(RETb, imm(_TAG_HEADER_MASK - _BIG_SIGN_BIT)); a.cmp(RETb, imm(_TAG_HEADER_POS_BIG)); a.jne(fail); a.bind(next); } void BeamModuleAssembler::emit_is_list(const ArgVal &Fail, const ArgVal &Src) { Label next = a.newLabel(); mov_arg(RET, Src); a.cmp(RET, imm(NIL)); a.short_().je(next); a.test(RETb, imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_LIST)); a.jne(labels[Fail.getValue()]); a.bind(next); } void BeamModuleAssembler::emit_is_map(const ArgVal &Fail, const ArgVal &Src) { mov_arg(RET, Src); emit_is_boxed(labels[Fail.getValue()], RET); x86::Gp boxed_ptr = emit_ptr_val(RET, RET); a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32))); a.and_(RETb, imm(_TAG_HEADER_MASK)); a.cmp(RETb, imm(_TAG_HEADER_MAP)); a.jne(labels[Fail.getValue()]); } void BeamModuleAssembler::emit_is_nil(const ArgVal &Fail, const ArgVal &Src) { a.cmp(getArgRef(Src), imm(NIL)); a.jne(labels[Fail.getValue()]); } void BeamModuleAssembler::emit_is_number(const ArgVal &Fail, const ArgVal &Src) { Label next = a.newLabel(); Label fail = labels[Fail.getValue()]; mov_arg(ARG1, Src); a.mov(RETd, ARG1d); a.and_(RETb, imm(_TAG_IMMED1_MASK)); a.cmp(RETb, imm(_TAG_IMMED1_SMALL)); a.short_().je(next); emit_is_boxed(fail, RET); x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1); a.mov(ARG1, emit_boxed_val(boxed_ptr)); a.mov(RETd, ARG1d); a.and_(RETb, imm(_TAG_HEADER_MASK - _BIG_SIGN_BIT)); a.cmp(RETb, imm(_TAG_HEADER_POS_BIG)); a.short_().je(next); a.cmp(ARG1d, imm(HEADER_FLONUM)); a.jne(fail); a.bind(next); } void BeamModuleAssembler::emit_is_pid(const ArgVal &Fail, const ArgVal &Src) { Label next = a.newLabel(); mov_arg(ARG1, Src); a.mov(RETd, ARG1d); a.and_(RETb, imm(_TAG_IMMED1_MASK)); a.cmp(RETb, imm(_TAG_IMMED1_PID)); a.short_().je(next); /* Reuse RET as the important bits are still available. */ emit_is_boxed(labels[Fail.getValue()], RET); x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1); a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32))); a.and_(RETb, _TAG_HEADER_MASK); a.cmp(RETb, _TAG_HEADER_EXTERNAL_PID); a.jne(labels[Fail.getValue()]); a.bind(next); } void BeamModuleAssembler::emit_is_port(const ArgVal &Fail, const ArgVal &Src) { Label next = a.newLabel(); mov_arg(ARG1, Src); a.mov(RETd, ARG1d); a.and_(RETb, imm(_TAG_IMMED1_MASK)); a.cmp(RETb, imm(_TAG_IMMED1_PORT)); a.short_().je(next); /* Reuse RET as the important bits are still available. */ emit_is_boxed(labels[Fail.getValue()], RET); x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1); a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32))); a.and_(RETb, imm(_TAG_HEADER_MASK)); a.cmp(RETb, imm(_TAG_HEADER_EXTERNAL_PORT)); a.jne(labels[Fail.getValue()]); a.bind(next); } void BeamModuleAssembler::emit_is_reference(const ArgVal &Fail, const ArgVal &Src) { Label next = a.newLabel(); mov_arg(RET, Src); emit_is_boxed(labels[Fail.getValue()], RET); x86::Gp boxed_ptr = emit_ptr_val(RET, RET); a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32))); a.and_(RETb, imm(_TAG_HEADER_MASK)); a.cmp(RETb, imm(_TAG_HEADER_REF)); a.short_().je(next); a.cmp(RETb, imm(_TAG_HEADER_EXTERNAL_REF)); a.jne(labels[Fail.getValue()]); a.bind(next); } /* Note: This instruction leaves the pointer to the tuple in ARG2. */ void BeamModuleAssembler::emit_i_is_tagged_tuple(const ArgVal &Fail, const ArgVal &Src, const ArgVal &Arity, const ArgVal &Tag) { mov_arg(ARG2, Src); emit_is_boxed(labels[Fail.getValue()], ARG2); x86::Gp boxed_ptr = emit_ptr_val(ARG2, ARG2); ERTS_CT_ASSERT(Support::isInt32(make_arityval(MAX_ARITYVAL))); a.cmp(emit_boxed_val(boxed_ptr, 0, sizeof(Uint32)), imm(Arity.getValue())); a.jne(labels[Fail.getValue()]); a.cmp(emit_boxed_val(boxed_ptr, sizeof(Eterm)), imm(Tag.getValue())); a.jne(labels[Fail.getValue()]); } /* Note: This instruction leaves the pointer to the tuple in ARG2. */ void BeamModuleAssembler::emit_i_is_tagged_tuple_ff(const ArgVal &NotTuple, const ArgVal &NotRecord, const ArgVal &Src, const ArgVal &Arity, const ArgVal &Tag) { mov_arg(ARG2, Src); emit_is_boxed(labels[NotTuple.getValue()], ARG2); (void)emit_ptr_val(ARG2, ARG2); a.mov(ARG1, emit_boxed_val(ARG2)); ERTS_CT_ASSERT(_TAG_HEADER_ARITYVAL == 0); a.test(ARG1.r8(), imm(_TAG_HEADER_MASK)); a.jne(labels[NotTuple.getValue()]); ERTS_CT_ASSERT(Support::isInt32(make_arityval(MAX_ARITYVAL))); a.cmp(ARG1d, imm(Arity.getValue())); a.jne(labels[NotRecord.getValue()]); a.cmp(emit_boxed_val(ARG2, sizeof(Eterm)), imm(Tag.getValue())); a.jne(labels[NotRecord.getValue()]); } /* Note: This instruction leaves the pointer to the tuple in ARG2. */ void BeamModuleAssembler::emit_i_is_tuple(const ArgVal &Fail, const ArgVal &Src) { mov_arg(ARG2, Src); emit_is_boxed(labels[Fail.getValue()], ARG2); (void)emit_ptr_val(ARG2, ARG2); ERTS_CT_ASSERT(_TAG_HEADER_ARITYVAL == 0); a.test(emit_boxed_val(ARG2, 0, sizeof(byte)), imm(_TAG_HEADER_MASK)); a.jne(labels[Fail.getValue()]); } /* Note: This instruction leaves the pointer to the tuple in ARG2. */ void BeamModuleAssembler::emit_i_is_tuple_of_arity(const ArgVal &Fail, const ArgVal &Src, const ArgVal &Arity) { mov_arg(ARG2, Src); emit_is_boxed(labels[Fail.getValue()], ARG2); (void)emit_ptr_val(ARG2, ARG2); ERTS_CT_ASSERT(Support::isInt32(make_arityval(MAX_ARITYVAL))); a.cmp(emit_boxed_val(ARG2, 0, sizeof(Uint32)), imm(Arity.getValue())); a.jne(labels[Fail.getValue()]); } /* Note: This instruction leaves the pointer to the tuple in ARG2. */ void BeamModuleAssembler::emit_i_test_arity(const ArgVal &Fail, const ArgVal &Src, const ArgVal &Arity) { mov_arg(ARG2, Src); (void)emit_ptr_val(ARG2, ARG2); ERTS_CT_ASSERT(Support::isInt32(make_arityval(MAX_ARITYVAL))); a.cmp(emit_boxed_val(ARG2, 0, sizeof(Uint32)), imm(Arity.getValue())); a.jne(labels[Fail.getValue()]); } void BeamModuleAssembler::emit_i_is_eq_exact_immed(const ArgVal &Fail, const ArgVal &X, const ArgVal &Y) { cmp_arg(getArgRef(X), Y); a.jne(labels[Fail.getValue()]); } void BeamModuleAssembler::emit_i_is_ne_exact_immed(const ArgVal &Fail, const ArgVal &X, const ArgVal &Y) { cmp_arg(getArgRef(X), Y); a.je(labels[Fail.getValue()]); } void BeamModuleAssembler::emit_is_eq_exact(const ArgVal &Fail, const ArgVal &X, const ArgVal &Y) { Label next = a.newLabel(); mov_arg(ARG2, Y); /* May clobber ARG1 */ mov_arg(ARG1, X); a.cmp(ARG1, ARG2); a.short_().je(next); /* Fancy way of checking if both are immediates. */ a.mov(RETd, ARG1d); a.and_(RETd, ARG2d); a.and_(RETb, imm(_TAG_PRIMARY_MASK)); a.cmp(RETb, imm(TAG_PRIMARY_IMMED1)); a.je(labels[Fail.getValue()]); emit_enter_runtime(); runtime_call<2>(eq); emit_leave_runtime(); a.test(RET, RET); a.je(labels[Fail.getValue()]); a.bind(next); } void BeamModuleAssembler::emit_i_is_eq_exact_literal(const ArgVal &Fail, const ArgVal &Src, const ArgVal &Literal, const ArgVal &tag_test) { mov_arg(ARG2, Literal); /* May clobber ARG1 */ mov_arg(ARG1, Src); /* Fail immediately unless Src is the same type of pointer as the literal. */ a.test(ARG1.r8(), imm(tag_test.getValue())); a.jne(labels[Fail.getValue()]); emit_enter_runtime(); runtime_call<2>(eq); emit_leave_runtime(); a.test(RET, RET); a.jz(labels[Fail.getValue()]); } void BeamModuleAssembler::emit_is_ne_exact(const ArgVal &Fail, const ArgVal &X, const ArgVal &Y) { Label next = a.newLabel(); mov_arg(ARG2, Y); /* May clobber ARG1 */ mov_arg(ARG1, X); a.cmp(ARG1, ARG2); a.je(labels[Fail.getValue()]); /* Fancy way of checking if both are immediates. */ a.mov(RETd, ARG1d); a.and_(RETd, ARG2d); a.and_(RETb, imm(_TAG_PRIMARY_MASK)); a.cmp(RETb, imm(TAG_PRIMARY_IMMED1)); a.short_().je(next); emit_enter_runtime(); runtime_call<2>(eq); emit_leave_runtime(); a.test(RET, RET); a.jnz(labels[Fail.getValue()]); a.bind(next); } void BeamModuleAssembler::emit_i_is_ne_exact_literal(const ArgVal &Fail, const ArgVal &Src, const ArgVal &Literal) { Label next = a.newLabel(); mov_arg(ARG2, Literal); /* May clobber ARG1 */ mov_arg(ARG1, Src); a.mov(RETd, ARG1d); a.and_(RETb, imm(_TAG_IMMED1_MASK)); a.cmp(RETb, imm(TAG_PRIMARY_IMMED1)); a.short_().je(next); emit_enter_runtime(); runtime_call<2>(eq); emit_leave_runtime(); a.test(RET, RET); a.jnz(labels[Fail.getValue()]); a.bind(next); } void BeamGlobalAssembler::emit_arith_eq_shared() { Label generic_compare = a.newLabel(); /* Are both floats? */ a.mov(ARG3d, ARG1d); a.or_(ARG3d, ARG2d); a.and_(ARG3d, imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_BOXED)); a.short_().jne(generic_compare); x86::Gp boxed_ptr = emit_ptr_val(ARG3, ARG1); a.mov(ARG3, emit_boxed_val(boxed_ptr)); boxed_ptr = emit_ptr_val(ARG5, ARG2); a.mov(ARG5, emit_boxed_val(boxed_ptr)); a.and_(ARG3d, imm(_TAG_HEADER_MASK)); a.and_(ARG5d, imm(_TAG_HEADER_MASK)); a.sub(ARG3d, imm(_TAG_HEADER_FLOAT)); a.sub(ARG5d, imm(_TAG_HEADER_FLOAT)); a.or_(ARG3d, ARG5d); a.short_().jne(generic_compare); boxed_ptr = emit_ptr_val(ARG1, ARG1); a.movsd(x86::xmm0, emit_boxed_val(boxed_ptr, sizeof(Eterm))); boxed_ptr = emit_ptr_val(ARG2, ARG2); a.movsd(x86::xmm1, emit_boxed_val(boxed_ptr, sizeof(Eterm))); /* All float terms are finite so our caller only needs to check ZF. We don't * need to check for errors (PF). */ a.comisd(x86::xmm0, x86::xmm1); a.ret(); a.bind(generic_compare); { emit_enter_runtime(); /* Generic eq-only arithmetic comparison. */ comment("erts_cmp_compound(X, Y, 0, 1);"); mov_imm(ARG3, 0); mov_imm(ARG4, 1); runtime_call<4>(erts_cmp_compound); emit_leave_runtime(); a.test(RET, RET); a.ret(); } } void BeamModuleAssembler::emit_is_eq(const ArgVal &Fail, const ArgVal &A, const ArgVal &B) { Label fail = labels[Fail.getValue()], next = a.newLabel(); mov_arg(ARG2, B); /* May clobber ARG1 */ mov_arg(ARG1, A); a.cmp(ARG1, ARG2); a.short_().je(next); /* We can skip deep comparisons when both args are immediates. */ a.mov(RETd, ARG1d); a.and_(RETd, ARG2d); a.and_(RETb, imm(_TAG_PRIMARY_MASK)); a.cmp(RETb, imm(TAG_PRIMARY_IMMED1)); a.je(fail); safe_fragment_call(ga->get_arith_eq_shared()); a.jne(fail); a.bind(next); } void BeamModuleAssembler::emit_is_ne(const ArgVal &Fail, const ArgVal &A, const ArgVal &B) { Label fail = labels[Fail.getValue()], next = a.newLabel(); mov_arg(ARG2, B); /* May clobber ARG1 */ mov_arg(ARG1, A); a.cmp(ARG1, ARG2); a.je(fail); /* We can skip deep comparisons when both args are immediates. */ a.mov(RETd, ARG1d); a.and_(RETd, ARG2d); a.and_(RETb, imm(_TAG_PRIMARY_MASK)); a.cmp(RETb, imm(TAG_PRIMARY_IMMED1)); a.short_().je(next); safe_fragment_call(ga->get_arith_eq_shared()); a.je(fail); a.bind(next); } void BeamGlobalAssembler::emit_arith_compare_shared() { Label atom_compare, generic_compare; atom_compare = a.newLabel(); generic_compare = a.newLabel(); /* Are both floats? * * This is done first as relative comparisons on atoms doesn't make much * sense. */ a.mov(ARG3d, ARG1d); a.or_(ARG3d, ARG2d); a.and_(ARG3d, imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_BOXED)); a.short_().jne(atom_compare); x86::Gp boxed_ptr = emit_ptr_val(ARG3, ARG1); a.mov(ARG3, emit_boxed_val(boxed_ptr)); boxed_ptr = emit_ptr_val(ARG5, ARG2); a.mov(ARG5, emit_boxed_val(boxed_ptr)); a.and_(ARG3d, imm(_TAG_HEADER_MASK)); a.and_(ARG5d, imm(_TAG_HEADER_MASK)); a.sub(ARG3d, imm(_TAG_HEADER_FLOAT)); a.sub(ARG5d, imm(_TAG_HEADER_FLOAT)); a.or_(ARG3d, ARG5d); a.short_().jne(generic_compare); boxed_ptr = emit_ptr_val(ARG1, ARG1); a.movsd(x86::xmm0, emit_boxed_val(boxed_ptr, sizeof(Eterm))); boxed_ptr = emit_ptr_val(ARG2, ARG2); a.movsd(x86::xmm1, emit_boxed_val(boxed_ptr, sizeof(Eterm))); a.comisd(x86::xmm0, x86::xmm1); /* `comisd` doesn't set the flags the same way `test` and friends do, so * they need to be converted for jl/jge to work. */ a.setae(x86::al); a.dec(x86::al); a.ret(); a.bind(atom_compare); { /* Are both atoms? */ a.mov(ARG3d, ARG1d); a.mov(ARG5d, ARG2d); a.and_(ARG3d, imm(_TAG_IMMED2_MASK)); a.and_(ARG5d, imm(_TAG_IMMED2_MASK)); a.sub(ARG3d, imm(_TAG_IMMED2_ATOM)); a.sub(ARG5d, imm(_TAG_IMMED2_ATOM)); a.or_(ARG3d, ARG5d); a.jne(generic_compare); emit_enter_runtime(); runtime_call<2>(erts_cmp_atoms); emit_leave_runtime(); /* !! erts_cmp_atoms returns int, not Sint !! */ a.test(RETd, RETd); a.ret(); } a.bind(generic_compare); { emit_enter_runtime(); comment("erts_cmp_compound(X, Y, 0, 0);"); mov_imm(ARG3, 0); mov_imm(ARG4, 0); runtime_call<4>(erts_cmp_compound); emit_leave_runtime(); a.test(RET, RET); a.ret(); } } void BeamModuleAssembler::emit_is_lt(const ArgVal &Fail, const ArgVal &LHS, const ArgVal &RHS) { Label fail = labels[Fail.getValue()]; Label generic = a.newLabel(), next = a.newLabel(); mov_arg(ARG2, RHS); /* May clobber ARG1 */ mov_arg(ARG1, LHS); a.cmp(ARG1, ARG2); a.je(fail); /* Relative comparisons are overwhelmingly likely to be used on smalls, so * we'll specialize those and keep the rest in a shared fragment. */ if (RHS.isImmed() && is_small(RHS.getValue())) { a.mov(RETd, ARG1d); } else if (LHS.isImmed() && is_small(LHS.getValue())) { a.mov(RETd, ARG2d); } else { a.mov(RETd, ARG1d); a.and_(RETd, ARG2d); } a.and_(RETb, imm(_TAG_IMMED1_MASK)); a.cmp(RETb, imm(_TAG_IMMED1_SMALL)); a.short_().jne(generic); a.cmp(ARG1, ARG2); a.short_().jl(next); a.jmp(fail); a.bind(generic); { safe_fragment_call(ga->get_arith_compare_shared()); a.jge(fail); } a.bind(next); } void BeamModuleAssembler::emit_is_ge(const ArgVal &Fail, const ArgVal &LHS, const ArgVal &RHS) { Label fail = labels[Fail.getValue()]; Label generic = a.newLabel(), next = a.newLabel(); mov_arg(ARG2, RHS); /* May clobber ARG1 */ mov_arg(ARG1, LHS); a.cmp(ARG1, ARG2); a.short_().je(next); /* Relative comparisons are overwhelmingly likely to be used on smalls, so * we'll specialize those and keep the rest in a shared fragment. */ if (RHS.isImmed() && is_small(RHS.getValue())) { a.mov(RETd, ARG1d); } else if (LHS.isImmed() && is_small(LHS.getValue())) { a.mov(RETd, ARG2d); } else { a.mov(RETd, ARG1d); a.and_(RETd, ARG2d); } a.and_(RETb, imm(_TAG_IMMED1_MASK)); a.cmp(RETb, imm(_TAG_IMMED1_SMALL)); a.short_().jne(generic); a.cmp(ARG1, ARG2); a.short_().jge(next); a.jmp(fail); a.bind(generic); { safe_fragment_call(ga->get_arith_compare_shared()); a.jl(fail); } a.bind(next); } void BeamModuleAssembler::emit_bif_is_eq_ne_exact_immed(const ArgVal &Src, const ArgVal &Immed, const ArgVal &Dst, Eterm fail_value, Eterm succ_value) { cmp_arg(getArgRef(Src), Immed); mov_imm(RET, fail_value); mov_imm(ARG1, succ_value); a.cmove(RET, ARG1); mov_arg(Dst, RET); } void BeamModuleAssembler::emit_bif_is_eq_exact_immed(const ArgVal &Src, const ArgVal &Immed, const ArgVal &Dst) { emit_bif_is_eq_ne_exact_immed(Src, Immed, Dst, am_false, am_true); } void BeamModuleAssembler::emit_bif_is_ne_exact_immed(const ArgVal &Src, const ArgVal &Immed, const ArgVal &Dst) { emit_bif_is_eq_ne_exact_immed(Src, Immed, Dst, am_true, am_false); } void BeamModuleAssembler::emit_badmatch(const ArgVal &Src) { mov_arg(x86::qword_ptr(c_p, offsetof(Process, fvalue)), Src); emit_error(BADMATCH); } void BeamModuleAssembler::emit_case_end(const ArgVal &Src) { mov_arg(x86::qword_ptr(c_p, offsetof(Process, fvalue)), Src); emit_error(EXC_CASE_CLAUSE); } void BeamModuleAssembler::emit_system_limit_body() { emit_error(SYSTEM_LIMIT); } void BeamModuleAssembler::emit_if_end() { emit_error(EXC_IF_CLAUSE); } void BeamModuleAssembler::emit_catch(const ArgVal &Y, const ArgVal &Fail) { a.inc(x86::qword_ptr(c_p, offsetof(Process, catches))); Label patch_addr = a.newLabel(); /* * Emit the following instruction: * * b8 ff ff ff 7f mov eax,0x7fffffff * ^ * | * | * offset to be patched * with the tagged catch */ a.bind(patch_addr); a.mov(RETd, imm(0x7fffffff)); mov_arg(Y, RET); /* Offset = 1 for `mov` payload */ catches.push_back({{patch_addr, 0x1, 0}, labels[Fail.getValue()]}); } void BeamGlobalAssembler::emit_catch_end_shared() { Label not_throw = a.newLabel(), not_error = a.newLabel(), after_gc = a.newLabel(); /* Load thrown value / reason into ARG2 for add_stacktrace */ a.mov(ARG2, getXRef(2)); a.mov(x86::qword_ptr(c_p, offsetof(Process, fvalue)), imm(NIL)); a.cmp(getXRef(1), imm(am_throw)); a.short_().jne(not_throw); /* Thrown value, return it in x0 */ a.mov(getXRef(0), ARG2); a.ret(); a.bind(not_throw); { a.cmp(getXRef(1), imm(am_error)); a.short_().jne(not_error); /* This is an error, attach a stacktrace to the reason. */ emit_enter_runtime<Update::eStack | Update::eHeap>(); a.mov(ARG1, c_p); /* ARG2 set above. */ a.mov(ARG3, getXRef(3)); runtime_call<3>(add_stacktrace); emit_leave_runtime<Update::eStack | Update::eHeap>(); /* not_error assumes stacktrace/reason is in ARG2 */ a.mov(ARG2, RET); } a.bind(not_error); { const int32_t bytes_needed = (3 + S_RESERVED) * sizeof(Eterm); a.lea(ARG3, x86::qword_ptr(HTOP, bytes_needed)); a.cmp(ARG3, E); a.short_().jbe(after_gc); /* Preserve stacktrace / reason */ a.mov(getXRef(0), ARG2); mov_imm(ARG4, 1); aligned_call(labels[garbage_collect]); a.mov(ARG2, getXRef(0)); a.bind(after_gc); a.mov(x86::qword_ptr(HTOP), imm(make_arityval(2))); a.mov(x86::qword_ptr(HTOP, sizeof(Eterm) * 1), imm(am_EXIT)); a.mov(x86::qword_ptr(HTOP, sizeof(Eterm) * 2), ARG2); a.lea(RET, x86::qword_ptr(HTOP, TAG_PRIMARY_BOXED)); a.add(HTOP, imm(3 * sizeof(Eterm))); a.mov(getXRef(0), RET); } a.ret(); } void BeamModuleAssembler::emit_catch_end(const ArgVal &Y) { Label next = a.newLabel(); emit_try_end(Y); a.cmp(getXRef(0), imm(THE_NON_VALUE)); a.short_().jne(next); fragment_call(ga->get_catch_end_shared()); a.bind(next); } void BeamModuleAssembler::emit_try_end(const ArgVal &Y) { a.dec(x86::qword_ptr(c_p, offsetof(Process, catches))); emit_init(Y); } void BeamModuleAssembler::emit_try_case(const ArgVal &Y) { a.dec(x86::qword_ptr(c_p, offsetof(Process, catches))); mov_imm(RET, NIL); mov_arg(Y, RET); a.mov(x86::qword_ptr(c_p, offsetof(Process, fvalue)), RET); a.movups(x86::xmm0, x86::xmmword_ptr(registers, 1 * sizeof(Eterm))); a.mov(RET, getXRef(3)); a.movups(x86::xmmword_ptr(registers, 0 * sizeof(Eterm)), x86::xmm0); a.mov(getXRef(2), RET); } void BeamModuleAssembler::emit_try_case_end(const ArgVal &Src) { mov_arg(x86::qword_ptr(c_p, offsetof(Process, fvalue)), Src); emit_error(EXC_TRY_CLAUSE); } void BeamModuleAssembler::emit_raise(const ArgVal &Trace, const ArgVal &Value) { mov_arg(ARG3, Value); mov_arg(ARG2, Trace); /* This is an error, attach a stacktrace to the reason. */ a.mov(x86::qword_ptr(c_p, offsetof(Process, fvalue)), ARG3); a.mov(x86::qword_ptr(c_p, offsetof(Process, ftrace)), ARG2); emit_enter_runtime(); a.mov(ARG1, c_p); runtime_call<2>(erts_sanitize_freason); emit_leave_runtime(); emit_handle_error(); } void BeamModuleAssembler::emit_build_stacktrace() { emit_enter_runtime<Update::eStack | Update::eHeap>(); a.mov(ARG1, c_p); a.mov(ARG2, getXRef(0)); runtime_call<2>(build_stacktrace); emit_leave_runtime<Update::eStack | Update::eHeap>(); a.mov(getXRef(0), RET); } void BeamModuleAssembler::emit_raw_raise() { Label next = a.newLabel(); emit_enter_runtime(); a.mov(ARG1, getXRef(2)); a.mov(ARG2, getXRef(0)); a.mov(ARG3, getXRef(1)); a.mov(ARG4, c_p); runtime_call<4>(raw_raise); emit_leave_runtime(); a.test(RET, RET); a.short_().jne(next); emit_handle_error(); a.bind(next); a.mov(getXRef(0), imm(am_badarg)); } void BeamGlobalAssembler::emit_i_test_yield_shared() { int mfa_offset = -(int)sizeof(ErtsCodeMFA) - BEAM_ASM_FUNC_PROLOGUE_SIZE; /* Yield address is in ARG3. */ a.lea(ARG2, x86::qword_ptr(ARG3, mfa_offset)); a.mov(x86::qword_ptr(c_p, offsetof(Process, current)), ARG2); a.mov(ARG2, x86::qword_ptr(ARG2, offsetof(ErtsCodeMFA, arity))); a.mov(x86::qword_ptr(c_p, offsetof(Process, arity)), ARG2); emit_discard_cp(); a.jmp(labels[context_switch_simplified]); } void BeamModuleAssembler::emit_i_test_yield() { Label next = a.newLabel(), entry = a.newLabel(); /* When present, this is guaranteed to be the first instruction after the * function entry label, so we can use `currLabel`. */ a.align(kAlignCode, 8); a.bind(entry); a.dec(FCALLS); a.short_().jg(next); a.lea(ARG3, x86::qword_ptr(entry)); a.call(funcYield); a.bind(next); } void BeamModuleAssembler::emit_i_yield() { a.mov(getXRef(0), imm(am_true)); #ifdef NATIVE_ERLANG_STACK fragment_call(ga->get_dispatch_return()); #else Label next = a.newLabel(); a.lea(ARG3, x86::qword_ptr(next)); abs_jmp(ga->get_dispatch_return()); a.align(kAlignCode, 8); a.bind(next); #endif } void BeamModuleAssembler::emit_i_perf_counter() { Label next = a.newLabel(), small = a.newLabel(); emit_enter_runtime(); #ifdef WIN32 /* Call the function pointer used by erts_sys_perf_counter */ runtime_call<0>(erts_sys_time_data__.r.o.sys_hrtime); #else runtime_call<0>(erts_sys_time_data__.r.o.perf_counter); #endif emit_leave_runtime(); a.mov(ARG1, RET); a.sar(ARG1, imm(SMALL_BITS - 1)); a.add(ARG1, 1); a.cmp(ARG1, 1); a.jbe(small); { a.mov(TMP_MEM1q, RET); emit_gc_test(ArgVal(ArgVal::i, 0), ArgVal(ArgVal::i, ERTS_MAX_UINT64_HEAP_SIZE), ArgVal(ArgVal::i, 0)); a.mov(ARG1, TMP_MEM1q); a.mov(x86::qword_ptr(HTOP, sizeof(Eterm) * 0), imm(make_pos_bignum_header(1))); a.mov(x86::qword_ptr(HTOP, sizeof(Eterm) * 1), ARG1); a.lea(RET, x86::qword_ptr(HTOP, TAG_PRIMARY_BOXED)); a.add(HTOP, imm(sizeof(Eterm) * 2)); a.short_().jmp(next); } a.bind(small); { a.shl(RET, imm(_TAG_IMMED1_SIZE)); a.or_(RET, imm(_TAG_IMMED1_SMALL)); } a.bind(next); a.mov(getXRef(0), RET); }

erts/emulator/beam/jit/instr_common.cpp (1,501 lines of code) (raw):