erts/emulator/beam/jit/instr_common.cpp (1,501 lines of code) (raw):
/*
* %CopyrightBegin%
*
* Copyright Ericsson AB 2020-2020. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* %CopyrightEnd%
*/
/*
* Some notes on how to minimize the code size.
*
* Instructions that use 32-bit registers (e.g. eax) are generally
* one byte shorter than instructions that use 64-bits registers
* (e.g. rax). This does not apply to registers r8-r15 beacuse they'll
* always need a rex prefix. The `and`, `or`, and `cmp` instructions
* are even shorter than operating on the RETb (al) register. The
* `test` instruction with an immediate second operand is shorter
* when operating on an 8-bit register.
*
* On both Unix and Windows, instructions can be shortened by using
* RETd, ARG1d, or ARG2d instead of RET, ARG1, or ARG2, respectively.
* On Unix, but not on Windows, ARG3d and ARG4d will also result in
* shorter instructions.
*
* Here are some examples. If we know that the higher 32 bits of
* a register is uninteresting or should be zeroed, we can write:
*
* a.mov(RETd, ARG1d)
*
* (When writing to the lower 32 bits of a register, the high 32
* bits are zeroed.)
*
* Here is a tag test on the contents of ARG1:
*
* a.and_(ARG1d, 15)
* a.cmp(ARG1d, 15)
*
* The same tag test on RET can be even shorter if written like this:
*
* a.and_(RETb, 15)
* a.cmp(RETb, 15)
*
* An alignment test can be written like this (when unit <= 256):
*
* a.test(RETb, imm(unit - 1));
* a.test(ARG1.r8(), imm(unit -1));
*
* ASMJIT will automatically encode backward jumps (jumps to bound
* labels) in the shortest form possible. However, forward jumps
* (jumps to unbound labels) will by default be encoded in the long
* form (using a 32-bit relative address).
*
* Within a single BEAM instruction, a `short_()` prefix can be used
* to emit short forward jumps (using a signed byte as an offset,
* limiting the distance to about 128 bytes).
*
* Example:
*
* a.short_().je(next);
* .
* .
* .
* a.bind(next);
*/
#include <algorithm>
#include "beam_asm.hpp"
extern "C"
{
#include "erl_bif_table.h"
#include "big.h"
#include "beam_catches.h"
#include "beam_common.h"
#include "code_ix.h"
}
using namespace asmjit;
/* Helpers */
void BeamModuleAssembler::emit_error(int reason) {
a.mov(x86::qword_ptr(c_p, offsetof(Process, freason)), imm(reason));
emit_handle_error();
}
void BeamModuleAssembler::emit_gc_test_preserve(const ArgVal &Need,
const ArgVal &Live,
x86::Gp term) {
const int32_t bytes_needed = (Need.getValue() + S_RESERVED) * sizeof(Eterm);
Label after_gc_check = a.newLabel();
ASSERT(term != ARG3);
a.lea(ARG3, x86::qword_ptr(HTOP, bytes_needed));
a.cmp(ARG3, E);
a.short_().jbe(after_gc_check);
a.mov(getXRef(Live.getValue()), term);
mov_imm(ARG4, Live.getValue() + 1);
fragment_call(ga->get_garbage_collect());
a.mov(term, getXRef(Live.getValue()));
a.bind(after_gc_check);
}
void BeamModuleAssembler::emit_gc_test(const ArgVal &Ns,
const ArgVal &Nh,
const ArgVal &Live) {
const int32_t bytes_needed =
(Ns.getValue() + Nh.getValue() + S_RESERVED) * sizeof(Eterm);
Label after_gc_check = a.newLabel();
a.lea(ARG3, x86::qword_ptr(HTOP, bytes_needed));
a.cmp(ARG3, E);
a.short_().jbe(after_gc_check);
mov_imm(ARG4, Live.getValue());
fragment_call(ga->get_garbage_collect());
a.bind(after_gc_check);
}
#if defined(DEBUG) && defined(HARD_DEBUG)
static void validate_term(Eterm term) {
if (is_boxed(term)) {
Eterm header = *boxed_val(term);
if (header_is_bin_matchstate(header)) {
return;
}
}
size_object_x(term, nullptr);
}
#endif
void BeamModuleAssembler::emit_validate(const ArgVal &arity) {
#ifdef DEBUG
Label next = a.newLabel(), crash = a.newLabel();
/* Crash if the Erlang heap is not word-aligned */
a.test(HTOP, imm(sizeof(Eterm) - 1));
a.jne(crash);
/* Crash if the Erlang stack is not word-aligned */
a.test(E, imm(sizeof(Eterm) - 1));
a.jne(crash);
/* Crash if we've overrun the stack */
a.lea(ARG1, x86::qword_ptr(E, -(int32_t)(S_REDZONE * sizeof(Eterm))));
a.cmp(HTOP, ARG1);
a.ja(crash);
a.jmp(next);
a.bind(crash);
a.hlt();
a.bind(next);
# ifdef HARD_DEBUG
emit_enter_runtime();
for (unsigned i = 0; i < arity.getValue(); i++) {
a.mov(ARG1, getXRef(i));
runtime_call<1>(validate_term);
}
emit_leave_runtime();
# endif
#endif
}
/* Instrs */
void BeamModuleAssembler::emit_i_validate(const ArgVal &Arity) {
emit_validate(Arity);
}
void BeamModuleAssembler::emit_allocate_heap(const ArgVal &NeedStack,
const ArgVal &NeedHeap,
const ArgVal &Live) {
ASSERT(NeedStack.getType() == ArgVal::TYPE::u);
ASSERT(NeedStack.getValue() <= MAX_REG);
ArgVal needed = NeedStack;
#if !defined(NATIVE_ERLANG_STACK)
needed = needed + CP_SIZE;
#endif
emit_gc_test(needed, NeedHeap, Live);
if (needed.getValue() > 0) {
a.sub(E, imm(needed.getValue() * sizeof(Eterm)));
}
#if !defined(NATIVE_ERLANG_STACK)
a.mov(getCPRef(), imm(NIL));
#endif
}
void BeamModuleAssembler::emit_allocate(const ArgVal &NeedStack,
const ArgVal &Live) {
emit_allocate_heap(NeedStack, ArgVal(ArgVal::TYPE::u, 0), Live);
}
void BeamModuleAssembler::emit_allocate_heap_zero(const ArgVal &NeedStack,
const ArgVal &NeedHeap,
const ArgVal &Live) {
ASSERT(NeedStack.getType() == ArgVal::TYPE::u);
ASSERT(NeedStack.getValue() <= MAX_REG);
emit_allocate_heap(NeedStack, NeedHeap, Live);
int slots = NeedStack.getValue();
if (slots == 1) {
a.mov(getYRef(0), imm(NIL));
} else {
/* `stosq` is more compact than `mov` after 2 slots. */
mov_imm(x86::rax, NIL);
#ifdef NATIVE_ERLANG_STACK
/* `mov` is two bytes shorter than `lea`. */
a.mov(x86::rdi, E);
#else
/* y(0) is at E+8. Must use `lea` here. */
a.lea(x86::rdi, getYRef(0));
#endif
if (slots <= 4) {
/* Slightly more compact than `rep stosq`. */
for (int i = 0; i < slots; i++) {
a.stosq();
}
} else {
mov_imm(x86::rcx, slots);
a.rep().stosq();
}
}
}
void BeamModuleAssembler::emit_allocate_zero(const ArgVal &NeedStack,
const ArgVal &Live) {
emit_allocate_heap_zero(NeedStack, ArgVal(ArgVal::TYPE::u, 0), Live);
}
void BeamModuleAssembler::emit_deallocate(const ArgVal &Deallocate) {
ASSERT(Deallocate.getType() == ArgVal::TYPE::u);
ASSERT(Deallocate.getValue() <= 1023);
ArgVal dealloc = Deallocate;
#if !defined(NATIVE_ERLANG_STACK)
dealloc = dealloc + CP_SIZE;
#endif
if (dealloc.getValue() > 0) {
a.add(E, imm(dealloc.getValue() * sizeof(Eterm)));
}
}
void BeamModuleAssembler::emit_test_heap(const ArgVal &Nh, const ArgVal &Live) {
emit_gc_test(ArgVal(ArgVal::u, 0), Nh, Live);
}
void BeamGlobalAssembler::emit_dispatch_return() {
#ifdef NATIVE_ERLANG_STACK
/* ARG3 should contain the place to jump to. */
a.pop(ARG3);
#else
/* ARG3 already contains the place to jump to. */
#endif
a.mov(x86::qword_ptr(c_p, offsetof(Process, current)), 0);
a.mov(x86::qword_ptr(c_p, offsetof(Process, arity)), 1);
a.jmp(labels[context_switch_simplified]);
}
void BeamModuleAssembler::emit_return() {
Label dispatch_return = a.newLabel();
#ifdef HARD_DEBUG
/* Validate return address and {x,0} */
emit_validate(ArgVal(ArgVal::u, 1));
#endif
#if !defined(NATIVE_ERLANG_STACK)
a.mov(ARG3, getCPRef());
a.mov(getCPRef(), imm(NIL));
#endif
/* The reduction test is kept in module code because moving it to a shared
* fragment caused major performance regressions in dialyzer. */
a.dec(FCALLS);
a.short_().jl(dispatch_return);
#ifdef NATIVE_ERLANG_STACK
a.ret();
#else
a.jmp(ARG3);
#endif
a.bind(dispatch_return);
abs_jmp(ga->get_dispatch_return());
}
void BeamModuleAssembler::emit_i_call(const ArgVal &CallDest) {
Label dest = labels[CallDest.getValue()];
erlang_call(dest, RET);
}
void BeamModuleAssembler::emit_i_call_last(const ArgVal &CallDest,
const ArgVal &Deallocate) {
emit_deallocate(Deallocate);
a.jmp(labels[CallDest.getValue()]);
}
void BeamModuleAssembler::emit_i_call_only(const ArgVal &CallDest) {
a.jmp(labels[CallDest.getValue()]);
}
/* Handles save_calls. Export entry is in ARG2.
*
* When the active code index is ERTS_SAVE_CALLS_CODE_IX, all remote calls will
* land here. */
void BeamGlobalAssembler::emit_dispatch_save_calls() {
a.mov(TMP_MEM1q, ARG2);
emit_enter_runtime();
a.mov(ARG1, c_p);
runtime_call<2>(save_calls);
emit_leave_runtime();
a.mov(ARG2, TMP_MEM1q);
/* Keep going with the actual code index. */
a.mov(ARG1, imm(&the_active_code_index));
a.mov(ARG1d, x86::dword_ptr(ARG1));
a.jmp(x86::qword_ptr(ARG2, ARG1, 3, offsetof(Export, addressv)));
}
x86::Mem BeamModuleAssembler::emit_setup_export(const ArgVal &Exp) {
/* Load export pointer / addressv */
make_move_patch(ARG2, imports[Exp.getValue()].patches);
return x86::qword_ptr(ARG2, active_code_ix, 3, offsetof(Export, addressv));
}
void BeamModuleAssembler::emit_i_call_ext(const ArgVal &Exp) {
x86::Mem destination = emit_setup_export(Exp);
erlang_call(destination, RET);
}
void BeamModuleAssembler::emit_i_call_ext_only(const ArgVal &Exp) {
auto destination = emit_setup_export(Exp);
a.jmp(destination);
}
void BeamModuleAssembler::emit_i_call_ext_last(const ArgVal &Exp,
const ArgVal &Deallocate) {
emit_deallocate(Deallocate);
auto destination = emit_setup_export(Exp);
a.jmp(destination);
}
void BeamModuleAssembler::emit_normal_exit() {
/* This is implictly global; it does not normally appear in modules and
* doesn't require size optimization. */
emit_enter_runtime<Update::eReductions | Update::eStack | Update::eHeap>();
emit_proc_lc_unrequire();
a.mov(x86::qword_ptr(c_p, offsetof(Process, freason)), imm(EXC_NORMAL));
a.mov(x86::qword_ptr(c_p, offsetof(Process, arity)), imm(0));
a.mov(ARG1, c_p);
mov_imm(ARG2, am_normal);
runtime_call<2>(erts_do_exit_process);
emit_proc_lc_require();
emit_leave_runtime<Update::eReductions | Update::eStack | Update::eHeap>();
abs_jmp(ga->get_do_schedule());
}
void BeamModuleAssembler::emit_continue_exit() {
/* This is implictly global; it does not normally appear in modules and
* doesn't require size optimization. */
emit_enter_runtime<Update::eReductions | Update::eStack | Update::eHeap>();
emit_proc_lc_unrequire();
a.mov(ARG1, c_p);
runtime_call<1>(erts_continue_exit_process);
emit_proc_lc_require();
emit_leave_runtime<Update::eReductions | Update::eStack | Update::eHeap>();
abs_jmp(ga->get_do_schedule());
}
/* This is an alias for handle_error */
void BeamModuleAssembler::emit_error_action_code() {
abs_jmp(ga->get_error_action_code());
}
static ErtsCodeMFA apply3_mfa = {am_erlang, am_apply, 3};
x86::Gp BeamModuleAssembler::emit_variable_apply(bool includeI) {
Label dispatch = a.newLabel(), entry = a.newLabel();
a.align(kAlignCode, 8);
a.bind(entry);
emit_enter_runtime<Update::eStack | Update::eHeap>();
a.mov(ARG1, c_p);
load_x_reg_array(ARG2);
if (includeI) {
a.lea(ARG3, x86::qword_ptr(entry));
} else {
mov_imm(ARG3, 0);
}
mov_imm(ARG4, 0);
runtime_call<4>(apply);
emit_leave_runtime<Update::eStack | Update::eHeap>();
a.test(RET, RET);
a.short_().jne(dispatch);
emit_handle_error(entry, &apply3_mfa);
a.bind(dispatch);
return RET;
}
void BeamModuleAssembler::emit_i_apply() {
x86::Gp dest = emit_variable_apply(false);
ASSERT(dest != ARG1);
erlang_call(dest, ARG1);
}
void BeamModuleAssembler::emit_i_apply_last(const ArgVal &Deallocate) {
emit_deallocate(Deallocate);
emit_i_apply_only();
}
void BeamModuleAssembler::emit_i_apply_only() {
x86::Gp dest = emit_variable_apply(true);
a.jmp(dest);
}
x86::Gp BeamModuleAssembler::emit_fixed_apply(const ArgVal &Arity,
bool includeI) {
Label dispatch = a.newLabel(), entry = a.newLabel();
a.align(kAlignCode, 8);
a.bind(entry);
mov_arg(ARG3, Arity);
emit_enter_runtime<Update::eStack | Update::eHeap>();
a.mov(ARG1, c_p);
load_x_reg_array(ARG2);
if (includeI) {
a.lea(ARG4, x86::qword_ptr(entry));
} else {
mov_imm(ARG4, 0);
}
mov_imm(ARG5, 0);
runtime_call<5>(fixed_apply);
emit_leave_runtime<Update::eStack | Update::eHeap>();
a.test(RET, RET);
a.short_().jne(dispatch);
emit_handle_error(entry, &apply3_mfa);
a.bind(dispatch);
return RET;
}
void BeamModuleAssembler::emit_apply(const ArgVal &Arity) {
x86::Gp dest = emit_fixed_apply(Arity, false);
ASSERT(dest != ARG1);
erlang_call(dest, ARG1);
}
void BeamModuleAssembler::emit_apply_last(const ArgVal &Arity,
const ArgVal &Deallocate) {
emit_deallocate(Deallocate);
x86::Gp dest = emit_fixed_apply(Arity, true);
a.jmp(dest);
}
x86::Gp BeamModuleAssembler::emit_call_fun(const ArgVal &Fun) {
Label dispatch = a.newLabel();
mov_arg(ARG2, Fun);
emit_enter_runtime<Update::eStack | Update::eHeap>();
a.mov(ARG1, c_p);
load_x_reg_array(ARG3);
mov_imm(ARG4, THE_NON_VALUE);
runtime_call<4>(call_fun);
emit_leave_runtime<Update::eStack | Update::eHeap>();
a.test(RET, RET);
a.short_().jne(dispatch);
emit_handle_error();
a.bind(dispatch);
return RET;
}
void BeamModuleAssembler::emit_i_call_fun(const ArgVal &Fun) {
x86::Gp dest = emit_call_fun(Fun);
ASSERT(dest != ARG1);
erlang_call(dest, ARG1);
}
void BeamModuleAssembler::emit_i_call_fun_last(const ArgVal &Fun,
const ArgVal &Deallocate) {
emit_deallocate(Deallocate);
x86::Gp dest = emit_call_fun(Fun);
a.jmp(dest);
}
x86::Gp BeamModuleAssembler::emit_apply_fun() {
Label dispatch = a.newLabel();
emit_enter_runtime<Update::eStack | Update::eHeap>();
a.mov(ARG1, c_p);
a.mov(ARG2, getXRef(0));
a.mov(ARG3, getXRef(1));
load_x_reg_array(ARG4);
runtime_call<4>(apply_fun);
emit_leave_runtime<Update::eStack | Update::eHeap>();
a.test(RET, RET);
a.short_().jne(dispatch);
emit_handle_error();
a.bind(dispatch);
return RET;
}
void BeamModuleAssembler::emit_i_apply_fun() {
x86::Gp dest = emit_apply_fun();
ASSERT(dest != ARG1);
erlang_call(dest, ARG1);
}
void BeamModuleAssembler::emit_i_apply_fun_last(const ArgVal &Deallocate) {
emit_deallocate(Deallocate);
emit_i_apply_fun_only();
}
void BeamModuleAssembler::emit_i_apply_fun_only() {
x86::Gp dest = emit_apply_fun();
a.jmp(dest);
}
/* Psuedo-instruction for signalling lambda load errors. Never actually runs. */
void BeamModuleAssembler::emit_i_lambda_error(const ArgVal &Dummy) {
a.hlt();
}
void BeamModuleAssembler::emit_i_make_fun(const ArgVal &Fun,
const ArgVal &NumFree) {
mov_arg(ARG4, NumFree);
emit_enter_runtime<Update::eReductions | Update::eStack | Update::eHeap>();
a.mov(ARG1, c_p);
load_x_reg_array(ARG2);
make_move_patch(ARG3, lambdas[Fun.getValue()].patches);
runtime_call<4>(new_fun);
emit_leave_runtime<Update::eReductions | Update::eStack | Update::eHeap>();
a.mov(getXRef(0), RET);
}
void BeamModuleAssembler::emit_i_make_fun3(const ArgVal &Fun,
const ArgVal &Dst,
const ArgVal &NumFree,
const std::vector<ArgVal> &env) {
size_t num_free = env.size();
ASSERT(NumFree.getValue() == num_free);
mov_arg(ARG3, NumFree);
emit_enter_runtime<Update::eHeap>();
a.mov(ARG1, c_p);
make_move_patch(ARG2, lambdas[Fun.getValue()].patches);
runtime_call<3>(new_fun_thing);
emit_leave_runtime<Update::eHeap>();
comment("Move fun environment");
for (unsigned i = 0; i < num_free; i++) {
mov_arg(x86::qword_ptr(RET,
offsetof(ErlFunThing, env) + i * sizeof(Eterm)),
env[i]);
}
comment("Create boxed ptr");
a.or_(RETb, TAG_PRIMARY_BOXED);
mov_arg(Dst, RET);
}
void BeamModuleAssembler::emit_get_list(const x86::Gp src,
const ArgVal &Hd,
const ArgVal &Tl) {
x86::Gp boxed_ptr = emit_ptr_val(src, src);
switch (ArgVal::register_relation(Hd, Tl)) {
case ArgVal::Relation::consecutive: {
comment("(moving head and tail together)");
x86::Mem dst_ptr = getArgRef(Hd, 16);
x86::Mem src_ptr = getCARRef(boxed_ptr, 16);
a.movups(x86::xmm0, src_ptr);
a.movups(dst_ptr, x86::xmm0);
break;
}
case ArgVal::Relation::reverse_consecutive: {
if (!hasCpuFeature(x86::Features::kAVX)) {
goto fallback;
}
comment("(moving and swapping head and tail together)");
x86::Mem dst_ptr = getArgRef(Tl, 16);
x86::Mem src_ptr = getCARRef(boxed_ptr, 16);
a.vpermilpd(x86::xmm0, src_ptr, 1); /* Load and swap */
a.vmovups(dst_ptr, x86::xmm0);
break;
}
case ArgVal::Relation::none:
fallback:
a.mov(ARG2, getCARRef(boxed_ptr));
a.mov(ARG3, getCDRRef(boxed_ptr));
mov_arg(Hd, ARG2);
mov_arg(Tl, ARG3);
break;
}
}
void BeamModuleAssembler::emit_get_list(const ArgVal &Src,
const ArgVal &Hd,
const ArgVal &Tl) {
mov_arg(ARG1, Src);
emit_get_list(ARG1, Hd, Tl);
}
void BeamModuleAssembler::emit_get_hd(const ArgVal &Src, const ArgVal &Hd) {
mov_arg(ARG1, Src);
x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1);
a.mov(ARG2, getCARRef(boxed_ptr));
mov_arg(Hd, ARG2);
}
void BeamModuleAssembler::emit_get_tl(const ArgVal &Src, const ArgVal &Tl) {
mov_arg(ARG1, Src);
x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1);
a.mov(ARG2, getCDRRef(boxed_ptr));
mov_arg(Tl, ARG2);
}
void BeamModuleAssembler::emit_is_nonempty_list_get_list(const ArgVal &Fail,
const ArgVal &Src,
const ArgVal &Hd,
const ArgVal &Tl) {
mov_arg(RET, Src);
a.test(RETb, imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_LIST));
a.jne(labels[Fail.getValue()]);
emit_get_list(RET, Hd, Tl);
}
void BeamModuleAssembler::emit_is_nonempty_list_get_hd(const ArgVal &Fail,
const ArgVal &Src,
const ArgVal &Hd) {
mov_arg(RET, Src);
a.test(RETb, imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_LIST));
a.jne(labels[Fail.getValue()]);
x86::Gp boxed_ptr = emit_ptr_val(RET, RET);
a.mov(ARG2, getCARRef(boxed_ptr));
mov_arg(Hd, ARG2);
}
void BeamModuleAssembler::emit_is_nonempty_list_get_tl(const ArgVal &Fail,
const ArgVal &Src,
const ArgVal &Tl) {
mov_arg(RET, Src);
a.test(RETb, imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_LIST));
a.jne(labels[Fail.getValue()]);
x86::Gp boxed_ptr = emit_ptr_val(RET, RET);
a.mov(ARG2, getCDRRef(boxed_ptr));
mov_arg(Tl, ARG2);
}
void BeamModuleAssembler::emit_i_get(const ArgVal &Src, const ArgVal &Dst) {
mov_arg(ARG2, Src);
emit_enter_runtime();
a.mov(ARG1, c_p);
runtime_call<2>(erts_pd_hash_get);
emit_leave_runtime();
mov_arg(Dst, RET);
}
void BeamModuleAssembler::emit_i_get_hash(const ArgVal &Src,
const ArgVal &Hash,
const ArgVal &Dst) {
mov_arg(ARG2, Hash);
mov_arg(ARG3, Src);
emit_enter_runtime();
a.mov(ARG1, c_p);
runtime_call<3>(erts_pd_hash_get_with_hx);
emit_leave_runtime();
mov_arg(Dst, RET);
}
/* Store the pointer to a tuple in ARG2. Remove any LITERAL_PTR tag. */
void BeamModuleAssembler::emit_load_tuple_ptr(const ArgVal &Term) {
mov_arg(ARG2, Term);
(void)emit_ptr_val(ARG2, ARG2);
}
#ifdef DEBUG
/* Emit an assertion to ensure that tuple_reg points into the same
* tuple as Src. */
void BeamModuleAssembler::emit_tuple_assertion(const ArgVal &Src,
x86::Gp tuple_reg) {
Label ok = a.newLabel(), fatal = a.newLabel();
ASSERT(tuple_reg != RET);
mov_arg(RET, Src);
emit_is_boxed(fatal, RET, dShort);
(void)emit_ptr_val(RET, RET);
a.cmp(RET, tuple_reg);
a.short_().je(ok);
a.bind(fatal);
{ a.ud2(); }
a.bind(ok);
}
#endif
/* Fetch an element from the tuple pointed to by the boxed pointer
* in ARG2. */
void BeamModuleAssembler::emit_i_get_tuple_element(const ArgVal &Src,
const ArgVal &Element,
const ArgVal &Dst) {
#ifdef DEBUG
emit_tuple_assertion(Src, ARG2);
#endif
a.mov(ARG1, emit_boxed_val(ARG2, Element.getValue()));
mov_arg(Dst, ARG1);
}
/* Fetch two consecutive tuple elements from the tuple pointed to by
* the boxed pointer in ARG2. */
void BeamModuleAssembler::emit_get_two_tuple_elements(const ArgVal &Src,
const ArgVal &Element,
const ArgVal &Dst1,
const ArgVal &Dst2) {
#ifdef DEBUG
emit_tuple_assertion(Src, ARG2);
#endif
x86::Mem element_ptr =
emit_boxed_val(ARG2, Element.getValue(), 2 * sizeof(Eterm));
switch (ArgVal::register_relation(Dst1, Dst2)) {
case ArgVal::Relation::consecutive: {
x86::Mem dst_ptr = getArgRef(Dst1, 16);
a.movups(x86::xmm0, element_ptr);
a.movups(dst_ptr, x86::xmm0);
break;
}
case ArgVal::Relation::reverse_consecutive: {
if (!hasCpuFeature(x86::Features::kAVX)) {
goto fallback;
} else {
x86::Mem dst_ptr = getArgRef(Dst2, 16);
a.vpermilpd(x86::xmm0, element_ptr, 1); /* Load and swap */
a.vmovups(dst_ptr, x86::xmm0);
break;
}
}
case ArgVal::Relation::none:
fallback:
a.mov(ARG1, emit_boxed_val(ARG2, Element.getValue()));
a.mov(ARG3, emit_boxed_val(ARG2, (Element + sizeof(Eterm)).getValue()));
mov_arg(Dst1, ARG1);
mov_arg(Dst2, ARG3);
break;
}
}
void BeamModuleAssembler::emit_init(const ArgVal &Y) {
mov_arg(Y, NIL);
}
void BeamModuleAssembler::emit_i_trim(const ArgVal &Words) {
ASSERT(Words.getType() == ArgVal::TYPE::u);
ASSERT(Words.getValue() <= 1023);
if (Words.getValue() > 0) {
a.add(E, imm(Words.getValue() * sizeof(Eterm)));
}
}
void BeamModuleAssembler::emit_i_move(const ArgVal &Src, const ArgVal &Dst) {
mov_arg(Dst, Src);
}
/* Move two words at consecutive addresses to consecutive or reverse
* consecutive destinations. */
void BeamModuleAssembler::emit_move_two_words(const ArgVal &Src1,
const ArgVal &Dst1,
const ArgVal &Src2,
const ArgVal &Dst2) {
x86::Mem src_ptr = getArgRef(Src1, 16);
ASSERT(ArgVal::register_relation(Src1, Src2) ==
ArgVal::Relation::consecutive);
switch (ArgVal::register_relation(Dst1, Dst2)) {
case ArgVal::Relation::consecutive: {
x86::Mem dst_ptr = getArgRef(Dst1, 16);
a.movups(x86::xmm0, src_ptr);
a.movups(dst_ptr, x86::xmm0);
break;
}
case ArgVal::Relation::reverse_consecutive: {
x86::Mem dst_ptr = getArgRef(Dst2, 16);
comment("(moving and swapping)");
if (hasCpuFeature(x86::Features::kAVX)) {
a.vpermilpd(x86::xmm0, src_ptr, 1); /* Load and swap */
a.vmovups(dst_ptr, x86::xmm0);
} else {
mov_arg(ARG1, Src1);
mov_arg(ARG2, Src2);
mov_arg(Dst1, ARG1);
mov_arg(Dst2, ARG2);
}
break;
}
case ArgVal::Relation::none:
ASSERT(0);
break;
}
}
void BeamModuleAssembler::emit_swap(const ArgVal &R1, const ArgVal &R2) {
if (!hasCpuFeature(x86::Features::kAVX)) {
goto fallback;
}
switch (ArgVal::register_relation(R1, R2)) {
case ArgVal::Relation::consecutive: {
x86::Mem ptr = getArgRef(R1, 16);
comment("(swapping using AVX)");
a.vpermilpd(x86::xmm0, ptr, 1); /* Load and swap */
a.vmovups(ptr, x86::xmm0);
break;
}
case ArgVal::Relation::reverse_consecutive: {
x86::Mem ptr = getArgRef(R2, 16);
comment("(swapping using AVX)");
a.vpermilpd(x86::xmm0, ptr, 1); /* Load and swap */
a.vmovups(ptr, x86::xmm0);
break;
}
case ArgVal::Relation::none:
fallback:
mov_arg(ARG1, R1);
mov_arg(ARG2, R2);
mov_arg(R2, ARG1);
mov_arg(R1, ARG2);
break;
}
}
void BeamModuleAssembler::emit_node(const ArgVal &Dst) {
a.mov(ARG1, imm(&erts_this_node));
a.mov(ARG1, x86::qword_ptr(ARG1));
a.mov(ARG1, x86::qword_ptr(ARG1, offsetof(ErlNode, sysname)));
mov_arg(Dst, ARG1);
}
void BeamModuleAssembler::emit_put_cons(const ArgVal &Hd, const ArgVal &Tl) {
switch (ArgVal::register_relation(Hd, Tl)) {
case ArgVal::Relation::consecutive: {
x86::Mem src_ptr = getArgRef(Hd, 16);
x86::Mem dst_ptr = x86::xmmword_ptr(HTOP, 0);
comment("(put head and tail together)");
a.movups(x86::xmm0, src_ptr);
a.movups(dst_ptr, x86::xmm0);
break;
}
case ArgVal::Relation::reverse_consecutive: {
if (!hasCpuFeature(x86::Features::kAVX)) {
goto fallback;
}
x86::Mem src_ptr = getArgRef(Tl, 16);
x86::Mem dst_ptr = x86::xmmword_ptr(HTOP, 0);
comment("(putting and swapping head and tail together)");
a.vpermilpd(x86::xmm0, src_ptr, 1); /* Load and swap */
a.vmovups(dst_ptr, x86::xmm0);
break;
}
case ArgVal::Relation::none:
fallback:
mov_arg(x86::qword_ptr(HTOP, 0), Hd);
mov_arg(x86::qword_ptr(HTOP, 1 * sizeof(Eterm)), Tl);
break;
}
a.lea(ARG2, x86::qword_ptr(HTOP, TAG_PRIMARY_LIST));
}
void BeamModuleAssembler::emit_append_cons(const ArgVal &index,
const ArgVal &Hd) {
size_t offset = 2 * index.getValue() * sizeof(Eterm);
mov_arg(x86::qword_ptr(HTOP, offset), Hd);
a.mov(x86::qword_ptr(HTOP, offset + sizeof(Eterm)), ARG2);
a.lea(ARG2, x86::qword_ptr(HTOP, offset + TAG_PRIMARY_LIST));
}
void BeamModuleAssembler::emit_store_cons(const ArgVal &len,
const ArgVal &Dst) {
a.add(HTOP, imm(len.getValue() * 2 * sizeof(Eterm)));
mov_arg(Dst, ARG2);
}
void BeamModuleAssembler::emit_put_tuple2(const ArgVal &Dst,
const ArgVal &Arity,
const std::vector<ArgVal> &args) {
size_t size = args.size();
ASSERT(arityval(Arity.getValue()) == size);
comment("Move arity word");
mov_arg(x86::qword_ptr(HTOP, 0), Arity);
comment("Move tuple data");
for (unsigned i = 0; i < size; i++) {
x86::Mem dst_ptr = x86::qword_ptr(HTOP, (i + 1) * sizeof(Eterm));
if (i + 1 == size) {
mov_arg(dst_ptr, args[i]);
} else {
switch (ArgVal::register_relation(args[i], args[i + 1])) {
case ArgVal::consecutive: {
x86::Mem src_ptr = getArgRef(args[i], 16);
comment("(moving two elements at once)");
dst_ptr.setSize(16);
a.movups(x86::xmm0, src_ptr);
a.movups(dst_ptr, x86::xmm0);
i++;
break;
}
case ArgVal::reverse_consecutive: {
if (!hasCpuFeature(x86::Features::kAVX)) {
mov_arg(dst_ptr, args[i]);
} else {
x86::Mem src_ptr = getArgRef(args[i + 1], 16);
comment("(moving and swapping two elements at once)");
dst_ptr.setSize(16);
a.vpermilpd(x86::xmm0, src_ptr, 1); /* Load and swap */
a.vmovups(dst_ptr, x86::xmm0);
i++;
}
break;
}
case ArgVal::none:
mov_arg(dst_ptr, args[i]);
break;
}
}
}
comment("Create boxed ptr");
a.lea(ARG1, x86::qword_ptr(HTOP, TAG_PRIMARY_BOXED));
a.add(HTOP, imm((size + 1) * sizeof(Eterm)));
mov_arg(Dst, ARG1);
}
void BeamModuleAssembler::emit_self(const ArgVal &Dst) {
a.mov(ARG1, x86::qword_ptr(c_p, offsetof(Process, common.id)));
mov_arg(Dst, ARG1);
}
void BeamModuleAssembler::emit_set_tuple_element(const ArgVal &Element,
const ArgVal &Tuple,
const ArgVal &Offset) {
mov_arg(ARG1, Tuple);
x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1);
mov_arg(emit_boxed_val(boxed_ptr, Offset.getValue()), Element, ARG2);
}
void BeamModuleAssembler::emit_is_nonempty_list(const ArgVal &Fail,
const ArgVal &Src) {
x86::Mem list_ptr = getArgRef(Src, 1);
a.test(list_ptr, imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_LIST));
a.jne(labels[Fail.getValue()]);
}
void BeamModuleAssembler::emit_jump(const ArgVal &Fail) {
a.jmp(labels[Fail.getValue()]);
}
void BeamModuleAssembler::emit_is_atom(const ArgVal &Fail, const ArgVal &Src) {
mov_arg(RET, Src);
ERTS_CT_ASSERT(_TAG_IMMED2_MASK < 256);
a.and_(RETb, imm(_TAG_IMMED2_MASK));
a.cmp(RETb, imm(_TAG_IMMED2_ATOM));
a.jne(labels[Fail.getValue()]);
}
void BeamModuleAssembler::emit_is_boolean(const ArgVal &Fail,
const ArgVal &Src) {
/* Since am_true and am_false differ by a single bit, we can simplify the
* check by clearing said bit and comparing against the lesser one. */
ERTS_CT_ASSERT(am_false == make_atom(0));
ERTS_CT_ASSERT(am_true == make_atom(1));
mov_arg(ARG1, Src);
a.and_(ARG1, imm(~(am_true & ~_TAG_IMMED1_MASK)));
a.cmp(ARG1, imm(am_false));
a.jne(labels[Fail.getValue()]);
}
void BeamModuleAssembler::emit_is_binary(Label fail,
x86::Gp src,
Label next,
Label subbin) {
ASSERT(src != RET && src != ARG2);
emit_is_boxed(fail, src);
x86::Gp boxed_ptr = emit_ptr_val(src, src);
a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32)));
a.and_(RETb, imm(_TAG_HEADER_MASK));
a.cmp(RETb, imm(_TAG_HEADER_SUB_BIN));
a.short_().je(subbin);
ERTS_CT_ASSERT(_TAG_HEADER_REFC_BIN + 4 == _TAG_HEADER_HEAP_BIN);
a.and_(RETb, imm(~4));
a.cmp(RETb, imm(_TAG_HEADER_REFC_BIN));
a.short_().je(next);
a.jmp(fail);
}
void BeamModuleAssembler::emit_is_binary(const ArgVal &Fail,
const ArgVal &Src) {
Label next = a.newLabel(), subbin = a.newLabel();
mov_arg(ARG1, Src);
emit_is_binary(labels[Fail.getValue()], ARG1, next, subbin);
a.bind(subbin);
{
/* emit_is_binary has already removed the literal tag from Src, if
* applicable. */
a.cmp(emit_boxed_val(ARG1, offsetof(ErlSubBin, bitsize), sizeof(byte)),
imm(0));
a.jne(labels[Fail.getValue()]);
}
a.bind(next);
}
void BeamModuleAssembler::emit_is_bitstring(const ArgVal &Fail,
const ArgVal &Src) {
Label next = a.newLabel();
mov_arg(ARG1, Src);
emit_is_binary(labels[Fail.getValue()], ARG1, next, next);
a.bind(next);
}
void BeamModuleAssembler::emit_is_float(const ArgVal &Fail, const ArgVal &Src) {
mov_arg(ARG1, Src);
emit_is_boxed(labels[Fail.getValue()], ARG1);
x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1);
a.cmp(emit_boxed_val(boxed_ptr), imm(HEADER_FLONUM));
a.jne(labels[Fail.getValue()]);
}
void BeamModuleAssembler::emit_is_function(const ArgVal &Fail,
const ArgVal &Src) {
Label next = a.newLabel();
mov_arg(RET, Src);
emit_is_boxed(labels[Fail.getValue()], RET);
x86::Gp boxed_ptr = emit_ptr_val(RET, RET);
a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32)));
a.cmp(RET, imm(HEADER_FUN));
a.short_().je(next);
ERTS_CT_ASSERT(HEADER_EXPORT < 256);
a.cmp(RETb, imm(HEADER_EXPORT));
a.jne(labels[Fail.getValue()]);
a.bind(next);
}
void BeamModuleAssembler::emit_is_function2(const ArgVal &Fail,
const ArgVal &Src,
const ArgVal &Arity) {
if (Arity.getType() != ArgVal::i) {
/*
* Non-literal arity - extremely uncommon. Generate simple code.
*/
mov_arg(ARG2, Src);
mov_arg(ARG3, Arity);
emit_enter_runtime();
a.mov(ARG1, c_p);
runtime_call<3>(erl_is_function);
emit_leave_runtime();
a.cmp(RET, imm(am_true));
a.jne(labels[Fail.getValue()]);
return;
}
unsigned arity = unsigned_val(Arity.getValue());
if (arity > MAX_ARG) {
/* Arity is negative or too large. */
a.jmp(labels[Fail.getValue()]);
return;
}
Label next = a.newLabel(), fun = a.newLabel();
mov_arg(ARG1, Src);
emit_is_boxed(labels[Fail.getValue()], ARG1);
x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1);
a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32)));
a.cmp(RETd, imm(HEADER_FUN));
a.short_().je(fun);
ERTS_CT_ASSERT(HEADER_EXPORT < 256);
a.cmp(RETb, imm(HEADER_EXPORT));
a.jne(labels[Fail.getValue()]);
comment("Check arity of export fun");
a.mov(ARG2, emit_boxed_val(boxed_ptr, sizeof(Eterm)));
a.cmp(x86::qword_ptr(ARG2, offsetof(Export, info.mfa.arity)), imm(arity));
a.jne(labels[Fail.getValue()]);
a.short_().jmp(next);
comment("Check arity of fun");
a.bind(fun);
{
a.cmp(emit_boxed_val(boxed_ptr, offsetof(ErlFunThing, arity)),
imm(arity));
a.jne(labels[Fail.getValue()]);
}
a.bind(next);
}
void BeamModuleAssembler::emit_is_integer(const ArgVal &Fail,
const ArgVal &Src) {
Label next = a.newLabel();
Label fail = labels[Fail.getValue()];
mov_arg(ARG1, Src);
a.mov(RETd, ARG1d);
a.and_(RETb, imm(_TAG_IMMED1_MASK));
a.cmp(RETb, imm(_TAG_IMMED1_SMALL));
a.short_().je(next);
emit_is_boxed(fail, RET);
x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1);
a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32)));
a.and_(RETb, imm(_TAG_HEADER_MASK - _BIG_SIGN_BIT));
a.cmp(RETb, imm(_TAG_HEADER_POS_BIG));
a.jne(fail);
a.bind(next);
}
void BeamModuleAssembler::emit_is_list(const ArgVal &Fail, const ArgVal &Src) {
Label next = a.newLabel();
mov_arg(RET, Src);
a.cmp(RET, imm(NIL));
a.short_().je(next);
a.test(RETb, imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_LIST));
a.jne(labels[Fail.getValue()]);
a.bind(next);
}
void BeamModuleAssembler::emit_is_map(const ArgVal &Fail, const ArgVal &Src) {
mov_arg(RET, Src);
emit_is_boxed(labels[Fail.getValue()], RET);
x86::Gp boxed_ptr = emit_ptr_val(RET, RET);
a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32)));
a.and_(RETb, imm(_TAG_HEADER_MASK));
a.cmp(RETb, imm(_TAG_HEADER_MAP));
a.jne(labels[Fail.getValue()]);
}
void BeamModuleAssembler::emit_is_nil(const ArgVal &Fail, const ArgVal &Src) {
a.cmp(getArgRef(Src), imm(NIL));
a.jne(labels[Fail.getValue()]);
}
void BeamModuleAssembler::emit_is_number(const ArgVal &Fail,
const ArgVal &Src) {
Label next = a.newLabel();
Label fail = labels[Fail.getValue()];
mov_arg(ARG1, Src);
a.mov(RETd, ARG1d);
a.and_(RETb, imm(_TAG_IMMED1_MASK));
a.cmp(RETb, imm(_TAG_IMMED1_SMALL));
a.short_().je(next);
emit_is_boxed(fail, RET);
x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1);
a.mov(ARG1, emit_boxed_val(boxed_ptr));
a.mov(RETd, ARG1d);
a.and_(RETb, imm(_TAG_HEADER_MASK - _BIG_SIGN_BIT));
a.cmp(RETb, imm(_TAG_HEADER_POS_BIG));
a.short_().je(next);
a.cmp(ARG1d, imm(HEADER_FLONUM));
a.jne(fail);
a.bind(next);
}
void BeamModuleAssembler::emit_is_pid(const ArgVal &Fail, const ArgVal &Src) {
Label next = a.newLabel();
mov_arg(ARG1, Src);
a.mov(RETd, ARG1d);
a.and_(RETb, imm(_TAG_IMMED1_MASK));
a.cmp(RETb, imm(_TAG_IMMED1_PID));
a.short_().je(next);
/* Reuse RET as the important bits are still available. */
emit_is_boxed(labels[Fail.getValue()], RET);
x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1);
a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32)));
a.and_(RETb, _TAG_HEADER_MASK);
a.cmp(RETb, _TAG_HEADER_EXTERNAL_PID);
a.jne(labels[Fail.getValue()]);
a.bind(next);
}
void BeamModuleAssembler::emit_is_port(const ArgVal &Fail, const ArgVal &Src) {
Label next = a.newLabel();
mov_arg(ARG1, Src);
a.mov(RETd, ARG1d);
a.and_(RETb, imm(_TAG_IMMED1_MASK));
a.cmp(RETb, imm(_TAG_IMMED1_PORT));
a.short_().je(next);
/* Reuse RET as the important bits are still available. */
emit_is_boxed(labels[Fail.getValue()], RET);
x86::Gp boxed_ptr = emit_ptr_val(ARG1, ARG1);
a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32)));
a.and_(RETb, imm(_TAG_HEADER_MASK));
a.cmp(RETb, imm(_TAG_HEADER_EXTERNAL_PORT));
a.jne(labels[Fail.getValue()]);
a.bind(next);
}
void BeamModuleAssembler::emit_is_reference(const ArgVal &Fail,
const ArgVal &Src) {
Label next = a.newLabel();
mov_arg(RET, Src);
emit_is_boxed(labels[Fail.getValue()], RET);
x86::Gp boxed_ptr = emit_ptr_val(RET, RET);
a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32)));
a.and_(RETb, imm(_TAG_HEADER_MASK));
a.cmp(RETb, imm(_TAG_HEADER_REF));
a.short_().je(next);
a.cmp(RETb, imm(_TAG_HEADER_EXTERNAL_REF));
a.jne(labels[Fail.getValue()]);
a.bind(next);
}
/* Note: This instruction leaves the pointer to the tuple in ARG2. */
void BeamModuleAssembler::emit_i_is_tagged_tuple(const ArgVal &Fail,
const ArgVal &Src,
const ArgVal &Arity,
const ArgVal &Tag) {
mov_arg(ARG2, Src);
emit_is_boxed(labels[Fail.getValue()], ARG2);
x86::Gp boxed_ptr = emit_ptr_val(ARG2, ARG2);
ERTS_CT_ASSERT(Support::isInt32(make_arityval(MAX_ARITYVAL)));
a.cmp(emit_boxed_val(boxed_ptr, 0, sizeof(Uint32)), imm(Arity.getValue()));
a.jne(labels[Fail.getValue()]);
a.cmp(emit_boxed_val(boxed_ptr, sizeof(Eterm)), imm(Tag.getValue()));
a.jne(labels[Fail.getValue()]);
}
/* Note: This instruction leaves the pointer to the tuple in ARG2. */
void BeamModuleAssembler::emit_i_is_tagged_tuple_ff(const ArgVal &NotTuple,
const ArgVal &NotRecord,
const ArgVal &Src,
const ArgVal &Arity,
const ArgVal &Tag) {
mov_arg(ARG2, Src);
emit_is_boxed(labels[NotTuple.getValue()], ARG2);
(void)emit_ptr_val(ARG2, ARG2);
a.mov(ARG1, emit_boxed_val(ARG2));
ERTS_CT_ASSERT(_TAG_HEADER_ARITYVAL == 0);
a.test(ARG1.r8(), imm(_TAG_HEADER_MASK));
a.jne(labels[NotTuple.getValue()]);
ERTS_CT_ASSERT(Support::isInt32(make_arityval(MAX_ARITYVAL)));
a.cmp(ARG1d, imm(Arity.getValue()));
a.jne(labels[NotRecord.getValue()]);
a.cmp(emit_boxed_val(ARG2, sizeof(Eterm)), imm(Tag.getValue()));
a.jne(labels[NotRecord.getValue()]);
}
/* Note: This instruction leaves the pointer to the tuple in ARG2. */
void BeamModuleAssembler::emit_i_is_tuple(const ArgVal &Fail,
const ArgVal &Src) {
mov_arg(ARG2, Src);
emit_is_boxed(labels[Fail.getValue()], ARG2);
(void)emit_ptr_val(ARG2, ARG2);
ERTS_CT_ASSERT(_TAG_HEADER_ARITYVAL == 0);
a.test(emit_boxed_val(ARG2, 0, sizeof(byte)), imm(_TAG_HEADER_MASK));
a.jne(labels[Fail.getValue()]);
}
/* Note: This instruction leaves the pointer to the tuple in ARG2. */
void BeamModuleAssembler::emit_i_is_tuple_of_arity(const ArgVal &Fail,
const ArgVal &Src,
const ArgVal &Arity) {
mov_arg(ARG2, Src);
emit_is_boxed(labels[Fail.getValue()], ARG2);
(void)emit_ptr_val(ARG2, ARG2);
ERTS_CT_ASSERT(Support::isInt32(make_arityval(MAX_ARITYVAL)));
a.cmp(emit_boxed_val(ARG2, 0, sizeof(Uint32)), imm(Arity.getValue()));
a.jne(labels[Fail.getValue()]);
}
/* Note: This instruction leaves the pointer to the tuple in ARG2. */
void BeamModuleAssembler::emit_i_test_arity(const ArgVal &Fail,
const ArgVal &Src,
const ArgVal &Arity) {
mov_arg(ARG2, Src);
(void)emit_ptr_val(ARG2, ARG2);
ERTS_CT_ASSERT(Support::isInt32(make_arityval(MAX_ARITYVAL)));
a.cmp(emit_boxed_val(ARG2, 0, sizeof(Uint32)), imm(Arity.getValue()));
a.jne(labels[Fail.getValue()]);
}
void BeamModuleAssembler::emit_i_is_eq_exact_immed(const ArgVal &Fail,
const ArgVal &X,
const ArgVal &Y) {
cmp_arg(getArgRef(X), Y);
a.jne(labels[Fail.getValue()]);
}
void BeamModuleAssembler::emit_i_is_ne_exact_immed(const ArgVal &Fail,
const ArgVal &X,
const ArgVal &Y) {
cmp_arg(getArgRef(X), Y);
a.je(labels[Fail.getValue()]);
}
void BeamModuleAssembler::emit_is_eq_exact(const ArgVal &Fail,
const ArgVal &X,
const ArgVal &Y) {
Label next = a.newLabel();
mov_arg(ARG2, Y); /* May clobber ARG1 */
mov_arg(ARG1, X);
a.cmp(ARG1, ARG2);
a.short_().je(next);
/* Fancy way of checking if both are immediates. */
a.mov(RETd, ARG1d);
a.and_(RETd, ARG2d);
a.and_(RETb, imm(_TAG_PRIMARY_MASK));
a.cmp(RETb, imm(TAG_PRIMARY_IMMED1));
a.je(labels[Fail.getValue()]);
emit_enter_runtime();
runtime_call<2>(eq);
emit_leave_runtime();
a.test(RET, RET);
a.je(labels[Fail.getValue()]);
a.bind(next);
}
void BeamModuleAssembler::emit_i_is_eq_exact_literal(const ArgVal &Fail,
const ArgVal &Src,
const ArgVal &Literal,
const ArgVal &tag_test) {
mov_arg(ARG2, Literal); /* May clobber ARG1 */
mov_arg(ARG1, Src);
/* Fail immediately unless Src is the same type of pointer as the literal.
*/
a.test(ARG1.r8(), imm(tag_test.getValue()));
a.jne(labels[Fail.getValue()]);
emit_enter_runtime();
runtime_call<2>(eq);
emit_leave_runtime();
a.test(RET, RET);
a.jz(labels[Fail.getValue()]);
}
void BeamModuleAssembler::emit_is_ne_exact(const ArgVal &Fail,
const ArgVal &X,
const ArgVal &Y) {
Label next = a.newLabel();
mov_arg(ARG2, Y); /* May clobber ARG1 */
mov_arg(ARG1, X);
a.cmp(ARG1, ARG2);
a.je(labels[Fail.getValue()]);
/* Fancy way of checking if both are immediates. */
a.mov(RETd, ARG1d);
a.and_(RETd, ARG2d);
a.and_(RETb, imm(_TAG_PRIMARY_MASK));
a.cmp(RETb, imm(TAG_PRIMARY_IMMED1));
a.short_().je(next);
emit_enter_runtime();
runtime_call<2>(eq);
emit_leave_runtime();
a.test(RET, RET);
a.jnz(labels[Fail.getValue()]);
a.bind(next);
}
void BeamModuleAssembler::emit_i_is_ne_exact_literal(const ArgVal &Fail,
const ArgVal &Src,
const ArgVal &Literal) {
Label next = a.newLabel();
mov_arg(ARG2, Literal); /* May clobber ARG1 */
mov_arg(ARG1, Src);
a.mov(RETd, ARG1d);
a.and_(RETb, imm(_TAG_IMMED1_MASK));
a.cmp(RETb, imm(TAG_PRIMARY_IMMED1));
a.short_().je(next);
emit_enter_runtime();
runtime_call<2>(eq);
emit_leave_runtime();
a.test(RET, RET);
a.jnz(labels[Fail.getValue()]);
a.bind(next);
}
void BeamGlobalAssembler::emit_arith_eq_shared() {
Label generic_compare = a.newLabel();
/* Are both floats? */
a.mov(ARG3d, ARG1d);
a.or_(ARG3d, ARG2d);
a.and_(ARG3d, imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_BOXED));
a.short_().jne(generic_compare);
x86::Gp boxed_ptr = emit_ptr_val(ARG3, ARG1);
a.mov(ARG3, emit_boxed_val(boxed_ptr));
boxed_ptr = emit_ptr_val(ARG5, ARG2);
a.mov(ARG5, emit_boxed_val(boxed_ptr));
a.and_(ARG3d, imm(_TAG_HEADER_MASK));
a.and_(ARG5d, imm(_TAG_HEADER_MASK));
a.sub(ARG3d, imm(_TAG_HEADER_FLOAT));
a.sub(ARG5d, imm(_TAG_HEADER_FLOAT));
a.or_(ARG3d, ARG5d);
a.short_().jne(generic_compare);
boxed_ptr = emit_ptr_val(ARG1, ARG1);
a.movsd(x86::xmm0, emit_boxed_val(boxed_ptr, sizeof(Eterm)));
boxed_ptr = emit_ptr_val(ARG2, ARG2);
a.movsd(x86::xmm1, emit_boxed_val(boxed_ptr, sizeof(Eterm)));
/* All float terms are finite so our caller only needs to check ZF. We don't
* need to check for errors (PF). */
a.comisd(x86::xmm0, x86::xmm1);
a.ret();
a.bind(generic_compare);
{
emit_enter_runtime();
/* Generic eq-only arithmetic comparison. */
comment("erts_cmp_compound(X, Y, 0, 1);");
mov_imm(ARG3, 0);
mov_imm(ARG4, 1);
runtime_call<4>(erts_cmp_compound);
emit_leave_runtime();
a.test(RET, RET);
a.ret();
}
}
void BeamModuleAssembler::emit_is_eq(const ArgVal &Fail,
const ArgVal &A,
const ArgVal &B) {
Label fail = labels[Fail.getValue()], next = a.newLabel();
mov_arg(ARG2, B); /* May clobber ARG1 */
mov_arg(ARG1, A);
a.cmp(ARG1, ARG2);
a.short_().je(next);
/* We can skip deep comparisons when both args are immediates. */
a.mov(RETd, ARG1d);
a.and_(RETd, ARG2d);
a.and_(RETb, imm(_TAG_PRIMARY_MASK));
a.cmp(RETb, imm(TAG_PRIMARY_IMMED1));
a.je(fail);
safe_fragment_call(ga->get_arith_eq_shared());
a.jne(fail);
a.bind(next);
}
void BeamModuleAssembler::emit_is_ne(const ArgVal &Fail,
const ArgVal &A,
const ArgVal &B) {
Label fail = labels[Fail.getValue()], next = a.newLabel();
mov_arg(ARG2, B); /* May clobber ARG1 */
mov_arg(ARG1, A);
a.cmp(ARG1, ARG2);
a.je(fail);
/* We can skip deep comparisons when both args are immediates. */
a.mov(RETd, ARG1d);
a.and_(RETd, ARG2d);
a.and_(RETb, imm(_TAG_PRIMARY_MASK));
a.cmp(RETb, imm(TAG_PRIMARY_IMMED1));
a.short_().je(next);
safe_fragment_call(ga->get_arith_eq_shared());
a.je(fail);
a.bind(next);
}
void BeamGlobalAssembler::emit_arith_compare_shared() {
Label atom_compare, generic_compare;
atom_compare = a.newLabel();
generic_compare = a.newLabel();
/* Are both floats?
*
* This is done first as relative comparisons on atoms doesn't make much
* sense. */
a.mov(ARG3d, ARG1d);
a.or_(ARG3d, ARG2d);
a.and_(ARG3d, imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_BOXED));
a.short_().jne(atom_compare);
x86::Gp boxed_ptr = emit_ptr_val(ARG3, ARG1);
a.mov(ARG3, emit_boxed_val(boxed_ptr));
boxed_ptr = emit_ptr_val(ARG5, ARG2);
a.mov(ARG5, emit_boxed_val(boxed_ptr));
a.and_(ARG3d, imm(_TAG_HEADER_MASK));
a.and_(ARG5d, imm(_TAG_HEADER_MASK));
a.sub(ARG3d, imm(_TAG_HEADER_FLOAT));
a.sub(ARG5d, imm(_TAG_HEADER_FLOAT));
a.or_(ARG3d, ARG5d);
a.short_().jne(generic_compare);
boxed_ptr = emit_ptr_val(ARG1, ARG1);
a.movsd(x86::xmm0, emit_boxed_val(boxed_ptr, sizeof(Eterm)));
boxed_ptr = emit_ptr_val(ARG2, ARG2);
a.movsd(x86::xmm1, emit_boxed_val(boxed_ptr, sizeof(Eterm)));
a.comisd(x86::xmm0, x86::xmm1);
/* `comisd` doesn't set the flags the same way `test` and friends do, so
* they need to be converted for jl/jge to work. */
a.setae(x86::al);
a.dec(x86::al);
a.ret();
a.bind(atom_compare);
{
/* Are both atoms? */
a.mov(ARG3d, ARG1d);
a.mov(ARG5d, ARG2d);
a.and_(ARG3d, imm(_TAG_IMMED2_MASK));
a.and_(ARG5d, imm(_TAG_IMMED2_MASK));
a.sub(ARG3d, imm(_TAG_IMMED2_ATOM));
a.sub(ARG5d, imm(_TAG_IMMED2_ATOM));
a.or_(ARG3d, ARG5d);
a.jne(generic_compare);
emit_enter_runtime();
runtime_call<2>(erts_cmp_atoms);
emit_leave_runtime();
/* !! erts_cmp_atoms returns int, not Sint !! */
a.test(RETd, RETd);
a.ret();
}
a.bind(generic_compare);
{
emit_enter_runtime();
comment("erts_cmp_compound(X, Y, 0, 0);");
mov_imm(ARG3, 0);
mov_imm(ARG4, 0);
runtime_call<4>(erts_cmp_compound);
emit_leave_runtime();
a.test(RET, RET);
a.ret();
}
}
void BeamModuleAssembler::emit_is_lt(const ArgVal &Fail,
const ArgVal &LHS,
const ArgVal &RHS) {
Label fail = labels[Fail.getValue()];
Label generic = a.newLabel(), next = a.newLabel();
mov_arg(ARG2, RHS); /* May clobber ARG1 */
mov_arg(ARG1, LHS);
a.cmp(ARG1, ARG2);
a.je(fail);
/* Relative comparisons are overwhelmingly likely to be used on smalls, so
* we'll specialize those and keep the rest in a shared fragment. */
if (RHS.isImmed() && is_small(RHS.getValue())) {
a.mov(RETd, ARG1d);
} else if (LHS.isImmed() && is_small(LHS.getValue())) {
a.mov(RETd, ARG2d);
} else {
a.mov(RETd, ARG1d);
a.and_(RETd, ARG2d);
}
a.and_(RETb, imm(_TAG_IMMED1_MASK));
a.cmp(RETb, imm(_TAG_IMMED1_SMALL));
a.short_().jne(generic);
a.cmp(ARG1, ARG2);
a.short_().jl(next);
a.jmp(fail);
a.bind(generic);
{
safe_fragment_call(ga->get_arith_compare_shared());
a.jge(fail);
}
a.bind(next);
}
void BeamModuleAssembler::emit_is_ge(const ArgVal &Fail,
const ArgVal &LHS,
const ArgVal &RHS) {
Label fail = labels[Fail.getValue()];
Label generic = a.newLabel(), next = a.newLabel();
mov_arg(ARG2, RHS); /* May clobber ARG1 */
mov_arg(ARG1, LHS);
a.cmp(ARG1, ARG2);
a.short_().je(next);
/* Relative comparisons are overwhelmingly likely to be used on smalls, so
* we'll specialize those and keep the rest in a shared fragment. */
if (RHS.isImmed() && is_small(RHS.getValue())) {
a.mov(RETd, ARG1d);
} else if (LHS.isImmed() && is_small(LHS.getValue())) {
a.mov(RETd, ARG2d);
} else {
a.mov(RETd, ARG1d);
a.and_(RETd, ARG2d);
}
a.and_(RETb, imm(_TAG_IMMED1_MASK));
a.cmp(RETb, imm(_TAG_IMMED1_SMALL));
a.short_().jne(generic);
a.cmp(ARG1, ARG2);
a.short_().jge(next);
a.jmp(fail);
a.bind(generic);
{
safe_fragment_call(ga->get_arith_compare_shared());
a.jl(fail);
}
a.bind(next);
}
void BeamModuleAssembler::emit_bif_is_eq_ne_exact_immed(const ArgVal &Src,
const ArgVal &Immed,
const ArgVal &Dst,
Eterm fail_value,
Eterm succ_value) {
cmp_arg(getArgRef(Src), Immed);
mov_imm(RET, fail_value);
mov_imm(ARG1, succ_value);
a.cmove(RET, ARG1);
mov_arg(Dst, RET);
}
void BeamModuleAssembler::emit_bif_is_eq_exact_immed(const ArgVal &Src,
const ArgVal &Immed,
const ArgVal &Dst) {
emit_bif_is_eq_ne_exact_immed(Src, Immed, Dst, am_false, am_true);
}
void BeamModuleAssembler::emit_bif_is_ne_exact_immed(const ArgVal &Src,
const ArgVal &Immed,
const ArgVal &Dst) {
emit_bif_is_eq_ne_exact_immed(Src, Immed, Dst, am_true, am_false);
}
void BeamModuleAssembler::emit_badmatch(const ArgVal &Src) {
mov_arg(x86::qword_ptr(c_p, offsetof(Process, fvalue)), Src);
emit_error(BADMATCH);
}
void BeamModuleAssembler::emit_case_end(const ArgVal &Src) {
mov_arg(x86::qword_ptr(c_p, offsetof(Process, fvalue)), Src);
emit_error(EXC_CASE_CLAUSE);
}
void BeamModuleAssembler::emit_system_limit_body() {
emit_error(SYSTEM_LIMIT);
}
void BeamModuleAssembler::emit_if_end() {
emit_error(EXC_IF_CLAUSE);
}
void BeamModuleAssembler::emit_catch(const ArgVal &Y, const ArgVal &Fail) {
a.inc(x86::qword_ptr(c_p, offsetof(Process, catches)));
Label patch_addr = a.newLabel();
/*
* Emit the following instruction:
*
* b8 ff ff ff 7f mov eax,0x7fffffff
* ^
* |
* |
* offset to be patched
* with the tagged catch
*/
a.bind(patch_addr);
a.mov(RETd, imm(0x7fffffff));
mov_arg(Y, RET);
/* Offset = 1 for `mov` payload */
catches.push_back({{patch_addr, 0x1, 0}, labels[Fail.getValue()]});
}
void BeamGlobalAssembler::emit_catch_end_shared() {
Label not_throw = a.newLabel(), not_error = a.newLabel(),
after_gc = a.newLabel();
/* Load thrown value / reason into ARG2 for add_stacktrace */
a.mov(ARG2, getXRef(2));
a.mov(x86::qword_ptr(c_p, offsetof(Process, fvalue)), imm(NIL));
a.cmp(getXRef(1), imm(am_throw));
a.short_().jne(not_throw);
/* Thrown value, return it in x0 */
a.mov(getXRef(0), ARG2);
a.ret();
a.bind(not_throw);
{
a.cmp(getXRef(1), imm(am_error));
a.short_().jne(not_error);
/* This is an error, attach a stacktrace to the reason. */
emit_enter_runtime<Update::eStack | Update::eHeap>();
a.mov(ARG1, c_p);
/* ARG2 set above. */
a.mov(ARG3, getXRef(3));
runtime_call<3>(add_stacktrace);
emit_leave_runtime<Update::eStack | Update::eHeap>();
/* not_error assumes stacktrace/reason is in ARG2 */
a.mov(ARG2, RET);
}
a.bind(not_error);
{
const int32_t bytes_needed = (3 + S_RESERVED) * sizeof(Eterm);
a.lea(ARG3, x86::qword_ptr(HTOP, bytes_needed));
a.cmp(ARG3, E);
a.short_().jbe(after_gc);
/* Preserve stacktrace / reason */
a.mov(getXRef(0), ARG2);
mov_imm(ARG4, 1);
aligned_call(labels[garbage_collect]);
a.mov(ARG2, getXRef(0));
a.bind(after_gc);
a.mov(x86::qword_ptr(HTOP), imm(make_arityval(2)));
a.mov(x86::qword_ptr(HTOP, sizeof(Eterm) * 1), imm(am_EXIT));
a.mov(x86::qword_ptr(HTOP, sizeof(Eterm) * 2), ARG2);
a.lea(RET, x86::qword_ptr(HTOP, TAG_PRIMARY_BOXED));
a.add(HTOP, imm(3 * sizeof(Eterm)));
a.mov(getXRef(0), RET);
}
a.ret();
}
void BeamModuleAssembler::emit_catch_end(const ArgVal &Y) {
Label next = a.newLabel();
emit_try_end(Y);
a.cmp(getXRef(0), imm(THE_NON_VALUE));
a.short_().jne(next);
fragment_call(ga->get_catch_end_shared());
a.bind(next);
}
void BeamModuleAssembler::emit_try_end(const ArgVal &Y) {
a.dec(x86::qword_ptr(c_p, offsetof(Process, catches)));
emit_init(Y);
}
void BeamModuleAssembler::emit_try_case(const ArgVal &Y) {
a.dec(x86::qword_ptr(c_p, offsetof(Process, catches)));
mov_imm(RET, NIL);
mov_arg(Y, RET);
a.mov(x86::qword_ptr(c_p, offsetof(Process, fvalue)), RET);
a.movups(x86::xmm0, x86::xmmword_ptr(registers, 1 * sizeof(Eterm)));
a.mov(RET, getXRef(3));
a.movups(x86::xmmword_ptr(registers, 0 * sizeof(Eterm)), x86::xmm0);
a.mov(getXRef(2), RET);
}
void BeamModuleAssembler::emit_try_case_end(const ArgVal &Src) {
mov_arg(x86::qword_ptr(c_p, offsetof(Process, fvalue)), Src);
emit_error(EXC_TRY_CLAUSE);
}
void BeamModuleAssembler::emit_raise(const ArgVal &Trace, const ArgVal &Value) {
mov_arg(ARG3, Value);
mov_arg(ARG2, Trace);
/* This is an error, attach a stacktrace to the reason. */
a.mov(x86::qword_ptr(c_p, offsetof(Process, fvalue)), ARG3);
a.mov(x86::qword_ptr(c_p, offsetof(Process, ftrace)), ARG2);
emit_enter_runtime();
a.mov(ARG1, c_p);
runtime_call<2>(erts_sanitize_freason);
emit_leave_runtime();
emit_handle_error();
}
void BeamModuleAssembler::emit_build_stacktrace() {
emit_enter_runtime<Update::eStack | Update::eHeap>();
a.mov(ARG1, c_p);
a.mov(ARG2, getXRef(0));
runtime_call<2>(build_stacktrace);
emit_leave_runtime<Update::eStack | Update::eHeap>();
a.mov(getXRef(0), RET);
}
void BeamModuleAssembler::emit_raw_raise() {
Label next = a.newLabel();
emit_enter_runtime();
a.mov(ARG1, getXRef(2));
a.mov(ARG2, getXRef(0));
a.mov(ARG3, getXRef(1));
a.mov(ARG4, c_p);
runtime_call<4>(raw_raise);
emit_leave_runtime();
a.test(RET, RET);
a.short_().jne(next);
emit_handle_error();
a.bind(next);
a.mov(getXRef(0), imm(am_badarg));
}
void BeamGlobalAssembler::emit_i_test_yield_shared() {
int mfa_offset = -(int)sizeof(ErtsCodeMFA) - BEAM_ASM_FUNC_PROLOGUE_SIZE;
/* Yield address is in ARG3. */
a.lea(ARG2, x86::qword_ptr(ARG3, mfa_offset));
a.mov(x86::qword_ptr(c_p, offsetof(Process, current)), ARG2);
a.mov(ARG2, x86::qword_ptr(ARG2, offsetof(ErtsCodeMFA, arity)));
a.mov(x86::qword_ptr(c_p, offsetof(Process, arity)), ARG2);
emit_discard_cp();
a.jmp(labels[context_switch_simplified]);
}
void BeamModuleAssembler::emit_i_test_yield() {
Label next = a.newLabel(), entry = a.newLabel();
/* When present, this is guaranteed to be the first instruction after the
* function entry label, so we can use `currLabel`. */
a.align(kAlignCode, 8);
a.bind(entry);
a.dec(FCALLS);
a.short_().jg(next);
a.lea(ARG3, x86::qword_ptr(entry));
a.call(funcYield);
a.bind(next);
}
void BeamModuleAssembler::emit_i_yield() {
a.mov(getXRef(0), imm(am_true));
#ifdef NATIVE_ERLANG_STACK
fragment_call(ga->get_dispatch_return());
#else
Label next = a.newLabel();
a.lea(ARG3, x86::qword_ptr(next));
abs_jmp(ga->get_dispatch_return());
a.align(kAlignCode, 8);
a.bind(next);
#endif
}
void BeamModuleAssembler::emit_i_perf_counter() {
Label next = a.newLabel(), small = a.newLabel();
emit_enter_runtime();
#ifdef WIN32
/* Call the function pointer used by erts_sys_perf_counter */
runtime_call<0>(erts_sys_time_data__.r.o.sys_hrtime);
#else
runtime_call<0>(erts_sys_time_data__.r.o.perf_counter);
#endif
emit_leave_runtime();
a.mov(ARG1, RET);
a.sar(ARG1, imm(SMALL_BITS - 1));
a.add(ARG1, 1);
a.cmp(ARG1, 1);
a.jbe(small);
{
a.mov(TMP_MEM1q, RET);
emit_gc_test(ArgVal(ArgVal::i, 0),
ArgVal(ArgVal::i, ERTS_MAX_UINT64_HEAP_SIZE),
ArgVal(ArgVal::i, 0));
a.mov(ARG1, TMP_MEM1q);
a.mov(x86::qword_ptr(HTOP, sizeof(Eterm) * 0),
imm(make_pos_bignum_header(1)));
a.mov(x86::qword_ptr(HTOP, sizeof(Eterm) * 1), ARG1);
a.lea(RET, x86::qword_ptr(HTOP, TAG_PRIMARY_BOXED));
a.add(HTOP, imm(sizeof(Eterm) * 2));
a.short_().jmp(next);
}
a.bind(small);
{
a.shl(RET, imm(_TAG_IMMED1_SIZE));
a.or_(RET, imm(_TAG_IMMED1_SMALL));
}
a.bind(next);
a.mov(getXRef(0), RET);
}