in vm/jitrino/src/jet/cg_regs.cpp [33:188]
AR CodeGen::valloc(jtype jt)
{
unsigned start = is_f(jt) ? ar_idx(fr0) : ar_idx(gr0);
unsigned count = is_f(jt) ? fr_num : gr_num;
unsigned highest_index = start+count-1;
//
// We are going to allocate a temporary register - we do several tries
// to reach the following goals:
// - minimize memory accesses
// - minimize callee-saved registers usages
// Try to find the first SCRATCH (non callee-save) register
AR last_used = rlast(jt);
for (unsigned i=ar_idx(last_used)+1; i<highest_index; i++) {
AR ar = _ar(i);
if (is_callee_save(ar) || ar == sp || ar == m_base) continue;
if (rrefs(ar) == 0 && rlocks(ar) == 0 && !m_global_rusage.test(i)) {
// good, found available scratch register
rlast(ar);
return ar;
}
}
// No free scratch registers above the 'last used'. Try any scratch reg.
for (unsigned i=start; i<highest_index; i++) {
AR ar = _ar(i);
if (is_callee_save(ar) || ar == sp || ar == m_base) continue;
if (rrefs(ar) == 0 && rlocks(ar) == 0 && !m_global_rusage.test(i)) {
// good, found available scratch register
rlast(ar);
return ar;
}
}
#if 0
//
// No free scratch registers. How about free callee-save ?
//
// not now. currently, only scratch regs are used as temporaries,
// and callee-save are used as global regs. may revisit it.
for (unsigned i=start; i<highest_index; i++) {
AR ar = _ar(i);
if (ar == sp || ar == m_base) continue;
if (m_bbstate->regs[i].refs == 0 && m_bbstate->regs[i].locks==0 &&
!m_static_rs.test(i)) {
last_used = ar;
rrefs(ar);
return ar;
}
}
#endif
//
// Ugh, no free registers of the needed kind available, need to spill
// someone out
// try to locate first non locked scratch register with min number of
// refs
unsigned min_ref = NOTHING, min_idx = NOTHING;
for (unsigned i=start; i<highest_index; i++) {
AR ar = _ar(i);
if (is_callee_save(ar) || ar == sp || ar == m_base) continue;
if (min_ref > rrefs(ar) && rlocks(ar)==0 && !m_global_rusage.test(i)) {
min_ref = rrefs(ar);
min_idx = i;
}
}
// this means that all scratch registers are locked. Cant happen.
assert(min_idx != NOTHING);
AR ar = _ar(min_idx);
#if 0 // TODO: checkit
// How about scratch registers of other kind ?
// Storing to a register allows us avoid memory access. This might seem
// questionable, as on IA32 'MOVD xmm, r32' has the latency of incredible
// 10 cycles.
// However, the throughput is just 1 cycle, so the port is free again
// very soon. Also, as we're allocating, say, GP register, then we'll
// operate on it in next few instructions, and will not use FPU during
// this, so the latency will [hopefully] be masked.
//
//Note: kinda prototype, not even tested.
start = !is_f(jt) ? ar_idx(fr0) : ar_idx(gr0);
count = !is_f(jt) ? fr_num : gr_num;
AR otherKind = ar_x;
for (unsigned i=start; i<highest_index; i++) {
AR ar = _ar(i);
if (is_callee_save(ar) || ar == sp || ar == m_base) continue;
if (m_bbstate->regs[i].refs == 0 && !m_bbstate->regs[i].locked) {
otherKind = ar;
break;
}
}
if (otherKind != ar_x) {
// Cool - do have a scratch register of other kind, let's unload there
mov(otherKind, ar);
m_bbstate->regs[min_idx].temp = otherKind;
}
#endif // if 0
// Ugh... No way out, have to spill to the memory...
if (is_set(DBG_TRACE_CG)) { dbg(";;>spill %s\n", to_str(ar).c_str()); }
// First, free out the stack items, which are the register
for (unsigned i=0; i<m_jframe->size(); i++) {
Val& s = m_jframe->dip(i);
if (!s.is_reg() || s.reg() != ar) { continue; };
jtype mov_jt = jtmov(s.jt());
st(mov_jt, ar, m_base, vstack_off(i));
rfree(s);
s.to_mem(m_base, vstack_off(i));
rref(s);
}
// Next, free out the locals, which are the register
for (unsigned i=0; i<m_jframe->num_vars(); i++) {
Val& s = m_jframe->var(i);
if (!s.is_reg() || s.reg() != ar) { continue; };
jtype mov_jt = jtmov(s.jt());
st(mov_jt, ar, m_base, vlocal_off(i));
rfree(s);
s.to_mem(m_base, vlocal_off(i));
rref(s);
}
//
// Now, free up the stack items that are the memory
// addressed via register (e.g. an instance field value)
//
for (unsigned i=0; is_gr(ar) && i<m_jframe->size(); i++) {
Val& s = m_jframe->dip(i);
if (!s.is_mem() || !s.uses(ar)) { continue; };
//WARN: both slots for i64 type have register assigned, but for dbl64 - only first slot is marked..
bool need_double_slot = is_ia32() && s.jt()==dbl64;
push(s.as_opnd(iplatf));
if (need_double_slot) {
Opnd hi_mem(iplatf, s.base(), s.disp() + STACK_SLOT_SIZE, s.index(), s.scale());
push(hi_mem);
}
rfree(s);
int stack_off = vstack_off(i);
s.to_mem(m_base, stack_off);
rref(s);
if (need_double_slot) {
Opnd hi_stk(iplatf, m_base, stack_off + STACK_SLOT_SIZE);
pop(hi_stk);
}
Opnd stk(iplatf, m_base, stack_off);
pop(stk);
}
if (is_set(DBG_TRACE_CG)) { dbg(";;>~spill\n"); }
return ar;
}