AR CodeGen::valloc()

in vm/jitrino/src/jet/cg_regs.cpp [33:188]


AR CodeGen::valloc(jtype jt)
{
    unsigned start = is_f(jt) ? ar_idx(fr0) : ar_idx(gr0);
    unsigned count = is_f(jt) ? fr_num : gr_num;
    unsigned highest_index = start+count-1;
    //
    // We are going to allocate a temporary register - we do several tries 
    // to reach the following goals:
    // - minimize memory accesses
    // - minimize callee-saved registers usages
    
    // Try to find the first SCRATCH (non callee-save) register
    
    AR last_used = rlast(jt);
    for (unsigned i=ar_idx(last_used)+1; i<highest_index; i++) {
        AR ar = _ar(i);
        if (is_callee_save(ar) || ar == sp || ar == m_base) continue;
        if (rrefs(ar) == 0 && rlocks(ar) == 0 && !m_global_rusage.test(i)) {
            // good, found available scratch register
            rlast(ar);
            return ar;
        }
    }
    // No free scratch registers above the 'last used'. Try any scratch reg.
    for (unsigned i=start; i<highest_index; i++) {
        AR ar = _ar(i);
        if (is_callee_save(ar) || ar == sp || ar == m_base) continue;
        if (rrefs(ar) == 0 && rlocks(ar) == 0 && !m_global_rusage.test(i)) {
            // good, found available scratch register
            rlast(ar);
            return ar;
        }
    }
#if 0
    //
    // No free scratch registers. How about free callee-save ?
    //
    // not now. currently, only scratch regs are used as temporaries,
    // and callee-save are used as global regs. may revisit it.
    for (unsigned i=start; i<highest_index; i++) {
        AR ar = _ar(i);
        if (ar == sp || ar == m_base) continue;
        if (m_bbstate->regs[i].refs == 0 && m_bbstate->regs[i].locks==0 && 
            !m_static_rs.test(i)) {
            last_used = ar;
            rrefs(ar);
            return ar;
        }
    }
#endif
    //
    // Ugh, no free registers of the needed kind available, need to spill 
    // someone out
    
    // try to locate first non locked scratch register with min number of 
    // refs
    unsigned min_ref = NOTHING, min_idx = NOTHING;
    for (unsigned i=start; i<highest_index; i++) {
        AR ar = _ar(i);
        if (is_callee_save(ar) || ar == sp || ar == m_base) continue;
        if (min_ref > rrefs(ar) && rlocks(ar)==0 && !m_global_rusage.test(i)) {
            min_ref = rrefs(ar);
            min_idx = i;
        }
    }
    
    // this means that all scratch registers are locked. Cant happen.
    assert(min_idx != NOTHING);
    
    AR ar = _ar(min_idx);
     
#if 0 // TODO: checkit
    // How about scratch registers of other kind ?
    // Storing to a register allows us avoid memory access. This might seem 
    // questionable, as on IA32 'MOVD xmm, r32' has the latency of incredible
    // 10 cycles.
    // However, the throughput is just 1 cycle, so the port is free again 
    // very soon. Also, as we're allocating, say, GP register, then we'll
    // operate on it in next few instructions, and will not use FPU during 
    // this, so the latency will [hopefully] be masked. 
    //
    
    //Note: kinda prototype, not even tested.
    start = !is_f(jt) ? ar_idx(fr0) : ar_idx(gr0);
    count = !is_f(jt) ? fr_num : gr_num;
    AR otherKind = ar_x;
    for (unsigned i=start; i<highest_index; i++) {
        AR ar = _ar(i);
        if (is_callee_save(ar) || ar == sp || ar == m_base) continue;
        if (m_bbstate->regs[i].refs == 0 && !m_bbstate->regs[i].locked) {
            otherKind = ar;
            break;
        }
    }
    if (otherKind != ar_x) {
        // Cool - do have a scratch register of other kind, let's unload there
        mov(otherKind, ar);
        m_bbstate->regs[min_idx].temp = otherKind;
    }
#endif // if 0

    // Ugh... No way out, have to spill to the memory...

    if (is_set(DBG_TRACE_CG)) { dbg(";;>spill %s\n", to_str(ar).c_str()); }
     
    // First, free out the stack items, which are the register
    for (unsigned i=0; i<m_jframe->size(); i++) {
        Val& s = m_jframe->dip(i);
        if (!s.is_reg() || s.reg() != ar) { continue; };
        jtype mov_jt = jtmov(s.jt());
        st(mov_jt, ar, m_base, vstack_off(i));
        rfree(s);
        s.to_mem(m_base, vstack_off(i));
        rref(s);
    }
    // Next, free out the locals, which are the register
    for (unsigned i=0; i<m_jframe->num_vars(); i++) {
        Val& s = m_jframe->var(i);
        if (!s.is_reg() || s.reg() != ar) { continue; };
        jtype mov_jt = jtmov(s.jt());
        st(mov_jt, ar, m_base, vlocal_off(i));
        rfree(s);
        s.to_mem(m_base, vlocal_off(i));
        rref(s);
    }
    //
    // Now, free up the stack items that are the memory
    // addressed via register (e.g. an instance field value)
    //
    for (unsigned i=0; is_gr(ar) && i<m_jframe->size(); i++) {
        Val& s = m_jframe->dip(i);
        if (!s.is_mem() || !s.uses(ar)) { continue; };
        
        //WARN: both slots for i64 type have register assigned, but for dbl64 - only first slot is marked..
        bool need_double_slot = is_ia32() && s.jt()==dbl64; 
        
        push(s.as_opnd(iplatf));
        if (need_double_slot) {
            Opnd hi_mem(iplatf, s.base(), s.disp() + STACK_SLOT_SIZE, s.index(), s.scale());
            push(hi_mem);
        }
        rfree(s);
        int stack_off = vstack_off(i);
        s.to_mem(m_base, stack_off);
        rref(s);
        if (need_double_slot) {
            Opnd hi_stk(iplatf, m_base, stack_off + STACK_SLOT_SIZE);
            pop(hi_stk);
        }
        Opnd stk(iplatf, m_base, stack_off);
        pop(stk);
    }
    if (is_set(DBG_TRACE_CG)) { dbg(";;>~spill\n"); }

    return ar;
}