void Compiler::gen_prolog()

in vm/jitrino/src/jet/cg_meth.cpp [54:570]


void Compiler::gen_prolog(void) {
    if (is_set(DBG_TRACE_CG)) {
        dbg(";; ========================================================\n");
        dbg(";; Prolog: max_stack=%d, num_locals=%d, in_slots=%d\n", 
                m_stack.get_max_stack(), 
                m_stack.get_num_locals(),
                m_stack.get_in_slots());
        dbg(";; info_gc_stack_depth=%d, info_gc_locals=%d, info_gc_stack=%d",
                m_stack.info_gc_stack_depth(), 
                m_stack.info_gc_locals(),
                m_stack.info_gc_stack());
        dbg(";; stack_bot=%d, stack_max=%d\n",
                m_stack.stack_bot(), m_stack.stack_max());
        dbg(";; local(0)=%d\n", m_stack.local(0));
        dbg(";; native_stack_bot=%d\n", m_stack.unused());
        dbg(";; ========================================================\n");
    }

    unsigned prologStart = ipoff();
    //
    // Debugging things
    //

    // Ensure stack is aligned properly.
    unsigned alignment = (m_ci.cc() & CCONV_STACK_ALIGN_HALF16) ? CCONV_STACK_ALIGN16
        : m_ci.cc() & CCONV_STACK_ALIGN_MASK;
    if (is_set(DBG_CHECK_STACK) && alignment != 0) {
        if (m_ci.cc() & CCONV_STACK_ALIGN_HALF16) {
            alu(alu_sub, sp, (unsigned)STACK_SLOT_SIZE);
        }
        alu(alu_test, sp, (alignment - 1));
        unsigned br_off = br(eq, 0, 0);
        gen_dbg_rt(false, "Misaligned stack @ %s", meth_fname());
        gen_brk();
        patch(br_off, ip());
        if (m_ci.cc() & CCONV_STACK_ALIGN_HALF16) {
            alu(alu_add, sp, (unsigned)STACK_SLOT_SIZE);
        }
    }
    
    if (is_set(DBG_BRK)) {
        gen_brk();
    }


    if (m_infoBlock.get_bc_size() == 1 && m_bc[0] == OPCODE_RETURN && !g_jvmtiMode) {
        // empty method, nothing to do; the same is in gen_return();
        return;
    }
    
    // A special stack preparation is performed in order to deal with 
    // a stack overflow error (SOE) at runtime:
    // First, the callee-save registers are not changed until we are 
    // absolutely sure we have enough stack. In this case, if SOE happens,
    // we'll simply do nothing in unwind_stack().
    
    //
    // Allocate stack frame at the very beginning, so we are always in 
    // the predictable state in unwind_frame().
    //
    unsigned frameSize = m_stack.size();
    alu(alu_sub, sp, frameSize);
        
    // Lock all the args registers to avoid them to be rewritten by the 
    // frame setup procedures
    rlock(m_ci);
    
    {
        // Here is pretty rare case, though still need to be proceeded:
        // When we allocate a stack frame of size more than one page then the 
        // memory page(s) may not be accessible and even not allocated.
        // A direct access to such [non existing] page raises 'access 
        // violation'. To avoid the problem we need simply probe (make read 
        // access) to the pages sequentially. In response on read-access to 
        // inaccessible page, the OS grows up the stack, so pages become 
        // accessible.
        const unsigned PAGE_SIZE = 0x1000;
        unsigned pages = 
            (frameSize + m_max_native_stack_depth + 
            PAGE_SIZE -1)/PAGE_SIZE;
        
        if (method_is_synchronized(m_method) || hasSOEHandlers)  {
            //A contract with VM: check extra page for synchronized methods or methods with SOE handlers.
            pages++;
        }
        //
        for (unsigned i=1; i<pages; i++) {
            AR ar = valloc(i32);
            ld4(ar, sp, frameSize-i*PAGE_SIZE);
        }
    }
    // When requested, store the whole context (==including scratch registers)
    // - normally for JVMTI PopFrame support.
    // Scratch registers get stored separately from the callee-save:
    // The callee-save registers are stored into spill area, but we can't
    // save scratch regs there - this area is already used to temporary
    // save scratch regs during method calls, etc (see gen_vm_call_restore).
    // Thus, we dedicate a separate place.
    const bool storeWholeContext =
        m_infoBlock.get_compile_params().exe_restore_context_after_unwind;
#ifdef _DEBUG
    // Fill the whole stack frame with a special value
    // -1 to avoid erasing retAddr
    int num_words = frameSize/STACK_SLOT_SIZE- 1;
    AR ar = valloc(iplatf);
    Opnd fill(iplatf, ar);
    rlock(ar);
    AR ridx = valloc(iplatf);
    runlock(ar);
    Opnd idx(iplatf, ridx);
    //
    // When filling up the frame, the regs context is destroyed - preserve
    // it.
    if (storeWholeContext) {
        push(fill);
        push(idx);
    }
    //
#ifdef _EM64T_
    mov(fill, (uint_ptr)0xDEADBEEFDEADBEEF);
#else
    mov(fill, 0xDEADBEEF);
#endif
    mov(idx, num_words);
    unsigned _loop = ipoff();
    mov(Opnd(iplatf, sp, 0, ridx, STACK_SLOT_SIZE), fill);
    alu(alu_sub, idx, 1);
    unsigned br_off = br(nz, 0, 0);
    patch(br_off, ip(_loop));
    if (storeWholeContext) {
        pop(idx);
        pop(fill);
    }
#endif

    // save callee-save registers. If frame size is less than 1 page, 
    // the page was not touched yet, and the SOE may happen here
    for (unsigned i=0; i<ar_num; i++) {
        AR ar = _ar(i);
        if (ar==sp || !is_callee_save(ar) || !m_global_rusage.test(i)) {
            continue;
        }
        // use maximum possible size to store the register
        jtype jt = is_f(ar) ? dbl64 : jobj;
        // Here, always use sp-based addressing - bp frame is not ready
        // yet.
        st(jt, ar, sp, frameSize+m_stack.spill(ar));
        m_infoBlock.saved(ar);
    }
    
    if (storeWholeContext) {
        // For JVMTI's PopFrame we store all scratch registers to a special
        // place.
        if (is_set(DBG_TRACE_CG)) { dbg(";;>jvmti.save.all.regs\n"); }
        for (unsigned i=0; i<ar_num; i++) {
            AR ar = _ar(i);
            if (is_callee_save(ar) || ar==sp) {
                continue;
            }
            // use maximum possible size to store the register
            jtype jt = is_f(ar) ? dbl64 : jobj;
            // Here, always use sp-based addressing - bp frame is not ready
            // yet.
            st(jt, ar, sp, frameSize+m_stack.jvmti_register_spill_offset(ar));
        }
        if (is_set(DBG_TRACE_CG)) { dbg(";;>~jvmti.save.all.regs\n"); }
    }


    // ok, if we pass to this point at runtime, then we have enough stack
    // and we stored all needed registers, so in case of unwind_stack() 
    // we'll simply restore registers from the stack.
    unsigned thisPoint = ipoff() - prologStart;
    m_infoBlock.set_warmup_len(thisPoint);
    if (m_base != sp) {
        // create bp-frame
        lea(m_base, Opnd(jobj, sp, frameSize));
    }
    
    // Must be here, after the stack get aligned
    if (is_set(DBG_TRACE_EE)) {
        gen_dbg_rt(true, "entering: %s", meth_fname());
    }

    //
    // reload input args into local vars
    //
        
    ::std::vector<unsigned> locals_map;
    locals_map.resize(words(m_ci.count()));
    // an initial GC map for input args
    ::std::vector<unsigned> args_map;
    args_map.resize(words(m_ci.count()));
    // an initial GC map for callee-save registers
    unsigned regs_map = 0;
    
    // STACK_SLOT_SIZE <= retAddr
    unsigned const sp_offset = frameSize + STACK_SLOT_SIZE;
    
    // Spill out registers that are both input args and globally allocated
    for (unsigned i=0, local=0; i<m_ci.count(); i++, local++) {
        jtype jt = m_ci.jt(i);
        AR ar = m_ci.reg(i);
        if (ar != ar_x && m_global_rusage.test(ar_idx(ar))) {
            Opnd arg = m_ci.get(i);
            Val& var = vlocal(jt, local, true);
            mov(var.as_opnd(), arg);
            // A presumption, to simplify the code: if the managed calling
            // convention uses registers, then it's a platform without 'big'
            // type problem.
            assert(!is_big(jt));
        }
        if (is_wide(jt)) {
            ++local;
        }
    }

    // Now, process input args: 
    //  - set GC maps for objects came as input args, 
    //  - move input args into the slots in the local stack frame (for some
    //    args)
    for (unsigned i = 0, local=0; i<m_ci.count(); i++, local++) {
        jtype jt = m_ci.jt(i);
        // All values less than 32 bits get moved between methods as I_32
        if (jt<i32) {
            jt = i32;
        }
        // If this is an object, then set a bit in appropriate map ...        
        if (jt == jobj) {
            AR ar = vreg(jobj, local);
            if (ar != ar_x && is_callee_save(ar)) {
                // .. callee-saved GP regs or ..
                regs_map |= 1<<ar_idx(m_ra[local]);
            }
            else if (vis_arg(local)) {
                // .. local vars that are kept on the input slots or
                // when we need to keep input args valid during enumeration
                // (for example for JVMTI PopFrame needs) ...
                assert(m_ci.reg(i) == ar_x);
                assert(0 == (m_ci.off(i) % STACK_SLOT_SIZE));
                int inVal = m_ci.off(i) / STACK_SLOT_SIZE;
                args_map[word_no(inVal)] =
                            args_map[word_no(inVal)] | (1 <<bit_no(inVal));
                if (g_jvmtiMode) {
                    // .. a 'regular' GC map for locals - must report
                    // together with input args in case of JVMTI
                    locals_map[word_no(local)] =
                                locals_map[word_no(local)] | (1 <<bit_no(local));
                }
            }
            else {
                assert(m_ci.reg(i) != ar_x);
                // .. a 'regular' GC map for locals.
                locals_map[word_no(local)] = 
                            locals_map[word_no(local)] | (1 <<bit_no(local));
            }
        }
        jtype jtm = jtmov(jt);
        // as_type() => Convert narrow types (<i32) to i32.
        Opnd arg = m_ci.get(i, sp_offset).as_type(jt);

        // If we need to store 'this' for special reporting (i.e.
        // monitor_exit or for stack trace) - store it.
        if (i==0 && is_set(JMF_REPORT_THIS)) {
            if (is_set(DBG_TRACE_CG)) {dbg(";;>copy thiz\n");}
            assert(jt == jobj);
            Opnd thiz(jobj, m_base, voff(m_stack.thiz()));
            do_mov(thiz, arg);
            if (is_set(DBG_TRACE_CG)) {dbg(";;>~copy thizh\n");}
        }
        // If the local resides on the input arg, then no need to copy it 
        // from input arg into the frame except JVMTI mode.
        if (vis_arg(local) && !g_jvmtiMode) {
            if (is_wide(jt)) {
                ++local;
            }
            continue;
        }
        //
        // Ok, copy the from input args area, into local variables area
        //
        
        // Define the slot, so it has proper type
        vvar_def(jt, local);
        if (arg.is_reg() && m_global_rusage.test(ar_idx(arg.reg()))) {
            // See a loop above - the argument already spilled into memory,
            // nothing to do
        }
        else {
            // forDef = true to avoid uploading, so it only returns memory 
            // operand
            Val& var = vlocal(jt, local, true);
            do_mov(var, arg);
            if (is_big(jt)) {
                // Presumption: on IA32 (<= is_big()==true) no i64 inputs 
                //              are left of input args
                assert(!vis_arg(local+1));
                // Presumption: on IA32 (<= is_big()==true) no i64 inputs 
                //              come on registers
                assert(arg.is_mem());
                assert(arg.index() == ar_x);
                Val arg_hi(jtm, arg.base(), arg.disp()+4);
                Val var_hi = vlocal(jt, local+1, true);
                do_mov(var_hi, arg_hi);
            }
        }
        if (is_wide(jt)) {
            ++local;
        }
    }
    
    runlock(m_ci);

    //
    // Store the GC map for the local variables that are initialized as 
    // they come from input args
    //
    if (is_set(DBG_TRACE_CG) && locals_map.size() != 0) {dbg(";;>locals.gc_map\n");}
    for (unsigned i = 0; i<locals_map.size(); i++) {
        Opnd map(i32, m_base, voff(m_stack.info_gc_locals()+i*sizeof(int)));
        Opnd val(locals_map[i]);
        mov(map, val);
    }
    //
    // For other local variables, zero the GC map
    //

    unsigned locals_gc_size = words(m_infoBlock.get_num_locals());
    if (locals_gc_size != locals_map.size()) {
        if (is_set(DBG_TRACE_CG)) {dbg(";;>locals.gc_map\n");}
        Opnd reg(i32, valloc(i32));
        alu(alu_xor, reg, reg);
        for (unsigned i=(U_32)locals_map.size(); i<locals_gc_size; i++) {
            st4(reg.reg(), m_base, voff(m_stack.info_gc_locals()+i*sizeof(int)));
        }
    }
    
    //
    // Store the GC map for input args
    //
    if (is_set(DBG_TRACE_CG) && args_map.size() != 0) {dbg(";;>args.gc_map\n");}
    for (unsigned i = 0; i<args_map.size(); i++) {
        Opnd map(i32, m_base, voff(m_stack.info_gc_args()+i*sizeof(int)));
        Opnd val(args_map[i]);
        mov(map, val);
    }
    //
    // Store info about objects on registers
    //
    if (is_set(DBG_TRACE_CG)) {dbg(";;>regs.gc_map\n");}
    Opnd map(i32,  m_base, voff(m_stack.info_gc_regs()));
    Opnd val(regs_map);
    mov(map, val);
    //
    // Initial stack size is zero
    //
    if (is_set(DBG_TRACE_CG)) {dbg(";;>gc.stack_depth\n");}
    Opnd dpth(i32,  m_base, voff(m_stack.info_gc_stack_depth()));
    mov(dpth, Opnd(0));
    m_bbstate->stack_depth = 0;

    // Make the variables on their places - in a case if call to 
    // JVMTI/monitor_enter/recompilation helper lead to GC
    
    // TODO: May optimize a bit by specifying (0) - if the 0th BB is 
    // ref_count==1. In this case there is no real need to upload all the 
    // items on their registers. This will require special processing in both
    // bb_enter() and bb_leave()
    gen_bb_leave(NOTHING);
    
    //
    // now, everything is ready, may call VM/whatever
    //
    
    // Debugging - print out 'Entering ...'
    if (is_set(DBG_TRACE_EE)) {
        if (is_set(DBG_TRACE_CG)) {dbg(";;>print.ee\n");}
        rlock(cs_trace_arg);
        // Print out input args
        for (unsigned i=0, local=0; i<m_ci.count(); i++, local++) {
            // prepare stack
            if(cs_trace_arg.size() != 0) {
                alu(alu_sub, sp, cs_trace_arg.size());
            }
            // 'local'-th argument as a first arg for dbg_trace_arg() ...
            jtype jt = m_ci.jt(i);
            if (jt<i32) jt = i32;
            
            Opnd arg = cs_trace_arg.get(0);
            Val var;
            if (vreg(jt, local) != ar_x) {
                AR ar = vreg(jt, local);
                if (is_f(ar) && arg.is_reg()) {
                    // If the local var resides on a float-point register,
                    // and calling canovention uses registers to pass args
                    // - we can not simply do 'mov gr, fr'. Store fr to 
                    // memory first, then reload it to gr
                    assert(is_gr(arg.reg()));
                    Opnd scratch(jt, m_base, voff(m_stack.scratch()));
                    mov(scratch, Opnd(jt, ar));
                    
                    jt = jt == flt32 ? i32 : i64;
                    var = scratch.as_type(jt);
                }
                else {
                    var = Val(jt, ar);
                }
            }
            else {
                var = Val(jt, m_base, vlocal_off(local));
            }
            do_mov(arg, var.as_opnd(arg.jt()));
            // ... its type and index ...
            gen_call_novm(cs_trace_arg, (void*)&dbg_trace_arg, 1, i, jt);
            if (is_wide(jt)) {
                ++local;
            }
        }
        runlock(cs_trace_arg);
        if (is_set(DBG_TRACE_CG)) {dbg(";;>~print.ee\n");}
    }
    
    //
    // Profiling/recompilation support
    //
    
    if (is_set(JMF_PROF_ENTRY_BE)) {
        if (is_set(DBG_TRACE_CG)) { dbg(";;>profiling\n"); }
        // Increment entry counter
        AR ar = valloc(jobj);
        movp(ar, m_p_methentry_counter);
        Opnd addr(i32, ar, 0);
        if (is_set(JMF_PROF_SYNC_CHECK)) {
            rlock(ar);
            AR gr_val = valloc(i32);
            runlock(ar);
            
            Opnd val(i32, gr_val);
            Opnd thr(m_methentry_threshold);
            /* mov vreg, [counter] */ mov(val, addr);
            /* add vreg, 1         */ alu(alu_add, val, Opnd(1));
            /* mov [counter], vreg */ mov(addr, val);
            /* cmp vreg, threshold */ alu(alu_cmp, val, thr);
            /* jne keep_going      */ 
            /*      call recompile */ 
            /* keep_going:   ...   */ 
            unsigned br_off = br(ne, 0, 0, taken);
            gen_call_vm_restore(false, ci_helper_o, m_recomp_handler_ptr, 
                                0, m_profile_handle);
            patch(br_off, ip());
        }
        else {
            alu(alu_add, addr, Opnd(1));
        }
        if (is_set(DBG_TRACE_CG)) { dbg(";;>~profiling\n"); }
    }
    
    //
    // JVMTI method_enter notification
    //
    if (compilation_params.exe_notify_method_entry) {
        AR ar = valloc(iplatf);
        Opnd flag_addr(iplatf, ar);
        mov(flag_addr,Opnd(iplatf,(int_ptr)rt_method_entry_flag_address));
        Opnd mem(i16, ar, 0);
        alu(alu_cmp, mem, Opnd(0));
        unsigned br_off = br(z, 0, 0, taken);
        
        SYNC_FIRST(static const CallSig cs_ti_menter(CCONV_HELPERS, jvoid, jobj));
        gen_call_vm(cs_ti_menter, rt_helper_ti_method_enter, 0, m_method);
        
        patch(br_off, ip());
    }
    
    
    if (meth_is_sync()) {
        unsigned stackFix = 0;
        if (is_set(DBG_TRACE_CG)) { dbg(";;>monitor_enter\n"); }
        if (method_is_static(m_method)) {
            gen_call_vm(cs_jlc, rt_helper_class_2_jlc, 0, m_klass);
            gen_save_ret(cs_jlc);
            stackFix = gen_stack_to_args(true, cs_mon, 0);
            //gen_call_vm(cs_mon, rt_helper_monitor_enter_static, 0, m_klass);
        }
        else {
            AR gr = gr0;
            if (cs_mon.reg(0) != gr_x) {
                if (cs_mon.size() != 0) {
                    assert(cs_mon.caller_pops());
                    alu(alu_sub, sp, cs_mon.size());                    
                }
                ld(jobj, cs_mon.reg(0), m_base, voff(m_stack.thiz()));
            }
            else {
                assert(cs_mon.size() != 0);
                alu(alu_sub, sp, cs_mon.size());
                ld(jobj, gr, m_base, voff(m_stack.thiz()));
                st(jobj, gr, sp, cs_mon.off(0));
            }
            //gen_call_vm(cs_mon, rt_helper_monitor_enter, 1);
        }
        gen_call_vm(cs_mon, rt_helper_monitor_enter, 1);

        if (method_is_static(m_method)) {
            runlock(cs_mon);
            if (stackFix != 0) {
                alu(alu_sub, sp, stackFix);
            }
        }

        if (is_set(DBG_TRACE_CG)) { dbg(";;>~monitor_enter\n"); }
    }
    
    if (is_set(DBG_TRACE_CG)) {
        dbg_dump_state("after prolog", m_bbstate);
    }
}