in src/hotspot/share/opto/compile.cpp [3272:3938]
void Compile::final_graph_reshaping_main_switch(Node* n, Final_Reshape_Counts& frc, uint nop, Unique_Node_List& dead_nodes) {
switch( nop ) {
// Count all float operations that may use FPU
case Op_AddF:
case Op_SubF:
case Op_MulF:
case Op_DivF:
case Op_NegF:
case Op_ModF:
case Op_ConvI2F:
case Op_ConF:
case Op_CmpF:
case Op_CmpF3:
case Op_StoreF:
case Op_LoadF:
// case Op_ConvL2F: // longs are split into 32-bit halves
frc.inc_float_count();
break;
case Op_ConvF2D:
case Op_ConvD2F:
frc.inc_float_count();
frc.inc_double_count();
break;
// Count all double operations that may use FPU
case Op_AddD:
case Op_SubD:
case Op_MulD:
case Op_DivD:
case Op_NegD:
case Op_ModD:
case Op_ConvI2D:
case Op_ConvD2I:
// case Op_ConvL2D: // handled by leaf call
// case Op_ConvD2L: // handled by leaf call
case Op_ConD:
case Op_CmpD:
case Op_CmpD3:
case Op_StoreD:
case Op_LoadD:
case Op_LoadD_unaligned:
frc.inc_double_count();
break;
case Op_Opaque1: // Remove Opaque Nodes before matching
n->subsume_by(n->in(1), this);
break;
case Op_CallLeafPure: {
// If the pure call is not supported, then lower to a CallLeaf.
if (!Matcher::match_rule_supported(Op_CallLeafPure)) {
CallNode* call = n->as_Call();
CallNode* new_call = new CallLeafNode(call->tf(), call->entry_point(),
call->_name, TypeRawPtr::BOTTOM);
new_call->init_req(TypeFunc::Control, call->in(TypeFunc::Control));
new_call->init_req(TypeFunc::I_O, C->top());
new_call->init_req(TypeFunc::Memory, C->top());
new_call->init_req(TypeFunc::ReturnAdr, C->top());
new_call->init_req(TypeFunc::FramePtr, C->top());
for (unsigned int i = TypeFunc::Parms; i < call->tf()->domain()->cnt(); i++) {
new_call->init_req(i, call->in(i));
}
n->subsume_by(new_call, this);
}
frc.inc_call_count();
break;
}
case Op_CallStaticJava:
case Op_CallJava:
case Op_CallDynamicJava:
frc.inc_java_call_count(); // Count java call site;
case Op_CallRuntime:
case Op_CallLeaf:
case Op_CallLeafVector:
case Op_CallLeafNoFP: {
assert (n->is_Call(), "");
CallNode *call = n->as_Call();
// Count call sites where the FP mode bit would have to be flipped.
// Do not count uncommon runtime calls:
// uncommon_trap, _complete_monitor_locking, _complete_monitor_unlocking,
// _new_Java, _new_typeArray, _new_objArray, _rethrow_Java, ...
if (!call->is_CallStaticJava() || !call->as_CallStaticJava()->_name) {
frc.inc_call_count(); // Count the call site
} else { // See if uncommon argument is shared
Node *n = call->in(TypeFunc::Parms);
int nop = n->Opcode();
// Clone shared simple arguments to uncommon calls, item (1).
if (n->outcnt() > 1 &&
!n->is_Proj() &&
nop != Op_CreateEx &&
nop != Op_CheckCastPP &&
nop != Op_DecodeN &&
nop != Op_DecodeNKlass &&
!n->is_Mem() &&
!n->is_Phi()) {
Node *x = n->clone();
call->set_req(TypeFunc::Parms, x);
}
}
break;
}
case Op_StoreB:
case Op_StoreC:
case Op_StoreI:
case Op_StoreL:
case Op_CompareAndSwapB:
case Op_CompareAndSwapS:
case Op_CompareAndSwapI:
case Op_CompareAndSwapL:
case Op_CompareAndSwapP:
case Op_CompareAndSwapN:
case Op_WeakCompareAndSwapB:
case Op_WeakCompareAndSwapS:
case Op_WeakCompareAndSwapI:
case Op_WeakCompareAndSwapL:
case Op_WeakCompareAndSwapP:
case Op_WeakCompareAndSwapN:
case Op_CompareAndExchangeB:
case Op_CompareAndExchangeS:
case Op_CompareAndExchangeI:
case Op_CompareAndExchangeL:
case Op_CompareAndExchangeP:
case Op_CompareAndExchangeN:
case Op_GetAndAddS:
case Op_GetAndAddB:
case Op_GetAndAddI:
case Op_GetAndAddL:
case Op_GetAndSetS:
case Op_GetAndSetB:
case Op_GetAndSetI:
case Op_GetAndSetL:
case Op_GetAndSetP:
case Op_GetAndSetN:
case Op_StoreP:
case Op_StoreN:
case Op_StoreNKlass:
case Op_LoadB:
case Op_LoadUB:
case Op_LoadUS:
case Op_LoadI:
case Op_LoadKlass:
case Op_LoadNKlass:
case Op_LoadL:
case Op_LoadL_unaligned:
case Op_LoadP:
case Op_LoadN:
case Op_LoadRange:
case Op_LoadS:
break;
case Op_AddP: { // Assert sane base pointers
Node *addp = n->in(AddPNode::Address);
assert(n->as_AddP()->address_input_has_same_base(), "Base pointers must match (addp %u)", addp->_idx );
#ifdef _LP64
if ((UseCompressedOops || UseCompressedClassPointers) &&
addp->Opcode() == Op_ConP &&
addp == n->in(AddPNode::Base) &&
n->in(AddPNode::Offset)->is_Con()) {
// If the transformation of ConP to ConN+DecodeN is beneficial depends
// on the platform and on the compressed oops mode.
// Use addressing with narrow klass to load with offset on x86.
// Some platforms can use the constant pool to load ConP.
// Do this transformation here since IGVN will convert ConN back to ConP.
const Type* t = addp->bottom_type();
bool is_oop = t->isa_oopptr() != nullptr;
bool is_klass = t->isa_klassptr() != nullptr;
if ((is_oop && UseCompressedOops && Matcher::const_oop_prefer_decode() ) ||
(is_klass && UseCompressedClassPointers && Matcher::const_klass_prefer_decode() &&
t->isa_klassptr()->exact_klass()->is_in_encoding_range())) {
Node* nn = nullptr;
int op = is_oop ? Op_ConN : Op_ConNKlass;
// Look for existing ConN node of the same exact type.
Node* r = root();
uint cnt = r->outcnt();
for (uint i = 0; i < cnt; i++) {
Node* m = r->raw_out(i);
if (m!= nullptr && m->Opcode() == op &&
m->bottom_type()->make_ptr() == t) {
nn = m;
break;
}
}
if (nn != nullptr) {
// Decode a narrow oop to match address
// [R12 + narrow_oop_reg<<3 + offset]
if (is_oop) {
nn = new DecodeNNode(nn, t);
} else {
nn = new DecodeNKlassNode(nn, t);
}
// Check for succeeding AddP which uses the same Base.
// Otherwise we will run into the assertion above when visiting that guy.
for (uint i = 0; i < n->outcnt(); ++i) {
Node *out_i = n->raw_out(i);
if (out_i && out_i->is_AddP() && out_i->in(AddPNode::Base) == addp) {
out_i->set_req(AddPNode::Base, nn);
#ifdef ASSERT
for (uint j = 0; j < out_i->outcnt(); ++j) {
Node *out_j = out_i->raw_out(j);
assert(out_j == nullptr || !out_j->is_AddP() || out_j->in(AddPNode::Base) != addp,
"more than 2 AddP nodes in a chain (out_j %u)", out_j->_idx);
}
#endif
}
}
n->set_req(AddPNode::Base, nn);
n->set_req(AddPNode::Address, nn);
if (addp->outcnt() == 0) {
addp->disconnect_inputs(this);
}
}
}
}
#endif
break;
}
case Op_CastPP: {
// Remove CastPP nodes to gain more freedom during scheduling but
// keep the dependency they encode as control or precedence edges
// (if control is set already) on memory operations. Some CastPP
// nodes don't have a control (don't carry a dependency): skip
// those.
if (n->in(0) != nullptr) {
ResourceMark rm;
Unique_Node_List wq;
wq.push(n);
for (uint next = 0; next < wq.size(); ++next) {
Node *m = wq.at(next);
for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax; i++) {
Node* use = m->fast_out(i);
if (use->is_Mem() || use->is_EncodeNarrowPtr()) {
use->ensure_control_or_add_prec(n->in(0));
} else {
switch(use->Opcode()) {
case Op_AddP:
case Op_DecodeN:
case Op_DecodeNKlass:
case Op_CheckCastPP:
case Op_CastPP:
wq.push(use);
break;
}
}
}
}
}
const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);
if (is_LP64 && n->in(1)->is_DecodeN() && Matcher::gen_narrow_oop_implicit_null_checks()) {
Node* in1 = n->in(1);
const Type* t = n->bottom_type();
Node* new_in1 = in1->clone();
new_in1->as_DecodeN()->set_type(t);
if (!Matcher::narrow_oop_use_complex_address()) {
//
// x86, ARM and friends can handle 2 adds in addressing mode
// and Matcher can fold a DecodeN node into address by using
// a narrow oop directly and do implicit null check in address:
//
// [R12 + narrow_oop_reg<<3 + offset]
// NullCheck narrow_oop_reg
//
// On other platforms (Sparc) we have to keep new DecodeN node and
// use it to do implicit null check in address:
//
// decode_not_null narrow_oop_reg, base_reg
// [base_reg + offset]
// NullCheck base_reg
//
// Pin the new DecodeN node to non-null path on these platform (Sparc)
// to keep the information to which null check the new DecodeN node
// corresponds to use it as value in implicit_null_check().
//
new_in1->set_req(0, n->in(0));
}
n->subsume_by(new_in1, this);
if (in1->outcnt() == 0) {
in1->disconnect_inputs(this);
}
} else {
n->subsume_by(n->in(1), this);
if (n->outcnt() == 0) {
n->disconnect_inputs(this);
}
}
break;
}
case Op_CastII: {
n->as_CastII()->remove_range_check_cast(this);
break;
}
#ifdef _LP64
case Op_CmpP:
// Do this transformation here to preserve CmpPNode::sub() and
// other TypePtr related Ideal optimizations (for example, ptr nullness).
if (n->in(1)->is_DecodeNarrowPtr() || n->in(2)->is_DecodeNarrowPtr()) {
Node* in1 = n->in(1);
Node* in2 = n->in(2);
if (!in1->is_DecodeNarrowPtr()) {
in2 = in1;
in1 = n->in(2);
}
assert(in1->is_DecodeNarrowPtr(), "sanity");
Node* new_in2 = nullptr;
if (in2->is_DecodeNarrowPtr()) {
assert(in2->Opcode() == in1->Opcode(), "must be same node type");
new_in2 = in2->in(1);
} else if (in2->Opcode() == Op_ConP) {
const Type* t = in2->bottom_type();
if (t == TypePtr::NULL_PTR) {
assert(in1->is_DecodeN(), "compare klass to null?");
// Don't convert CmpP null check into CmpN if compressed
// oops implicit null check is not generated.
// This will allow to generate normal oop implicit null check.
if (Matcher::gen_narrow_oop_implicit_null_checks())
new_in2 = ConNode::make(TypeNarrowOop::NULL_PTR);
//
// This transformation together with CastPP transformation above
// will generated code for implicit null checks for compressed oops.
//
// The original code after Optimize()
//
// LoadN memory, narrow_oop_reg
// decode narrow_oop_reg, base_reg
// CmpP base_reg, nullptr
// CastPP base_reg // NotNull
// Load [base_reg + offset], val_reg
//
// after these transformations will be
//
// LoadN memory, narrow_oop_reg
// CmpN narrow_oop_reg, nullptr
// decode_not_null narrow_oop_reg, base_reg
// Load [base_reg + offset], val_reg
//
// and the uncommon path (== nullptr) will use narrow_oop_reg directly
// since narrow oops can be used in debug info now (see the code in
// final_graph_reshaping_walk()).
//
// At the end the code will be matched to
// on x86:
//
// Load_narrow_oop memory, narrow_oop_reg
// Load [R12 + narrow_oop_reg<<3 + offset], val_reg
// NullCheck narrow_oop_reg
//
// and on sparc:
//
// Load_narrow_oop memory, narrow_oop_reg
// decode_not_null narrow_oop_reg, base_reg
// Load [base_reg + offset], val_reg
// NullCheck base_reg
//
} else if (t->isa_oopptr()) {
new_in2 = ConNode::make(t->make_narrowoop());
} else if (t->isa_klassptr()) {
ciKlass* klass = t->is_klassptr()->exact_klass();
if (klass->is_in_encoding_range()) {
new_in2 = ConNode::make(t->make_narrowklass());
}
}
}
if (new_in2 != nullptr) {
Node* cmpN = new CmpNNode(in1->in(1), new_in2);
n->subsume_by(cmpN, this);
if (in1->outcnt() == 0) {
in1->disconnect_inputs(this);
}
if (in2->outcnt() == 0) {
in2->disconnect_inputs(this);
}
}
}
break;
case Op_DecodeN:
case Op_DecodeNKlass:
assert(!n->in(1)->is_EncodeNarrowPtr(), "should be optimized out");
// DecodeN could be pinned when it can't be fold into
// an address expression, see the code for Op_CastPP above.
assert(n->in(0) == nullptr || (UseCompressedOops && !Matcher::narrow_oop_use_complex_address()), "no control");
break;
case Op_EncodeP:
case Op_EncodePKlass: {
Node* in1 = n->in(1);
if (in1->is_DecodeNarrowPtr()) {
n->subsume_by(in1->in(1), this);
} else if (in1->Opcode() == Op_ConP) {
const Type* t = in1->bottom_type();
if (t == TypePtr::NULL_PTR) {
assert(t->isa_oopptr(), "null klass?");
n->subsume_by(ConNode::make(TypeNarrowOop::NULL_PTR), this);
} else if (t->isa_oopptr()) {
n->subsume_by(ConNode::make(t->make_narrowoop()), this);
} else if (t->isa_klassptr()) {
ciKlass* klass = t->is_klassptr()->exact_klass();
if (klass->is_in_encoding_range()) {
n->subsume_by(ConNode::make(t->make_narrowklass()), this);
} else {
assert(false, "unencodable klass in ConP -> EncodeP");
C->record_failure("unencodable klass in ConP -> EncodeP");
}
}
}
if (in1->outcnt() == 0) {
in1->disconnect_inputs(this);
}
break;
}
case Op_Proj: {
if (OptimizeStringConcat || IncrementalInline) {
ProjNode* proj = n->as_Proj();
if (proj->_is_io_use) {
assert(proj->_con == TypeFunc::I_O || proj->_con == TypeFunc::Memory, "");
// Separate projections were used for the exception path which
// are normally removed by a late inline. If it wasn't inlined
// then they will hang around and should just be replaced with
// the original one. Merge them.
Node* non_io_proj = proj->in(0)->as_Multi()->proj_out_or_null(proj->_con, false /*is_io_use*/);
if (non_io_proj != nullptr) {
proj->subsume_by(non_io_proj , this);
}
}
}
break;
}
case Op_Phi:
if (n->as_Phi()->bottom_type()->isa_narrowoop() || n->as_Phi()->bottom_type()->isa_narrowklass()) {
// The EncodeP optimization may create Phi with the same edges
// for all paths. It is not handled well by Register Allocator.
Node* unique_in = n->in(1);
assert(unique_in != nullptr, "");
uint cnt = n->req();
for (uint i = 2; i < cnt; i++) {
Node* m = n->in(i);
assert(m != nullptr, "");
if (unique_in != m)
unique_in = nullptr;
}
if (unique_in != nullptr) {
n->subsume_by(unique_in, this);
}
}
break;
#endif
case Op_ModI:
handle_div_mod_op(n, T_INT, false);
break;
case Op_ModL:
handle_div_mod_op(n, T_LONG, false);
break;
case Op_UModI:
handle_div_mod_op(n, T_INT, true);
break;
case Op_UModL:
handle_div_mod_op(n, T_LONG, true);
break;
case Op_LoadVector:
case Op_StoreVector:
#ifdef ASSERT
// Add VerifyVectorAlignment node between adr and load / store.
if (VerifyAlignVector && Matcher::has_match_rule(Op_VerifyVectorAlignment)) {
bool must_verify_alignment = n->is_LoadVector() ? n->as_LoadVector()->must_verify_alignment() :
n->as_StoreVector()->must_verify_alignment();
if (must_verify_alignment) {
jlong vector_width = n->is_LoadVector() ? n->as_LoadVector()->memory_size() :
n->as_StoreVector()->memory_size();
// The memory access should be aligned to the vector width in bytes.
// However, the underlying array is possibly less well aligned, but at least
// to ObjectAlignmentInBytes. Hence, even if multiple arrays are accessed in
// a loop we can expect at least the following alignment:
jlong guaranteed_alignment = MIN2(vector_width, (jlong)ObjectAlignmentInBytes);
assert(2 <= guaranteed_alignment && guaranteed_alignment <= 64, "alignment must be in range");
assert(is_power_of_2(guaranteed_alignment), "alignment must be power of 2");
// Create mask from alignment. e.g. 0b1000 -> 0b0111
jlong mask = guaranteed_alignment - 1;
Node* mask_con = ConLNode::make(mask);
VerifyVectorAlignmentNode* va = new VerifyVectorAlignmentNode(n->in(MemNode::Address), mask_con);
n->set_req(MemNode::Address, va);
}
}
#endif
break;
case Op_LoadVectorGather:
case Op_StoreVectorScatter:
case Op_LoadVectorGatherMasked:
case Op_StoreVectorScatterMasked:
case Op_VectorCmpMasked:
case Op_VectorMaskGen:
case Op_LoadVectorMasked:
case Op_StoreVectorMasked:
break;
case Op_AddReductionVI:
case Op_AddReductionVL:
case Op_AddReductionVF:
case Op_AddReductionVD:
case Op_MulReductionVI:
case Op_MulReductionVL:
case Op_MulReductionVF:
case Op_MulReductionVD:
case Op_MinReductionV:
case Op_MaxReductionV:
case Op_AndReductionV:
case Op_OrReductionV:
case Op_XorReductionV:
break;
case Op_PackB:
case Op_PackS:
case Op_PackI:
case Op_PackF:
case Op_PackL:
case Op_PackD:
if (n->req()-1 > 2) {
// Replace many operand PackNodes with a binary tree for matching
PackNode* p = (PackNode*) n;
Node* btp = p->binary_tree_pack(1, n->req());
n->subsume_by(btp, this);
}
break;
case Op_Loop:
assert(!n->as_Loop()->is_loop_nest_inner_loop() || _loop_opts_cnt == 0, "should have been turned into a counted loop");
case Op_CountedLoop:
case Op_LongCountedLoop:
case Op_OuterStripMinedLoop:
if (n->as_Loop()->is_inner_loop()) {
frc.inc_inner_loop_count();
}
n->as_Loop()->verify_strip_mined(0);
break;
case Op_LShiftI:
case Op_RShiftI:
case Op_URShiftI:
case Op_LShiftL:
case Op_RShiftL:
case Op_URShiftL:
if (Matcher::need_masked_shift_count) {
// The cpu's shift instructions don't restrict the count to the
// lower 5/6 bits. We need to do the masking ourselves.
Node* in2 = n->in(2);
juint mask = (n->bottom_type() == TypeInt::INT) ? (BitsPerInt - 1) : (BitsPerLong - 1);
const TypeInt* t = in2->find_int_type();
if (t != nullptr && t->is_con()) {
juint shift = t->get_con();
if (shift > mask) { // Unsigned cmp
n->set_req(2, ConNode::make(TypeInt::make(shift & mask)));
}
} else {
if (t == nullptr || t->_lo < 0 || t->_hi > (int)mask) {
Node* shift = new AndINode(in2, ConNode::make(TypeInt::make(mask)));
n->set_req(2, shift);
}
}
if (in2->outcnt() == 0) { // Remove dead node
in2->disconnect_inputs(this);
}
}
break;
case Op_MemBarStoreStore:
case Op_MemBarRelease:
// Break the link with AllocateNode: it is no longer useful and
// confuses register allocation.
if (n->req() > MemBarNode::Precedent) {
n->set_req(MemBarNode::Precedent, top());
}
break;
case Op_MemBarAcquire: {
if (n->as_MemBar()->trailing_load() && n->req() > MemBarNode::Precedent) {
// At parse time, the trailing MemBarAcquire for a volatile load
// is created with an edge to the load. After optimizations,
// that input may be a chain of Phis. If those phis have no
// other use, then the MemBarAcquire keeps them alive and
// register allocation can be confused.
dead_nodes.push(n->in(MemBarNode::Precedent));
n->set_req(MemBarNode::Precedent, top());
}
break;
}
case Op_Blackhole:
break;
case Op_RangeCheck: {
RangeCheckNode* rc = n->as_RangeCheck();
Node* iff = new IfNode(rc->in(0), rc->in(1), rc->_prob, rc->_fcnt);
n->subsume_by(iff, this);
frc._tests.push(iff);
break;
}
case Op_ConvI2L: {
if (!Matcher::convi2l_type_required) {
// Code generation on some platforms doesn't need accurate
// ConvI2L types. Widening the type can help remove redundant
// address computations.
n->as_Type()->set_type(TypeLong::INT);
ResourceMark rm;
Unique_Node_List wq;
wq.push(n);
for (uint next = 0; next < wq.size(); next++) {
Node *m = wq.at(next);
for(;;) {
// Loop over all nodes with identical inputs edges as m
Node* k = m->find_similar(m->Opcode());
if (k == nullptr) {
break;
}
// Push their uses so we get a chance to remove node made
// redundant
for (DUIterator_Fast imax, i = k->fast_outs(imax); i < imax; i++) {
Node* u = k->fast_out(i);
if (u->Opcode() == Op_LShiftL ||
u->Opcode() == Op_AddL ||
u->Opcode() == Op_SubL ||
u->Opcode() == Op_AddP) {
wq.push(u);
}
}
// Replace all nodes with identical edges as m with m
k->subsume_by(m, this);
}
}
}
break;
}
case Op_CmpUL: {
if (!Matcher::has_match_rule(Op_CmpUL)) {
// No support for unsigned long comparisons
ConINode* sign_pos = new ConINode(TypeInt::make(BitsPerLong - 1));
Node* sign_bit_mask = new RShiftLNode(n->in(1), sign_pos);
Node* orl = new OrLNode(n->in(1), sign_bit_mask);
ConLNode* remove_sign_mask = new ConLNode(TypeLong::make(max_jlong));
Node* andl = new AndLNode(orl, remove_sign_mask);
Node* cmp = new CmpLNode(andl, n->in(2));
n->subsume_by(cmp, this);
}
break;
}
#ifdef ASSERT
case Op_ConNKlass: {
const TypePtr* tp = n->as_Type()->type()->make_ptr();
ciKlass* klass = tp->is_klassptr()->exact_klass();
assert(klass->is_in_encoding_range(), "klass cannot be compressed");
break;
}
#endif
default:
assert(!n->is_Call(), "");
assert(!n->is_Mem(), "");
assert(nop != Op_ProfileBoolean, "should be eliminated during IGVN");
break;
}
}