in llvm/lib/Target/X86/X86ISelDAGToDAG.cpp [4705:6102]
void X86DAGToDAGISel::Select(SDNode *Node) {
MVT NVT = Node->getSimpleValueType(0);
unsigned Opcode = Node->getOpcode();
SDLoc dl(Node);
if (Node->isMachineOpcode()) {
LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n');
Node->setNodeId(-1);
return; // Already selected.
}
switch (Opcode) {
default: break;
case ISD::INTRINSIC_W_CHAIN: {
unsigned IntNo = Node->getConstantOperandVal(1);
switch (IntNo) {
default: break;
case Intrinsic::x86_encodekey128:
case Intrinsic::x86_encodekey256: {
if (!Subtarget->hasKL())
break;
unsigned Opcode;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic");
case Intrinsic::x86_encodekey128: Opcode = X86::ENCODEKEY128; break;
case Intrinsic::x86_encodekey256: Opcode = X86::ENCODEKEY256; break;
}
SDValue Chain = Node->getOperand(0);
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM0, Node->getOperand(3),
SDValue());
if (Opcode == X86::ENCODEKEY256)
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM1, Node->getOperand(4),
Chain.getValue(1));
MachineSDNode *Res = CurDAG->getMachineNode(
Opcode, dl, Node->getVTList(),
{Node->getOperand(2), Chain, Chain.getValue(1)});
ReplaceNode(Node, Res);
return;
}
case Intrinsic::x86_tileloadd64_internal:
case Intrinsic::x86_tileloaddt164_internal: {
if (!Subtarget->hasAMXTILE())
break;
unsigned Opc = IntNo == Intrinsic::x86_tileloadd64_internal
? X86::PTILELOADDV
: X86::PTILELOADDT1V;
// _tile_loadd_internal(row, col, buf, STRIDE)
SDValue Base = Node->getOperand(4);
SDValue Scale = getI8Imm(1, dl);
SDValue Index = Node->getOperand(5);
SDValue Disp = CurDAG->getTargetConstant(0, dl, MVT::i32);
SDValue Segment = CurDAG->getRegister(0, MVT::i16);
SDValue Chain = Node->getOperand(0);
MachineSDNode *CNode;
SDValue Ops[] = {Node->getOperand(2),
Node->getOperand(3),
Base,
Scale,
Index,
Disp,
Segment,
Chain};
CNode = CurDAG->getMachineNode(Opc, dl, {MVT::x86amx, MVT::Other}, Ops);
ReplaceNode(Node, CNode);
return;
}
}
break;
}
case ISD::INTRINSIC_VOID: {
unsigned IntNo = Node->getConstantOperandVal(1);
switch (IntNo) {
default: break;
case Intrinsic::x86_sse3_monitor:
case Intrinsic::x86_monitorx:
case Intrinsic::x86_clzero: {
bool Use64BitPtr = Node->getOperand(2).getValueType() == MVT::i64;
unsigned Opc = 0;
switch (IntNo) {
default: llvm_unreachable("Unexpected intrinsic!");
case Intrinsic::x86_sse3_monitor:
if (!Subtarget->hasSSE3())
break;
Opc = Use64BitPtr ? X86::MONITOR64rrr : X86::MONITOR32rrr;
break;
case Intrinsic::x86_monitorx:
if (!Subtarget->hasMWAITX())
break;
Opc = Use64BitPtr ? X86::MONITORX64rrr : X86::MONITORX32rrr;
break;
case Intrinsic::x86_clzero:
if (!Subtarget->hasCLZERO())
break;
Opc = Use64BitPtr ? X86::CLZERO64r : X86::CLZERO32r;
break;
}
if (Opc) {
unsigned PtrReg = Use64BitPtr ? X86::RAX : X86::EAX;
SDValue Chain = CurDAG->getCopyToReg(Node->getOperand(0), dl, PtrReg,
Node->getOperand(2), SDValue());
SDValue InFlag = Chain.getValue(1);
if (IntNo == Intrinsic::x86_sse3_monitor ||
IntNo == Intrinsic::x86_monitorx) {
// Copy the other two operands to ECX and EDX.
Chain = CurDAG->getCopyToReg(Chain, dl, X86::ECX, Node->getOperand(3),
InFlag);
InFlag = Chain.getValue(1);
Chain = CurDAG->getCopyToReg(Chain, dl, X86::EDX, Node->getOperand(4),
InFlag);
InFlag = Chain.getValue(1);
}
MachineSDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
{ Chain, InFlag});
ReplaceNode(Node, CNode);
return;
}
break;
}
case Intrinsic::x86_tilestored64_internal: {
unsigned Opc = X86::PTILESTOREDV;
// _tile_stored_internal(row, col, buf, STRIDE, c)
SDValue Base = Node->getOperand(4);
SDValue Scale = getI8Imm(1, dl);
SDValue Index = Node->getOperand(5);
SDValue Disp = CurDAG->getTargetConstant(0, dl, MVT::i32);
SDValue Segment = CurDAG->getRegister(0, MVT::i16);
SDValue Chain = Node->getOperand(0);
MachineSDNode *CNode;
SDValue Ops[] = {Node->getOperand(2),
Node->getOperand(3),
Base,
Scale,
Index,
Disp,
Segment,
Node->getOperand(6),
Chain};
CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
ReplaceNode(Node, CNode);
return;
}
case Intrinsic::x86_tileloadd64:
case Intrinsic::x86_tileloaddt164:
case Intrinsic::x86_tilestored64: {
if (!Subtarget->hasAMXTILE())
break;
unsigned Opc;
switch (IntNo) {
default: llvm_unreachable("Unexpected intrinsic!");
case Intrinsic::x86_tileloadd64: Opc = X86::PTILELOADD; break;
case Intrinsic::x86_tileloaddt164: Opc = X86::PTILELOADDT1; break;
case Intrinsic::x86_tilestored64: Opc = X86::PTILESTORED; break;
}
// FIXME: Match displacement and scale.
unsigned TIndex = Node->getConstantOperandVal(2);
SDValue TReg = getI8Imm(TIndex, dl);
SDValue Base = Node->getOperand(3);
SDValue Scale = getI8Imm(1, dl);
SDValue Index = Node->getOperand(4);
SDValue Disp = CurDAG->getTargetConstant(0, dl, MVT::i32);
SDValue Segment = CurDAG->getRegister(0, MVT::i16);
SDValue Chain = Node->getOperand(0);
MachineSDNode *CNode;
if (Opc == X86::PTILESTORED) {
SDValue Ops[] = { Base, Scale, Index, Disp, Segment, TReg, Chain };
CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
} else {
SDValue Ops[] = { TReg, Base, Scale, Index, Disp, Segment, Chain };
CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
}
ReplaceNode(Node, CNode);
return;
}
}
break;
}
case ISD::BRIND:
case X86ISD::NT_BRIND: {
if (Subtarget->isTargetNaCl())
// NaCl has its own pass where jmp %r32 are converted to jmp %r64. We
// leave the instruction alone.
break;
if (Subtarget->isTarget64BitILP32()) {
// Converts a 32-bit register to a 64-bit, zero-extended version of
// it. This is needed because x86-64 can do many things, but jmp %r32
// ain't one of them.
SDValue Target = Node->getOperand(1);
assert(Target.getValueType() == MVT::i32 && "Unexpected VT!");
SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, MVT::i64);
SDValue Brind = CurDAG->getNode(Opcode, dl, MVT::Other,
Node->getOperand(0), ZextTarget);
ReplaceNode(Node, Brind.getNode());
SelectCode(ZextTarget.getNode());
SelectCode(Brind.getNode());
return;
}
break;
}
case X86ISD::GlobalBaseReg:
ReplaceNode(Node, getGlobalBaseReg());
return;
case ISD::BITCAST:
// Just drop all 128/256/512-bit bitcasts.
if (NVT.is512BitVector() || NVT.is256BitVector() || NVT.is128BitVector() ||
NVT == MVT::f128) {
ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
CurDAG->RemoveDeadNode(Node);
return;
}
break;
case ISD::SRL:
if (matchBitExtract(Node))
return;
LLVM_FALLTHROUGH;
case ISD::SRA:
case ISD::SHL:
if (tryShiftAmountMod(Node))
return;
break;
case X86ISD::VPTERNLOG: {
uint8_t Imm = cast<ConstantSDNode>(Node->getOperand(3))->getZExtValue();
if (matchVPTERNLOG(Node, Node, Node, Node, Node->getOperand(0),
Node->getOperand(1), Node->getOperand(2), Imm))
return;
break;
}
case X86ISD::ANDNP:
if (tryVPTERNLOG(Node))
return;
break;
case ISD::AND:
if (NVT.isVector() && NVT.getVectorElementType() == MVT::i1) {
// Try to form a masked VPTESTM. Operands can be in either order.
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
if (N0.getOpcode() == ISD::SETCC && N0.hasOneUse() &&
tryVPTESTM(Node, N0, N1))
return;
if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
tryVPTESTM(Node, N1, N0))
return;
}
if (MachineSDNode *NewNode = matchBEXTRFromAndImm(Node)) {
ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
CurDAG->RemoveDeadNode(Node);
return;
}
if (matchBitExtract(Node))
return;
if (AndImmShrink && shrinkAndImmediate(Node))
return;
LLVM_FALLTHROUGH;
case ISD::OR:
case ISD::XOR:
if (tryShrinkShlLogicImm(Node))
return;
if (Opcode == ISD::OR && tryMatchBitSelect(Node))
return;
if (tryVPTERNLOG(Node))
return;
LLVM_FALLTHROUGH;
case ISD::ADD:
case ISD::SUB: {
// Try to avoid folding immediates with multiple uses for optsize.
// This code tries to select to register form directly to avoid going
// through the isel table which might fold the immediate. We can't change
// the patterns on the add/sub/and/or/xor with immediate paterns in the
// tablegen files to check immediate use count without making the patterns
// unavailable to the fast-isel table.
if (!CurDAG->shouldOptForSize())
break;
// Only handle i8/i16/i32/i64.
if (NVT != MVT::i8 && NVT != MVT::i16 && NVT != MVT::i32 && NVT != MVT::i64)
break;
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
if (!Cst)
break;
int64_t Val = Cst->getSExtValue();
// Make sure its an immediate that is considered foldable.
// FIXME: Handle unsigned 32 bit immediates for 64-bit AND.
if (!isInt<8>(Val) && !isInt<32>(Val))
break;
// If this can match to INC/DEC, let it go.
if (Opcode == ISD::ADD && (Val == 1 || Val == -1))
break;
// Check if we should avoid folding this immediate.
if (!shouldAvoidImmediateInstFormsForSize(N1.getNode()))
break;
// We should not fold the immediate. So we need a register form instead.
unsigned ROpc, MOpc;
switch (NVT.SimpleTy) {
default: llvm_unreachable("Unexpected VT!");
case MVT::i8:
switch (Opcode) {
default: llvm_unreachable("Unexpected opcode!");
case ISD::ADD: ROpc = X86::ADD8rr; MOpc = X86::ADD8rm; break;
case ISD::SUB: ROpc = X86::SUB8rr; MOpc = X86::SUB8rm; break;
case ISD::AND: ROpc = X86::AND8rr; MOpc = X86::AND8rm; break;
case ISD::OR: ROpc = X86::OR8rr; MOpc = X86::OR8rm; break;
case ISD::XOR: ROpc = X86::XOR8rr; MOpc = X86::XOR8rm; break;
}
break;
case MVT::i16:
switch (Opcode) {
default: llvm_unreachable("Unexpected opcode!");
case ISD::ADD: ROpc = X86::ADD16rr; MOpc = X86::ADD16rm; break;
case ISD::SUB: ROpc = X86::SUB16rr; MOpc = X86::SUB16rm; break;
case ISD::AND: ROpc = X86::AND16rr; MOpc = X86::AND16rm; break;
case ISD::OR: ROpc = X86::OR16rr; MOpc = X86::OR16rm; break;
case ISD::XOR: ROpc = X86::XOR16rr; MOpc = X86::XOR16rm; break;
}
break;
case MVT::i32:
switch (Opcode) {
default: llvm_unreachable("Unexpected opcode!");
case ISD::ADD: ROpc = X86::ADD32rr; MOpc = X86::ADD32rm; break;
case ISD::SUB: ROpc = X86::SUB32rr; MOpc = X86::SUB32rm; break;
case ISD::AND: ROpc = X86::AND32rr; MOpc = X86::AND32rm; break;
case ISD::OR: ROpc = X86::OR32rr; MOpc = X86::OR32rm; break;
case ISD::XOR: ROpc = X86::XOR32rr; MOpc = X86::XOR32rm; break;
}
break;
case MVT::i64:
switch (Opcode) {
default: llvm_unreachable("Unexpected opcode!");
case ISD::ADD: ROpc = X86::ADD64rr; MOpc = X86::ADD64rm; break;
case ISD::SUB: ROpc = X86::SUB64rr; MOpc = X86::SUB64rm; break;
case ISD::AND: ROpc = X86::AND64rr; MOpc = X86::AND64rm; break;
case ISD::OR: ROpc = X86::OR64rr; MOpc = X86::OR64rm; break;
case ISD::XOR: ROpc = X86::XOR64rr; MOpc = X86::XOR64rm; break;
}
break;
}
// Ok this is a AND/OR/XOR/ADD/SUB with constant.
// If this is a not a subtract, we can still try to fold a load.
if (Opcode != ISD::SUB) {
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
SDValue Ops[] = { N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
// Update the chain.
ReplaceUses(N0.getValue(1), SDValue(CNode, 2));
// Record the mem-refs
CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N0)->getMemOperand()});
ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
CurDAG->RemoveDeadNode(Node);
return;
}
}
CurDAG->SelectNodeTo(Node, ROpc, NVT, MVT::i32, N0, N1);
return;
}
case X86ISD::SMUL:
// i16/i32/i64 are handled with isel patterns.
if (NVT != MVT::i8)
break;
LLVM_FALLTHROUGH;
case X86ISD::UMUL: {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
unsigned LoReg, ROpc, MOpc;
switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8:
LoReg = X86::AL;
ROpc = Opcode == X86ISD::SMUL ? X86::IMUL8r : X86::MUL8r;
MOpc = Opcode == X86ISD::SMUL ? X86::IMUL8m : X86::MUL8m;
break;
case MVT::i16:
LoReg = X86::AX;
ROpc = X86::MUL16r;
MOpc = X86::MUL16m;
break;
case MVT::i32:
LoReg = X86::EAX;
ROpc = X86::MUL32r;
MOpc = X86::MUL32m;
break;
case MVT::i64:
LoReg = X86::RAX;
ROpc = X86::MUL64r;
MOpc = X86::MUL64m;
break;
}
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
bool FoldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
// Multiply is commutative.
if (!FoldedLoad) {
FoldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
if (FoldedLoad)
std::swap(N0, N1);
}
SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
N0, SDValue()).getValue(1);
MachineSDNode *CNode;
if (FoldedLoad) {
// i16/i32/i64 use an instruction that produces a low and high result even
// though only the low result is used.
SDVTList VTs;
if (NVT == MVT::i8)
VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
else
VTs = CurDAG->getVTList(NVT, NVT, MVT::i32, MVT::Other);
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
InFlag };
CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
// Update the chain.
ReplaceUses(N1.getValue(1), SDValue(CNode, NVT == MVT::i8 ? 2 : 3));
// Record the mem-refs
CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
} else {
// i16/i32/i64 use an instruction that produces a low and high result even
// though only the low result is used.
SDVTList VTs;
if (NVT == MVT::i8)
VTs = CurDAG->getVTList(NVT, MVT::i32);
else
VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
CNode = CurDAG->getMachineNode(ROpc, dl, VTs, {N1, InFlag});
}
ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
ReplaceUses(SDValue(Node, 1), SDValue(CNode, NVT == MVT::i8 ? 1 : 2));
CurDAG->RemoveDeadNode(Node);
return;
}
case ISD::SMUL_LOHI:
case ISD::UMUL_LOHI: {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
unsigned Opc, MOpc;
unsigned LoReg, HiReg;
bool IsSigned = Opcode == ISD::SMUL_LOHI;
bool UseMULX = !IsSigned && Subtarget->hasBMI2();
bool UseMULXHi = UseMULX && SDValue(Node, 0).use_empty();
switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i32:
Opc = UseMULXHi ? X86::MULX32Hrr :
UseMULX ? X86::MULX32rr :
IsSigned ? X86::IMUL32r : X86::MUL32r;
MOpc = UseMULXHi ? X86::MULX32Hrm :
UseMULX ? X86::MULX32rm :
IsSigned ? X86::IMUL32m : X86::MUL32m;
LoReg = UseMULX ? X86::EDX : X86::EAX;
HiReg = X86::EDX;
break;
case MVT::i64:
Opc = UseMULXHi ? X86::MULX64Hrr :
UseMULX ? X86::MULX64rr :
IsSigned ? X86::IMUL64r : X86::MUL64r;
MOpc = UseMULXHi ? X86::MULX64Hrm :
UseMULX ? X86::MULX64rm :
IsSigned ? X86::IMUL64m : X86::MUL64m;
LoReg = UseMULX ? X86::RDX : X86::RAX;
HiReg = X86::RDX;
break;
}
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
// Multiply is commmutative.
if (!foldedLoad) {
foldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
if (foldedLoad)
std::swap(N0, N1);
}
SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
N0, SDValue()).getValue(1);
SDValue ResHi, ResLo;
if (foldedLoad) {
SDValue Chain;
MachineSDNode *CNode = nullptr;
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
InFlag };
if (UseMULXHi) {
SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other);
CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
ResHi = SDValue(CNode, 0);
Chain = SDValue(CNode, 1);
} else if (UseMULX) {
SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other);
CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
ResHi = SDValue(CNode, 0);
ResLo = SDValue(CNode, 1);
Chain = SDValue(CNode, 2);
} else {
SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
Chain = SDValue(CNode, 0);
InFlag = SDValue(CNode, 1);
}
// Update the chain.
ReplaceUses(N1.getValue(1), Chain);
// Record the mem-refs
CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
} else {
SDValue Ops[] = { N1, InFlag };
if (UseMULXHi) {
SDVTList VTs = CurDAG->getVTList(NVT);
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
ResHi = SDValue(CNode, 0);
} else if (UseMULX) {
SDVTList VTs = CurDAG->getVTList(NVT, NVT);
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
ResHi = SDValue(CNode, 0);
ResLo = SDValue(CNode, 1);
} else {
SDVTList VTs = CurDAG->getVTList(MVT::Glue);
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
InFlag = SDValue(CNode, 0);
}
}
// Copy the low half of the result, if it is needed.
if (!SDValue(Node, 0).use_empty()) {
if (!ResLo) {
assert(LoReg && "Register for low half is not defined!");
ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg,
NVT, InFlag);
InFlag = ResLo.getValue(2);
}
ReplaceUses(SDValue(Node, 0), ResLo);
LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG);
dbgs() << '\n');
}
// Copy the high half of the result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
if (!ResHi) {
assert(HiReg && "Register for high half is not defined!");
ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg,
NVT, InFlag);
InFlag = ResHi.getValue(2);
}
ReplaceUses(SDValue(Node, 1), ResHi);
LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG);
dbgs() << '\n');
}
CurDAG->RemoveDeadNode(Node);
return;
}
case ISD::SDIVREM:
case ISD::UDIVREM: {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
unsigned ROpc, MOpc;
bool isSigned = Opcode == ISD::SDIVREM;
if (!isSigned) {
switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8: ROpc = X86::DIV8r; MOpc = X86::DIV8m; break;
case MVT::i16: ROpc = X86::DIV16r; MOpc = X86::DIV16m; break;
case MVT::i32: ROpc = X86::DIV32r; MOpc = X86::DIV32m; break;
case MVT::i64: ROpc = X86::DIV64r; MOpc = X86::DIV64m; break;
}
} else {
switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8: ROpc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
case MVT::i16: ROpc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
case MVT::i32: ROpc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
case MVT::i64: ROpc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
}
}
unsigned LoReg, HiReg, ClrReg;
unsigned SExtOpcode;
switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8:
LoReg = X86::AL; ClrReg = HiReg = X86::AH;
SExtOpcode = 0; // Not used.
break;
case MVT::i16:
LoReg = X86::AX; HiReg = X86::DX;
ClrReg = X86::DX;
SExtOpcode = X86::CWD;
break;
case MVT::i32:
LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
SExtOpcode = X86::CDQ;
break;
case MVT::i64:
LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
SExtOpcode = X86::CQO;
break;
}
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
bool signBitIsZero = CurDAG->SignBitIsZero(N0);
SDValue InFlag;
if (NVT == MVT::i8) {
// Special case for div8, just use a move with zero extension to AX to
// clear the upper 8 bits (AH).
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain;
MachineSDNode *Move;
if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rm8
: X86::MOVZX16rm8;
Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, MVT::Other, Ops);
Chain = SDValue(Move, 1);
ReplaceUses(N0.getValue(1), Chain);
// Record the mem-refs
CurDAG->setNodeMemRefs(Move, {cast<LoadSDNode>(N0)->getMemOperand()});
} else {
unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rr8
: X86::MOVZX16rr8;
Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, N0);
Chain = CurDAG->getEntryNode();
}
Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, SDValue(Move, 0),
SDValue());
InFlag = Chain.getValue(1);
} else {
InFlag =
CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
LoReg, N0, SDValue()).getValue(1);
if (isSigned && !signBitIsZero) {
// Sign extend the low part into the high part.
InFlag =
SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
} else {
// Zero out the high part, effectively zero extending the input.
SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32);
SDValue ClrNode =
SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, None), 0);
switch (NVT.SimpleTy) {
case MVT::i16:
ClrNode =
SDValue(CurDAG->getMachineNode(
TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode,
CurDAG->getTargetConstant(X86::sub_16bit, dl,
MVT::i32)),
0);
break;
case MVT::i32:
break;
case MVT::i64:
ClrNode =
SDValue(CurDAG->getMachineNode(
TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode,
CurDAG->getTargetConstant(X86::sub_32bit, dl,
MVT::i32)),
0);
break;
default:
llvm_unreachable("Unexpected division source");
}
InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
ClrNode, InFlag).getValue(1);
}
}
if (foldedLoad) {
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
InFlag };
MachineSDNode *CNode =
CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
InFlag = SDValue(CNode, 1);
// Update the chain.
ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
// Record the mem-refs
CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
} else {
InFlag =
SDValue(CurDAG->getMachineNode(ROpc, dl, MVT::Glue, N1, InFlag), 0);
}
// Prevent use of AH in a REX instruction by explicitly copying it to
// an ABCD_L register.
//
// The current assumption of the register allocator is that isel
// won't generate explicit references to the GR8_ABCD_H registers. If
// the allocator and/or the backend get enhanced to be more robust in
// that regard, this can be, and should be, removed.
if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) {
SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8);
unsigned AHExtOpcode =
isSigned ? X86::MOVSX32rr8_NOREX : X86::MOVZX32rr8_NOREX;
SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32,
MVT::Glue, AHCopy, InFlag);
SDValue Result(RNode, 0);
InFlag = SDValue(RNode, 1);
Result =
CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
ReplaceUses(SDValue(Node, 1), Result);
LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
dbgs() << '\n');
}
// Copy the division (low) result, if it is needed.
if (!SDValue(Node, 0).use_empty()) {
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
LoReg, NVT, InFlag);
InFlag = Result.getValue(2);
ReplaceUses(SDValue(Node, 0), Result);
LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
dbgs() << '\n');
}
// Copy the remainder (high) result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
HiReg, NVT, InFlag);
InFlag = Result.getValue(2);
ReplaceUses(SDValue(Node, 1), Result);
LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
dbgs() << '\n');
}
CurDAG->RemoveDeadNode(Node);
return;
}
case X86ISD::FCMP:
case X86ISD::STRICT_FCMP:
case X86ISD::STRICT_FCMPS: {
bool IsStrictCmp = Node->getOpcode() == X86ISD::STRICT_FCMP ||
Node->getOpcode() == X86ISD::STRICT_FCMPS;
SDValue N0 = Node->getOperand(IsStrictCmp ? 1 : 0);
SDValue N1 = Node->getOperand(IsStrictCmp ? 2 : 1);
// Save the original VT of the compare.
MVT CmpVT = N0.getSimpleValueType();
// Floating point needs special handling if we don't have FCOMI.
if (Subtarget->hasCMov())
break;
bool IsSignaling = Node->getOpcode() == X86ISD::STRICT_FCMPS;
unsigned Opc;
switch (CmpVT.SimpleTy) {
default: llvm_unreachable("Unexpected type!");
case MVT::f32:
Opc = IsSignaling ? X86::COM_Fpr32 : X86::UCOM_Fpr32;
break;
case MVT::f64:
Opc = IsSignaling ? X86::COM_Fpr64 : X86::UCOM_Fpr64;
break;
case MVT::f80:
Opc = IsSignaling ? X86::COM_Fpr80 : X86::UCOM_Fpr80;
break;
}
SDValue Chain =
IsStrictCmp ? Node->getOperand(0) : CurDAG->getEntryNode();
SDValue Glue;
if (IsStrictCmp) {
SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
Chain = SDValue(CurDAG->getMachineNode(Opc, dl, VTs, {N0, N1, Chain}), 0);
Glue = Chain.getValue(1);
} else {
Glue = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N0, N1), 0);
}
// Move FPSW to AX.
SDValue FNSTSW =
SDValue(CurDAG->getMachineNode(X86::FNSTSW16r, dl, MVT::i16, Glue), 0);
// Extract upper 8-bits of AX.
SDValue Extract =
CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl, MVT::i8, FNSTSW);
// Move AH into flags.
// Some 64-bit targets lack SAHF support, but they do support FCOMI.
assert(Subtarget->hasLAHFSAHF() &&
"Target doesn't support SAHF or FCOMI?");
SDValue AH = CurDAG->getCopyToReg(Chain, dl, X86::AH, Extract, SDValue());
Chain = AH;
SDValue SAHF = SDValue(
CurDAG->getMachineNode(X86::SAHF, dl, MVT::i32, AH.getValue(1)), 0);
if (IsStrictCmp)
ReplaceUses(SDValue(Node, 1), Chain);
ReplaceUses(SDValue(Node, 0), SAHF);
CurDAG->RemoveDeadNode(Node);
return;
}
case X86ISD::CMP: {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
// Optimizations for TEST compares.
if (!isNullConstant(N1))
break;
// Save the original VT of the compare.
MVT CmpVT = N0.getSimpleValueType();
// If we are comparing (and (shr X, C, Mask) with 0, emit a BEXTR followed
// by a test instruction. The test should be removed later by
// analyzeCompare if we are using only the zero flag.
// TODO: Should we check the users and use the BEXTR flags directly?
if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
if (MachineSDNode *NewNode = matchBEXTRFromAndImm(N0.getNode())) {
unsigned TestOpc = CmpVT == MVT::i64 ? X86::TEST64rr
: X86::TEST32rr;
SDValue BEXTR = SDValue(NewNode, 0);
NewNode = CurDAG->getMachineNode(TestOpc, dl, MVT::i32, BEXTR, BEXTR);
ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
CurDAG->RemoveDeadNode(Node);
return;
}
}
// We can peek through truncates, but we need to be careful below.
if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse())
N0 = N0.getOperand(0);
// Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
// use a smaller encoding.
// Look past the truncate if CMP is the only use of it.
if (N0.getOpcode() == ISD::AND &&
N0.getNode()->hasOneUse() &&
N0.getValueType() != MVT::i8) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
if (!C) break;
uint64_t Mask = C->getZExtValue();
// We may have looked through a truncate so mask off any bits that
// shouldn't be part of the compare.
Mask &= maskTrailingOnes<uint64_t>(CmpVT.getScalarSizeInBits());
// Check if we can replace AND+IMM64 with a shift. This is possible for
// masks/ like 0xFF000000 or 0x00FFFFFF and if we care only about the zero
// flag.
if (CmpVT == MVT::i64 && !isInt<32>(Mask) &&
onlyUsesZeroFlag(SDValue(Node, 0))) {
if (isMask_64(~Mask)) {
unsigned TrailingZeros = countTrailingZeros(Mask);
SDValue Imm = CurDAG->getTargetConstant(TrailingZeros, dl, MVT::i64);
SDValue Shift =
SDValue(CurDAG->getMachineNode(X86::SHR64ri, dl, MVT::i64, MVT::i32,
N0.getOperand(0), Imm), 0);
MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
MVT::i32, Shift, Shift);
ReplaceNode(Node, Test);
return;
}
if (isMask_64(Mask)) {
unsigned LeadingZeros = countLeadingZeros(Mask);
SDValue Imm = CurDAG->getTargetConstant(LeadingZeros, dl, MVT::i64);
SDValue Shift =
SDValue(CurDAG->getMachineNode(X86::SHL64ri, dl, MVT::i64, MVT::i32,
N0.getOperand(0), Imm), 0);
MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
MVT::i32, Shift, Shift);
ReplaceNode(Node, Test);
return;
}
}
MVT VT;
int SubRegOp;
unsigned ROpc, MOpc;
// For each of these checks we need to be careful if the sign flag is
// being used. It is only safe to use the sign flag in two conditions,
// either the sign bit in the shrunken mask is zero or the final test
// size is equal to the original compare size.
if (isUInt<8>(Mask) &&
(!(Mask & 0x80) || CmpVT == MVT::i8 ||
hasNoSignFlagUses(SDValue(Node, 0)))) {
// For example, convert "testl %eax, $8" to "testb %al, $8"
VT = MVT::i8;
SubRegOp = X86::sub_8bit;
ROpc = X86::TEST8ri;
MOpc = X86::TEST8mi;
} else if (OptForMinSize && isUInt<16>(Mask) &&
(!(Mask & 0x8000) || CmpVT == MVT::i16 ||
hasNoSignFlagUses(SDValue(Node, 0)))) {
// For example, "testl %eax, $32776" to "testw %ax, $32776".
// NOTE: We only want to form TESTW instructions if optimizing for
// min size. Otherwise we only save one byte and possibly get a length
// changing prefix penalty in the decoders.
VT = MVT::i16;
SubRegOp = X86::sub_16bit;
ROpc = X86::TEST16ri;
MOpc = X86::TEST16mi;
} else if (isUInt<32>(Mask) && N0.getValueType() != MVT::i16 &&
((!(Mask & 0x80000000) &&
// Without minsize 16-bit Cmps can get here so we need to
// be sure we calculate the correct sign flag if needed.
(CmpVT != MVT::i16 || !(Mask & 0x8000))) ||
CmpVT == MVT::i32 ||
hasNoSignFlagUses(SDValue(Node, 0)))) {
// For example, "testq %rax, $268468232" to "testl %eax, $268468232".
// NOTE: We only want to run that transform if N0 is 32 or 64 bits.
// Otherwize, we find ourselves in a position where we have to do
// promotion. If previous passes did not promote the and, we assume
// they had a good reason not to and do not promote here.
VT = MVT::i32;
SubRegOp = X86::sub_32bit;
ROpc = X86::TEST32ri;
MOpc = X86::TEST32mi;
} else {
// No eligible transformation was found.
break;
}
SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT);
SDValue Reg = N0.getOperand(0);
// Emit a testl or testw.
MachineSDNode *NewNode;
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
if (tryFoldLoad(Node, N0.getNode(), Reg, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
if (auto *LoadN = dyn_cast<LoadSDNode>(N0.getOperand(0).getNode())) {
if (!LoadN->isSimple()) {
unsigned NumVolBits = LoadN->getValueType(0).getSizeInBits();
if ((MOpc == X86::TEST8mi && NumVolBits != 8) ||
(MOpc == X86::TEST16mi && NumVolBits != 16) ||
(MOpc == X86::TEST32mi && NumVolBits != 32))
break;
}
}
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
Reg.getOperand(0) };
NewNode = CurDAG->getMachineNode(MOpc, dl, MVT::i32, MVT::Other, Ops);
// Update the chain.
ReplaceUses(Reg.getValue(1), SDValue(NewNode, 1));
// Record the mem-refs
CurDAG->setNodeMemRefs(NewNode,
{cast<LoadSDNode>(Reg)->getMemOperand()});
} else {
// Extract the subregister if necessary.
if (N0.getValueType() != VT)
Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg);
NewNode = CurDAG->getMachineNode(ROpc, dl, MVT::i32, Reg, Imm);
}
// Replace CMP with TEST.
ReplaceNode(Node, NewNode);
return;
}
break;
}
case X86ISD::PCMPISTR: {
if (!Subtarget->hasSSE42())
break;
bool NeedIndex = !SDValue(Node, 0).use_empty();
bool NeedMask = !SDValue(Node, 1).use_empty();
// We can't fold a load if we are going to make two instructions.
bool MayFoldLoad = !NeedIndex || !NeedMask;
MachineSDNode *CNode;
if (NeedMask) {
unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrr : X86::PCMPISTRMrr;
unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrm : X86::PCMPISTRMrm;
CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node);
ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
}
if (NeedIndex || !NeedMask) {
unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr : X86::PCMPISTRIrr;
unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrm : X86::PCMPISTRIrm;
CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node);
ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
}
// Connect the flag usage to the last instruction created.
ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
CurDAG->RemoveDeadNode(Node);
return;
}
case X86ISD::PCMPESTR: {
if (!Subtarget->hasSSE42())
break;
// Copy the two implicit register inputs.
SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EAX,
Node->getOperand(1),
SDValue()).getValue(1);
InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX,
Node->getOperand(3), InFlag).getValue(1);
bool NeedIndex = !SDValue(Node, 0).use_empty();
bool NeedMask = !SDValue(Node, 1).use_empty();
// We can't fold a load if we are going to make two instructions.
bool MayFoldLoad = !NeedIndex || !NeedMask;
MachineSDNode *CNode;
if (NeedMask) {
unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrr : X86::PCMPESTRMrr;
unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrm : X86::PCMPESTRMrm;
CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node,
InFlag);
ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
}
if (NeedIndex || !NeedMask) {
unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr : X86::PCMPESTRIrr;
unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrm : X86::PCMPESTRIrm;
CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node, InFlag);
ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
}
// Connect the flag usage to the last instruction created.
ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
CurDAG->RemoveDeadNode(Node);
return;
}
case ISD::SETCC: {
if (NVT.isVector() && tryVPTESTM(Node, SDValue(Node, 0), SDValue()))
return;
break;
}
case ISD::STORE:
if (foldLoadStoreIntoMemOperand(Node))
return;
break;
case X86ISD::SETCC_CARRY: {
// We have to do this manually because tblgen will put the eflags copy in
// the wrong place if we use an extract_subreg in the pattern.
MVT VT = Node->getSimpleValueType(0);
// Copy flags to the EFLAGS register and glue it to next node.
SDValue EFLAGS =
CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS,
Node->getOperand(1), SDValue());
// Create a 64-bit instruction if the result is 64-bits otherwise use the
// 32-bit version.
unsigned Opc = VT == MVT::i64 ? X86::SETB_C64r : X86::SETB_C32r;
MVT SetVT = VT == MVT::i64 ? MVT::i64 : MVT::i32;
SDValue Result = SDValue(
CurDAG->getMachineNode(Opc, dl, SetVT, EFLAGS, EFLAGS.getValue(1)), 0);
// For less than 32-bits we need to extract from the 32-bit node.
if (VT == MVT::i8 || VT == MVT::i16) {
int SubIndex = VT == MVT::i16 ? X86::sub_16bit : X86::sub_8bit;
Result = CurDAG->getTargetExtractSubreg(SubIndex, dl, VT, Result);
}
ReplaceUses(SDValue(Node, 0), Result);
CurDAG->RemoveDeadNode(Node);
return;
}
case X86ISD::SBB: {
if (isNullConstant(Node->getOperand(0)) &&
isNullConstant(Node->getOperand(1))) {
MVT VT = Node->getSimpleValueType(0);
// Create zero.
SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32);
SDValue Zero =
SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, None), 0);
if (VT == MVT::i64) {
Zero = SDValue(
CurDAG->getMachineNode(
TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
CurDAG->getTargetConstant(0, dl, MVT::i64), Zero,
CurDAG->getTargetConstant(X86::sub_32bit, dl, MVT::i32)),
0);
}
// Copy flags to the EFLAGS register and glue it to next node.
SDValue EFLAGS =
CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS,
Node->getOperand(2), SDValue());
// Create a 64-bit instruction if the result is 64-bits otherwise use the
// 32-bit version.
unsigned Opc = VT == MVT::i64 ? X86::SBB64rr : X86::SBB32rr;
MVT SBBVT = VT == MVT::i64 ? MVT::i64 : MVT::i32;
VTs = CurDAG->getVTList(SBBVT, MVT::i32);
SDValue Result =
SDValue(CurDAG->getMachineNode(Opc, dl, VTs, {Zero, Zero, EFLAGS,
EFLAGS.getValue(1)}),
0);
// Replace the flag use.
ReplaceUses(SDValue(Node, 1), Result.getValue(1));
// Replace the result use.
if (!SDValue(Node, 0).use_empty()) {
// For less than 32-bits we need to extract from the 32-bit node.
if (VT == MVT::i8 || VT == MVT::i16) {
int SubIndex = VT == MVT::i16 ? X86::sub_16bit : X86::sub_8bit;
Result = CurDAG->getTargetExtractSubreg(SubIndex, dl, VT, Result);
}
ReplaceUses(SDValue(Node, 0), Result);
}
CurDAG->RemoveDeadNode(Node);
return;
}
break;
}
case X86ISD::MGATHER: {
auto *Mgt = cast<X86MaskedGatherSDNode>(Node);
SDValue IndexOp = Mgt->getIndex();
SDValue Mask = Mgt->getMask();
MVT IndexVT = IndexOp.getSimpleValueType();
MVT ValueVT = Node->getSimpleValueType(0);
MVT MaskVT = Mask.getSimpleValueType();
// This is just to prevent crashes if the nodes are malformed somehow. We're
// otherwise only doing loose type checking in here based on type what
// a type constraint would say just like table based isel.
if (!ValueVT.isVector() || !MaskVT.isVector())
break;
unsigned NumElts = ValueVT.getVectorNumElements();
MVT ValueSVT = ValueVT.getVectorElementType();
bool IsFP = ValueSVT.isFloatingPoint();
unsigned EltSize = ValueSVT.getSizeInBits();
unsigned Opc = 0;
bool AVX512Gather = MaskVT.getVectorElementType() == MVT::i1;
if (AVX512Gather) {
if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32)
Opc = IsFP ? X86::VGATHERDPSZ128rm : X86::VPGATHERDDZ128rm;
else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32)
Opc = IsFP ? X86::VGATHERDPSZ256rm : X86::VPGATHERDDZ256rm;
else if (IndexVT == MVT::v16i32 && NumElts == 16 && EltSize == 32)
Opc = IsFP ? X86::VGATHERDPSZrm : X86::VPGATHERDDZrm;
else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64)
Opc = IsFP ? X86::VGATHERDPDZ128rm : X86::VPGATHERDQZ128rm;
else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64)
Opc = IsFP ? X86::VGATHERDPDZ256rm : X86::VPGATHERDQZ256rm;
else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 64)
Opc = IsFP ? X86::VGATHERDPDZrm : X86::VPGATHERDQZrm;
else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32)
Opc = IsFP ? X86::VGATHERQPSZ128rm : X86::VPGATHERQDZ128rm;
else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32)
Opc = IsFP ? X86::VGATHERQPSZ256rm : X86::VPGATHERQDZ256rm;
else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 32)
Opc = IsFP ? X86::VGATHERQPSZrm : X86::VPGATHERQDZrm;
else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64)
Opc = IsFP ? X86::VGATHERQPDZ128rm : X86::VPGATHERQQZ128rm;
else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64)
Opc = IsFP ? X86::VGATHERQPDZ256rm : X86::VPGATHERQQZ256rm;
else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 64)
Opc = IsFP ? X86::VGATHERQPDZrm : X86::VPGATHERQQZrm;
} else {
assert(EVT(MaskVT) == EVT(ValueVT).changeVectorElementTypeToInteger() &&
"Unexpected mask VT!");
if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32)
Opc = IsFP ? X86::VGATHERDPSrm : X86::VPGATHERDDrm;
else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32)
Opc = IsFP ? X86::VGATHERDPSYrm : X86::VPGATHERDDYrm;
else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64)
Opc = IsFP ? X86::VGATHERDPDrm : X86::VPGATHERDQrm;
else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64)
Opc = IsFP ? X86::VGATHERDPDYrm : X86::VPGATHERDQYrm;
else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32)
Opc = IsFP ? X86::VGATHERQPSrm : X86::VPGATHERQDrm;
else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32)
Opc = IsFP ? X86::VGATHERQPSYrm : X86::VPGATHERQDYrm;
else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64)
Opc = IsFP ? X86::VGATHERQPDrm : X86::VPGATHERQQrm;
else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64)
Opc = IsFP ? X86::VGATHERQPDYrm : X86::VPGATHERQQYrm;
}
if (!Opc)
break;
SDValue Base, Scale, Index, Disp, Segment;
if (!selectVectorAddr(Mgt, Mgt->getBasePtr(), IndexOp, Mgt->getScale(),
Base, Scale, Index, Disp, Segment))
break;
SDValue PassThru = Mgt->getPassThru();
SDValue Chain = Mgt->getChain();
// Gather instructions have a mask output not in the ISD node.
SDVTList VTs = CurDAG->getVTList(ValueVT, MaskVT, MVT::Other);
MachineSDNode *NewNode;
if (AVX512Gather) {
SDValue Ops[] = {PassThru, Mask, Base, Scale,
Index, Disp, Segment, Chain};
NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops);
} else {
SDValue Ops[] = {PassThru, Base, Scale, Index,
Disp, Segment, Mask, Chain};
NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops);
}
CurDAG->setNodeMemRefs(NewNode, {Mgt->getMemOperand()});
ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
ReplaceUses(SDValue(Node, 1), SDValue(NewNode, 2));
CurDAG->RemoveDeadNode(Node);
return;
}
case X86ISD::MSCATTER: {
auto *Sc = cast<X86MaskedScatterSDNode>(Node);
SDValue Value = Sc->getValue();
SDValue IndexOp = Sc->getIndex();
MVT IndexVT = IndexOp.getSimpleValueType();
MVT ValueVT = Value.getSimpleValueType();
// This is just to prevent crashes if the nodes are malformed somehow. We're
// otherwise only doing loose type checking in here based on type what
// a type constraint would say just like table based isel.
if (!ValueVT.isVector())
break;
unsigned NumElts = ValueVT.getVectorNumElements();
MVT ValueSVT = ValueVT.getVectorElementType();
bool IsFP = ValueSVT.isFloatingPoint();
unsigned EltSize = ValueSVT.getSizeInBits();
unsigned Opc;
if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32)
Opc = IsFP ? X86::VSCATTERDPSZ128mr : X86::VPSCATTERDDZ128mr;
else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32)
Opc = IsFP ? X86::VSCATTERDPSZ256mr : X86::VPSCATTERDDZ256mr;
else if (IndexVT == MVT::v16i32 && NumElts == 16 && EltSize == 32)
Opc = IsFP ? X86::VSCATTERDPSZmr : X86::VPSCATTERDDZmr;
else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64)
Opc = IsFP ? X86::VSCATTERDPDZ128mr : X86::VPSCATTERDQZ128mr;
else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64)
Opc = IsFP ? X86::VSCATTERDPDZ256mr : X86::VPSCATTERDQZ256mr;
else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 64)
Opc = IsFP ? X86::VSCATTERDPDZmr : X86::VPSCATTERDQZmr;
else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32)
Opc = IsFP ? X86::VSCATTERQPSZ128mr : X86::VPSCATTERQDZ128mr;
else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32)
Opc = IsFP ? X86::VSCATTERQPSZ256mr : X86::VPSCATTERQDZ256mr;
else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 32)
Opc = IsFP ? X86::VSCATTERQPSZmr : X86::VPSCATTERQDZmr;
else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64)
Opc = IsFP ? X86::VSCATTERQPDZ128mr : X86::VPSCATTERQQZ128mr;
else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64)
Opc = IsFP ? X86::VSCATTERQPDZ256mr : X86::VPSCATTERQQZ256mr;
else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 64)
Opc = IsFP ? X86::VSCATTERQPDZmr : X86::VPSCATTERQQZmr;
else
break;
SDValue Base, Scale, Index, Disp, Segment;
if (!selectVectorAddr(Sc, Sc->getBasePtr(), IndexOp, Sc->getScale(),
Base, Scale, Index, Disp, Segment))
break;
SDValue Mask = Sc->getMask();
SDValue Chain = Sc->getChain();
// Scatter instructions have a mask output not in the ISD node.
SDVTList VTs = CurDAG->getVTList(Mask.getValueType(), MVT::Other);
SDValue Ops[] = {Base, Scale, Index, Disp, Segment, Mask, Value, Chain};
MachineSDNode *NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops);
CurDAG->setNodeMemRefs(NewNode, {Sc->getMemOperand()});
ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 1));
CurDAG->RemoveDeadNode(Node);
return;
}
case ISD::PREALLOCATED_SETUP: {
auto *MFI = CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
auto CallId = MFI->getPreallocatedIdForCallSite(
cast<SrcValueSDNode>(Node->getOperand(1))->getValue());
SDValue Chain = Node->getOperand(0);
SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32);
MachineSDNode *New = CurDAG->getMachineNode(
TargetOpcode::PREALLOCATED_SETUP, dl, MVT::Other, CallIdValue, Chain);
ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); // Chain
CurDAG->RemoveDeadNode(Node);
return;
}
case ISD::PREALLOCATED_ARG: {
auto *MFI = CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
auto CallId = MFI->getPreallocatedIdForCallSite(
cast<SrcValueSDNode>(Node->getOperand(1))->getValue());
SDValue Chain = Node->getOperand(0);
SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32);
SDValue ArgIndex = Node->getOperand(2);
SDValue Ops[3];
Ops[0] = CallIdValue;
Ops[1] = ArgIndex;
Ops[2] = Chain;
MachineSDNode *New = CurDAG->getMachineNode(
TargetOpcode::PREALLOCATED_ARG, dl,
CurDAG->getVTList(TLI->getPointerTy(CurDAG->getDataLayout()),
MVT::Other),
Ops);
ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); // Arg pointer
ReplaceUses(SDValue(Node, 1), SDValue(New, 1)); // Chain
CurDAG->RemoveDeadNode(Node);
return;
}
case X86ISD::AESENCWIDE128KL:
case X86ISD::AESDECWIDE128KL:
case X86ISD::AESENCWIDE256KL:
case X86ISD::AESDECWIDE256KL: {
if (!Subtarget->hasWIDEKL())
break;
unsigned Opcode;
switch (Node->getOpcode()) {
default:
llvm_unreachable("Unexpected opcode!");
case X86ISD::AESENCWIDE128KL:
Opcode = X86::AESENCWIDE128KL;
break;
case X86ISD::AESDECWIDE128KL:
Opcode = X86::AESDECWIDE128KL;
break;
case X86ISD::AESENCWIDE256KL:
Opcode = X86::AESENCWIDE256KL;
break;
case X86ISD::AESDECWIDE256KL:
Opcode = X86::AESDECWIDE256KL;
break;
}
SDValue Chain = Node->getOperand(0);
SDValue Addr = Node->getOperand(1);
SDValue Base, Scale, Index, Disp, Segment;
if (!selectAddr(Node, Addr, Base, Scale, Index, Disp, Segment))
break;
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM0, Node->getOperand(2),
SDValue());
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM1, Node->getOperand(3),
Chain.getValue(1));
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM2, Node->getOperand(4),
Chain.getValue(1));
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM3, Node->getOperand(5),
Chain.getValue(1));
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM4, Node->getOperand(6),
Chain.getValue(1));
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM5, Node->getOperand(7),
Chain.getValue(1));
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM6, Node->getOperand(8),
Chain.getValue(1));
Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM7, Node->getOperand(9),
Chain.getValue(1));
MachineSDNode *Res = CurDAG->getMachineNode(
Opcode, dl, Node->getVTList(),
{Base, Scale, Index, Disp, Segment, Chain, Chain.getValue(1)});
CurDAG->setNodeMemRefs(Res, cast<MemSDNode>(Node)->getMemOperand());
ReplaceNode(Node, Res);
return;
}
}
SelectCode(Node);
}