bool TargetLowering::SimplifyDemandedBits()

in llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp [898:2327]


bool TargetLowering::SimplifyDemandedBits(
    SDValue Op, const APInt &OriginalDemandedBits,
    const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
    unsigned Depth, bool AssumeSingleUse) const {
  unsigned BitWidth = OriginalDemandedBits.getBitWidth();
  assert(Op.getScalarValueSizeInBits() == BitWidth &&
         "Mask size mismatches value type size!");

  // Don't know anything.
  Known = KnownBits(BitWidth);

  // TODO: We can probably do more work on calculating the known bits and
  // simplifying the operations for scalable vectors, but for now we just
  // bail out.
  if (Op.getValueType().isScalableVector())
    return false;

  bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
  unsigned NumElts = OriginalDemandedElts.getBitWidth();
  assert((!Op.getValueType().isVector() ||
          NumElts == Op.getValueType().getVectorNumElements()) &&
         "Unexpected vector size");

  APInt DemandedBits = OriginalDemandedBits;
  APInt DemandedElts = OriginalDemandedElts;
  SDLoc dl(Op);
  auto &DL = TLO.DAG.getDataLayout();

  // Undef operand.
  if (Op.isUndef())
    return false;

  if (Op.getOpcode() == ISD::Constant) {
    // We know all of the bits for a constant!
    Known = KnownBits::makeConstant(cast<ConstantSDNode>(Op)->getAPIntValue());
    return false;
  }

  if (Op.getOpcode() == ISD::ConstantFP) {
    // We know all of the bits for a floating point constant!
    Known = KnownBits::makeConstant(
        cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
    return false;
  }

  // Other users may use these bits.
  EVT VT = Op.getValueType();
  if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
    if (Depth != 0) {
      // If not at the root, Just compute the Known bits to
      // simplify things downstream.
      Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
      return false;
    }
    // If this is the root being simplified, allow it to have multiple uses,
    // just set the DemandedBits/Elts to all bits.
    DemandedBits = APInt::getAllOnes(BitWidth);
    DemandedElts = APInt::getAllOnes(NumElts);
  } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
    // Not demanding any bits/elts from Op.
    return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
  } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
    // Limit search depth.
    return false;
  }

  KnownBits Known2;
  switch (Op.getOpcode()) {
  case ISD::TargetConstant:
    llvm_unreachable("Can't simplify this node");
  case ISD::SCALAR_TO_VECTOR: {
    if (!DemandedElts[0])
      return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));

    KnownBits SrcKnown;
    SDValue Src = Op.getOperand(0);
    unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
    APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth);
    if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
      return true;

    // Upper elements are undef, so only get the knownbits if we just demand
    // the bottom element.
    if (DemandedElts == 1)
      Known = SrcKnown.anyextOrTrunc(BitWidth);
    break;
  }
  case ISD::BUILD_VECTOR:
    // Collect the known bits that are shared by every demanded element.
    // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
    Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
    return false; // Don't fall through, will infinitely loop.
  case ISD::LOAD: {
    auto *LD = cast<LoadSDNode>(Op);
    if (getTargetConstantFromLoad(LD)) {
      Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
      return false; // Don't fall through, will infinitely loop.
    }
    if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
      // If this is a ZEXTLoad and we are looking at the loaded value.
      EVT MemVT = LD->getMemoryVT();
      unsigned MemBits = MemVT.getScalarSizeInBits();
      Known.Zero.setBitsFrom(MemBits);
      return false; // Don't fall through, will infinitely loop.
    }
    break;
  }
  case ISD::INSERT_VECTOR_ELT: {
    SDValue Vec = Op.getOperand(0);
    SDValue Scl = Op.getOperand(1);
    auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
    EVT VecVT = Vec.getValueType();

    // If index isn't constant, assume we need all vector elements AND the
    // inserted element.
    APInt DemandedVecElts(DemandedElts);
    if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
      unsigned Idx = CIdx->getZExtValue();
      DemandedVecElts.clearBit(Idx);

      // Inserted element is not required.
      if (!DemandedElts[Idx])
        return TLO.CombineTo(Op, Vec);
    }

    KnownBits KnownScl;
    unsigned NumSclBits = Scl.getScalarValueSizeInBits();
    APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
    if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
      return true;

    Known = KnownScl.anyextOrTrunc(BitWidth);

    KnownBits KnownVec;
    if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
                             Depth + 1))
      return true;

    if (!!DemandedVecElts)
      Known = KnownBits::commonBits(Known, KnownVec);

    return false;
  }
  case ISD::INSERT_SUBVECTOR: {
    // Demand any elements from the subvector and the remainder from the src its
    // inserted into.
    SDValue Src = Op.getOperand(0);
    SDValue Sub = Op.getOperand(1);
    uint64_t Idx = Op.getConstantOperandVal(2);
    unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
    APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
    APInt DemandedSrcElts = DemandedElts;
    DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);

    KnownBits KnownSub, KnownSrc;
    if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
                             Depth + 1))
      return true;
    if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
                             Depth + 1))
      return true;

    Known.Zero.setAllBits();
    Known.One.setAllBits();
    if (!!DemandedSubElts)
      Known = KnownBits::commonBits(Known, KnownSub);
    if (!!DemandedSrcElts)
      Known = KnownBits::commonBits(Known, KnownSrc);

    // Attempt to avoid multi-use src if we don't need anything from it.
    if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
        !DemandedSrcElts.isAllOnes()) {
      SDValue NewSub = SimplifyMultipleUseDemandedBits(
          Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
      SDValue NewSrc = SimplifyMultipleUseDemandedBits(
          Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
      if (NewSub || NewSrc) {
        NewSub = NewSub ? NewSub : Sub;
        NewSrc = NewSrc ? NewSrc : Src;
        SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
                                        Op.getOperand(2));
        return TLO.CombineTo(Op, NewOp);
      }
    }
    break;
  }
  case ISD::EXTRACT_SUBVECTOR: {
    // Offset the demanded elts by the subvector index.
    SDValue Src = Op.getOperand(0);
    if (Src.getValueType().isScalableVector())
      break;
    uint64_t Idx = Op.getConstantOperandVal(1);
    unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
    APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);

    if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
                             Depth + 1))
      return true;

    // Attempt to avoid multi-use src if we don't need anything from it.
    if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
      SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
          Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
      if (DemandedSrc) {
        SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
                                        Op.getOperand(1));
        return TLO.CombineTo(Op, NewOp);
      }
    }
    break;
  }
  case ISD::CONCAT_VECTORS: {
    Known.Zero.setAllBits();
    Known.One.setAllBits();
    EVT SubVT = Op.getOperand(0).getValueType();
    unsigned NumSubVecs = Op.getNumOperands();
    unsigned NumSubElts = SubVT.getVectorNumElements();
    for (unsigned i = 0; i != NumSubVecs; ++i) {
      APInt DemandedSubElts =
          DemandedElts.extractBits(NumSubElts, i * NumSubElts);
      if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
                               Known2, TLO, Depth + 1))
        return true;
      // Known bits are shared by every demanded subvector element.
      if (!!DemandedSubElts)
        Known = KnownBits::commonBits(Known, Known2);
    }
    break;
  }
  case ISD::VECTOR_SHUFFLE: {
    ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();

    // Collect demanded elements from shuffle operands..
    APInt DemandedLHS(NumElts, 0);
    APInt DemandedRHS(NumElts, 0);
    for (unsigned i = 0; i != NumElts; ++i) {
      if (!DemandedElts[i])
        continue;
      int M = ShuffleMask[i];
      if (M < 0) {
        // For UNDEF elements, we don't know anything about the common state of
        // the shuffle result.
        DemandedLHS.clearAllBits();
        DemandedRHS.clearAllBits();
        break;
      }
      assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
      if (M < (int)NumElts)
        DemandedLHS.setBit(M);
      else
        DemandedRHS.setBit(M - NumElts);
    }

    if (!!DemandedLHS || !!DemandedRHS) {
      SDValue Op0 = Op.getOperand(0);
      SDValue Op1 = Op.getOperand(1);

      Known.Zero.setAllBits();
      Known.One.setAllBits();
      if (!!DemandedLHS) {
        if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
                                 Depth + 1))
          return true;
        Known = KnownBits::commonBits(Known, Known2);
      }
      if (!!DemandedRHS) {
        if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
                                 Depth + 1))
          return true;
        Known = KnownBits::commonBits(Known, Known2);
      }

      // Attempt to avoid multi-use ops if we don't need anything from them.
      SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
          Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
      SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
          Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
      if (DemandedOp0 || DemandedOp1) {
        Op0 = DemandedOp0 ? DemandedOp0 : Op0;
        Op1 = DemandedOp1 ? DemandedOp1 : Op1;
        SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
        return TLO.CombineTo(Op, NewOp);
      }
    }
    break;
  }
  case ISD::AND: {
    SDValue Op0 = Op.getOperand(0);
    SDValue Op1 = Op.getOperand(1);

    // If the RHS is a constant, check to see if the LHS would be zero without
    // using the bits from the RHS.  Below, we use knowledge about the RHS to
    // simplify the LHS, here we're using information from the LHS to simplify
    // the RHS.
    if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
      // Do not increment Depth here; that can cause an infinite loop.
      KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
      // If the LHS already has zeros where RHSC does, this 'and' is dead.
      if ((LHSKnown.Zero & DemandedBits) ==
          (~RHSC->getAPIntValue() & DemandedBits))
        return TLO.CombineTo(Op, Op0);

      // If any of the set bits in the RHS are known zero on the LHS, shrink
      // the constant.
      if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
                                 DemandedElts, TLO))
        return true;

      // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
      // constant, but if this 'and' is only clearing bits that were just set by
      // the xor, then this 'and' can be eliminated by shrinking the mask of
      // the xor. For example, for a 32-bit X:
      // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
      if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
          LHSKnown.One == ~RHSC->getAPIntValue()) {
        SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
        return TLO.CombineTo(Op, Xor);
      }
    }

    if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
                             Depth + 1))
      return true;
    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
    if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
                             Known2, TLO, Depth + 1))
      return true;
    assert(!Known2.hasConflict() && "Bits known to be one AND zero?");

    // Attempt to avoid multi-use ops if we don't need anything from them.
    if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
      SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
          Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
      SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
          Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
      if (DemandedOp0 || DemandedOp1) {
        Op0 = DemandedOp0 ? DemandedOp0 : Op0;
        Op1 = DemandedOp1 ? DemandedOp1 : Op1;
        SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
        return TLO.CombineTo(Op, NewOp);
      }
    }

    // If all of the demanded bits are known one on one side, return the other.
    // These bits cannot contribute to the result of the 'and'.
    if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
      return TLO.CombineTo(Op, Op0);
    if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
      return TLO.CombineTo(Op, Op1);
    // If all of the demanded bits in the inputs are known zeros, return zero.
    if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
      return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
    // If the RHS is a constant, see if we can simplify it.
    if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
                               TLO))
      return true;
    // If the operation can be done in a smaller type, do so.
    if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
      return true;

    Known &= Known2;
    break;
  }
  case ISD::OR: {
    SDValue Op0 = Op.getOperand(0);
    SDValue Op1 = Op.getOperand(1);

    if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
                             Depth + 1))
      return true;
    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
    if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
                             Known2, TLO, Depth + 1))
      return true;
    assert(!Known2.hasConflict() && "Bits known to be one AND zero?");

    // Attempt to avoid multi-use ops if we don't need anything from them.
    if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
      SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
          Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
      SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
          Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
      if (DemandedOp0 || DemandedOp1) {
        Op0 = DemandedOp0 ? DemandedOp0 : Op0;
        Op1 = DemandedOp1 ? DemandedOp1 : Op1;
        SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
        return TLO.CombineTo(Op, NewOp);
      }
    }

    // If all of the demanded bits are known zero on one side, return the other.
    // These bits cannot contribute to the result of the 'or'.
    if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
      return TLO.CombineTo(Op, Op0);
    if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
      return TLO.CombineTo(Op, Op1);
    // If the RHS is a constant, see if we can simplify it.
    if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
      return true;
    // If the operation can be done in a smaller type, do so.
    if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
      return true;

    Known |= Known2;
    break;
  }
  case ISD::XOR: {
    SDValue Op0 = Op.getOperand(0);
    SDValue Op1 = Op.getOperand(1);

    if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
                             Depth + 1))
      return true;
    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
    if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
                             Depth + 1))
      return true;
    assert(!Known2.hasConflict() && "Bits known to be one AND zero?");

    // Attempt to avoid multi-use ops if we don't need anything from them.
    if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
      SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
          Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
      SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
          Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
      if (DemandedOp0 || DemandedOp1) {
        Op0 = DemandedOp0 ? DemandedOp0 : Op0;
        Op1 = DemandedOp1 ? DemandedOp1 : Op1;
        SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
        return TLO.CombineTo(Op, NewOp);
      }
    }

    // If all of the demanded bits are known zero on one side, return the other.
    // These bits cannot contribute to the result of the 'xor'.
    if (DemandedBits.isSubsetOf(Known.Zero))
      return TLO.CombineTo(Op, Op0);
    if (DemandedBits.isSubsetOf(Known2.Zero))
      return TLO.CombineTo(Op, Op1);
    // If the operation can be done in a smaller type, do so.
    if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
      return true;

    // If all of the unknown bits are known to be zero on one side or the other
    // turn this into an *inclusive* or.
    //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
    if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));

    ConstantSDNode* C = isConstOrConstSplat(Op1, DemandedElts);
    if (C) {
      // If one side is a constant, and all of the set bits in the constant are
      // also known set on the other side, turn this into an AND, as we know
      // the bits will be cleared.
      //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
      // NB: it is okay if more bits are known than are requested
      if (C->getAPIntValue() == Known2.One) {
        SDValue ANDC =
            TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
      }

      // If the RHS is a constant, see if we can change it. Don't alter a -1
      // constant because that's a 'not' op, and that is better for combining
      // and codegen.
      if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
        // We're flipping all demanded bits. Flip the undemanded bits too.
        SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
        return TLO.CombineTo(Op, New);
      }
    }

    // If we can't turn this into a 'not', try to shrink the constant.
    if (!C || !C->isAllOnes())
      if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
        return true;

    Known ^= Known2;
    break;
  }
  case ISD::SELECT:
    if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
                             Depth + 1))
      return true;
    if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
                             Depth + 1))
      return true;
    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
    assert(!Known2.hasConflict() && "Bits known to be one AND zero?");

    // If the operands are constants, see if we can simplify them.
    if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
      return true;

    // Only known if known in both the LHS and RHS.
    Known = KnownBits::commonBits(Known, Known2);
    break;
  case ISD::SELECT_CC:
    if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
                             Depth + 1))
      return true;
    if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
                             Depth + 1))
      return true;
    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
    assert(!Known2.hasConflict() && "Bits known to be one AND zero?");

    // If the operands are constants, see if we can simplify them.
    if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
      return true;

    // Only known if known in both the LHS and RHS.
    Known = KnownBits::commonBits(Known, Known2);
    break;
  case ISD::SETCC: {
    SDValue Op0 = Op.getOperand(0);
    SDValue Op1 = Op.getOperand(1);
    ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
    // If (1) we only need the sign-bit, (2) the setcc operands are the same
    // width as the setcc result, and (3) the result of a setcc conforms to 0 or
    // -1, we may be able to bypass the setcc.
    if (DemandedBits.isSignMask() &&
        Op0.getScalarValueSizeInBits() == BitWidth &&
        getBooleanContents(Op0.getValueType()) ==
            BooleanContent::ZeroOrNegativeOneBooleanContent) {
      // If we're testing X < 0, then this compare isn't needed - just use X!
      // FIXME: We're limiting to integer types here, but this should also work
      // if we don't care about FP signed-zero. The use of SETLT with FP means
      // that we don't care about NaNs.
      if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
          (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
        return TLO.CombineTo(Op, Op0);

      // TODO: Should we check for other forms of sign-bit comparisons?
      // Examples: X <= -1, X >= 0
    }
    if (getBooleanContents(Op0.getValueType()) ==
            TargetLowering::ZeroOrOneBooleanContent &&
        BitWidth > 1)
      Known.Zero.setBitsFrom(1);
    break;
  }
  case ISD::SHL: {
    SDValue Op0 = Op.getOperand(0);
    SDValue Op1 = Op.getOperand(1);
    EVT ShiftVT = Op1.getValueType();

    if (const APInt *SA =
            TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
      unsigned ShAmt = SA->getZExtValue();
      if (ShAmt == 0)
        return TLO.CombineTo(Op, Op0);

      // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
      // single shift.  We can do this if the bottom bits (which are shifted
      // out) are never demanded.
      // TODO - support non-uniform vector amounts.
      if (Op0.getOpcode() == ISD::SRL) {
        if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
          if (const APInt *SA2 =
                  TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
            unsigned C1 = SA2->getZExtValue();
            unsigned Opc = ISD::SHL;
            int Diff = ShAmt - C1;
            if (Diff < 0) {
              Diff = -Diff;
              Opc = ISD::SRL;
            }
            SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
            return TLO.CombineTo(
                Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
          }
        }
      }

      // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
      // are not demanded. This will likely allow the anyext to be folded away.
      // TODO - support non-uniform vector amounts.
      if (Op0.getOpcode() == ISD::ANY_EXTEND) {
        SDValue InnerOp = Op0.getOperand(0);
        EVT InnerVT = InnerOp.getValueType();
        unsigned InnerBits = InnerVT.getScalarSizeInBits();
        if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
            isTypeDesirableForOp(ISD::SHL, InnerVT)) {
          EVT ShTy = getShiftAmountTy(InnerVT, DL);
          if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
            ShTy = InnerVT;
          SDValue NarrowShl =
              TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
                              TLO.DAG.getConstant(ShAmt, dl, ShTy));
          return TLO.CombineTo(
              Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
        }

        // Repeat the SHL optimization above in cases where an extension
        // intervenes: (shl (anyext (shr x, c1)), c2) to
        // (shl (anyext x), c2-c1).  This requires that the bottom c1 bits
        // aren't demanded (as above) and that the shifted upper c1 bits of
        // x aren't demanded.
        // TODO - support non-uniform vector amounts.
        if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
            InnerOp.hasOneUse()) {
          if (const APInt *SA2 =
                  TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) {
            unsigned InnerShAmt = SA2->getZExtValue();
            if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
                DemandedBits.getActiveBits() <=
                    (InnerBits - InnerShAmt + ShAmt) &&
                DemandedBits.countTrailingZeros() >= ShAmt) {
              SDValue NewSA =
                  TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
              SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
                                               InnerOp.getOperand(0));
              return TLO.CombineTo(
                  Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
            }
          }
        }
      }

      APInt InDemandedMask = DemandedBits.lshr(ShAmt);
      if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
                               Depth + 1))
        return true;
      assert(!Known.hasConflict() && "Bits known to be one AND zero?");
      Known.Zero <<= ShAmt;
      Known.One <<= ShAmt;
      // low bits known zero.
      Known.Zero.setLowBits(ShAmt);

      // Try shrinking the operation as long as the shift amount will still be
      // in range.
      if ((ShAmt < DemandedBits.getActiveBits()) &&
          ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
        return true;
    }

    // If we are only demanding sign bits then we can use the shift source
    // directly.
    if (const APInt *MaxSA =
            TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
      unsigned ShAmt = MaxSA->getZExtValue();
      unsigned NumSignBits =
          TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
      unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
      if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
        return TLO.CombineTo(Op, Op0);
    }
    break;
  }
  case ISD::SRL: {
    SDValue Op0 = Op.getOperand(0);
    SDValue Op1 = Op.getOperand(1);
    EVT ShiftVT = Op1.getValueType();

    if (const APInt *SA =
            TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
      unsigned ShAmt = SA->getZExtValue();
      if (ShAmt == 0)
        return TLO.CombineTo(Op, Op0);

      // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
      // single shift.  We can do this if the top bits (which are shifted out)
      // are never demanded.
      // TODO - support non-uniform vector amounts.
      if (Op0.getOpcode() == ISD::SHL) {
        if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
          if (const APInt *SA2 =
                  TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
            unsigned C1 = SA2->getZExtValue();
            unsigned Opc = ISD::SRL;
            int Diff = ShAmt - C1;
            if (Diff < 0) {
              Diff = -Diff;
              Opc = ISD::SHL;
            }
            SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
            return TLO.CombineTo(
                Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
          }
        }
      }

      APInt InDemandedMask = (DemandedBits << ShAmt);

      // If the shift is exact, then it does demand the low bits (and knows that
      // they are zero).
      if (Op->getFlags().hasExact())
        InDemandedMask.setLowBits(ShAmt);

      // Compute the new bits that are at the top now.
      if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
                               Depth + 1))
        return true;
      assert(!Known.hasConflict() && "Bits known to be one AND zero?");
      Known.Zero.lshrInPlace(ShAmt);
      Known.One.lshrInPlace(ShAmt);
      // High bits known zero.
      Known.Zero.setHighBits(ShAmt);
    }
    break;
  }
  case ISD::SRA: {
    SDValue Op0 = Op.getOperand(0);
    SDValue Op1 = Op.getOperand(1);
    EVT ShiftVT = Op1.getValueType();

    // If we only want bits that already match the signbit then we don't need
    // to shift.
    unsigned NumHiDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
    if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
        NumHiDemandedBits)
      return TLO.CombineTo(Op, Op0);

    // If this is an arithmetic shift right and only the low-bit is set, we can
    // always convert this into a logical shr, even if the shift amount is
    // variable.  The low bit of the shift cannot be an input sign bit unless
    // the shift amount is >= the size of the datatype, which is undefined.
    if (DemandedBits.isOne())
      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));

    if (const APInt *SA =
            TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
      unsigned ShAmt = SA->getZExtValue();
      if (ShAmt == 0)
        return TLO.CombineTo(Op, Op0);

      APInt InDemandedMask = (DemandedBits << ShAmt);

      // If the shift is exact, then it does demand the low bits (and knows that
      // they are zero).
      if (Op->getFlags().hasExact())
        InDemandedMask.setLowBits(ShAmt);

      // If any of the demanded bits are produced by the sign extension, we also
      // demand the input sign bit.
      if (DemandedBits.countLeadingZeros() < ShAmt)
        InDemandedMask.setSignBit();

      if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
                               Depth + 1))
        return true;
      assert(!Known.hasConflict() && "Bits known to be one AND zero?");
      Known.Zero.lshrInPlace(ShAmt);
      Known.One.lshrInPlace(ShAmt);

      // If the input sign bit is known to be zero, or if none of the top bits
      // are demanded, turn this into an unsigned shift right.
      if (Known.Zero[BitWidth - ShAmt - 1] ||
          DemandedBits.countLeadingZeros() >= ShAmt) {
        SDNodeFlags Flags;
        Flags.setExact(Op->getFlags().hasExact());
        return TLO.CombineTo(
            Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
      }

      int Log2 = DemandedBits.exactLogBase2();
      if (Log2 >= 0) {
        // The bit must come from the sign.
        SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
      }

      if (Known.One[BitWidth - ShAmt - 1])
        // New bits are known one.
        Known.One.setHighBits(ShAmt);

      // Attempt to avoid multi-use ops if we don't need anything from them.
      if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
        SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
            Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
        if (DemandedOp0) {
          SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
          return TLO.CombineTo(Op, NewOp);
        }
      }
    }
    break;
  }
  case ISD::FSHL:
  case ISD::FSHR: {
    SDValue Op0 = Op.getOperand(0);
    SDValue Op1 = Op.getOperand(1);
    SDValue Op2 = Op.getOperand(2);
    bool IsFSHL = (Op.getOpcode() == ISD::FSHL);

    if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
      unsigned Amt = SA->getAPIntValue().urem(BitWidth);

      // For fshl, 0-shift returns the 1st arg.
      // For fshr, 0-shift returns the 2nd arg.
      if (Amt == 0) {
        if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
                                 Known, TLO, Depth + 1))
          return true;
        break;
      }

      // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
      // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
      APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
      APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
      if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
                               Depth + 1))
        return true;
      if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
                               Depth + 1))
        return true;

      Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
      Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
      Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
      Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
      Known.One |= Known2.One;
      Known.Zero |= Known2.Zero;
    }

    // For pow-2 bitwidths we only demand the bottom modulo amt bits.
    if (isPowerOf2_32(BitWidth)) {
      APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
      if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
                               Known2, TLO, Depth + 1))
        return true;
    }
    break;
  }
  case ISD::ROTL:
  case ISD::ROTR: {
    SDValue Op0 = Op.getOperand(0);
    SDValue Op1 = Op.getOperand(1);
    bool IsROTL = (Op.getOpcode() == ISD::ROTL);

    // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
    if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
      return TLO.CombineTo(Op, Op0);

    if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
      unsigned Amt = SA->getAPIntValue().urem(BitWidth);
      unsigned RevAmt = BitWidth - Amt;

      // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
      // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
      APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
      if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
                               Depth + 1))
        return true;

      // rot*(x, 0) --> x
      if (Amt == 0)
        return TLO.CombineTo(Op, Op0);

      // See if we don't demand either half of the rotated bits.
      if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
          DemandedBits.countTrailingZeros() >= (IsROTL ? Amt : RevAmt)) {
        Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
      }
      if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
          DemandedBits.countLeadingZeros() >= (IsROTL ? RevAmt : Amt)) {
        Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
      }
    }

    // For pow-2 bitwidths we only demand the bottom modulo amt bits.
    if (isPowerOf2_32(BitWidth)) {
      APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
      if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
                               Depth + 1))
        return true;
    }
    break;
  }
  case ISD::UMIN: {
    // Check if one arg is always less than (or equal) to the other arg.
    SDValue Op0 = Op.getOperand(0);
    SDValue Op1 = Op.getOperand(1);
    KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
    KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
    Known = KnownBits::umin(Known0, Known1);
    if (Optional<bool> IsULE = KnownBits::ule(Known0, Known1))
      return TLO.CombineTo(Op, IsULE.getValue() ? Op0 : Op1);
    if (Optional<bool> IsULT = KnownBits::ult(Known0, Known1))
      return TLO.CombineTo(Op, IsULT.getValue() ? Op0 : Op1);
    break;
  }
  case ISD::UMAX: {
    // Check if one arg is always greater than (or equal) to the other arg.
    SDValue Op0 = Op.getOperand(0);
    SDValue Op1 = Op.getOperand(1);
    KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
    KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
    Known = KnownBits::umax(Known0, Known1);
    if (Optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
      return TLO.CombineTo(Op, IsUGE.getValue() ? Op0 : Op1);
    if (Optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
      return TLO.CombineTo(Op, IsUGT.getValue() ? Op0 : Op1);
    break;
  }
  case ISD::BITREVERSE: {
    SDValue Src = Op.getOperand(0);
    APInt DemandedSrcBits = DemandedBits.reverseBits();
    if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
                             Depth + 1))
      return true;
    Known.One = Known2.One.reverseBits();
    Known.Zero = Known2.Zero.reverseBits();
    break;
  }
  case ISD::BSWAP: {
    SDValue Src = Op.getOperand(0);
    APInt DemandedSrcBits = DemandedBits.byteSwap();
    if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
                             Depth + 1))
      return true;
    Known.One = Known2.One.byteSwap();
    Known.Zero = Known2.Zero.byteSwap();
    break;
  }
  case ISD::CTPOP: {
    // If only 1 bit is demanded, replace with PARITY as long as we're before
    // op legalization.
    // FIXME: Limit to scalars for now.
    if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
                                               Op.getOperand(0)));

    Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
    break;
  }
  case ISD::SIGN_EXTEND_INREG: {
    SDValue Op0 = Op.getOperand(0);
    EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
    unsigned ExVTBits = ExVT.getScalarSizeInBits();

    // If we only care about the highest bit, don't bother shifting right.
    if (DemandedBits.isSignMask()) {
      unsigned MinSignedBits =
          TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
      bool AlreadySignExtended = ExVTBits >= MinSignedBits;
      // However if the input is already sign extended we expect the sign
      // extension to be dropped altogether later and do not simplify.
      if (!AlreadySignExtended) {
        // Compute the correct shift amount type, which must be getShiftAmountTy
        // for scalar types after legalization.
        EVT ShiftAmtTy = VT;
        if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
          ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);

        SDValue ShiftAmt =
            TLO.DAG.getConstant(BitWidth - ExVTBits, dl, ShiftAmtTy);
        return TLO.CombineTo(Op,
                             TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
      }
    }

    // If none of the extended bits are demanded, eliminate the sextinreg.
    if (DemandedBits.getActiveBits() <= ExVTBits)
      return TLO.CombineTo(Op, Op0);

    APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);

    // Since the sign extended bits are demanded, we know that the sign
    // bit is demanded.
    InputDemandedBits.setBit(ExVTBits - 1);

    if (SimplifyDemandedBits(Op0, InputDemandedBits, Known, TLO, Depth + 1))
      return true;
    assert(!Known.hasConflict() && "Bits known to be one AND zero?");

    // If the sign bit of the input is known set or clear, then we know the
    // top bits of the result.

    // If the input sign bit is known zero, convert this into a zero extension.
    if (Known.Zero[ExVTBits - 1])
      return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));

    APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
    if (Known.One[ExVTBits - 1]) { // Input sign bit known set
      Known.One.setBitsFrom(ExVTBits);
      Known.Zero &= Mask;
    } else { // Input sign bit unknown
      Known.Zero &= Mask;
      Known.One &= Mask;
    }
    break;
  }
  case ISD::BUILD_PAIR: {
    EVT HalfVT = Op.getOperand(0).getValueType();
    unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();

    APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
    APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);

    KnownBits KnownLo, KnownHi;

    if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
      return true;

    if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
      return true;

    Known.Zero = KnownLo.Zero.zext(BitWidth) |
                 KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);

    Known.One = KnownLo.One.zext(BitWidth) |
                KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
    break;
  }
  case ISD::ZERO_EXTEND:
  case ISD::ZERO_EXTEND_VECTOR_INREG: {
    SDValue Src = Op.getOperand(0);
    EVT SrcVT = Src.getValueType();
    unsigned InBits = SrcVT.getScalarSizeInBits();
    unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
    bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;

    // If none of the top bits are demanded, convert this into an any_extend.
    if (DemandedBits.getActiveBits() <= InBits) {
      // If we only need the non-extended bits of the bottom element
      // then we can just bitcast to the result.
      if (IsLE && IsVecInReg && DemandedElts == 1 &&
          VT.getSizeInBits() == SrcVT.getSizeInBits())
        return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));

      unsigned Opc =
          IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
      if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
        return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
    }

    APInt InDemandedBits = DemandedBits.trunc(InBits);
    APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
    if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
                             Depth + 1))
      return true;
    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
    assert(Known.getBitWidth() == InBits && "Src width has changed?");
    Known = Known.zext(BitWidth);

    // Attempt to avoid multi-use ops if we don't need anything from them.
    if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
            Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
      return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
    break;
  }
  case ISD::SIGN_EXTEND:
  case ISD::SIGN_EXTEND_VECTOR_INREG: {
    SDValue Src = Op.getOperand(0);
    EVT SrcVT = Src.getValueType();
    unsigned InBits = SrcVT.getScalarSizeInBits();
    unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
    bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;

    // If none of the top bits are demanded, convert this into an any_extend.
    if (DemandedBits.getActiveBits() <= InBits) {
      // If we only need the non-extended bits of the bottom element
      // then we can just bitcast to the result.
      if (IsLE && IsVecInReg && DemandedElts == 1 &&
          VT.getSizeInBits() == SrcVT.getSizeInBits())
        return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));

      unsigned Opc =
          IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
      if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
        return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
    }

    APInt InDemandedBits = DemandedBits.trunc(InBits);
    APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);

    // Since some of the sign extended bits are demanded, we know that the sign
    // bit is demanded.
    InDemandedBits.setBit(InBits - 1);

    if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
                             Depth + 1))
      return true;
    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
    assert(Known.getBitWidth() == InBits && "Src width has changed?");

    // If the sign bit is known one, the top bits match.
    Known = Known.sext(BitWidth);

    // If the sign bit is known zero, convert this to a zero extend.
    if (Known.isNonNegative()) {
      unsigned Opc =
          IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
      if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
        return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
    }

    // Attempt to avoid multi-use ops if we don't need anything from them.
    if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
            Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
      return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
    break;
  }
  case ISD::ANY_EXTEND:
  case ISD::ANY_EXTEND_VECTOR_INREG: {
    SDValue Src = Op.getOperand(0);
    EVT SrcVT = Src.getValueType();
    unsigned InBits = SrcVT.getScalarSizeInBits();
    unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
    bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;

    // If we only need the bottom element then we can just bitcast.
    // TODO: Handle ANY_EXTEND?
    if (IsLE && IsVecInReg && DemandedElts == 1 &&
        VT.getSizeInBits() == SrcVT.getSizeInBits())
      return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));

    APInt InDemandedBits = DemandedBits.trunc(InBits);
    APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
    if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
                             Depth + 1))
      return true;
    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
    assert(Known.getBitWidth() == InBits && "Src width has changed?");
    Known = Known.anyext(BitWidth);

    // Attempt to avoid multi-use ops if we don't need anything from them.
    if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
            Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
      return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
    break;
  }
  case ISD::TRUNCATE: {
    SDValue Src = Op.getOperand(0);

    // Simplify the input, using demanded bit information, and compute the known
    // zero/one bits live out.
    unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
    APInt TruncMask = DemandedBits.zext(OperandBitWidth);
    if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
                             Depth + 1))
      return true;
    Known = Known.trunc(BitWidth);

    // Attempt to avoid multi-use ops if we don't need anything from them.
    if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
            Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));

    // If the input is only used by this truncate, see if we can shrink it based
    // on the known demanded bits.
    if (Src.getNode()->hasOneUse()) {
      switch (Src.getOpcode()) {
      default:
        break;
      case ISD::SRL:
        // Shrink SRL by a constant if none of the high bits shifted in are
        // demanded.
        if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
          // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
          // undesirable.
          break;

        const APInt *ShAmtC =
            TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
        if (!ShAmtC || ShAmtC->uge(BitWidth))
          break;
        uint64_t ShVal = ShAmtC->getZExtValue();

        APInt HighBits =
            APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
        HighBits.lshrInPlace(ShVal);
        HighBits = HighBits.trunc(BitWidth);

        if (!(HighBits & DemandedBits)) {
          // None of the shifted in bits are needed.  Add a truncate of the
          // shift input, then shift it.
          SDValue NewShAmt = TLO.DAG.getConstant(
              ShVal, dl, getShiftAmountTy(VT, DL, TLO.LegalTypes()));
          SDValue NewTrunc =
              TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
          return TLO.CombineTo(
              Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
        }
        break;
      }
    }

    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
    break;
  }
  case ISD::AssertZext: {
    // AssertZext demands all of the high bits, plus any of the low bits
    // demanded by its users.
    EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
    APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
    if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
                             TLO, Depth + 1))
      return true;
    assert(!Known.hasConflict() && "Bits known to be one AND zero?");

    Known.Zero |= ~InMask;
    break;
  }
  case ISD::EXTRACT_VECTOR_ELT: {
    SDValue Src = Op.getOperand(0);
    SDValue Idx = Op.getOperand(1);
    ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
    unsigned EltBitWidth = Src.getScalarValueSizeInBits();

    if (SrcEltCnt.isScalable())
      return false;

    // Demand the bits from every vector element without a constant index.
    unsigned NumSrcElts = SrcEltCnt.getFixedValue();
    APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
    if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
      if (CIdx->getAPIntValue().ult(NumSrcElts))
        DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());

    // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
    // anything about the extended bits.
    APInt DemandedSrcBits = DemandedBits;
    if (BitWidth > EltBitWidth)
      DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);

    if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
                             Depth + 1))
      return true;

    // Attempt to avoid multi-use ops if we don't need anything from them.
    if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
      if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
              Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
        SDValue NewOp =
            TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
        return TLO.CombineTo(Op, NewOp);
      }
    }

    Known = Known2;
    if (BitWidth > EltBitWidth)
      Known = Known.anyext(BitWidth);
    break;
  }
  case ISD::BITCAST: {
    SDValue Src = Op.getOperand(0);
    EVT SrcVT = Src.getValueType();
    unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();

    // If this is an FP->Int bitcast and if the sign bit is the only
    // thing demanded, turn this into a FGETSIGN.
    if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
        DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
        SrcVT.isFloatingPoint()) {
      bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
      bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
      if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
          SrcVT != MVT::f128) {
        // Cannot eliminate/lower SHL for f128 yet.
        EVT Ty = OpVTLegal ? VT : MVT::i32;
        // Make a FGETSIGN + SHL to move the sign bit into the appropriate
        // place.  We expect the SHL to be eliminated by other optimizations.
        SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
        unsigned OpVTSizeInBits = Op.getValueSizeInBits();
        if (!OpVTLegal && OpVTSizeInBits > 32)
          Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
        unsigned ShVal = Op.getValueSizeInBits() - 1;
        SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
        return TLO.CombineTo(Op,
                             TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
      }
    }

    // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
    // Demand the elt/bit if any of the original elts/bits are demanded.
    if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
      unsigned Scale = BitWidth / NumSrcEltBits;
      unsigned NumSrcElts = SrcVT.getVectorNumElements();
      APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
      APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
      for (unsigned i = 0; i != Scale; ++i) {
        unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
        unsigned BitOffset = EltOffset * NumSrcEltBits;
        APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
        if (!Sub.isZero()) {
          DemandedSrcBits |= Sub;
          for (unsigned j = 0; j != NumElts; ++j)
            if (DemandedElts[j])
              DemandedSrcElts.setBit((j * Scale) + i);
        }
      }

      APInt KnownSrcUndef, KnownSrcZero;
      if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
                                     KnownSrcZero, TLO, Depth + 1))
        return true;

      KnownBits KnownSrcBits;
      if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
                               KnownSrcBits, TLO, Depth + 1))
        return true;
    } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
      // TODO - bigendian once we have test coverage.
      unsigned Scale = NumSrcEltBits / BitWidth;
      unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
      APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
      APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
      for (unsigned i = 0; i != NumElts; ++i)
        if (DemandedElts[i]) {
          unsigned Offset = (i % Scale) * BitWidth;
          DemandedSrcBits.insertBits(DemandedBits, Offset);
          DemandedSrcElts.setBit(i / Scale);
        }

      if (SrcVT.isVector()) {
        APInt KnownSrcUndef, KnownSrcZero;
        if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
                                       KnownSrcZero, TLO, Depth + 1))
          return true;
      }

      KnownBits KnownSrcBits;
      if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
                               KnownSrcBits, TLO, Depth + 1))
        return true;
    }

    // If this is a bitcast, let computeKnownBits handle it.  Only do this on a
    // recursive call where Known may be useful to the caller.
    if (Depth > 0) {
      Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
      return false;
    }
    break;
  }
  case ISD::ADD:
  case ISD::MUL:
  case ISD::SUB: {
    // Add, Sub, and Mul don't demand any bits in positions beyond that
    // of the highest bit demanded of them.
    SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
    SDNodeFlags Flags = Op.getNode()->getFlags();
    unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
    APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
    if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
                             Depth + 1) ||
        SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO,
                             Depth + 1) ||
        // See if the operation should be performed at a smaller bit width.
        ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
      if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
        // Disable the nsw and nuw flags. We can no longer guarantee that we
        // won't wrap after simplification.
        Flags.setNoSignedWrap(false);
        Flags.setNoUnsignedWrap(false);
        SDValue NewOp =
            TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
        return TLO.CombineTo(Op, NewOp);
      }
      return true;
    }

    // Attempt to avoid multi-use ops if we don't need anything from them.
    if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
      SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
          Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
      SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
          Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
      if (DemandedOp0 || DemandedOp1) {
        Flags.setNoSignedWrap(false);
        Flags.setNoUnsignedWrap(false);
        Op0 = DemandedOp0 ? DemandedOp0 : Op0;
        Op1 = DemandedOp1 ? DemandedOp1 : Op1;
        SDValue NewOp =
            TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
        return TLO.CombineTo(Op, NewOp);
      }
    }

    // If we have a constant operand, we may be able to turn it into -1 if we
    // do not demand the high bits. This can make the constant smaller to
    // encode, allow more general folding, or match specialized instruction
    // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
    // is probably not useful (and could be detrimental).
    ConstantSDNode *C = isConstOrConstSplat(Op1);
    APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
    if (C && !C->isAllOnes() && !C->isOne() &&
        (C->getAPIntValue() | HighMask).isAllOnes()) {
      SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
      // Disable the nsw and nuw flags. We can no longer guarantee that we
      // won't wrap after simplification.
      Flags.setNoSignedWrap(false);
      Flags.setNoUnsignedWrap(false);
      SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
      return TLO.CombineTo(Op, NewOp);
    }

    LLVM_FALLTHROUGH;
  }
  default:
    if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
      if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
                                            Known, TLO, Depth))
        return true;
      break;
    }

    // Just use computeKnownBits to compute output bits.
    Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
    break;
  }

  // If we know the value of all of the demanded bits, return this as a
  // constant.
  if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
    // Avoid folding to a constant if any OpaqueConstant is involved.
    const SDNode *N = Op.getNode();
    for (SDNode *Op :
         llvm::make_range(SDNodeIterator::begin(N), SDNodeIterator::end(N))) {
      if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
        if (C->isOpaque())
          return false;
    }
    if (VT.isInteger())
      return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
    if (VT.isFloatingPoint())
      return TLO.CombineTo(
          Op,
          TLO.DAG.getConstantFP(
              APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT));
  }

  return false;
}