RValue CodeGenFunction::EmitAtomicExpr()

in clang/lib/CodeGen/CGAtomic.cpp [797:1458]


RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
  QualType AtomicTy = E->getPtr()->getType()->getPointeeType();
  QualType MemTy = AtomicTy;
  if (const AtomicType *AT = AtomicTy->getAs<AtomicType>())
    MemTy = AT->getValueType();
  llvm::Value *IsWeak = nullptr, *OrderFail = nullptr;

  Address Val1 = Address::invalid();
  Address Val2 = Address::invalid();
  Address Dest = Address::invalid();
  Address Ptr = EmitPointerWithAlignment(E->getPtr());

  if (E->getOp() == AtomicExpr::AO__c11_atomic_init ||
      E->getOp() == AtomicExpr::AO__opencl_atomic_init) {
    LValue lvalue = MakeAddrLValue(Ptr, AtomicTy);
    EmitAtomicInit(E->getVal1(), lvalue);
    return RValue::get(nullptr);
  }

  auto TInfo = getContext().getTypeInfoInChars(AtomicTy);
  uint64_t Size = TInfo.Width.getQuantity();
  unsigned MaxInlineWidthInBits = getTarget().getMaxAtomicInlineWidth();

  bool Oversized = getContext().toBits(TInfo.Width) > MaxInlineWidthInBits;
  bool Misaligned = (Ptr.getAlignment() % TInfo.Width) != 0;
  bool UseLibcall = Misaligned | Oversized;
  bool ShouldCastToIntPtrTy = true;

  CharUnits MaxInlineWidth =
      getContext().toCharUnitsFromBits(MaxInlineWidthInBits);

  DiagnosticsEngine &Diags = CGM.getDiags();

  if (Misaligned) {
    Diags.Report(E->getBeginLoc(), diag::warn_atomic_op_misaligned)
        << (int)TInfo.Width.getQuantity()
        << (int)Ptr.getAlignment().getQuantity();
  }

  if (Oversized) {
    Diags.Report(E->getBeginLoc(), diag::warn_atomic_op_oversized)
        << (int)TInfo.Width.getQuantity() << (int)MaxInlineWidth.getQuantity();
  }

  llvm::Value *Order = EmitScalarExpr(E->getOrder());
  llvm::Value *Scope =
      E->getScopeModel() ? EmitScalarExpr(E->getScope()) : nullptr;

  switch (E->getOp()) {
  case AtomicExpr::AO__c11_atomic_init:
  case AtomicExpr::AO__opencl_atomic_init:
    llvm_unreachable("Already handled above with EmitAtomicInit!");

  case AtomicExpr::AO__c11_atomic_load:
  case AtomicExpr::AO__opencl_atomic_load:
  case AtomicExpr::AO__hip_atomic_load:
  case AtomicExpr::AO__atomic_load_n:
    break;

  case AtomicExpr::AO__atomic_load:
    Dest = EmitPointerWithAlignment(E->getVal1());
    break;

  case AtomicExpr::AO__atomic_store:
    Val1 = EmitPointerWithAlignment(E->getVal1());
    break;

  case AtomicExpr::AO__atomic_exchange:
    Val1 = EmitPointerWithAlignment(E->getVal1());
    Dest = EmitPointerWithAlignment(E->getVal2());
    break;

  case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
  case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
  case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
  case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
  case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
  case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
  case AtomicExpr::AO__atomic_compare_exchange_n:
  case AtomicExpr::AO__atomic_compare_exchange:
    Val1 = EmitPointerWithAlignment(E->getVal1());
    if (E->getOp() == AtomicExpr::AO__atomic_compare_exchange)
      Val2 = EmitPointerWithAlignment(E->getVal2());
    else
      Val2 = EmitValToTemp(*this, E->getVal2());
    OrderFail = EmitScalarExpr(E->getOrderFail());
    if (E->getOp() == AtomicExpr::AO__atomic_compare_exchange_n ||
        E->getOp() == AtomicExpr::AO__atomic_compare_exchange)
      IsWeak = EmitScalarExpr(E->getWeak());
    break;

  case AtomicExpr::AO__c11_atomic_fetch_add:
  case AtomicExpr::AO__c11_atomic_fetch_sub:
  case AtomicExpr::AO__hip_atomic_fetch_add:
  case AtomicExpr::AO__opencl_atomic_fetch_add:
  case AtomicExpr::AO__opencl_atomic_fetch_sub:
    if (MemTy->isPointerType()) {
      // For pointer arithmetic, we're required to do a bit of math:
      // adding 1 to an int* is not the same as adding 1 to a uintptr_t.
      // ... but only for the C11 builtins. The GNU builtins expect the
      // user to multiply by sizeof(T).
      QualType Val1Ty = E->getVal1()->getType();
      llvm::Value *Val1Scalar = EmitScalarExpr(E->getVal1());
      CharUnits PointeeIncAmt =
          getContext().getTypeSizeInChars(MemTy->getPointeeType());
      Val1Scalar = Builder.CreateMul(Val1Scalar, CGM.getSize(PointeeIncAmt));
      auto Temp = CreateMemTemp(Val1Ty, ".atomictmp");
      Val1 = Temp;
      EmitStoreOfScalar(Val1Scalar, MakeAddrLValue(Temp, Val1Ty));
      break;
    }
    LLVM_FALLTHROUGH;
  case AtomicExpr::AO__atomic_fetch_add:
  case AtomicExpr::AO__atomic_fetch_sub:
  case AtomicExpr::AO__atomic_add_fetch:
  case AtomicExpr::AO__atomic_sub_fetch:
    ShouldCastToIntPtrTy = !MemTy->isFloatingType();
    LLVM_FALLTHROUGH;

  case AtomicExpr::AO__c11_atomic_store:
  case AtomicExpr::AO__c11_atomic_exchange:
  case AtomicExpr::AO__opencl_atomic_store:
  case AtomicExpr::AO__hip_atomic_store:
  case AtomicExpr::AO__opencl_atomic_exchange:
  case AtomicExpr::AO__hip_atomic_exchange:
  case AtomicExpr::AO__atomic_store_n:
  case AtomicExpr::AO__atomic_exchange_n:
  case AtomicExpr::AO__c11_atomic_fetch_and:
  case AtomicExpr::AO__c11_atomic_fetch_or:
  case AtomicExpr::AO__c11_atomic_fetch_xor:
  case AtomicExpr::AO__c11_atomic_fetch_nand:
  case AtomicExpr::AO__c11_atomic_fetch_max:
  case AtomicExpr::AO__c11_atomic_fetch_min:
  case AtomicExpr::AO__opencl_atomic_fetch_and:
  case AtomicExpr::AO__opencl_atomic_fetch_or:
  case AtomicExpr::AO__opencl_atomic_fetch_xor:
  case AtomicExpr::AO__opencl_atomic_fetch_min:
  case AtomicExpr::AO__opencl_atomic_fetch_max:
  case AtomicExpr::AO__atomic_fetch_and:
  case AtomicExpr::AO__hip_atomic_fetch_and:
  case AtomicExpr::AO__atomic_fetch_or:
  case AtomicExpr::AO__hip_atomic_fetch_or:
  case AtomicExpr::AO__atomic_fetch_xor:
  case AtomicExpr::AO__hip_atomic_fetch_xor:
  case AtomicExpr::AO__atomic_fetch_nand:
  case AtomicExpr::AO__atomic_and_fetch:
  case AtomicExpr::AO__atomic_or_fetch:
  case AtomicExpr::AO__atomic_xor_fetch:
  case AtomicExpr::AO__atomic_nand_fetch:
  case AtomicExpr::AO__atomic_max_fetch:
  case AtomicExpr::AO__atomic_min_fetch:
  case AtomicExpr::AO__atomic_fetch_max:
  case AtomicExpr::AO__hip_atomic_fetch_max:
  case AtomicExpr::AO__atomic_fetch_min:
  case AtomicExpr::AO__hip_atomic_fetch_min:
    Val1 = EmitValToTemp(*this, E->getVal1());
    break;
  }

  QualType RValTy = E->getType().getUnqualifiedType();

  // The inlined atomics only function on iN types, where N is a power of 2. We
  // need to make sure (via temporaries if necessary) that all incoming values
  // are compatible.
  LValue AtomicVal = MakeAddrLValue(Ptr, AtomicTy);
  AtomicInfo Atomics(*this, AtomicVal);

  if (ShouldCastToIntPtrTy) {
    Ptr = Atomics.emitCastToAtomicIntPointer(Ptr);
    if (Val1.isValid())
      Val1 = Atomics.convertToAtomicIntPointer(Val1);
    if (Val2.isValid())
      Val2 = Atomics.convertToAtomicIntPointer(Val2);
  }
  if (Dest.isValid()) {
    if (ShouldCastToIntPtrTy)
      Dest = Atomics.emitCastToAtomicIntPointer(Dest);
  } else if (E->isCmpXChg())
    Dest = CreateMemTemp(RValTy, "cmpxchg.bool");
  else if (!RValTy->isVoidType()) {
    Dest = Atomics.CreateTempAlloca();
    if (ShouldCastToIntPtrTy)
      Dest = Atomics.emitCastToAtomicIntPointer(Dest);
  }

  // Use a library call.  See: http://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary .
  if (UseLibcall) {
    bool UseOptimizedLibcall = false;
    switch (E->getOp()) {
    case AtomicExpr::AO__c11_atomic_init:
    case AtomicExpr::AO__opencl_atomic_init:
      llvm_unreachable("Already handled above with EmitAtomicInit!");

    case AtomicExpr::AO__c11_atomic_fetch_add:
    case AtomicExpr::AO__opencl_atomic_fetch_add:
    case AtomicExpr::AO__atomic_fetch_add:
    case AtomicExpr::AO__hip_atomic_fetch_add:
    case AtomicExpr::AO__c11_atomic_fetch_and:
    case AtomicExpr::AO__opencl_atomic_fetch_and:
    case AtomicExpr::AO__hip_atomic_fetch_and:
    case AtomicExpr::AO__atomic_fetch_and:
    case AtomicExpr::AO__c11_atomic_fetch_or:
    case AtomicExpr::AO__opencl_atomic_fetch_or:
    case AtomicExpr::AO__hip_atomic_fetch_or:
    case AtomicExpr::AO__atomic_fetch_or:
    case AtomicExpr::AO__c11_atomic_fetch_nand:
    case AtomicExpr::AO__atomic_fetch_nand:
    case AtomicExpr::AO__c11_atomic_fetch_sub:
    case AtomicExpr::AO__opencl_atomic_fetch_sub:
    case AtomicExpr::AO__atomic_fetch_sub:
    case AtomicExpr::AO__c11_atomic_fetch_xor:
    case AtomicExpr::AO__opencl_atomic_fetch_xor:
    case AtomicExpr::AO__opencl_atomic_fetch_min:
    case AtomicExpr::AO__opencl_atomic_fetch_max:
    case AtomicExpr::AO__atomic_fetch_xor:
    case AtomicExpr::AO__hip_atomic_fetch_xor:
    case AtomicExpr::AO__c11_atomic_fetch_max:
    case AtomicExpr::AO__c11_atomic_fetch_min:
    case AtomicExpr::AO__atomic_add_fetch:
    case AtomicExpr::AO__atomic_and_fetch:
    case AtomicExpr::AO__atomic_nand_fetch:
    case AtomicExpr::AO__atomic_or_fetch:
    case AtomicExpr::AO__atomic_sub_fetch:
    case AtomicExpr::AO__atomic_xor_fetch:
    case AtomicExpr::AO__atomic_fetch_max:
    case AtomicExpr::AO__hip_atomic_fetch_max:
    case AtomicExpr::AO__atomic_fetch_min:
    case AtomicExpr::AO__hip_atomic_fetch_min:
    case AtomicExpr::AO__atomic_max_fetch:
    case AtomicExpr::AO__atomic_min_fetch:
      // For these, only library calls for certain sizes exist.
      UseOptimizedLibcall = true;
      break;

    case AtomicExpr::AO__atomic_load:
    case AtomicExpr::AO__atomic_store:
    case AtomicExpr::AO__atomic_exchange:
    case AtomicExpr::AO__atomic_compare_exchange:
      // Use the generic version if we don't know that the operand will be
      // suitably aligned for the optimized version.
      if (Misaligned)
        break;
      LLVM_FALLTHROUGH;
    case AtomicExpr::AO__c11_atomic_load:
    case AtomicExpr::AO__c11_atomic_store:
    case AtomicExpr::AO__c11_atomic_exchange:
    case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
    case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
    case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
    case AtomicExpr::AO__opencl_atomic_load:
    case AtomicExpr::AO__hip_atomic_load:
    case AtomicExpr::AO__opencl_atomic_store:
    case AtomicExpr::AO__hip_atomic_store:
    case AtomicExpr::AO__opencl_atomic_exchange:
    case AtomicExpr::AO__hip_atomic_exchange:
    case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
    case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
    case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
    case AtomicExpr::AO__atomic_load_n:
    case AtomicExpr::AO__atomic_store_n:
    case AtomicExpr::AO__atomic_exchange_n:
    case AtomicExpr::AO__atomic_compare_exchange_n:
      // Only use optimized library calls for sizes for which they exist.
      // FIXME: Size == 16 optimized library functions exist too.
      if (Size == 1 || Size == 2 || Size == 4 || Size == 8)
        UseOptimizedLibcall = true;
      break;
    }

    CallArgList Args;
    if (!UseOptimizedLibcall) {
      // For non-optimized library calls, the size is the first parameter
      Args.add(RValue::get(llvm::ConstantInt::get(SizeTy, Size)),
               getContext().getSizeType());
    }
    // Atomic address is the first or second parameter
    // The OpenCL atomic library functions only accept pointer arguments to
    // generic address space.
    auto CastToGenericAddrSpace = [&](llvm::Value *V, QualType PT) {
      if (!E->isOpenCL())
        return V;
      auto AS = PT->castAs<PointerType>()->getPointeeType().getAddressSpace();
      if (AS == LangAS::opencl_generic)
        return V;
      auto DestAS = getContext().getTargetAddressSpace(LangAS::opencl_generic);
      auto T = llvm::cast<llvm::PointerType>(V->getType());
      auto *DestType = llvm::PointerType::getWithSamePointeeType(T, DestAS);

      return getTargetHooks().performAddrSpaceCast(
          *this, V, AS, LangAS::opencl_generic, DestType, false);
    };

    Args.add(RValue::get(CastToGenericAddrSpace(
                 EmitCastToVoidPtr(Ptr.getPointer()), E->getPtr()->getType())),
             getContext().VoidPtrTy);

    std::string LibCallName;
    QualType LoweredMemTy =
      MemTy->isPointerType() ? getContext().getIntPtrType() : MemTy;
    QualType RetTy;
    bool HaveRetTy = false;
    llvm::Instruction::BinaryOps PostOp = (llvm::Instruction::BinaryOps)0;
    bool PostOpMinMax = false;
    switch (E->getOp()) {
    case AtomicExpr::AO__c11_atomic_init:
    case AtomicExpr::AO__opencl_atomic_init:
      llvm_unreachable("Already handled!");

    // There is only one libcall for compare an exchange, because there is no
    // optimisation benefit possible from a libcall version of a weak compare
    // and exchange.
    // bool __atomic_compare_exchange(size_t size, void *mem, void *expected,
    //                                void *desired, int success, int failure)
    // bool __atomic_compare_exchange_N(T *mem, T *expected, T desired,
    //                                  int success, int failure)
    case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
    case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
    case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
    case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
    case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
    case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
    case AtomicExpr::AO__atomic_compare_exchange:
    case AtomicExpr::AO__atomic_compare_exchange_n:
      LibCallName = "__atomic_compare_exchange";
      RetTy = getContext().BoolTy;
      HaveRetTy = true;
      Args.add(
          RValue::get(CastToGenericAddrSpace(
              EmitCastToVoidPtr(Val1.getPointer()), E->getVal1()->getType())),
          getContext().VoidPtrTy);
      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val2.getPointer(),
                        MemTy, E->getExprLoc(), TInfo.Width);
      Args.add(RValue::get(Order), getContext().IntTy);
      Order = OrderFail;
      break;
    // void __atomic_exchange(size_t size, void *mem, void *val, void *return,
    //                        int order)
    // T __atomic_exchange_N(T *mem, T val, int order)
    case AtomicExpr::AO__c11_atomic_exchange:
    case AtomicExpr::AO__opencl_atomic_exchange:
    case AtomicExpr::AO__atomic_exchange_n:
    case AtomicExpr::AO__atomic_exchange:
    case AtomicExpr::AO__hip_atomic_exchange:
      LibCallName = "__atomic_exchange";
      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
                        MemTy, E->getExprLoc(), TInfo.Width);
      break;
    // void __atomic_store(size_t size, void *mem, void *val, int order)
    // void __atomic_store_N(T *mem, T val, int order)
    case AtomicExpr::AO__c11_atomic_store:
    case AtomicExpr::AO__opencl_atomic_store:
    case AtomicExpr::AO__hip_atomic_store:
    case AtomicExpr::AO__atomic_store:
    case AtomicExpr::AO__atomic_store_n:
      LibCallName = "__atomic_store";
      RetTy = getContext().VoidTy;
      HaveRetTy = true;
      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
                        MemTy, E->getExprLoc(), TInfo.Width);
      break;
    // void __atomic_load(size_t size, void *mem, void *return, int order)
    // T __atomic_load_N(T *mem, int order)
    case AtomicExpr::AO__c11_atomic_load:
    case AtomicExpr::AO__opencl_atomic_load:
    case AtomicExpr::AO__hip_atomic_load:
    case AtomicExpr::AO__atomic_load:
    case AtomicExpr::AO__atomic_load_n:
      LibCallName = "__atomic_load";
      break;
    // T __atomic_add_fetch_N(T *mem, T val, int order)
    // T __atomic_fetch_add_N(T *mem, T val, int order)
    case AtomicExpr::AO__atomic_add_fetch:
      PostOp = llvm::Instruction::Add;
      LLVM_FALLTHROUGH;
    case AtomicExpr::AO__c11_atomic_fetch_add:
    case AtomicExpr::AO__opencl_atomic_fetch_add:
    case AtomicExpr::AO__atomic_fetch_add:
    case AtomicExpr::AO__hip_atomic_fetch_add:
      LibCallName = "__atomic_fetch_add";
      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
                        LoweredMemTy, E->getExprLoc(), TInfo.Width);
      break;
    // T __atomic_and_fetch_N(T *mem, T val, int order)
    // T __atomic_fetch_and_N(T *mem, T val, int order)
    case AtomicExpr::AO__atomic_and_fetch:
      PostOp = llvm::Instruction::And;
      LLVM_FALLTHROUGH;
    case AtomicExpr::AO__c11_atomic_fetch_and:
    case AtomicExpr::AO__opencl_atomic_fetch_and:
    case AtomicExpr::AO__hip_atomic_fetch_and:
    case AtomicExpr::AO__atomic_fetch_and:
      LibCallName = "__atomic_fetch_and";
      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
                        MemTy, E->getExprLoc(), TInfo.Width);
      break;
    // T __atomic_or_fetch_N(T *mem, T val, int order)
    // T __atomic_fetch_or_N(T *mem, T val, int order)
    case AtomicExpr::AO__atomic_or_fetch:
      PostOp = llvm::Instruction::Or;
      LLVM_FALLTHROUGH;
    case AtomicExpr::AO__c11_atomic_fetch_or:
    case AtomicExpr::AO__opencl_atomic_fetch_or:
    case AtomicExpr::AO__hip_atomic_fetch_or:
    case AtomicExpr::AO__atomic_fetch_or:
      LibCallName = "__atomic_fetch_or";
      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
                        MemTy, E->getExprLoc(), TInfo.Width);
      break;
    // T __atomic_sub_fetch_N(T *mem, T val, int order)
    // T __atomic_fetch_sub_N(T *mem, T val, int order)
    case AtomicExpr::AO__atomic_sub_fetch:
      PostOp = llvm::Instruction::Sub;
      LLVM_FALLTHROUGH;
    case AtomicExpr::AO__c11_atomic_fetch_sub:
    case AtomicExpr::AO__opencl_atomic_fetch_sub:
    case AtomicExpr::AO__atomic_fetch_sub:
      LibCallName = "__atomic_fetch_sub";
      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
                        LoweredMemTy, E->getExprLoc(), TInfo.Width);
      break;
    // T __atomic_xor_fetch_N(T *mem, T val, int order)
    // T __atomic_fetch_xor_N(T *mem, T val, int order)
    case AtomicExpr::AO__atomic_xor_fetch:
      PostOp = llvm::Instruction::Xor;
      LLVM_FALLTHROUGH;
    case AtomicExpr::AO__c11_atomic_fetch_xor:
    case AtomicExpr::AO__opencl_atomic_fetch_xor:
    case AtomicExpr::AO__hip_atomic_fetch_xor:
    case AtomicExpr::AO__atomic_fetch_xor:
      LibCallName = "__atomic_fetch_xor";
      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
                        MemTy, E->getExprLoc(), TInfo.Width);
      break;
    case AtomicExpr::AO__atomic_min_fetch:
      PostOpMinMax = true;
      LLVM_FALLTHROUGH;
    case AtomicExpr::AO__c11_atomic_fetch_min:
    case AtomicExpr::AO__atomic_fetch_min:
    case AtomicExpr::AO__hip_atomic_fetch_min:
    case AtomicExpr::AO__opencl_atomic_fetch_min:
      LibCallName = E->getValueType()->isSignedIntegerType()
                        ? "__atomic_fetch_min"
                        : "__atomic_fetch_umin";
      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
                        LoweredMemTy, E->getExprLoc(), TInfo.Width);
      break;
    case AtomicExpr::AO__atomic_max_fetch:
      PostOpMinMax = true;
      LLVM_FALLTHROUGH;
    case AtomicExpr::AO__c11_atomic_fetch_max:
    case AtomicExpr::AO__atomic_fetch_max:
    case AtomicExpr::AO__hip_atomic_fetch_max:
    case AtomicExpr::AO__opencl_atomic_fetch_max:
      LibCallName = E->getValueType()->isSignedIntegerType()
                        ? "__atomic_fetch_max"
                        : "__atomic_fetch_umax";
      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
                        LoweredMemTy, E->getExprLoc(), TInfo.Width);
      break;
    // T __atomic_nand_fetch_N(T *mem, T val, int order)
    // T __atomic_fetch_nand_N(T *mem, T val, int order)
    case AtomicExpr::AO__atomic_nand_fetch:
      PostOp = llvm::Instruction::And; // the NOT is special cased below
      LLVM_FALLTHROUGH;
    case AtomicExpr::AO__c11_atomic_fetch_nand:
    case AtomicExpr::AO__atomic_fetch_nand:
      LibCallName = "__atomic_fetch_nand";
      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
                        MemTy, E->getExprLoc(), TInfo.Width);
      break;
    }

    if (E->isOpenCL()) {
      LibCallName = std::string("__opencl") +
          StringRef(LibCallName).drop_front(1).str();

    }
    // Optimized functions have the size in their name.
    if (UseOptimizedLibcall)
      LibCallName += "_" + llvm::utostr(Size);
    // By default, assume we return a value of the atomic type.
    if (!HaveRetTy) {
      if (UseOptimizedLibcall) {
        // Value is returned directly.
        // The function returns an appropriately sized integer type.
        RetTy = getContext().getIntTypeForBitwidth(
            getContext().toBits(TInfo.Width), /*Signed=*/false);
      } else {
        // Value is returned through parameter before the order.
        RetTy = getContext().VoidTy;
        Args.add(RValue::get(EmitCastToVoidPtr(Dest.getPointer())),
                 getContext().VoidPtrTy);
      }
    }
    // order is always the last parameter
    Args.add(RValue::get(Order),
             getContext().IntTy);
    if (E->isOpenCL())
      Args.add(RValue::get(Scope), getContext().IntTy);

    // PostOp is only needed for the atomic_*_fetch operations, and
    // thus is only needed for and implemented in the
    // UseOptimizedLibcall codepath.
    assert(UseOptimizedLibcall || (!PostOp && !PostOpMinMax));

    RValue Res = emitAtomicLibcall(*this, LibCallName, RetTy, Args);
    // The value is returned directly from the libcall.
    if (E->isCmpXChg())
      return Res;

    // The value is returned directly for optimized libcalls but the expr
    // provided an out-param.
    if (UseOptimizedLibcall && Res.getScalarVal()) {
      llvm::Value *ResVal = Res.getScalarVal();
      if (PostOpMinMax) {
        llvm::Value *LoadVal1 = Args[1].getRValue(*this).getScalarVal();
        ResVal = EmitPostAtomicMinMax(Builder, E->getOp(),
                                      E->getValueType()->isSignedIntegerType(),
                                      ResVal, LoadVal1);
      } else if (PostOp) {
        llvm::Value *LoadVal1 = Args[1].getRValue(*this).getScalarVal();
        ResVal = Builder.CreateBinOp(PostOp, ResVal, LoadVal1);
      }
      if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch)
        ResVal = Builder.CreateNot(ResVal);

      Builder.CreateStore(
          ResVal, Builder.CreateElementBitCast(Dest, ResVal->getType()));
    }

    if (RValTy->isVoidType())
      return RValue::get(nullptr);

    return convertTempToRValue(
        Builder.CreateElementBitCast(Dest, ConvertTypeForMem(RValTy)),
        RValTy, E->getExprLoc());
  }

  bool IsStore = E->getOp() == AtomicExpr::AO__c11_atomic_store ||
                 E->getOp() == AtomicExpr::AO__opencl_atomic_store ||
                 E->getOp() == AtomicExpr::AO__hip_atomic_store ||
                 E->getOp() == AtomicExpr::AO__atomic_store ||
                 E->getOp() == AtomicExpr::AO__atomic_store_n;
  bool IsLoad = E->getOp() == AtomicExpr::AO__c11_atomic_load ||
                E->getOp() == AtomicExpr::AO__opencl_atomic_load ||
                E->getOp() == AtomicExpr::AO__hip_atomic_load ||
                E->getOp() == AtomicExpr::AO__atomic_load ||
                E->getOp() == AtomicExpr::AO__atomic_load_n;

  if (isa<llvm::ConstantInt>(Order)) {
    auto ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
    // We should not ever get to a case where the ordering isn't a valid C ABI
    // value, but it's hard to enforce that in general.
    if (llvm::isValidAtomicOrderingCABI(ord))
      switch ((llvm::AtomicOrderingCABI)ord) {
      case llvm::AtomicOrderingCABI::relaxed:
        EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
                     llvm::AtomicOrdering::Monotonic, Scope);
        break;
      case llvm::AtomicOrderingCABI::consume:
      case llvm::AtomicOrderingCABI::acquire:
        if (IsStore)
          break; // Avoid crashing on code with undefined behavior
        EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
                     llvm::AtomicOrdering::Acquire, Scope);
        break;
      case llvm::AtomicOrderingCABI::release:
        if (IsLoad)
          break; // Avoid crashing on code with undefined behavior
        EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
                     llvm::AtomicOrdering::Release, Scope);
        break;
      case llvm::AtomicOrderingCABI::acq_rel:
        if (IsLoad || IsStore)
          break; // Avoid crashing on code with undefined behavior
        EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
                     llvm::AtomicOrdering::AcquireRelease, Scope);
        break;
      case llvm::AtomicOrderingCABI::seq_cst:
        EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
                     llvm::AtomicOrdering::SequentiallyConsistent, Scope);
        break;
      }
    if (RValTy->isVoidType())
      return RValue::get(nullptr);

    return convertTempToRValue(
        Builder.CreateElementBitCast(Dest, ConvertTypeForMem(RValTy)),
        RValTy, E->getExprLoc());
  }

  // Long case, when Order isn't obviously constant.

  // Create all the relevant BB's
  llvm::BasicBlock *MonotonicBB = nullptr, *AcquireBB = nullptr,
                   *ReleaseBB = nullptr, *AcqRelBB = nullptr,
                   *SeqCstBB = nullptr;
  MonotonicBB = createBasicBlock("monotonic", CurFn);
  if (!IsStore)
    AcquireBB = createBasicBlock("acquire", CurFn);
  if (!IsLoad)
    ReleaseBB = createBasicBlock("release", CurFn);
  if (!IsLoad && !IsStore)
    AcqRelBB = createBasicBlock("acqrel", CurFn);
  SeqCstBB = createBasicBlock("seqcst", CurFn);
  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);

  // Create the switch for the split
  // MonotonicBB is arbitrarily chosen as the default case; in practice, this
  // doesn't matter unless someone is crazy enough to use something that
  // doesn't fold to a constant for the ordering.
  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, MonotonicBB);

  // Emit all the different atomics
  Builder.SetInsertPoint(MonotonicBB);
  EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
               llvm::AtomicOrdering::Monotonic, Scope);
  Builder.CreateBr(ContBB);
  if (!IsStore) {
    Builder.SetInsertPoint(AcquireBB);
    EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
                 llvm::AtomicOrdering::Acquire, Scope);
    Builder.CreateBr(ContBB);
    SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::consume),
                AcquireBB);
    SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::acquire),
                AcquireBB);
  }
  if (!IsLoad) {
    Builder.SetInsertPoint(ReleaseBB);
    EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
                 llvm::AtomicOrdering::Release, Scope);
    Builder.CreateBr(ContBB);
    SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::release),
                ReleaseBB);
  }
  if (!IsLoad && !IsStore) {
    Builder.SetInsertPoint(AcqRelBB);
    EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
                 llvm::AtomicOrdering::AcquireRelease, Scope);
    Builder.CreateBr(ContBB);
    SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::acq_rel),
                AcqRelBB);
  }
  Builder.SetInsertPoint(SeqCstBB);
  EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
               llvm::AtomicOrdering::SequentiallyConsistent, Scope);
  Builder.CreateBr(ContBB);
  SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst),
              SeqCstBB);

  // Cleanup and return
  Builder.SetInsertPoint(ContBB);
  if (RValTy->isVoidType())
    return RValue::get(nullptr);

  assert(Atomics.getValueSizeInBits() <= Atomics.getAtomicSizeInBits());
  return convertTempToRValue(
      Builder.CreateElementBitCast(Dest, ConvertTypeForMem(RValTy)),
      RValTy, E->getExprLoc());
}