RValue CodeGenFunction::EmitCall()

in clang/lib/CodeGen/CGCall.cpp [4607:5507]
617 lines of code
173 McCabe index (conditional complexity)

RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
                                 const CGCallee &Callee,
                                 ReturnValueSlot ReturnValue,
                                 const CallArgList &CallArgs,
                                 llvm::CallBase **callOrInvoke, bool IsMustTail,
                                 SourceLocation Loc) {
  // FIXME: We no longer need the types from CallArgs; lift up and simplify.

  assert(Callee.isOrdinary() || Callee.isVirtual());

  // Handle struct-return functions by passing a pointer to the
  // location that we would like to return into.
  QualType RetTy = CallInfo.getReturnType();
  const ABIArgInfo &RetAI = CallInfo.getReturnInfo();

  llvm::FunctionType *IRFuncTy = getTypes().GetFunctionType(CallInfo);

  const Decl *TargetDecl = Callee.getAbstractInfo().getCalleeDecl().getDecl();
  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl)) {
    // We can only guarantee that a function is called from the correct
    // context/function based on the appropriate target attributes,
    // so only check in the case where we have both always_inline and target
    // since otherwise we could be making a conditional call after a check for
    // the proper cpu features (and it won't cause code generation issues due to
    // function based code generation).
    if (TargetDecl->hasAttr<AlwaysInlineAttr>() &&
        TargetDecl->hasAttr<TargetAttr>())
      checkTargetFeatures(Loc, FD);

    // Some architectures (such as x86-64) have the ABI changed based on
    // attribute-target/features. Give them a chance to diagnose.
    CGM.getTargetCodeGenInfo().checkFunctionCallABI(
        CGM, Loc, dyn_cast_or_null<FunctionDecl>(CurCodeDecl), FD, CallArgs);
  }

#ifndef NDEBUG
  if (!(CallInfo.isVariadic() && CallInfo.getArgStruct())) {
    // For an inalloca varargs function, we don't expect CallInfo to match the
    // function pointer's type, because the inalloca struct a will have extra
    // fields in it for the varargs parameters.  Code later in this function
    // bitcasts the function pointer to the type derived from CallInfo.
    //
    // In other cases, we assert that the types match up (until pointers stop
    // having pointee types).
    if (Callee.isVirtual())
      assert(IRFuncTy == Callee.getVirtualFunctionType());
    else {
      llvm::PointerType *PtrTy =
          llvm::cast<llvm::PointerType>(Callee.getFunctionPointer()->getType());
      assert(PtrTy->isOpaqueOrPointeeTypeMatches(IRFuncTy));
    }
  }
#endif

  // 1. Set up the arguments.

  // If we're using inalloca, insert the allocation after the stack save.
  // FIXME: Do this earlier rather than hacking it in here!
  Address ArgMemory = Address::invalid();
  if (llvm::StructType *ArgStruct = CallInfo.getArgStruct()) {
    const llvm::DataLayout &DL = CGM.getDataLayout();
    llvm::Instruction *IP = CallArgs.getStackBase();
    llvm::AllocaInst *AI;
    if (IP) {
      IP = IP->getNextNode();
      AI = new llvm::AllocaInst(ArgStruct, DL.getAllocaAddrSpace(),
                                "argmem", IP);
    } else {
      AI = CreateTempAlloca(ArgStruct, "argmem");
    }
    auto Align = CallInfo.getArgStructAlignment();
    AI->setAlignment(Align.getAsAlign());
    AI->setUsedWithInAlloca(true);
    assert(AI->isUsedWithInAlloca() && !AI->isStaticAlloca());
    ArgMemory = Address(AI, Align);
  }

  ClangToLLVMArgMapping IRFunctionArgs(CGM.getContext(), CallInfo);
  SmallVector<llvm::Value *, 16> IRCallArgs(IRFunctionArgs.totalIRArgs());

  // If the call returns a temporary with struct return, create a temporary
  // alloca to hold the result, unless one is given to us.
  Address SRetPtr = Address::invalid();
  Address SRetAlloca = Address::invalid();
  llvm::Value *UnusedReturnSizePtr = nullptr;
  if (RetAI.isIndirect() || RetAI.isInAlloca() || RetAI.isCoerceAndExpand()) {
    if (!ReturnValue.isNull()) {
      SRetPtr = ReturnValue.getValue();
    } else {
      SRetPtr = CreateMemTemp(RetTy, "tmp", &SRetAlloca);
      if (HaveInsertPoint() && ReturnValue.isUnused()) {
        llvm::TypeSize size =
            CGM.getDataLayout().getTypeAllocSize(ConvertTypeForMem(RetTy));
        UnusedReturnSizePtr = EmitLifetimeStart(size, SRetAlloca.getPointer());
      }
    }
    if (IRFunctionArgs.hasSRetArg()) {
      IRCallArgs[IRFunctionArgs.getSRetArgNo()] = SRetPtr.getPointer();
    } else if (RetAI.isInAlloca()) {
      Address Addr =
          Builder.CreateStructGEP(ArgMemory, RetAI.getInAllocaFieldIndex());
      Builder.CreateStore(SRetPtr.getPointer(), Addr);
    }
  }

  Address swiftErrorTemp = Address::invalid();
  Address swiftErrorArg = Address::invalid();

  // When passing arguments using temporary allocas, we need to add the
  // appropriate lifetime markers. This vector keeps track of all the lifetime
  // markers that need to be ended right after the call.
  SmallVector<CallLifetimeEnd, 2> CallLifetimeEndAfterCall;

  // Translate all of the arguments as necessary to match the IR lowering.
  assert(CallInfo.arg_size() == CallArgs.size() &&
         "Mismatch between function signature & arguments.");
  unsigned ArgNo = 0;
  CGFunctionInfo::const_arg_iterator info_it = CallInfo.arg_begin();
  for (CallArgList::const_iterator I = CallArgs.begin(), E = CallArgs.end();
       I != E; ++I, ++info_it, ++ArgNo) {
    const ABIArgInfo &ArgInfo = info_it->info;

    // Insert a padding argument to ensure proper alignment.
    if (IRFunctionArgs.hasPaddingArg(ArgNo))
      IRCallArgs[IRFunctionArgs.getPaddingArgNo(ArgNo)] =
          llvm::UndefValue::get(ArgInfo.getPaddingType());

    unsigned FirstIRArg, NumIRArgs;
    std::tie(FirstIRArg, NumIRArgs) = IRFunctionArgs.getIRArgs(ArgNo);

    switch (ArgInfo.getKind()) {
    case ABIArgInfo::InAlloca: {
      assert(NumIRArgs == 0);
      assert(getTarget().getTriple().getArch() == llvm::Triple::x86);
      if (I->isAggregate()) {
        Address Addr = I->hasLValue()
                           ? I->getKnownLValue().getAddress(*this)
                           : I->getKnownRValue().getAggregateAddress();
        llvm::Instruction *Placeholder =
            cast<llvm::Instruction>(Addr.getPointer());

        if (!ArgInfo.getInAllocaIndirect()) {
          // Replace the placeholder with the appropriate argument slot GEP.
          CGBuilderTy::InsertPoint IP = Builder.saveIP();
          Builder.SetInsertPoint(Placeholder);
          Addr = Builder.CreateStructGEP(ArgMemory,
                                         ArgInfo.getInAllocaFieldIndex());
          Builder.restoreIP(IP);
        } else {
          // For indirect things such as overaligned structs, replace the
          // placeholder with a regular aggregate temporary alloca. Store the
          // address of this alloca into the struct.
          Addr = CreateMemTemp(info_it->type, "inalloca.indirect.tmp");
          Address ArgSlot = Builder.CreateStructGEP(
              ArgMemory, ArgInfo.getInAllocaFieldIndex());
          Builder.CreateStore(Addr.getPointer(), ArgSlot);
        }
        deferPlaceholderReplacement(Placeholder, Addr.getPointer());
      } else if (ArgInfo.getInAllocaIndirect()) {
        // Make a temporary alloca and store the address of it into the argument
        // struct.
        Address Addr = CreateMemTempWithoutCast(
            I->Ty, getContext().getTypeAlignInChars(I->Ty),
            "indirect-arg-temp");
        I->copyInto(*this, Addr);
        Address ArgSlot =
            Builder.CreateStructGEP(ArgMemory, ArgInfo.getInAllocaFieldIndex());
        Builder.CreateStore(Addr.getPointer(), ArgSlot);
      } else {
        // Store the RValue into the argument struct.
        Address Addr =
            Builder.CreateStructGEP(ArgMemory, ArgInfo.getInAllocaFieldIndex());
        unsigned AS = Addr.getType()->getPointerAddressSpace();
        llvm::Type *MemType = ConvertTypeForMem(I->Ty)->getPointerTo(AS);
        // There are some cases where a trivial bitcast is not avoidable.  The
        // definition of a type later in a translation unit may change it's type
        // from {}* to (%struct.foo*)*.
        if (Addr.getType() != MemType)
          Addr = Builder.CreateBitCast(Addr, MemType);
        I->copyInto(*this, Addr);
      }
      break;
    }

    case ABIArgInfo::Indirect:
    case ABIArgInfo::IndirectAliased: {
      assert(NumIRArgs == 1);
      if (!I->isAggregate()) {
        // Make a temporary alloca to pass the argument.
        Address Addr = CreateMemTempWithoutCast(
            I->Ty, ArgInfo.getIndirectAlign(), "indirect-arg-temp");
        IRCallArgs[FirstIRArg] = Addr.getPointer();

        I->copyInto(*this, Addr);
      } else {
        // We want to avoid creating an unnecessary temporary+copy here;
        // however, we need one in three cases:
        // 1. If the argument is not byval, and we are required to copy the
        //    source.  (This case doesn't occur on any common architecture.)
        // 2. If the argument is byval, RV is not sufficiently aligned, and
        //    we cannot force it to be sufficiently aligned.
        // 3. If the argument is byval, but RV is not located in default
        //    or alloca address space.
        Address Addr = I->hasLValue()
                           ? I->getKnownLValue().getAddress(*this)
                           : I->getKnownRValue().getAggregateAddress();
        llvm::Value *V = Addr.getPointer();
        CharUnits Align = ArgInfo.getIndirectAlign();
        const llvm::DataLayout *TD = &CGM.getDataLayout();

        assert((FirstIRArg >= IRFuncTy->getNumParams() ||
                IRFuncTy->getParamType(FirstIRArg)->getPointerAddressSpace() ==
                    TD->getAllocaAddrSpace()) &&
               "indirect argument must be in alloca address space");

        bool NeedCopy = false;

        if (Addr.getAlignment() < Align &&
            llvm::getOrEnforceKnownAlignment(V, Align.getAsAlign(), *TD) <
                Align.getAsAlign()) {
          NeedCopy = true;
        } else if (I->hasLValue()) {
          auto LV = I->getKnownLValue();
          auto AS = LV.getAddressSpace();

          if (!ArgInfo.getIndirectByVal() ||
              (LV.getAlignment() < getContext().getTypeAlignInChars(I->Ty))) {
            NeedCopy = true;
          }
          if (!getLangOpts().OpenCL) {
            if ((ArgInfo.getIndirectByVal() &&
                (AS != LangAS::Default &&
                 AS != CGM.getASTAllocaAddressSpace()))) {
              NeedCopy = true;
            }
          }
          // For OpenCL even if RV is located in default or alloca address space
          // we don't want to perform address space cast for it.
          else if ((ArgInfo.getIndirectByVal() &&
                    Addr.getType()->getAddressSpace() != IRFuncTy->
                      getParamType(FirstIRArg)->getPointerAddressSpace())) {
            NeedCopy = true;
          }
        }

        if (NeedCopy) {
          // Create an aligned temporary, and copy to it.
          Address AI = CreateMemTempWithoutCast(
              I->Ty, ArgInfo.getIndirectAlign(), "byval-temp");
          IRCallArgs[FirstIRArg] = AI.getPointer();

          // Emit lifetime markers for the temporary alloca.
          llvm::TypeSize ByvalTempElementSize =
              CGM.getDataLayout().getTypeAllocSize(AI.getElementType());
          llvm::Value *LifetimeSize =
              EmitLifetimeStart(ByvalTempElementSize, AI.getPointer());

          // Add cleanup code to emit the end lifetime marker after the call.
          if (LifetimeSize) // In case we disabled lifetime markers.
            CallLifetimeEndAfterCall.emplace_back(AI, LifetimeSize);

          // Generate the copy.
          I->copyInto(*this, AI);
        } else {
          // Skip the extra memcpy call.
          auto *T = llvm::PointerType::getWithSamePointeeType(
              cast<llvm::PointerType>(V->getType()),
              CGM.getDataLayout().getAllocaAddrSpace());
          IRCallArgs[FirstIRArg] = getTargetHooks().performAddrSpaceCast(
              *this, V, LangAS::Default, CGM.getASTAllocaAddressSpace(), T,
              true);
        }
      }
      break;
    }

    case ABIArgInfo::Ignore:
      assert(NumIRArgs == 0);
      break;

    case ABIArgInfo::Extend:
    case ABIArgInfo::Direct: {
      if (!isa<llvm::StructType>(ArgInfo.getCoerceToType()) &&
          ArgInfo.getCoerceToType() == ConvertType(info_it->type) &&
          ArgInfo.getDirectOffset() == 0) {
        assert(NumIRArgs == 1);
        llvm::Value *V;
        if (!I->isAggregate())
          V = I->getKnownRValue().getScalarVal();
        else
          V = Builder.CreateLoad(
              I->hasLValue() ? I->getKnownLValue().getAddress(*this)
                             : I->getKnownRValue().getAggregateAddress());

        // Implement swifterror by copying into a new swifterror argument.
        // We'll write back in the normal path out of the call.
        if (CallInfo.getExtParameterInfo(ArgNo).getABI()
              == ParameterABI::SwiftErrorResult) {
          assert(!swiftErrorTemp.isValid() && "multiple swifterror args");

          QualType pointeeTy = I->Ty->getPointeeType();
          swiftErrorArg =
            Address(V, getContext().getTypeAlignInChars(pointeeTy));

          swiftErrorTemp =
            CreateMemTemp(pointeeTy, getPointerAlign(), "swifterror.temp");
          V = swiftErrorTemp.getPointer();
          cast<llvm::AllocaInst>(V)->setSwiftError(true);

          llvm::Value *errorValue = Builder.CreateLoad(swiftErrorArg);
          Builder.CreateStore(errorValue, swiftErrorTemp);
        }

        // We might have to widen integers, but we should never truncate.
        if (ArgInfo.getCoerceToType() != V->getType() &&
            V->getType()->isIntegerTy())
          V = Builder.CreateZExt(V, ArgInfo.getCoerceToType());

        // If the argument doesn't match, perform a bitcast to coerce it.  This
        // can happen due to trivial type mismatches.
        if (FirstIRArg < IRFuncTy->getNumParams() &&
            V->getType() != IRFuncTy->getParamType(FirstIRArg))
          V = Builder.CreateBitCast(V, IRFuncTy->getParamType(FirstIRArg));

        IRCallArgs[FirstIRArg] = V;
        break;
      }

      // FIXME: Avoid the conversion through memory if possible.
      Address Src = Address::invalid();
      if (!I->isAggregate()) {
        Src = CreateMemTemp(I->Ty, "coerce");
        I->copyInto(*this, Src);
      } else {
        Src = I->hasLValue() ? I->getKnownLValue().getAddress(*this)
                             : I->getKnownRValue().getAggregateAddress();
      }

      // If the value is offset in memory, apply the offset now.
      Src = emitAddressAtOffset(*this, Src, ArgInfo);

      // Fast-isel and the optimizer generally like scalar values better than
      // FCAs, so we flatten them if this is safe to do for this argument.
      llvm::StructType *STy =
            dyn_cast<llvm::StructType>(ArgInfo.getCoerceToType());
      if (STy && ArgInfo.isDirect() && ArgInfo.getCanBeFlattened()) {
        llvm::Type *SrcTy = Src.getElementType();
        uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(SrcTy);
        uint64_t DstSize = CGM.getDataLayout().getTypeAllocSize(STy);

        // If the source type is smaller than the destination type of the
        // coerce-to logic, copy the source value into a temp alloca the size
        // of the destination type to allow loading all of it. The bits past
        // the source value are left undef.
        if (SrcSize < DstSize) {
          Address TempAlloca
            = CreateTempAlloca(STy, Src.getAlignment(),
                               Src.getName() + ".coerce");
          Builder.CreateMemCpy(TempAlloca, Src, SrcSize);
          Src = TempAlloca;
        } else {
          Src = Builder.CreateElementBitCast(Src, STy);
        }

        assert(NumIRArgs == STy->getNumElements());
        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
          Address EltPtr = Builder.CreateStructGEP(Src, i);
          llvm::Value *LI = Builder.CreateLoad(EltPtr);
          IRCallArgs[FirstIRArg + i] = LI;
        }
      } else {
        // In the simple case, just pass the coerced loaded value.
        assert(NumIRArgs == 1);
        llvm::Value *Load =
            CreateCoercedLoad(Src, ArgInfo.getCoerceToType(), *this);

        if (CallInfo.isCmseNSCall()) {
          // For certain parameter types, clear padding bits, as they may reveal
          // sensitive information.
          // Small struct/union types are passed as integer arrays.
          auto *ATy = dyn_cast<llvm::ArrayType>(Load->getType());
          if (ATy != nullptr && isa<RecordType>(I->Ty.getCanonicalType()))
            Load = EmitCMSEClearRecord(Load, ATy, I->Ty);
        }
        IRCallArgs[FirstIRArg] = Load;
      }

      break;
    }

    case ABIArgInfo::CoerceAndExpand: {
      auto coercionType = ArgInfo.getCoerceAndExpandType();
      auto layout = CGM.getDataLayout().getStructLayout(coercionType);

      llvm::Value *tempSize = nullptr;
      Address addr = Address::invalid();
      Address AllocaAddr = Address::invalid();
      if (I->isAggregate()) {
        addr = I->hasLValue() ? I->getKnownLValue().getAddress(*this)
                              : I->getKnownRValue().getAggregateAddress();

      } else {
        RValue RV = I->getKnownRValue();
        assert(RV.isScalar()); // complex should always just be direct

        llvm::Type *scalarType = RV.getScalarVal()->getType();
        auto scalarSize = CGM.getDataLayout().getTypeAllocSize(scalarType);
        auto scalarAlign = CGM.getDataLayout().getPrefTypeAlignment(scalarType);

        // Materialize to a temporary.
        addr =
            CreateTempAlloca(RV.getScalarVal()->getType(),
                             CharUnits::fromQuantity(std::max(
                                 layout->getAlignment().value(), scalarAlign)),
                             "tmp",
                             /*ArraySize=*/nullptr, &AllocaAddr);
        tempSize = EmitLifetimeStart(scalarSize, AllocaAddr.getPointer());

        Builder.CreateStore(RV.getScalarVal(), addr);
      }

      addr = Builder.CreateElementBitCast(addr, coercionType);

      unsigned IRArgPos = FirstIRArg;
      for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) {
        llvm::Type *eltType = coercionType->getElementType(i);
        if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType)) continue;
        Address eltAddr = Builder.CreateStructGEP(addr, i);
        llvm::Value *elt = Builder.CreateLoad(eltAddr);
        IRCallArgs[IRArgPos++] = elt;
      }
      assert(IRArgPos == FirstIRArg + NumIRArgs);

      if (tempSize) {
        EmitLifetimeEnd(tempSize, AllocaAddr.getPointer());
      }

      break;
    }

    case ABIArgInfo::Expand: {
      unsigned IRArgPos = FirstIRArg;
      ExpandTypeToArgs(I->Ty, *I, IRFuncTy, IRCallArgs, IRArgPos);
      assert(IRArgPos == FirstIRArg + NumIRArgs);
      break;
    }
    }
  }

  const CGCallee &ConcreteCallee = Callee.prepareConcreteCallee(*this);
  llvm::Value *CalleePtr = ConcreteCallee.getFunctionPointer();

  // If we're using inalloca, set up that argument.
  if (ArgMemory.isValid()) {
    llvm::Value *Arg = ArgMemory.getPointer();
    if (CallInfo.isVariadic()) {
      // When passing non-POD arguments by value to variadic functions, we will
      // end up with a variadic prototype and an inalloca call site.  In such
      // cases, we can't do any parameter mismatch checks.  Give up and bitcast
      // the callee.
      unsigned CalleeAS = CalleePtr->getType()->getPointerAddressSpace();
      CalleePtr =
          Builder.CreateBitCast(CalleePtr, IRFuncTy->getPointerTo(CalleeAS));
    } else {
      llvm::Type *LastParamTy =
          IRFuncTy->getParamType(IRFuncTy->getNumParams() - 1);
      if (Arg->getType() != LastParamTy) {
#ifndef NDEBUG
        // Assert that these structs have equivalent element types.
        llvm::StructType *FullTy = CallInfo.getArgStruct();
        llvm::StructType *DeclaredTy = cast<llvm::StructType>(
            cast<llvm::PointerType>(LastParamTy)->getElementType());
        assert(DeclaredTy->getNumElements() == FullTy->getNumElements());
        for (llvm::StructType::element_iterator DI = DeclaredTy->element_begin(),
                                                DE = DeclaredTy->element_end(),
                                                FI = FullTy->element_begin();
             DI != DE; ++DI, ++FI)
          assert(*DI == *FI);
#endif
        Arg = Builder.CreateBitCast(Arg, LastParamTy);
      }
    }
    assert(IRFunctionArgs.hasInallocaArg());
    IRCallArgs[IRFunctionArgs.getInallocaArgNo()] = Arg;
  }

  // 2. Prepare the function pointer.

  // If the callee is a bitcast of a non-variadic function to have a
  // variadic function pointer type, check to see if we can remove the
  // bitcast.  This comes up with unprototyped functions.
  //
  // This makes the IR nicer, but more importantly it ensures that we
  // can inline the function at -O0 if it is marked always_inline.
  auto simplifyVariadicCallee = [](llvm::FunctionType *CalleeFT,
                                   llvm::Value *Ptr) -> llvm::Function * {
    if (!CalleeFT->isVarArg())
      return nullptr;

    // Get underlying value if it's a bitcast
    if (llvm::ConstantExpr *CE = dyn_cast<llvm::ConstantExpr>(Ptr)) {
      if (CE->getOpcode() == llvm::Instruction::BitCast)
        Ptr = CE->getOperand(0);
    }

    llvm::Function *OrigFn = dyn_cast<llvm::Function>(Ptr);
    if (!OrigFn)
      return nullptr;

    llvm::FunctionType *OrigFT = OrigFn->getFunctionType();

    // If the original type is variadic, or if any of the component types
    // disagree, we cannot remove the cast.
    if (OrigFT->isVarArg() ||
        OrigFT->getNumParams() != CalleeFT->getNumParams() ||
        OrigFT->getReturnType() != CalleeFT->getReturnType())
      return nullptr;

    for (unsigned i = 0, e = OrigFT->getNumParams(); i != e; ++i)
      if (OrigFT->getParamType(i) != CalleeFT->getParamType(i))
        return nullptr;

    return OrigFn;
  };

  if (llvm::Function *OrigFn = simplifyVariadicCallee(IRFuncTy, CalleePtr)) {
    CalleePtr = OrigFn;
    IRFuncTy = OrigFn->getFunctionType();
  }

  // 3. Perform the actual call.

  // Deactivate any cleanups that we're supposed to do immediately before
  // the call.
  if (!CallArgs.getCleanupsToDeactivate().empty())
    deactivateArgCleanupsBeforeCall(*this, CallArgs);

  // Assert that the arguments we computed match up.  The IR verifier
  // will catch this, but this is a common enough source of problems
  // during IRGen changes that it's way better for debugging to catch
  // it ourselves here.
#ifndef NDEBUG
  assert(IRCallArgs.size() == IRFuncTy->getNumParams() || IRFuncTy->isVarArg());
  for (unsigned i = 0; i < IRCallArgs.size(); ++i) {
    // Inalloca argument can have different type.
    if (IRFunctionArgs.hasInallocaArg() &&
        i == IRFunctionArgs.getInallocaArgNo())
      continue;
    if (i < IRFuncTy->getNumParams())
      assert(IRCallArgs[i]->getType() == IRFuncTy->getParamType(i));
  }
#endif

  // Update the largest vector width if any arguments have vector types.
  for (unsigned i = 0; i < IRCallArgs.size(); ++i) {
    if (auto *VT = dyn_cast<llvm::VectorType>(IRCallArgs[i]->getType()))
      LargestVectorWidth =
          std::max((uint64_t)LargestVectorWidth,
                   VT->getPrimitiveSizeInBits().getKnownMinSize());
  }

  // Compute the calling convention and attributes.
  unsigned CallingConv;
  llvm::AttributeList Attrs;
  CGM.ConstructAttributeList(CalleePtr->getName(), CallInfo,
                             Callee.getAbstractInfo(), Attrs, CallingConv,
                             /*AttrOnCallSite=*/true,
                             /*IsThunk=*/false);

  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurFuncDecl))
    if (FD->hasAttr<StrictFPAttr>())
      // All calls within a strictfp function are marked strictfp
      Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::StrictFP);

  // Add call-site nomerge attribute if exists.
  if (InNoMergeAttributedStmt)
    Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::NoMerge);

  // Apply some call-site-specific attributes.
  // TODO: work this into building the attribute set.

  // Apply always_inline to all calls within flatten functions.
  // FIXME: should this really take priority over __try, below?
  if (CurCodeDecl && CurCodeDecl->hasAttr<FlattenAttr>() &&
      !(TargetDecl && TargetDecl->hasAttr<NoInlineAttr>())) {
    Attrs =
        Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::AlwaysInline);
  }

  // Disable inlining inside SEH __try blocks.
  if (isSEHTryScope()) {
    Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::NoInline);
  }

  // Decide whether to use a call or an invoke.
  bool CannotThrow;
  if (currentFunctionUsesSEHTry()) {
    // SEH cares about asynchronous exceptions, so everything can "throw."
    CannotThrow = false;
  } else if (isCleanupPadScope() &&
             EHPersonality::get(*this).isMSVCXXPersonality()) {
    // The MSVC++ personality will implicitly terminate the program if an
    // exception is thrown during a cleanup outside of a try/catch.
    // We don't need to model anything in IR to get this behavior.
    CannotThrow = true;
  } else {
    // Otherwise, nounwind call sites will never throw.
    CannotThrow = Attrs.hasFnAttr(llvm::Attribute::NoUnwind);

    if (auto *FPtr = dyn_cast<llvm::Function>(CalleePtr))
      if (FPtr->hasFnAttribute(llvm::Attribute::NoUnwind))
        CannotThrow = true;
  }

  // If we made a temporary, be sure to clean up after ourselves. Note that we
  // can't depend on being inside of an ExprWithCleanups, so we need to manually
  // pop this cleanup later on. Being eager about this is OK, since this
  // temporary is 'invisible' outside of the callee.
  if (UnusedReturnSizePtr)
    pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, SRetAlloca,
                                         UnusedReturnSizePtr);

  llvm::BasicBlock *InvokeDest = CannotThrow ? nullptr : getInvokeDest();

  SmallVector<llvm::OperandBundleDef, 1> BundleList =
      getBundlesForFunclet(CalleePtr);

  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurFuncDecl))
    if (FD->hasAttr<StrictFPAttr>())
      // All calls within a strictfp function are marked strictfp
      Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::StrictFP);

  AssumeAlignedAttrEmitter AssumeAlignedAttrEmitter(*this, TargetDecl);
  Attrs = AssumeAlignedAttrEmitter.TryEmitAsCallSiteAttribute(Attrs);

  AllocAlignAttrEmitter AllocAlignAttrEmitter(*this, TargetDecl, CallArgs);
  Attrs = AllocAlignAttrEmitter.TryEmitAsCallSiteAttribute(Attrs);

  // Emit the actual call/invoke instruction.
  llvm::CallBase *CI;
  if (!InvokeDest) {
    CI = Builder.CreateCall(IRFuncTy, CalleePtr, IRCallArgs, BundleList);
  } else {
    llvm::BasicBlock *Cont = createBasicBlock("invoke.cont");
    CI = Builder.CreateInvoke(IRFuncTy, CalleePtr, Cont, InvokeDest, IRCallArgs,
                              BundleList);
    EmitBlock(Cont);
  }
  if (callOrInvoke)
    *callOrInvoke = CI;

  // If this is within a function that has the guard(nocf) attribute and is an
  // indirect call, add the "guard_nocf" attribute to this call to indicate that
  // Control Flow Guard checks should not be added, even if the call is inlined.
  if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CurFuncDecl)) {
    if (const auto *A = FD->getAttr<CFGuardAttr>()) {
      if (A->getGuard() == CFGuardAttr::GuardArg::nocf && !CI->getCalledFunction())
        Attrs = Attrs.addFnAttribute(getLLVMContext(), "guard_nocf");
    }
  }

  // Apply the attributes and calling convention.
  CI->setAttributes(Attrs);
  CI->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv));

  // Apply various metadata.

  if (!CI->getType()->isVoidTy())
    CI->setName("call");

  // Update largest vector width from the return type.
  if (auto *VT = dyn_cast<llvm::VectorType>(CI->getType()))
    LargestVectorWidth =
        std::max((uint64_t)LargestVectorWidth,
                 VT->getPrimitiveSizeInBits().getKnownMinSize());

  // Insert instrumentation or attach profile metadata at indirect call sites.
  // For more details, see the comment before the definition of
  // IPVK_IndirectCallTarget in InstrProfData.inc.
  if (!CI->getCalledFunction())
    PGO.valueProfile(Builder, llvm::IPVK_IndirectCallTarget,
                     CI, CalleePtr);

  // In ObjC ARC mode with no ObjC ARC exception safety, tell the ARC
  // optimizer it can aggressively ignore unwind edges.
  if (CGM.getLangOpts().ObjCAutoRefCount)
    AddObjCARCExceptionMetadata(CI);

  // Set tail call kind if necessary.
  if (llvm::CallInst *Call = dyn_cast<llvm::CallInst>(CI)) {
    if (TargetDecl && TargetDecl->hasAttr<NotTailCalledAttr>())
      Call->setTailCallKind(llvm::CallInst::TCK_NoTail);
    else if (IsMustTail)
      Call->setTailCallKind(llvm::CallInst::TCK_MustTail);
  }

  // Add metadata for calls to MSAllocator functions
  if (getDebugInfo() && TargetDecl &&
      TargetDecl->hasAttr<MSAllocatorAttr>())
    getDebugInfo()->addHeapAllocSiteMetadata(CI, RetTy->getPointeeType(), Loc);

  // Add metadata if calling an __attribute__((error(""))) or warning fn.
  if (TargetDecl && TargetDecl->hasAttr<ErrorAttr>()) {
    llvm::ConstantInt *Line =
        llvm::ConstantInt::get(Int32Ty, Loc.getRawEncoding());
    llvm::ConstantAsMetadata *MD = llvm::ConstantAsMetadata::get(Line);
    llvm::MDTuple *MDT = llvm::MDNode::get(getLLVMContext(), {MD});
    CI->setMetadata("srcloc", MDT);
  }

  // 4. Finish the call.

  // If the call doesn't return, finish the basic block and clear the
  // insertion point; this allows the rest of IRGen to discard
  // unreachable code.
  if (CI->doesNotReturn()) {
    if (UnusedReturnSizePtr)
      PopCleanupBlock();

    // Strip away the noreturn attribute to better diagnose unreachable UB.
    if (SanOpts.has(SanitizerKind::Unreachable)) {
      // Also remove from function since CallBase::hasFnAttr additionally checks
      // attributes of the called function.
      if (auto *F = CI->getCalledFunction())
        F->removeFnAttr(llvm::Attribute::NoReturn);
      CI->removeFnAttr(llvm::Attribute::NoReturn);

      // Avoid incompatibility with ASan which relies on the `noreturn`
      // attribute to insert handler calls.
      if (SanOpts.hasOneOf(SanitizerKind::Address |
                           SanitizerKind::KernelAddress)) {
        SanitizerScope SanScope(this);
        llvm::IRBuilder<>::InsertPointGuard IPGuard(Builder);
        Builder.SetInsertPoint(CI);
        auto *FnType = llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false);
        llvm::FunctionCallee Fn =
            CGM.CreateRuntimeFunction(FnType, "__asan_handle_no_return");
        EmitNounwindRuntimeCall(Fn);
      }
    }

    EmitUnreachable(Loc);
    Builder.ClearInsertionPoint();

    // FIXME: For now, emit a dummy basic block because expr emitters in
    // generally are not ready to handle emitting expressions at unreachable
    // points.
    EnsureInsertPoint();

    // Return a reasonable RValue.
    return GetUndefRValue(RetTy);
  }

  // If this is a musttail call, return immediately. We do not branch to the
  // epilogue in this case.
  if (IsMustTail) {
    for (auto it = EHStack.find(CurrentCleanupScopeDepth); it != EHStack.end();
         ++it) {
      EHCleanupScope *Cleanup = dyn_cast<EHCleanupScope>(&*it);
      if (!(Cleanup && Cleanup->getCleanup()->isRedundantBeforeReturn()))
        CGM.ErrorUnsupported(MustTailCall, "tail call skipping over cleanups");
    }
    if (CI->getType()->isVoidTy())
      Builder.CreateRetVoid();
    else
      Builder.CreateRet(CI);
    Builder.ClearInsertionPoint();
    EnsureInsertPoint();
    return GetUndefRValue(RetTy);
  }

  // Perform the swifterror writeback.
  if (swiftErrorTemp.isValid()) {
    llvm::Value *errorResult = Builder.CreateLoad(swiftErrorTemp);
    Builder.CreateStore(errorResult, swiftErrorArg);
  }

  // Emit any call-associated writebacks immediately.  Arguably this
  // should happen after any return-value munging.
  if (CallArgs.hasWritebacks())
    emitWritebacks(*this, CallArgs);

  // The stack cleanup for inalloca arguments has to run out of the normal
  // lexical order, so deactivate it and run it manually here.
  CallArgs.freeArgumentMemory(*this);

  // Extract the return value.
  RValue Ret = [&] {
    switch (RetAI.getKind()) {
    case ABIArgInfo::CoerceAndExpand: {
      auto coercionType = RetAI.getCoerceAndExpandType();

      Address addr = SRetPtr;
      addr = Builder.CreateElementBitCast(addr, coercionType);

      assert(CI->getType() == RetAI.getUnpaddedCoerceAndExpandType());
      bool requiresExtract = isa<llvm::StructType>(CI->getType());

      unsigned unpaddedIndex = 0;
      for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) {
        llvm::Type *eltType = coercionType->getElementType(i);
        if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType)) continue;
        Address eltAddr = Builder.CreateStructGEP(addr, i);
        llvm::Value *elt = CI;
        if (requiresExtract)
          elt = Builder.CreateExtractValue(elt, unpaddedIndex++);
        else
          assert(unpaddedIndex == 0);
        Builder.CreateStore(elt, eltAddr);
      }
      // FALLTHROUGH
      LLVM_FALLTHROUGH;
    }

    case ABIArgInfo::InAlloca:
    case ABIArgInfo::Indirect: {
      RValue ret = convertTempToRValue(SRetPtr, RetTy, SourceLocation());
      if (UnusedReturnSizePtr)
        PopCleanupBlock();
      return ret;
    }

    case ABIArgInfo::Ignore:
      // If we are ignoring an argument that had a result, make sure to
      // construct the appropriate return value for our caller.
      return GetUndefRValue(RetTy);

    case ABIArgInfo::Extend:
    case ABIArgInfo::Direct: {
      llvm::Type *RetIRTy = ConvertType(RetTy);
      if (RetAI.getCoerceToType() == RetIRTy && RetAI.getDirectOffset() == 0) {
        switch (getEvaluationKind(RetTy)) {
        case TEK_Complex: {
          llvm::Value *Real = Builder.CreateExtractValue(CI, 0);
          llvm::Value *Imag = Builder.CreateExtractValue(CI, 1);
          return RValue::getComplex(std::make_pair(Real, Imag));
        }
        case TEK_Aggregate: {
          Address DestPtr = ReturnValue.getValue();
          bool DestIsVolatile = ReturnValue.isVolatile();

          if (!DestPtr.isValid()) {
            DestPtr = CreateMemTemp(RetTy, "agg.tmp");
            DestIsVolatile = false;
          }
          EmitAggregateStore(CI, DestPtr, DestIsVolatile);
          return RValue::getAggregate(DestPtr);
        }
        case TEK_Scalar: {
          // If the argument doesn't match, perform a bitcast to coerce it.  This
          // can happen due to trivial type mismatches.
          llvm::Value *V = CI;
          if (V->getType() != RetIRTy)
            V = Builder.CreateBitCast(V, RetIRTy);
          return RValue::get(V);
        }
        }
        llvm_unreachable("bad evaluation kind");
      }

      Address DestPtr = ReturnValue.getValue();
      bool DestIsVolatile = ReturnValue.isVolatile();

      if (!DestPtr.isValid()) {
        DestPtr = CreateMemTemp(RetTy, "coerce");
        DestIsVolatile = false;
      }

      // If the value is offset in memory, apply the offset now.
      Address StorePtr = emitAddressAtOffset(*this, DestPtr, RetAI);
      CreateCoercedStore(CI, StorePtr, DestIsVolatile, *this);

      return convertTempToRValue(DestPtr, RetTy, SourceLocation());
    }

    case ABIArgInfo::Expand:
    case ABIArgInfo::IndirectAliased:
      llvm_unreachable("Invalid ABI kind for return argument");
    }

    llvm_unreachable("Unhandled ABIArgInfo::Kind");
  } ();

  // Emit the assume_aligned check on the return value.
  if (Ret.isScalar() && TargetDecl) {
    AssumeAlignedAttrEmitter.EmitAsAnAssumption(Loc, RetTy, Ret);
    AllocAlignAttrEmitter.EmitAsAnAssumption(Loc, RetTy, Ret);
  }

  // Explicitly call CallLifetimeEnd::Emit just to re-use the code even though
  // we can't use the full cleanup mechanism.
  for (CallLifetimeEnd &LifetimeEnd : CallLifetimeEndAfterCall)
    LifetimeEnd.Emit(*this, /*Flags=*/{});

  if (!ReturnValue.isExternallyDestructed() &&
      RetTy.isDestructedType() == QualType::DK_nontrivial_c_struct)
    pushDestroy(QualType::DK_nontrivial_c_struct, Ret.getAggregateAddress(),
                RetTy);

  return Ret;
}