void AArch64InstrInfo::genAlternativeCodeSequence()

in llvm/lib/Target/AArch64/AArch64InstrInfo.cpp [5354:6228]


void AArch64InstrInfo::genAlternativeCodeSequence(
    MachineInstr &Root, MachineCombinerPattern Pattern,
    SmallVectorImpl<MachineInstr *> &InsInstrs,
    SmallVectorImpl<MachineInstr *> &DelInstrs,
    DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
  MachineBasicBlock &MBB = *Root.getParent();
  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
  MachineFunction &MF = *MBB.getParent();
  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();

  MachineInstr *MUL = nullptr;
  const TargetRegisterClass *RC;
  unsigned Opc;
  switch (Pattern) {
  default:
    // Reassociate instructions.
    TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
                                                DelInstrs, InstrIdxForVirtReg);
    return;
  case MachineCombinerPattern::MULADDW_OP1:
  case MachineCombinerPattern::MULADDX_OP1:
    // MUL I=A,B,0
    // ADD R,I,C
    // ==> MADD R,A,B,C
    // --- Create(MADD);
    if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
      Opc = AArch64::MADDWrrr;
      RC = &AArch64::GPR32RegClass;
    } else {
      Opc = AArch64::MADDXrrr;
      RC = &AArch64::GPR64RegClass;
    }
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    break;
  case MachineCombinerPattern::MULADDW_OP2:
  case MachineCombinerPattern::MULADDX_OP2:
    // MUL I=A,B,0
    // ADD R,C,I
    // ==> MADD R,A,B,C
    // --- Create(MADD);
    if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
      Opc = AArch64::MADDWrrr;
      RC = &AArch64::GPR32RegClass;
    } else {
      Opc = AArch64::MADDXrrr;
      RC = &AArch64::GPR64RegClass;
    }
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    break;
  case MachineCombinerPattern::MULADDWI_OP1:
  case MachineCombinerPattern::MULADDXI_OP1: {
    // MUL I=A,B,0
    // ADD R,I,Imm
    // ==> ORR  V, ZR, Imm
    // ==> MADD R,A,B,V
    // --- Create(MADD);
    const TargetRegisterClass *OrrRC;
    unsigned BitSize, OrrOpc, ZeroReg;
    if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
      OrrOpc = AArch64::ORRWri;
      OrrRC = &AArch64::GPR32spRegClass;
      BitSize = 32;
      ZeroReg = AArch64::WZR;
      Opc = AArch64::MADDWrrr;
      RC = &AArch64::GPR32RegClass;
    } else {
      OrrOpc = AArch64::ORRXri;
      OrrRC = &AArch64::GPR64spRegClass;
      BitSize = 64;
      ZeroReg = AArch64::XZR;
      Opc = AArch64::MADDXrrr;
      RC = &AArch64::GPR64RegClass;
    }
    Register NewVR = MRI.createVirtualRegister(OrrRC);
    uint64_t Imm = Root.getOperand(2).getImm();

    if (Root.getOperand(3).isImm()) {
      unsigned Val = Root.getOperand(3).getImm();
      Imm = Imm << Val;
    }
    uint64_t UImm = SignExtend64(Imm, BitSize);
    uint64_t Encoding;
    if (!AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding))
      return;
    MachineInstrBuilder MIB1 =
        BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
            .addReg(ZeroReg)
            .addImm(Encoding);
    InsInstrs.push_back(MIB1);
    InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
    MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
    break;
  }
  case MachineCombinerPattern::MULSUBW_OP1:
  case MachineCombinerPattern::MULSUBX_OP1: {
    // MUL I=A,B,0
    // SUB R,I, C
    // ==> SUB  V, 0, C
    // ==> MADD R,A,B,V // = -C + A*B
    // --- Create(MADD);
    const TargetRegisterClass *SubRC;
    unsigned SubOpc, ZeroReg;
    if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
      SubOpc = AArch64::SUBWrr;
      SubRC = &AArch64::GPR32spRegClass;
      ZeroReg = AArch64::WZR;
      Opc = AArch64::MADDWrrr;
      RC = &AArch64::GPR32RegClass;
    } else {
      SubOpc = AArch64::SUBXrr;
      SubRC = &AArch64::GPR64spRegClass;
      ZeroReg = AArch64::XZR;
      Opc = AArch64::MADDXrrr;
      RC = &AArch64::GPR64RegClass;
    }
    Register NewVR = MRI.createVirtualRegister(SubRC);
    // SUB NewVR, 0, C
    MachineInstrBuilder MIB1 =
        BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
            .addReg(ZeroReg)
            .add(Root.getOperand(2));
    InsInstrs.push_back(MIB1);
    InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
    MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
    break;
  }
  case MachineCombinerPattern::MULSUBW_OP2:
  case MachineCombinerPattern::MULSUBX_OP2:
    // MUL I=A,B,0
    // SUB R,C,I
    // ==> MSUB R,A,B,C (computes C - A*B)
    // --- Create(MSUB);
    if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
      Opc = AArch64::MSUBWrrr;
      RC = &AArch64::GPR32RegClass;
    } else {
      Opc = AArch64::MSUBXrrr;
      RC = &AArch64::GPR64RegClass;
    }
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    break;
  case MachineCombinerPattern::MULSUBWI_OP1:
  case MachineCombinerPattern::MULSUBXI_OP1: {
    // MUL I=A,B,0
    // SUB R,I, Imm
    // ==> ORR  V, ZR, -Imm
    // ==> MADD R,A,B,V // = -Imm + A*B
    // --- Create(MADD);
    const TargetRegisterClass *OrrRC;
    unsigned BitSize, OrrOpc, ZeroReg;
    if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
      OrrOpc = AArch64::ORRWri;
      OrrRC = &AArch64::GPR32spRegClass;
      BitSize = 32;
      ZeroReg = AArch64::WZR;
      Opc = AArch64::MADDWrrr;
      RC = &AArch64::GPR32RegClass;
    } else {
      OrrOpc = AArch64::ORRXri;
      OrrRC = &AArch64::GPR64spRegClass;
      BitSize = 64;
      ZeroReg = AArch64::XZR;
      Opc = AArch64::MADDXrrr;
      RC = &AArch64::GPR64RegClass;
    }
    Register NewVR = MRI.createVirtualRegister(OrrRC);
    uint64_t Imm = Root.getOperand(2).getImm();
    if (Root.getOperand(3).isImm()) {
      unsigned Val = Root.getOperand(3).getImm();
      Imm = Imm << Val;
    }
    uint64_t UImm = SignExtend64(-Imm, BitSize);
    uint64_t Encoding;
    if (!AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding))
      return;
    MachineInstrBuilder MIB1 =
        BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
            .addReg(ZeroReg)
            .addImm(Encoding);
    InsInstrs.push_back(MIB1);
    InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
    MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
    break;
  }

  case MachineCombinerPattern::MULADDv8i8_OP1:
    Opc = AArch64::MLAv8i8;
    RC = &AArch64::FPR64RegClass;
    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    break;
  case MachineCombinerPattern::MULADDv8i8_OP2:
    Opc = AArch64::MLAv8i8;
    RC = &AArch64::FPR64RegClass;
    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    break;
  case MachineCombinerPattern::MULADDv16i8_OP1:
    Opc = AArch64::MLAv16i8;
    RC = &AArch64::FPR128RegClass;
    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    break;
  case MachineCombinerPattern::MULADDv16i8_OP2:
    Opc = AArch64::MLAv16i8;
    RC = &AArch64::FPR128RegClass;
    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    break;
  case MachineCombinerPattern::MULADDv4i16_OP1:
    Opc = AArch64::MLAv4i16;
    RC = &AArch64::FPR64RegClass;
    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    break;
  case MachineCombinerPattern::MULADDv4i16_OP2:
    Opc = AArch64::MLAv4i16;
    RC = &AArch64::FPR64RegClass;
    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    break;
  case MachineCombinerPattern::MULADDv8i16_OP1:
    Opc = AArch64::MLAv8i16;
    RC = &AArch64::FPR128RegClass;
    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    break;
  case MachineCombinerPattern::MULADDv8i16_OP2:
    Opc = AArch64::MLAv8i16;
    RC = &AArch64::FPR128RegClass;
    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    break;
  case MachineCombinerPattern::MULADDv2i32_OP1:
    Opc = AArch64::MLAv2i32;
    RC = &AArch64::FPR64RegClass;
    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    break;
  case MachineCombinerPattern::MULADDv2i32_OP2:
    Opc = AArch64::MLAv2i32;
    RC = &AArch64::FPR64RegClass;
    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    break;
  case MachineCombinerPattern::MULADDv4i32_OP1:
    Opc = AArch64::MLAv4i32;
    RC = &AArch64::FPR128RegClass;
    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    break;
  case MachineCombinerPattern::MULADDv4i32_OP2:
    Opc = AArch64::MLAv4i32;
    RC = &AArch64::FPR128RegClass;
    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    break;

  case MachineCombinerPattern::MULSUBv8i8_OP1:
    Opc = AArch64::MLAv8i8;
    RC = &AArch64::FPR64RegClass;
    MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
                                 RC);
    break;
  case MachineCombinerPattern::MULSUBv8i8_OP2:
    Opc = AArch64::MLSv8i8;
    RC = &AArch64::FPR64RegClass;
    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    break;
  case MachineCombinerPattern::MULSUBv16i8_OP1:
    Opc = AArch64::MLAv16i8;
    RC = &AArch64::FPR128RegClass;
    MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
                                 RC);
    break;
  case MachineCombinerPattern::MULSUBv16i8_OP2:
    Opc = AArch64::MLSv16i8;
    RC = &AArch64::FPR128RegClass;
    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    break;
  case MachineCombinerPattern::MULSUBv4i16_OP1:
    Opc = AArch64::MLAv4i16;
    RC = &AArch64::FPR64RegClass;
    MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
                                 RC);
    break;
  case MachineCombinerPattern::MULSUBv4i16_OP2:
    Opc = AArch64::MLSv4i16;
    RC = &AArch64::FPR64RegClass;
    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    break;
  case MachineCombinerPattern::MULSUBv8i16_OP1:
    Opc = AArch64::MLAv8i16;
    RC = &AArch64::FPR128RegClass;
    MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
                                 RC);
    break;
  case MachineCombinerPattern::MULSUBv8i16_OP2:
    Opc = AArch64::MLSv8i16;
    RC = &AArch64::FPR128RegClass;
    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    break;
  case MachineCombinerPattern::MULSUBv2i32_OP1:
    Opc = AArch64::MLAv2i32;
    RC = &AArch64::FPR64RegClass;
    MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
                                 RC);
    break;
  case MachineCombinerPattern::MULSUBv2i32_OP2:
    Opc = AArch64::MLSv2i32;
    RC = &AArch64::FPR64RegClass;
    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    break;
  case MachineCombinerPattern::MULSUBv4i32_OP1:
    Opc = AArch64::MLAv4i32;
    RC = &AArch64::FPR128RegClass;
    MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
                                 RC);
    break;
  case MachineCombinerPattern::MULSUBv4i32_OP2:
    Opc = AArch64::MLSv4i32;
    RC = &AArch64::FPR128RegClass;
    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    break;

  case MachineCombinerPattern::MULADDv4i16_indexed_OP1:
    Opc = AArch64::MLAv4i16_indexed;
    RC = &AArch64::FPR64RegClass;
    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    break;
  case MachineCombinerPattern::MULADDv4i16_indexed_OP2:
    Opc = AArch64::MLAv4i16_indexed;
    RC = &AArch64::FPR64RegClass;
    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    break;
  case MachineCombinerPattern::MULADDv8i16_indexed_OP1:
    Opc = AArch64::MLAv8i16_indexed;
    RC = &AArch64::FPR128RegClass;
    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    break;
  case MachineCombinerPattern::MULADDv8i16_indexed_OP2:
    Opc = AArch64::MLAv8i16_indexed;
    RC = &AArch64::FPR128RegClass;
    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    break;
  case MachineCombinerPattern::MULADDv2i32_indexed_OP1:
    Opc = AArch64::MLAv2i32_indexed;
    RC = &AArch64::FPR64RegClass;
    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    break;
  case MachineCombinerPattern::MULADDv2i32_indexed_OP2:
    Opc = AArch64::MLAv2i32_indexed;
    RC = &AArch64::FPR64RegClass;
    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    break;
  case MachineCombinerPattern::MULADDv4i32_indexed_OP1:
    Opc = AArch64::MLAv4i32_indexed;
    RC = &AArch64::FPR128RegClass;
    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    break;
  case MachineCombinerPattern::MULADDv4i32_indexed_OP2:
    Opc = AArch64::MLAv4i32_indexed;
    RC = &AArch64::FPR128RegClass;
    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    break;

  case MachineCombinerPattern::MULSUBv4i16_indexed_OP1:
    Opc = AArch64::MLAv4i16_indexed;
    RC = &AArch64::FPR64RegClass;
    MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
                                 RC);
    break;
  case MachineCombinerPattern::MULSUBv4i16_indexed_OP2:
    Opc = AArch64::MLSv4i16_indexed;
    RC = &AArch64::FPR64RegClass;
    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    break;
  case MachineCombinerPattern::MULSUBv8i16_indexed_OP1:
    Opc = AArch64::MLAv8i16_indexed;
    RC = &AArch64::FPR128RegClass;
    MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
                                 RC);
    break;
  case MachineCombinerPattern::MULSUBv8i16_indexed_OP2:
    Opc = AArch64::MLSv8i16_indexed;
    RC = &AArch64::FPR128RegClass;
    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    break;
  case MachineCombinerPattern::MULSUBv2i32_indexed_OP1:
    Opc = AArch64::MLAv2i32_indexed;
    RC = &AArch64::FPR64RegClass;
    MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
                                 RC);
    break;
  case MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
    Opc = AArch64::MLSv2i32_indexed;
    RC = &AArch64::FPR64RegClass;
    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    break;
  case MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
    Opc = AArch64::MLAv4i32_indexed;
    RC = &AArch64::FPR128RegClass;
    MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
                                 RC);
    break;
  case MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
    Opc = AArch64::MLSv4i32_indexed;
    RC = &AArch64::FPR128RegClass;
    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    break;

  // Floating Point Support
  case MachineCombinerPattern::FMULADDH_OP1:
    Opc = AArch64::FMADDHrrr;
    RC = &AArch64::FPR16RegClass;
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    break;
  case MachineCombinerPattern::FMULADDS_OP1:
    Opc = AArch64::FMADDSrrr;
    RC = &AArch64::FPR32RegClass;
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    break;
  case MachineCombinerPattern::FMULADDD_OP1:
    Opc = AArch64::FMADDDrrr;
    RC = &AArch64::FPR64RegClass;
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    break;

  case MachineCombinerPattern::FMULADDH_OP2:
    Opc = AArch64::FMADDHrrr;
    RC = &AArch64::FPR16RegClass;
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    break;
  case MachineCombinerPattern::FMULADDS_OP2:
    Opc = AArch64::FMADDSrrr;
    RC = &AArch64::FPR32RegClass;
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    break;
  case MachineCombinerPattern::FMULADDD_OP2:
    Opc = AArch64::FMADDDrrr;
    RC = &AArch64::FPR64RegClass;
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    break;

  case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
    Opc = AArch64::FMLAv1i32_indexed;
    RC = &AArch64::FPR32RegClass;
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
                           FMAInstKind::Indexed);
    break;
  case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
    Opc = AArch64::FMLAv1i32_indexed;
    RC = &AArch64::FPR32RegClass;
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
                           FMAInstKind::Indexed);
    break;

  case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
    Opc = AArch64::FMLAv1i64_indexed;
    RC = &AArch64::FPR64RegClass;
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
                           FMAInstKind::Indexed);
    break;
  case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
    Opc = AArch64::FMLAv1i64_indexed;
    RC = &AArch64::FPR64RegClass;
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
                           FMAInstKind::Indexed);
    break;

  case MachineCombinerPattern::FMLAv4i16_indexed_OP1:
    RC = &AArch64::FPR64RegClass;
    Opc = AArch64::FMLAv4i16_indexed;
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
                           FMAInstKind::Indexed);
    break;
  case MachineCombinerPattern::FMLAv4f16_OP1:
    RC = &AArch64::FPR64RegClass;
    Opc = AArch64::FMLAv4f16;
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
                           FMAInstKind::Accumulator);
    break;
  case MachineCombinerPattern::FMLAv4i16_indexed_OP2:
    RC = &AArch64::FPR64RegClass;
    Opc = AArch64::FMLAv4i16_indexed;
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
                           FMAInstKind::Indexed);
    break;
  case MachineCombinerPattern::FMLAv4f16_OP2:
    RC = &AArch64::FPR64RegClass;
    Opc = AArch64::FMLAv4f16;
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
                           FMAInstKind::Accumulator);
    break;

  case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
  case MachineCombinerPattern::FMLAv2f32_OP1:
    RC = &AArch64::FPR64RegClass;
    if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
      Opc = AArch64::FMLAv2i32_indexed;
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
                             FMAInstKind::Indexed);
    } else {
      Opc = AArch64::FMLAv2f32;
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
                             FMAInstKind::Accumulator);
    }
    break;
  case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
  case MachineCombinerPattern::FMLAv2f32_OP2:
    RC = &AArch64::FPR64RegClass;
    if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
      Opc = AArch64::FMLAv2i32_indexed;
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
                             FMAInstKind::Indexed);
    } else {
      Opc = AArch64::FMLAv2f32;
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
                             FMAInstKind::Accumulator);
    }
    break;

  case MachineCombinerPattern::FMLAv8i16_indexed_OP1:
    RC = &AArch64::FPR128RegClass;
    Opc = AArch64::FMLAv8i16_indexed;
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
                           FMAInstKind::Indexed);
    break;
  case MachineCombinerPattern::FMLAv8f16_OP1:
    RC = &AArch64::FPR128RegClass;
    Opc = AArch64::FMLAv8f16;
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
                           FMAInstKind::Accumulator);
    break;
  case MachineCombinerPattern::FMLAv8i16_indexed_OP2:
    RC = &AArch64::FPR128RegClass;
    Opc = AArch64::FMLAv8i16_indexed;
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
                           FMAInstKind::Indexed);
    break;
  case MachineCombinerPattern::FMLAv8f16_OP2:
    RC = &AArch64::FPR128RegClass;
    Opc = AArch64::FMLAv8f16;
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
                           FMAInstKind::Accumulator);
    break;

  case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
  case MachineCombinerPattern::FMLAv2f64_OP1:
    RC = &AArch64::FPR128RegClass;
    if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
      Opc = AArch64::FMLAv2i64_indexed;
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
                             FMAInstKind::Indexed);
    } else {
      Opc = AArch64::FMLAv2f64;
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
                             FMAInstKind::Accumulator);
    }
    break;
  case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
  case MachineCombinerPattern::FMLAv2f64_OP2:
    RC = &AArch64::FPR128RegClass;
    if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
      Opc = AArch64::FMLAv2i64_indexed;
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
                             FMAInstKind::Indexed);
    } else {
      Opc = AArch64::FMLAv2f64;
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
                             FMAInstKind::Accumulator);
    }
    break;

  case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
  case MachineCombinerPattern::FMLAv4f32_OP1:
    RC = &AArch64::FPR128RegClass;
    if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
      Opc = AArch64::FMLAv4i32_indexed;
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
                             FMAInstKind::Indexed);
    } else {
      Opc = AArch64::FMLAv4f32;
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
                             FMAInstKind::Accumulator);
    }
    break;

  case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
  case MachineCombinerPattern::FMLAv4f32_OP2:
    RC = &AArch64::FPR128RegClass;
    if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
      Opc = AArch64::FMLAv4i32_indexed;
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
                             FMAInstKind::Indexed);
    } else {
      Opc = AArch64::FMLAv4f32;
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
                             FMAInstKind::Accumulator);
    }
    break;

  case MachineCombinerPattern::FMULSUBH_OP1:
    Opc = AArch64::FNMSUBHrrr;
    RC = &AArch64::FPR16RegClass;
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    break;
  case MachineCombinerPattern::FMULSUBS_OP1:
    Opc = AArch64::FNMSUBSrrr;
    RC = &AArch64::FPR32RegClass;
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    break;
  case MachineCombinerPattern::FMULSUBD_OP1:
    Opc = AArch64::FNMSUBDrrr;
    RC = &AArch64::FPR64RegClass;
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    break;

  case MachineCombinerPattern::FNMULSUBH_OP1:
    Opc = AArch64::FNMADDHrrr;
    RC = &AArch64::FPR16RegClass;
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    break;
  case MachineCombinerPattern::FNMULSUBS_OP1:
    Opc = AArch64::FNMADDSrrr;
    RC = &AArch64::FPR32RegClass;
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    break;
  case MachineCombinerPattern::FNMULSUBD_OP1:
    Opc = AArch64::FNMADDDrrr;
    RC = &AArch64::FPR64RegClass;
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    break;

  case MachineCombinerPattern::FMULSUBH_OP2:
    Opc = AArch64::FMSUBHrrr;
    RC = &AArch64::FPR16RegClass;
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    break;
  case MachineCombinerPattern::FMULSUBS_OP2:
    Opc = AArch64::FMSUBSrrr;
    RC = &AArch64::FPR32RegClass;
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    break;
  case MachineCombinerPattern::FMULSUBD_OP2:
    Opc = AArch64::FMSUBDrrr;
    RC = &AArch64::FPR64RegClass;
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    break;

  case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
    Opc = AArch64::FMLSv1i32_indexed;
    RC = &AArch64::FPR32RegClass;
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
                           FMAInstKind::Indexed);
    break;

  case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
    Opc = AArch64::FMLSv1i64_indexed;
    RC = &AArch64::FPR64RegClass;
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
                           FMAInstKind::Indexed);
    break;

  case MachineCombinerPattern::FMLSv4f16_OP1:
  case MachineCombinerPattern::FMLSv4i16_indexed_OP1: {
    RC = &AArch64::FPR64RegClass;
    Register NewVR = MRI.createVirtualRegister(RC);
    MachineInstrBuilder MIB1 =
        BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f16), NewVR)
            .add(Root.getOperand(2));
    InsInstrs.push_back(MIB1);
    InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
    if (Pattern == MachineCombinerPattern::FMLSv4f16_OP1) {
      Opc = AArch64::FMLAv4f16;
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
                             FMAInstKind::Accumulator, &NewVR);
    } else {
      Opc = AArch64::FMLAv4i16_indexed;
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
                             FMAInstKind::Indexed, &NewVR);
    }
    break;
  }
  case MachineCombinerPattern::FMLSv4f16_OP2:
    RC = &AArch64::FPR64RegClass;
    Opc = AArch64::FMLSv4f16;
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
                           FMAInstKind::Accumulator);
    break;
  case MachineCombinerPattern::FMLSv4i16_indexed_OP2:
    RC = &AArch64::FPR64RegClass;
    Opc = AArch64::FMLSv4i16_indexed;
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
                           FMAInstKind::Indexed);
    break;

  case MachineCombinerPattern::FMLSv2f32_OP2:
  case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
    RC = &AArch64::FPR64RegClass;
    if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
      Opc = AArch64::FMLSv2i32_indexed;
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
                             FMAInstKind::Indexed);
    } else {
      Opc = AArch64::FMLSv2f32;
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
                             FMAInstKind::Accumulator);
    }
    break;

  case MachineCombinerPattern::FMLSv8f16_OP1:
  case MachineCombinerPattern::FMLSv8i16_indexed_OP1: {
    RC = &AArch64::FPR128RegClass;
    Register NewVR = MRI.createVirtualRegister(RC);
    MachineInstrBuilder MIB1 =
        BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv8f16), NewVR)
            .add(Root.getOperand(2));
    InsInstrs.push_back(MIB1);
    InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
    if (Pattern == MachineCombinerPattern::FMLSv8f16_OP1) {
      Opc = AArch64::FMLAv8f16;
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
                             FMAInstKind::Accumulator, &NewVR);
    } else {
      Opc = AArch64::FMLAv8i16_indexed;
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
                             FMAInstKind::Indexed, &NewVR);
    }
    break;
  }
  case MachineCombinerPattern::FMLSv8f16_OP2:
    RC = &AArch64::FPR128RegClass;
    Opc = AArch64::FMLSv8f16;
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
                           FMAInstKind::Accumulator);
    break;
  case MachineCombinerPattern::FMLSv8i16_indexed_OP2:
    RC = &AArch64::FPR128RegClass;
    Opc = AArch64::FMLSv8i16_indexed;
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
                           FMAInstKind::Indexed);
    break;

  case MachineCombinerPattern::FMLSv2f64_OP2:
  case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
    RC = &AArch64::FPR128RegClass;
    if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
      Opc = AArch64::FMLSv2i64_indexed;
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
                             FMAInstKind::Indexed);
    } else {
      Opc = AArch64::FMLSv2f64;
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
                             FMAInstKind::Accumulator);
    }
    break;

  case MachineCombinerPattern::FMLSv4f32_OP2:
  case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
    RC = &AArch64::FPR128RegClass;
    if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
      Opc = AArch64::FMLSv4i32_indexed;
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
                             FMAInstKind::Indexed);
    } else {
      Opc = AArch64::FMLSv4f32;
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
                             FMAInstKind::Accumulator);
    }
    break;
  case MachineCombinerPattern::FMLSv2f32_OP1:
  case MachineCombinerPattern::FMLSv2i32_indexed_OP1: {
    RC = &AArch64::FPR64RegClass;
    Register NewVR = MRI.createVirtualRegister(RC);
    MachineInstrBuilder MIB1 =
        BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR)
            .add(Root.getOperand(2));
    InsInstrs.push_back(MIB1);
    InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
    if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) {
      Opc = AArch64::FMLAv2i32_indexed;
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
                             FMAInstKind::Indexed, &NewVR);
    } else {
      Opc = AArch64::FMLAv2f32;
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
                             FMAInstKind::Accumulator, &NewVR);
    }
    break;
  }
  case MachineCombinerPattern::FMLSv4f32_OP1:
  case MachineCombinerPattern::FMLSv4i32_indexed_OP1: {
    RC = &AArch64::FPR128RegClass;
    Register NewVR = MRI.createVirtualRegister(RC);
    MachineInstrBuilder MIB1 =
        BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR)
            .add(Root.getOperand(2));
    InsInstrs.push_back(MIB1);
    InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
    if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) {
      Opc = AArch64::FMLAv4i32_indexed;
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
                             FMAInstKind::Indexed, &NewVR);
    } else {
      Opc = AArch64::FMLAv4f32;
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
                             FMAInstKind::Accumulator, &NewVR);
    }
    break;
  }
  case MachineCombinerPattern::FMLSv2f64_OP1:
  case MachineCombinerPattern::FMLSv2i64_indexed_OP1: {
    RC = &AArch64::FPR128RegClass;
    Register NewVR = MRI.createVirtualRegister(RC);
    MachineInstrBuilder MIB1 =
        BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR)
            .add(Root.getOperand(2));
    InsInstrs.push_back(MIB1);
    InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
    if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) {
      Opc = AArch64::FMLAv2i64_indexed;
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
                             FMAInstKind::Indexed, &NewVR);
    } else {
      Opc = AArch64::FMLAv2f64;
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
                             FMAInstKind::Accumulator, &NewVR);
    }
    break;
  }
  case MachineCombinerPattern::FMULv2i32_indexed_OP1:
  case MachineCombinerPattern::FMULv2i32_indexed_OP2: {
    unsigned IdxDupOp =
        (Pattern == MachineCombinerPattern::FMULv2i32_indexed_OP1) ? 1 : 2;
    genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,
                       &AArch64::FPR128RegClass, MRI);
    break;
  }
  case MachineCombinerPattern::FMULv2i64_indexed_OP1:
  case MachineCombinerPattern::FMULv2i64_indexed_OP2: {
    unsigned IdxDupOp =
        (Pattern == MachineCombinerPattern::FMULv2i64_indexed_OP1) ? 1 : 2;
    genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,
                       &AArch64::FPR128RegClass, MRI);
    break;
  }
  case MachineCombinerPattern::FMULv4i16_indexed_OP1:
  case MachineCombinerPattern::FMULv4i16_indexed_OP2: {
    unsigned IdxDupOp =
        (Pattern == MachineCombinerPattern::FMULv4i16_indexed_OP1) ? 1 : 2;
    genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,
                       &AArch64::FPR128_loRegClass, MRI);
    break;
  }
  case MachineCombinerPattern::FMULv4i32_indexed_OP1:
  case MachineCombinerPattern::FMULv4i32_indexed_OP2: {
    unsigned IdxDupOp =
        (Pattern == MachineCombinerPattern::FMULv4i32_indexed_OP1) ? 1 : 2;
    genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,
                       &AArch64::FPR128RegClass, MRI);
    break;
  }
  case MachineCombinerPattern::FMULv8i16_indexed_OP1:
  case MachineCombinerPattern::FMULv8i16_indexed_OP2: {
    unsigned IdxDupOp =
        (Pattern == MachineCombinerPattern::FMULv8i16_indexed_OP1) ? 1 : 2;
    genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,
                       &AArch64::FPR128_loRegClass, MRI);
    break;
  }
  } // end switch (Pattern)
  // Record MUL and ADD/SUB for deletion
  if (MUL)
    DelInstrs.push_back(MUL);
  DelInstrs.push_back(&Root);
}