//===-- RISCVFrameLowering.cpp - RISC-V Frame Information -----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains the RISC-V implementation of TargetFrameLowering class.
//
//===----------------------------------------------------------------------===//

#include "RISCVFrameLowering.h"
#include "MCTargetDesc/RISCVBaseInfo.h"
#include "RISCVMachineFunctionInfo.h"
#include "RISCVSubtarget.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/CFIInstBuilder.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/Support/LEB128.h"

#include <algorithm>

#define DEBUG_TYPE "riscv-frame"

using namespace llvm;

static Align getABIStackAlignment(RISCVABI::ABI ABI) {
  if (ABI == RISCVABI::ABI_ILP32E)
    return Align(4);
  if (ABI == RISCVABI::ABI_LP64E)
    return Align(8);
  return Align(16);
}

RISCVFrameLowering::RISCVFrameLowering(const RISCVSubtarget &STI)
    : TargetFrameLowering(
          StackGrowsDown, getABIStackAlignment(STI.getTargetABI()),
          /*LocalAreaOffset=*/0,
          /*TransientStackAlignment=*/getABIStackAlignment(STI.getTargetABI())),
      STI(STI) {}

// The register used to hold the frame pointer.
static constexpr MCPhysReg FPReg = RISCV::X8;

// The register used to hold the stack pointer.
static constexpr MCPhysReg SPReg = RISCV::X2;

// The register used to hold the return address.
static constexpr MCPhysReg RAReg = RISCV::X1;

// LIst of CSRs that are given a fixed location by save/restore libcalls or
// Zcmp/Xqccmp Push/Pop. The order in this table indicates the order the
// registers are saved on the stack. Zcmp uses the reverse order of save/restore
// and Xqccmp on the stack, but this is handled when offsets are calculated.
static const MCPhysReg FixedCSRFIMap[] = {
    /*ra*/ RAReg,      /*s0*/ FPReg,      /*s1*/ RISCV::X9,
    /*s2*/ RISCV::X18, /*s3*/ RISCV::X19, /*s4*/ RISCV::X20,
    /*s5*/ RISCV::X21, /*s6*/ RISCV::X22, /*s7*/ RISCV::X23,
    /*s8*/ RISCV::X24, /*s9*/ RISCV::X25, /*s10*/ RISCV::X26,
    /*s11*/ RISCV::X27};

// The number of stack bytes allocated by `QC.C.MIENTER(.NEST)` and popped by
// `QC.C.MILEAVERET`.
static constexpr uint64_t QCIInterruptPushAmount = 96;

static const std::pair<MCPhysReg, int8_t> FixedCSRFIQCIInterruptMap[] = {
    /* -1 is a gap for mepc/mnepc */
    {/*fp*/ FPReg, -2},
    /* -3 is a gap for qc.mcause */
    {/*ra*/ RAReg, -4},
    /* -5 is reserved */
    {/*t0*/ RISCV::X5, -6},
    {/*t1*/ RISCV::X6, -7},
    {/*t2*/ RISCV::X7, -8},
    {/*a0*/ RISCV::X10, -9},
    {/*a1*/ RISCV::X11, -10},
    {/*a2*/ RISCV::X12, -11},
    {/*a3*/ RISCV::X13, -12},
    {/*a4*/ RISCV::X14, -13},
    {/*a5*/ RISCV::X15, -14},
    {/*a6*/ RISCV::X16, -15},
    {/*a7*/ RISCV::X17, -16},
    {/*t3*/ RISCV::X28, -17},
    {/*t4*/ RISCV::X29, -18},
    {/*t5*/ RISCV::X30, -19},
    {/*t6*/ RISCV::X31, -20},
    /* -21, -22, -23, -24 are reserved */
};

// For now we use x3, a.k.a gp, as pointer to shadow call stack.
// User should not use x3 in their asm.
static void emitSCSPrologue(MachineFunction &MF, MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator MI,
                            const DebugLoc &DL) {
  const auto &STI = MF.getSubtarget<RISCVSubtarget>();
  bool HasHWShadowStack = MF.getFunction().hasFnAttribute("hw-shadow-stack") &&
                          STI.hasStdExtZicfiss();
  bool HasSWShadowStack =
      MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack);
  if (!HasHWShadowStack && !HasSWShadowStack)
    return;

  const llvm::RISCVRegisterInfo *TRI = STI.getRegisterInfo();

  // Do not save RA to the SCS if it's not saved to the regular stack,
  // i.e. RA is not at risk of being overwritten.
  std::vector<CalleeSavedInfo> &CSI = MF.getFrameInfo().getCalleeSavedInfo();
  if (llvm::none_of(
          CSI, [&](CalleeSavedInfo &CSR) { return CSR.getReg() == RAReg; }))
    return;

  const RISCVInstrInfo *TII = STI.getInstrInfo();
  if (HasHWShadowStack) {
    BuildMI(MBB, MI, DL, TII->get(RISCV::SSPUSH)).addReg(RAReg);
    return;
  }

  Register SCSPReg = RISCVABI::getSCSPReg();

  bool IsRV64 = STI.is64Bit();
  int64_t SlotSize = STI.getXLen() / 8;
  // Store return address to shadow call stack
  // addi    gp, gp, [4|8]
  // s[w|d]  ra, -[4|8](gp)
  BuildMI(MBB, MI, DL, TII->get(RISCV::ADDI))
      .addReg(SCSPReg, RegState::Define)
      .addReg(SCSPReg)
      .addImm(SlotSize)
      .setMIFlag(MachineInstr::FrameSetup);
  BuildMI(MBB, MI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
      .addReg(RAReg)
      .addReg(SCSPReg)
      .addImm(-SlotSize)
      .setMIFlag(MachineInstr::FrameSetup);

  // Emit a CFI instruction that causes SlotSize to be subtracted from the value
  // of the shadow stack pointer when unwinding past this frame.
  char DwarfSCSReg = TRI->getDwarfRegNum(SCSPReg, /*IsEH*/ true);
  assert(DwarfSCSReg < 32 && "SCS Register should be < 32 (X3).");

  char Offset = static_cast<char>(-SlotSize) & 0x7f;
  const char CFIInst[] = {
      dwarf::DW_CFA_val_expression,
      DwarfSCSReg, // register
      2,           // length
      static_cast<char>(unsigned(dwarf::DW_OP_breg0 + DwarfSCSReg)),
      Offset, // addend (sleb128)
  };

  CFIInstBuilder(MBB, MI, MachineInstr::FrameSetup)
      .buildEscape(StringRef(CFIInst, sizeof(CFIInst)));
}

static void emitSCSEpilogue(MachineFunction &MF, MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator MI,
                            const DebugLoc &DL) {
  const auto &STI = MF.getSubtarget<RISCVSubtarget>();
  bool HasHWShadowStack = MF.getFunction().hasFnAttribute("hw-shadow-stack") &&
                          STI.hasStdExtZicfiss();
  bool HasSWShadowStack =
      MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack);
  if (!HasHWShadowStack && !HasSWShadowStack)
    return;

  // See emitSCSPrologue() above.
  std::vector<CalleeSavedInfo> &CSI = MF.getFrameInfo().getCalleeSavedInfo();
  if (llvm::none_of(
          CSI, [&](CalleeSavedInfo &CSR) { return CSR.getReg() == RAReg; }))
    return;

  const RISCVInstrInfo *TII = STI.getInstrInfo();
  if (HasHWShadowStack) {
    BuildMI(MBB, MI, DL, TII->get(RISCV::SSPOPCHK)).addReg(RAReg);
    return;
  }

  Register SCSPReg = RISCVABI::getSCSPReg();

  bool IsRV64 = STI.is64Bit();
  int64_t SlotSize = STI.getXLen() / 8;
  // Load return address from shadow call stack
  // l[w|d]  ra, -[4|8](gp)
  // addi    gp, gp, -[4|8]
  BuildMI(MBB, MI, DL, TII->get(IsRV64 ? RISCV::LD : RISCV::LW))
      .addReg(RAReg, RegState::Define)
      .addReg(SCSPReg)
      .addImm(-SlotSize)
      .setMIFlag(MachineInstr::FrameDestroy);
  BuildMI(MBB, MI, DL, TII->get(RISCV::ADDI))
      .addReg(SCSPReg, RegState::Define)
      .addReg(SCSPReg)
      .addImm(-SlotSize)
      .setMIFlag(MachineInstr::FrameDestroy);
  // Restore the SCS pointer
  CFIInstBuilder(MBB, MI, MachineInstr::FrameDestroy).buildRestore(SCSPReg);
}

// Insert instruction to swap mscratchsw with sp
static void emitSiFiveCLICStackSwap(MachineFunction &MF, MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator MBBI,
                                    const DebugLoc &DL) {
  auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();

  if (!RVFI->isSiFiveStackSwapInterrupt(MF))
    return;

  const auto &STI = MF.getSubtarget<RISCVSubtarget>();
  const RISCVInstrInfo *TII = STI.getInstrInfo();

  assert(STI.hasVendorXSfmclic() && "Stack Swapping Requires XSfmclic");

  BuildMI(MBB, MBBI, DL, TII->get(RISCV::CSRRW))
      .addReg(SPReg, RegState::Define)
      .addImm(RISCVSysReg::sf_mscratchcsw)
      .addReg(SPReg, RegState::Kill)
      .setMIFlag(MachineInstr::FrameSetup);

  // FIXME: CFI Information for this swap.
}

static void
createSiFivePreemptibleInterruptFrameEntries(MachineFunction &MF,
                                             RISCVMachineFunctionInfo &RVFI) {
  if (!RVFI.isSiFivePreemptibleInterrupt(MF))
    return;

  const TargetRegisterClass &RC = RISCV::GPRRegClass;
  const TargetRegisterInfo &TRI =
      *MF.getSubtarget<RISCVSubtarget>().getRegisterInfo();
  MachineFrameInfo &MFI = MF.getFrameInfo();

  // Create two frame objects for spilling X8 and X9, which will be done in
  // `emitSiFiveCLICPreemptibleSaves`. This is in addition to any other stack
  // objects we might have for X8 and X9, as they might be saved twice.
  for (int I = 0; I < 2; ++I) {
    int FI = MFI.CreateStackObject(TRI.getSpillSize(RC), TRI.getSpillAlign(RC),
                                   true);
    RVFI.pushInterruptCSRFrameIndex(FI);
  }
}

static void emitSiFiveCLICPreemptibleSaves(MachineFunction &MF,
                                           MachineBasicBlock &MBB,
                                           MachineBasicBlock::iterator MBBI,
                                           const DebugLoc &DL) {
  auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();

  if (!RVFI->isSiFivePreemptibleInterrupt(MF))
    return;

  const auto &STI = MF.getSubtarget<RISCVSubtarget>();
  const RISCVInstrInfo *TII = STI.getInstrInfo();

  // FIXME: CFI Information here is nonexistent/wrong.

  // X8 and X9 might be stored into the stack twice, initially into the
  // `interruptCSRFrameIndex` here, and then maybe again into their CSI frame
  // index.
  //
  // This is done instead of telling the register allocator that we need two
  // VRegs to store the value of `mcause` and `mepc` through the instruction,
  // which affects other passes.
  TII->storeRegToStackSlot(MBB, MBBI, RISCV::X8, /* IsKill=*/true,
                           RVFI->getInterruptCSRFrameIndex(0),
                           &RISCV::GPRRegClass, STI.getRegisterInfo(),
                           Register(), MachineInstr::FrameSetup);
  TII->storeRegToStackSlot(MBB, MBBI, RISCV::X9, /* IsKill=*/true,
                           RVFI->getInterruptCSRFrameIndex(1),
                           &RISCV::GPRRegClass, STI.getRegisterInfo(),
                           Register(), MachineInstr::FrameSetup);

  // Put `mcause` into X8 (s0), and `mepc` into X9 (s1). If either of these are
  // used in the function, then they will appear in `getUnmanagedCSI` and will
  // be saved again.
  BuildMI(MBB, MBBI, DL, TII->get(RISCV::CSRRS))
      .addReg(RISCV::X8, RegState::Define)
      .addImm(RISCVSysReg::mcause)
      .addReg(RISCV::X0)
      .setMIFlag(MachineInstr::FrameSetup);
  BuildMI(MBB, MBBI, DL, TII->get(RISCV::CSRRS))
      .addReg(RISCV::X9, RegState::Define)
      .addImm(RISCVSysReg::mepc)
      .addReg(RISCV::X0)
      .setMIFlag(MachineInstr::FrameSetup);

  // Enable interrupts.
  BuildMI(MBB, MBBI, DL, TII->get(RISCV::CSRRSI))
      .addReg(RISCV::X0, RegState::Define)
      .addImm(RISCVSysReg::mstatus)
      .addImm(8)
      .setMIFlag(MachineInstr::FrameSetup);
}

static void emitSiFiveCLICPreemptibleRestores(MachineFunction &MF,
                                              MachineBasicBlock &MBB,
                                              MachineBasicBlock::iterator MBBI,
                                              const DebugLoc &DL) {
  auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();

  if (!RVFI->isSiFivePreemptibleInterrupt(MF))
    return;

  const auto &STI = MF.getSubtarget<RISCVSubtarget>();
  const RISCVInstrInfo *TII = STI.getInstrInfo();

  // FIXME: CFI Information here is nonexistent/wrong.

  // Disable interrupts.
  BuildMI(MBB, MBBI, DL, TII->get(RISCV::CSRRCI))
      .addReg(RISCV::X0, RegState::Define)
      .addImm(RISCVSysReg::mstatus)
      .addImm(8)
      .setMIFlag(MachineInstr::FrameSetup);

  // Restore `mepc` from x9 (s1), and `mcause` from x8 (s0). If either were used
  // in the function, they have already been restored once, so now have the
  // value stored in `emitSiFiveCLICPreemptibleSaves`.
  BuildMI(MBB, MBBI, DL, TII->get(RISCV::CSRRW))
      .addReg(RISCV::X0, RegState::Define)
      .addImm(RISCVSysReg::mepc)
      .addReg(RISCV::X9, RegState::Kill)
      .setMIFlag(MachineInstr::FrameSetup);
  BuildMI(MBB, MBBI, DL, TII->get(RISCV::CSRRW))
      .addReg(RISCV::X0, RegState::Define)
      .addImm(RISCVSysReg::mcause)
      .addReg(RISCV::X8, RegState::Kill)
      .setMIFlag(MachineInstr::FrameSetup);

  // X8 and X9 need to be restored to their values on function entry, which we
  // saved onto the stack in `emitSiFiveCLICPreemptibleSaves`.
  TII->loadRegFromStackSlot(MBB, MBBI, RISCV::X9,
                            RVFI->getInterruptCSRFrameIndex(1),
                            &RISCV::GPRRegClass, STI.getRegisterInfo(),
                            Register(), MachineInstr::FrameSetup);
  TII->loadRegFromStackSlot(MBB, MBBI, RISCV::X8,
                            RVFI->getInterruptCSRFrameIndex(0),
                            &RISCV::GPRRegClass, STI.getRegisterInfo(),
                            Register(), MachineInstr::FrameSetup);
}

// Get the ID of the libcall used for spilling and restoring callee saved
// registers. The ID is representative of the number of registers saved or
// restored by the libcall, except it is zero-indexed - ID 0 corresponds to a
// single register.
static int getLibCallID(const MachineFunction &MF,
                        const std::vector<CalleeSavedInfo> &CSI) {
  const auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();

  if (CSI.empty() || !RVFI->useSaveRestoreLibCalls(MF))
    return -1;

  MCRegister MaxReg;
  for (auto &CS : CSI)
    // assignCalleeSavedSpillSlots assigns negative frame indexes to
    // registers which can be saved by libcall.
    if (CS.getFrameIdx() < 0)
      MaxReg = std::max(MaxReg.id(), CS.getReg().id());

  if (!MaxReg)
    return -1;

  switch (MaxReg.id()) {
  default:
    llvm_unreachable("Something has gone wrong!");
    // clang-format off
  case /*s11*/ RISCV::X27: return 12;
  case /*s10*/ RISCV::X26: return 11;
  case /*s9*/  RISCV::X25: return 10;
  case /*s8*/  RISCV::X24: return 9;
  case /*s7*/  RISCV::X23: return 8;
  case /*s6*/  RISCV::X22: return 7;
  case /*s5*/  RISCV::X21: return 6;
  case /*s4*/  RISCV::X20: return 5;
  case /*s3*/  RISCV::X19: return 4;
  case /*s2*/  RISCV::X18: return 3;
  case /*s1*/  RISCV::X9:  return 2;
  case /*s0*/  FPReg:  return 1;
  case /*ra*/  RAReg:  return 0;
    // clang-format on
  }
}

// Get the name of the libcall used for spilling callee saved registers.
// If this function will not use save/restore libcalls, then return a nullptr.
static const char *
getSpillLibCallName(const MachineFunction &MF,
                    const std::vector<CalleeSavedInfo> &CSI) {
  static const char *const SpillLibCalls[] = {
    "__riscv_save_0",
    "__riscv_save_1",
    "__riscv_save_2",
    "__riscv_save_3",
    "__riscv_save_4",
    "__riscv_save_5",
    "__riscv_save_6",
    "__riscv_save_7",
    "__riscv_save_8",
    "__riscv_save_9",
    "__riscv_save_10",
    "__riscv_save_11",
    "__riscv_save_12"
  };

  int LibCallID = getLibCallID(MF, CSI);
  if (LibCallID == -1)
    return nullptr;
  return SpillLibCalls[LibCallID];
}

// Get the name of the libcall used for restoring callee saved registers.
// If this function will not use save/restore libcalls, then return a nullptr.
static const char *
getRestoreLibCallName(const MachineFunction &MF,
                      const std::vector<CalleeSavedInfo> &CSI) {
  static const char *const RestoreLibCalls[] = {
    "__riscv_restore_0",
    "__riscv_restore_1",
    "__riscv_restore_2",
    "__riscv_restore_3",
    "__riscv_restore_4",
    "__riscv_restore_5",
    "__riscv_restore_6",
    "__riscv_restore_7",
    "__riscv_restore_8",
    "__riscv_restore_9",
    "__riscv_restore_10",
    "__riscv_restore_11",
    "__riscv_restore_12"
  };

  int LibCallID = getLibCallID(MF, CSI);
  if (LibCallID == -1)
    return nullptr;
  return RestoreLibCalls[LibCallID];
}

// Get the max reg of Push/Pop for restoring callee saved registers.
static unsigned getNumPushPopRegs(const std::vector<CalleeSavedInfo> &CSI) {
  unsigned NumPushPopRegs = 0;
  for (auto &CS : CSI) {
    auto *FII = llvm::find_if(FixedCSRFIMap,
                              [&](MCPhysReg P) { return P == CS.getReg(); });
    if (FII != std::end(FixedCSRFIMap)) {
      unsigned RegNum = std::distance(std::begin(FixedCSRFIMap), FII);
      NumPushPopRegs = std::max(NumPushPopRegs, RegNum + 1);
    }
  }
  assert(NumPushPopRegs != 12 && "x26 requires x27 to also be pushed");
  return NumPushPopRegs;
}

// Return true if the specified function should have a dedicated frame
// pointer register.  This is true if frame pointer elimination is
// disabled, if it needs dynamic stack realignment, if the function has
// variable sized allocas, or if the frame address is taken.
bool RISCVFrameLowering::hasFPImpl(const MachineFunction &MF) const {
  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();

  const MachineFrameInfo &MFI = MF.getFrameInfo();
  return MF.getTarget().Options.DisableFramePointerElim(MF) ||
         RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
         MFI.isFrameAddressTaken();
}

bool RISCVFrameLowering::hasBP(const MachineFunction &MF) const {
  const MachineFrameInfo &MFI = MF.getFrameInfo();
  const TargetRegisterInfo *TRI = STI.getRegisterInfo();

  // If we do not reserve stack space for outgoing arguments in prologue,
  // we will adjust the stack pointer before call instruction. After the
  // adjustment, we can not use SP to access the stack objects for the
  // arguments. Instead, use BP to access these stack objects.
  return (MFI.hasVarSizedObjects() ||
          (!hasReservedCallFrame(MF) && (!MFI.isMaxCallFrameSizeComputed() ||
                                         MFI.getMaxCallFrameSize() != 0))) &&
         TRI->hasStackRealignment(MF);
}

// Determines the size of the frame and maximum call frame size.
void RISCVFrameLowering::determineFrameLayout(MachineFunction &MF) const {
  MachineFrameInfo &MFI = MF.getFrameInfo();
  auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();

  // Get the number of bytes to allocate from the FrameInfo.
  uint64_t FrameSize = MFI.getStackSize();

  // QCI Interrupts use at least 96 bytes of stack space
  if (RVFI->useQCIInterrupt(MF))
    FrameSize = std::max(FrameSize, QCIInterruptPushAmount);

  // Get the alignment.
  Align StackAlign = getStackAlign();

  // Make sure the frame is aligned.
  FrameSize = alignTo(FrameSize, StackAlign);

  // Update frame info.
  MFI.setStackSize(FrameSize);

  // When using SP or BP to access stack objects, we may require extra padding
  // to ensure the bottom of the RVV stack is correctly aligned within the main
  // stack. We calculate this as the amount required to align the scalar local
  // variable section up to the RVV alignment.
  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
  if (RVFI->getRVVStackSize() && (!hasFP(MF) || TRI->hasStackRealignment(MF))) {
    int ScalarLocalVarSize = FrameSize - RVFI->getCalleeSavedStackSize() -
                             RVFI->getVarArgsSaveSize();
    if (auto RVVPadding =
            offsetToAlignment(ScalarLocalVarSize, RVFI->getRVVStackAlign()))
      RVFI->setRVVPadding(RVVPadding);
  }
}

// Returns the stack size including RVV padding (when required), rounded back
// up to the required stack alignment.
uint64_t RISCVFrameLowering::getStackSizeWithRVVPadding(
    const MachineFunction &MF) const {
  const MachineFrameInfo &MFI = MF.getFrameInfo();
  auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
  return alignTo(MFI.getStackSize() + RVFI->getRVVPadding(), getStackAlign());
}

static SmallVector<CalleeSavedInfo, 8>
getUnmanagedCSI(const MachineFunction &MF,
                const std::vector<CalleeSavedInfo> &CSI) {
  const MachineFrameInfo &MFI = MF.getFrameInfo();
  SmallVector<CalleeSavedInfo, 8> NonLibcallCSI;

  for (auto &CS : CSI) {
    int FI = CS.getFrameIdx();
    if (FI >= 0 && MFI.getStackID(FI) == TargetStackID::Default)
      NonLibcallCSI.push_back(CS);
  }

  return NonLibcallCSI;
}

static SmallVector<CalleeSavedInfo, 8>
getRVVCalleeSavedInfo(const MachineFunction &MF,
                      const std::vector<CalleeSavedInfo> &CSI) {
  const MachineFrameInfo &MFI = MF.getFrameInfo();
  SmallVector<CalleeSavedInfo, 8> RVVCSI;

  for (auto &CS : CSI) {
    int FI = CS.getFrameIdx();
    if (FI >= 0 && MFI.getStackID(FI) == TargetStackID::ScalableVector)
      RVVCSI.push_back(CS);
  }

  return RVVCSI;
}

static SmallVector<CalleeSavedInfo, 8>
getPushOrLibCallsSavedInfo(const MachineFunction &MF,
                           const std::vector<CalleeSavedInfo> &CSI) {
  auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();

  SmallVector<CalleeSavedInfo, 8> PushOrLibCallsCSI;
  if (!RVFI->useSaveRestoreLibCalls(MF) && !RVFI->isPushable(MF))
    return PushOrLibCallsCSI;

  for (const auto &CS : CSI) {
    if (RVFI->useQCIInterrupt(MF)) {
      // Some registers are saved by both `QC.C.MIENTER(.NEST)` and
      // `QC.CM.PUSH(FP)`. In these cases, prioritise the CFI info that points
      // to the versions saved by `QC.C.MIENTER(.NEST)` which is what FP
      // unwinding would use.
      if (llvm::is_contained(llvm::make_first_range(FixedCSRFIQCIInterruptMap),
                             CS.getReg()))
        continue;
    }

    if (llvm::is_contained(FixedCSRFIMap, CS.getReg()))
      PushOrLibCallsCSI.push_back(CS);
  }

  return PushOrLibCallsCSI;
}

static SmallVector<CalleeSavedInfo, 8>
getQCISavedInfo(const MachineFunction &MF,
                const std::vector<CalleeSavedInfo> &CSI) {
  auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();

  SmallVector<CalleeSavedInfo, 8> QCIInterruptCSI;
  if (!RVFI->useQCIInterrupt(MF))
    return QCIInterruptCSI;

  for (const auto &CS : CSI) {
    if (llvm::is_contained(llvm::make_first_range(FixedCSRFIQCIInterruptMap),
                           CS.getReg()))
      QCIInterruptCSI.push_back(CS);
  }

  return QCIInterruptCSI;
}

void RISCVFrameLowering::allocateAndProbeStackForRVV(
    MachineFunction &MF, MachineBasicBlock &MBB,
    MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int64_t Amount,
    MachineInstr::MIFlag Flag, bool EmitCFI, bool DynAllocation) const {
  assert(Amount != 0 && "Did not need to adjust stack pointer for RVV.");

  // Emit a variable-length allocation probing loop.

  // Get VLEN in TargetReg
  const RISCVInstrInfo *TII = STI.getInstrInfo();
  Register TargetReg = RISCV::X6;
  uint32_t NumOfVReg = Amount / RISCV::RVVBytesPerBlock;
  BuildMI(MBB, MBBI, DL, TII->get(RISCV::PseudoReadVLENB), TargetReg)
      .setMIFlag(Flag);
  TII->mulImm(MF, MBB, MBBI, DL, TargetReg, NumOfVReg, Flag);

  CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
  if (EmitCFI) {
    // Set the CFA register to TargetReg.
    CFIBuilder.buildDefCFA(TargetReg, -Amount);
  }

  // It will be expanded to a probe loop in `inlineStackProbe`.
  BuildMI(MBB, MBBI, DL, TII->get(RISCV::PROBED_STACKALLOC_RVV))
      .addReg(TargetReg);

  if (EmitCFI) {
    // Set the CFA register back to SP.
    CFIBuilder.buildDefCFARegister(SPReg);
  }

  // SUB SP, SP, T1
  BuildMI(MBB, MBBI, DL, TII->get(RISCV::SUB), SPReg)
      .addReg(SPReg)
      .addReg(TargetReg)
      .setMIFlag(Flag);

  // If we have a dynamic allocation later we need to probe any residuals.
  if (DynAllocation) {
    BuildMI(MBB, MBBI, DL, TII->get(STI.is64Bit() ? RISCV::SD : RISCV::SW))
        .addReg(RISCV::X0)
        .addReg(SPReg)
        .addImm(0)
        .setMIFlags(MachineInstr::FrameSetup);
  }
}

static void appendScalableVectorExpression(const TargetRegisterInfo &TRI,
                                           SmallVectorImpl<char> &Expr,
                                           int FixedOffset, int ScalableOffset,
                                           llvm::raw_string_ostream &Comment) {
  unsigned DwarfVLenB = TRI.getDwarfRegNum(RISCV::VLENB, true);
  uint8_t Buffer[16];
  if (FixedOffset) {
    Expr.push_back(dwarf::DW_OP_consts);
    Expr.append(Buffer, Buffer + encodeSLEB128(FixedOffset, Buffer));
    Expr.push_back((uint8_t)dwarf::DW_OP_plus);
    Comment << (FixedOffset < 0 ? " - " : " + ") << std::abs(FixedOffset);
  }

  Expr.push_back((uint8_t)dwarf::DW_OP_consts);
  Expr.append(Buffer, Buffer + encodeSLEB128(ScalableOffset, Buffer));

  Expr.push_back((uint8_t)dwarf::DW_OP_bregx);
  Expr.append(Buffer, Buffer + encodeULEB128(DwarfVLenB, Buffer));
  Expr.push_back(0);

  Expr.push_back((uint8_t)dwarf::DW_OP_mul);
  Expr.push_back((uint8_t)dwarf::DW_OP_plus);

  Comment << (ScalableOffset < 0 ? " - " : " + ") << std::abs(ScalableOffset)
          << " * vlenb";
}

static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI,
                                               Register Reg,
                                               uint64_t FixedOffset,
                                               uint64_t ScalableOffset) {
  assert(ScalableOffset != 0 && "Did not need to adjust CFA for RVV");
  SmallString<64> Expr;
  std::string CommentBuffer;
  llvm::raw_string_ostream Comment(CommentBuffer);
  // Build up the expression (Reg + FixedOffset + ScalableOffset * VLENB).
  unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
  Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
  Expr.push_back(0);
  if (Reg == SPReg)
    Comment << "sp";
  else
    Comment << printReg(Reg, &TRI);

  appendScalableVectorExpression(TRI, Expr, FixedOffset, ScalableOffset,
                                 Comment);

  SmallString<64> DefCfaExpr;
  uint8_t Buffer[16];
  DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
  DefCfaExpr.append(Buffer, Buffer + encodeULEB128(Expr.size(), Buffer));
  DefCfaExpr.append(Expr.str());

  return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
                                        Comment.str());
}

static MCCFIInstruction createDefCFAOffset(const TargetRegisterInfo &TRI,
                                           Register Reg, uint64_t FixedOffset,
                                           uint64_t ScalableOffset) {
  assert(ScalableOffset != 0 && "Did not need to adjust CFA for RVV");
  SmallString<64> Expr;
  std::string CommentBuffer;
  llvm::raw_string_ostream Comment(CommentBuffer);
  Comment << printReg(Reg, &TRI) << "  @ cfa";

  // Build up the expression (FixedOffset + ScalableOffset * VLENB).
  appendScalableVectorExpression(TRI, Expr, FixedOffset, ScalableOffset,
                                 Comment);

  SmallString<64> DefCfaExpr;
  uint8_t Buffer[16];
  unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
  DefCfaExpr.push_back(dwarf::DW_CFA_expression);
  DefCfaExpr.append(Buffer, Buffer + encodeULEB128(DwarfReg, Buffer));
  DefCfaExpr.append(Buffer, Buffer + encodeULEB128(Expr.size(), Buffer));
  DefCfaExpr.append(Expr.str());

  return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
                                        Comment.str());
}

// Allocate stack space and probe it if necessary.
void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB,
                                       MachineBasicBlock::iterator MBBI,
                                       MachineFunction &MF, uint64_t Offset,
                                       uint64_t RealStackSize, bool EmitCFI,
                                       bool NeedProbe, uint64_t ProbeSize,
                                       bool DynAllocation,
                                       MachineInstr::MIFlag Flag) const {
  DebugLoc DL;
  const RISCVRegisterInfo *RI = STI.getRegisterInfo();
  const RISCVInstrInfo *TII = STI.getInstrInfo();
  bool IsRV64 = STI.is64Bit();
  CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);

  // Simply allocate the stack if it's not big enough to require a probe.
  if (!NeedProbe || Offset <= ProbeSize) {
    RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-Offset),
                  Flag, getStackAlign());

    if (EmitCFI)
      CFIBuilder.buildDefCFAOffset(RealStackSize);

    if (NeedProbe && DynAllocation) {
      // s[d|w] zero, 0(sp)
      BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
          .addReg(RISCV::X0)
          .addReg(SPReg)
          .addImm(0)
          .setMIFlags(Flag);
    }

    return;
  }

  // Unroll the probe loop depending on the number of iterations.
  if (Offset < ProbeSize * 5) {
    uint64_t CFAAdjust = RealStackSize - Offset;

    uint64_t CurrentOffset = 0;
    while (CurrentOffset + ProbeSize <= Offset) {
      RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
                    StackOffset::getFixed(-ProbeSize), Flag, getStackAlign());
      // s[d|w] zero, 0(sp)
      BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
          .addReg(RISCV::X0)
          .addReg(SPReg)
          .addImm(0)
          .setMIFlags(Flag);

      CurrentOffset += ProbeSize;
      if (EmitCFI)
        CFIBuilder.buildDefCFAOffset(CurrentOffset + CFAAdjust);
    }

    uint64_t Residual = Offset - CurrentOffset;
    if (Residual) {
      RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
                    StackOffset::getFixed(-Residual), Flag, getStackAlign());
      if (EmitCFI)
        CFIBuilder.buildDefCFAOffset(RealStackSize);

      if (DynAllocation) {
        // s[d|w] zero, 0(sp)
        BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
            .addReg(RISCV::X0)
            .addReg(SPReg)
            .addImm(0)
            .setMIFlags(Flag);
      }
    }

    return;
  }

  // Emit a variable-length allocation probing loop.
  uint64_t RoundedSize = alignDown(Offset, ProbeSize);
  uint64_t Residual = Offset - RoundedSize;

  Register TargetReg = RISCV::X6;
  // SUB TargetReg, SP, RoundedSize
  RI->adjustReg(MBB, MBBI, DL, TargetReg, SPReg,
                StackOffset::getFixed(-RoundedSize), Flag, getStackAlign());

  if (EmitCFI) {
    // Set the CFA register to TargetReg.
    CFIBuilder.buildDefCFA(TargetReg, RoundedSize);
  }

  // It will be expanded to a probe loop in `inlineStackProbe`.
  BuildMI(MBB, MBBI, DL, TII->get(RISCV::PROBED_STACKALLOC)).addReg(TargetReg);

  if (EmitCFI) {
    // Set the CFA register back to SP.
    CFIBuilder.buildDefCFARegister(SPReg);
  }

  if (Residual) {
    RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-Residual),
                  Flag, getStackAlign());
    if (DynAllocation) {
      // s[d|w] zero, 0(sp)
      BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
          .addReg(RISCV::X0)
          .addReg(SPReg)
          .addImm(0)
          .setMIFlags(Flag);
    }
  }

  if (EmitCFI)
    CFIBuilder.buildDefCFAOffset(Offset);
}

static bool isPush(unsigned Opcode) {
  switch (Opcode) {
  case RISCV::CM_PUSH:
  case RISCV::QC_CM_PUSH:
  case RISCV::QC_CM_PUSHFP:
    return true;
  default:
    return false;
  }
}

static bool isPop(unsigned Opcode) {
  // There are other pops but these are the only ones introduced during this
  // pass.
  switch (Opcode) {
  case RISCV::CM_POP:
  case RISCV::QC_CM_POP:
    return true;
  default:
    return false;
  }
}

static unsigned getPushOpcode(RISCVMachineFunctionInfo::PushPopKind Kind,
                              bool UpdateFP) {
  switch (Kind) {
  case RISCVMachineFunctionInfo::PushPopKind::StdExtZcmp:
    return RISCV::CM_PUSH;
  case RISCVMachineFunctionInfo::PushPopKind::VendorXqccmp:
    return UpdateFP ? RISCV::QC_CM_PUSHFP : RISCV::QC_CM_PUSH;
  default:
    llvm_unreachable("Unhandled PushPopKind");
  }
}

static unsigned getPopOpcode(RISCVMachineFunctionInfo::PushPopKind Kind) {
  // There are other pops but they are introduced later by the Push/Pop
  // Optimizer.
  switch (Kind) {
  case RISCVMachineFunctionInfo::PushPopKind::StdExtZcmp:
    return RISCV::CM_POP;
  case RISCVMachineFunctionInfo::PushPopKind::VendorXqccmp:
    return RISCV::QC_CM_POP;
  default:
    llvm_unreachable("Unhandled PushPopKind");
  }
}

void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
                                      MachineBasicBlock &MBB) const {
  MachineFrameInfo &MFI = MF.getFrameInfo();
  auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
  const RISCVRegisterInfo *RI = STI.getRegisterInfo();
  MachineBasicBlock::iterator MBBI = MBB.begin();

  Register BPReg = RISCVABI::getBPReg();

  // Debug location must be unknown since the first debug location is used
  // to determine the end of the prologue.
  DebugLoc DL;

  // All calls are tail calls in GHC calling conv, and functions have no
  // prologue/epilogue.
  if (MF.getFunction().getCallingConv() == CallingConv::GHC)
    return;

  // SiFive CLIC needs to swap `sp` into `sf.mscratchcsw`
  emitSiFiveCLICStackSwap(MF, MBB, MBBI, DL);

  // Emit prologue for shadow call stack.
  emitSCSPrologue(MF, MBB, MBBI, DL);

  // We keep track of the first instruction because it might be a
  // `(QC.)CM.PUSH(FP)`, and we may need to adjust the immediate rather than
  // inserting an `addi sp, sp, -N*16`
  auto PossiblePush = MBBI;

  // Skip past all callee-saved register spill instructions.
  while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
    ++MBBI;

  // Determine the correct frame layout
  determineFrameLayout(MF);

  const auto &CSI = MFI.getCalleeSavedInfo();

  // Skip to before the spills of scalar callee-saved registers
  // FIXME: assumes exactly one instruction is used to restore each
  // callee-saved register.
  MBBI = std::prev(MBBI, getRVVCalleeSavedInfo(MF, CSI).size() +
                             getUnmanagedCSI(MF, CSI).size());
  CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);

  // If libcalls are used to spill and restore callee-saved registers, the frame
  // has two sections; the opaque section managed by the libcalls, and the
  // section managed by MachineFrameInfo which can also hold callee saved
  // registers in fixed stack slots, both of which have negative frame indices.
  // This gets even more complicated when incoming arguments are passed via the
  // stack, as these too have negative frame indices. An example is detailed
  // below:
  //
  //  | incoming arg | <- FI[-3]
  //  | libcallspill |
  //  | calleespill  | <- FI[-2]
  //  | calleespill  | <- FI[-1]
  //  | this_frame   | <- FI[0]
  //
  // For negative frame indices, the offset from the frame pointer will differ
  // depending on which of these groups the frame index applies to.
  // The following calculates the correct offset knowing the number of callee
  // saved registers spilt by the two methods.
  if (int LibCallRegs = getLibCallID(MF, MFI.getCalleeSavedInfo()) + 1) {
    // Calculate the size of the frame managed by the libcall. The stack
    // alignment of these libcalls should be the same as how we set it in
    // getABIStackAlignment.
    unsigned LibCallFrameSize =
        alignTo((STI.getXLen() / 8) * LibCallRegs, getStackAlign());
    RVFI->setLibCallStackSize(LibCallFrameSize);

    CFIBuilder.buildDefCFAOffset(LibCallFrameSize);
    for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI))
      CFIBuilder.buildOffset(CS.getReg(),
                             MFI.getObjectOffset(CS.getFrameIdx()));
  }

  // FIXME (note copied from Lanai): This appears to be overallocating.  Needs
  // investigation. Get the number of bytes to allocate from the FrameInfo.
  uint64_t RealStackSize = getStackSizeWithRVVPadding(MF);
  uint64_t StackSize = RealStackSize - RVFI->getReservedSpillsSize();
  uint64_t RVVStackSize = RVFI->getRVVStackSize();

  // Early exit if there is no need to allocate on the stack
  if (RealStackSize == 0 && !MFI.adjustsStack() && RVVStackSize == 0)
    return;

  // If the stack pointer has been marked as reserved, then produce an error if
  // the frame requires stack allocation
  if (STI.isRegisterReservedByUser(SPReg))
    MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
        MF.getFunction(), "Stack pointer required, but has been reserved."});

  uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
  // Split the SP adjustment to reduce the offsets of callee saved spill.
  if (FirstSPAdjustAmount) {
    StackSize = FirstSPAdjustAmount;
    RealStackSize = FirstSPAdjustAmount;
  }

  if (RVFI->useQCIInterrupt(MF)) {
    // The function starts with `QC.C.MIENTER(.NEST)`, so the `(QC.)CM.PUSH(FP)`
    // could only be the next instruction.
    ++PossiblePush;

    // Insert the CFI metadata before where we think the `(QC.)CM.PUSH(FP)`
    // could be. The PUSH will also get its own CFI metadata for its own
    // modifications, which should come after the PUSH.
    CFIInstBuilder PushCFIBuilder(MBB, PossiblePush, MachineInstr::FrameSetup);
    PushCFIBuilder.buildDefCFAOffset(QCIInterruptPushAmount);
    for (const CalleeSavedInfo &CS : getQCISavedInfo(MF, CSI))
      PushCFIBuilder.buildOffset(CS.getReg(),
                                 MFI.getObjectOffset(CS.getFrameIdx()));
  }

  if (RVFI->isPushable(MF) && PossiblePush != MBB.end() &&
      isPush(PossiblePush->getOpcode())) {
    // Use available stack adjustment in push instruction to allocate additional
    // stack space. Align the stack size down to a multiple of 16. This is
    // needed for RVE.
    // FIXME: Can we increase the stack size to a multiple of 16 instead?
    uint64_t StackAdj =
        std::min(alignDown(StackSize, 16), static_cast<uint64_t>(48));
    PossiblePush->getOperand(1).setImm(StackAdj);
    StackSize -= StackAdj;

    CFIBuilder.buildDefCFAOffset(RealStackSize - StackSize);
    for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI))
      CFIBuilder.buildOffset(CS.getReg(),
                             MFI.getObjectOffset(CS.getFrameIdx()));
  }

  // Allocate space on the stack if necessary.
  auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
  const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
  bool NeedProbe = TLI->hasInlineStackProbe(MF);
  uint64_t ProbeSize = TLI->getStackProbeSize(MF, getStackAlign());
  bool DynAllocation =
      MF.getInfo<RISCVMachineFunctionInfo>()->hasDynamicAllocation();
  if (StackSize != 0)
    allocateStack(MBB, MBBI, MF, StackSize, RealStackSize, /*EmitCFI=*/true,
                  NeedProbe, ProbeSize, DynAllocation,
                  MachineInstr::FrameSetup);

  // Save SiFive CLIC CSRs into Stack
  emitSiFiveCLICPreemptibleSaves(MF, MBB, MBBI, DL);

  // The frame pointer is callee-saved, and code has been generated for us to
  // save it to the stack. We need to skip over the storing of callee-saved
  // registers as the frame pointer must be modified after it has been saved
  // to the stack, not before.
  // FIXME: assumes exactly one instruction is used to save each callee-saved
  // register.
  std::advance(MBBI, getUnmanagedCSI(MF, CSI).size());
  CFIBuilder.setInsertPoint(MBBI);

  // Iterate over list of callee-saved registers and emit .cfi_offset
  // directives.
  for (const CalleeSavedInfo &CS : getUnmanagedCSI(MF, CSI))
    CFIBuilder.buildOffset(CS.getReg(), MFI.getObjectOffset(CS.getFrameIdx()));

  // Generate new FP.
  if (hasFP(MF)) {
    if (STI.isRegisterReservedByUser(FPReg))
      MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
          MF.getFunction(), "Frame pointer required, but has been reserved."});
    // The frame pointer does need to be reserved from register allocation.
    assert(MF.getRegInfo().isReserved(FPReg) && "FP not reserved");

    // Some stack management variants automatically keep FP updated, so we don't
    // need an instruction to do so.
    if (!RVFI->hasImplicitFPUpdates(MF)) {
      RI->adjustReg(
          MBB, MBBI, DL, FPReg, SPReg,
          StackOffset::getFixed(RealStackSize - RVFI->getVarArgsSaveSize()),
          MachineInstr::FrameSetup, getStackAlign());
    }

    CFIBuilder.buildDefCFA(FPReg, RVFI->getVarArgsSaveSize());
  }

  uint64_t SecondSPAdjustAmount = 0;
  // Emit the second SP adjustment after saving callee saved registers.
  if (FirstSPAdjustAmount) {
    SecondSPAdjustAmount = getStackSizeWithRVVPadding(MF) - FirstSPAdjustAmount;
    assert(SecondSPAdjustAmount > 0 &&
           "SecondSPAdjustAmount should be greater than zero");

    allocateStack(MBB, MBBI, MF, SecondSPAdjustAmount,
                  getStackSizeWithRVVPadding(MF), !hasFP(MF), NeedProbe,
                  ProbeSize, DynAllocation, MachineInstr::FrameSetup);
  }

  if (RVVStackSize) {
    if (NeedProbe) {
      allocateAndProbeStackForRVV(MF, MBB, MBBI, DL, RVVStackSize,
                                  MachineInstr::FrameSetup, !hasFP(MF),
                                  DynAllocation);
    } else {
      // We must keep the stack pointer aligned through any intermediate
      // updates.
      RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
                    StackOffset::getScalable(-RVVStackSize),
                    MachineInstr::FrameSetup, getStackAlign());
    }

    if (!hasFP(MF)) {
      // Emit .cfi_def_cfa_expression "sp + StackSize + RVVStackSize * vlenb".
      CFIBuilder.insertCFIInst(createDefCFAExpression(
          *RI, SPReg, getStackSizeWithRVVPadding(MF), RVVStackSize / 8));
    }

    std::advance(MBBI, getRVVCalleeSavedInfo(MF, CSI).size());
    emitCalleeSavedRVVPrologCFI(MBB, MBBI, hasFP(MF));
  }

  if (hasFP(MF)) {
    // Realign Stack
    const RISCVRegisterInfo *RI = STI.getRegisterInfo();
    if (RI->hasStackRealignment(MF)) {
      Align MaxAlignment = MFI.getMaxAlign();

      const RISCVInstrInfo *TII = STI.getInstrInfo();
      if (isInt<12>(-(int)MaxAlignment.value())) {
        BuildMI(MBB, MBBI, DL, TII->get(RISCV::ANDI), SPReg)
            .addReg(SPReg)
            .addImm(-(int)MaxAlignment.value())
            .setMIFlag(MachineInstr::FrameSetup);
      } else {
        unsigned ShiftAmount = Log2(MaxAlignment);
        Register VR =
            MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass);
        BuildMI(MBB, MBBI, DL, TII->get(RISCV::SRLI), VR)
            .addReg(SPReg)
            .addImm(ShiftAmount)
            .setMIFlag(MachineInstr::FrameSetup);
        BuildMI(MBB, MBBI, DL, TII->get(RISCV::SLLI), SPReg)
            .addReg(VR)
            .addImm(ShiftAmount)
            .setMIFlag(MachineInstr::FrameSetup);
      }
      if (NeedProbe && RVVStackSize == 0) {
        // Do a probe if the align + size allocated just passed the probe size
        // and was not yet probed.
        if (SecondSPAdjustAmount < ProbeSize &&
            SecondSPAdjustAmount + MaxAlignment.value() >= ProbeSize) {
          bool IsRV64 = STI.is64Bit();
          BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
              .addReg(RISCV::X0)
              .addReg(SPReg)
              .addImm(0)
              .setMIFlags(MachineInstr::FrameSetup);
        }
      }
      // FP will be used to restore the frame in the epilogue, so we need
      // another base register BP to record SP after re-alignment. SP will
      // track the current stack after allocating variable sized objects.
      if (hasBP(MF)) {
        // move BP, SP
        BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADDI), BPReg)
            .addReg(SPReg)
            .addImm(0)
            .setMIFlag(MachineInstr::FrameSetup);
      }
    }
  }
}

void RISCVFrameLowering::deallocateStack(MachineFunction &MF,
                                         MachineBasicBlock &MBB,
                                         MachineBasicBlock::iterator MBBI,
                                         const DebugLoc &DL,
                                         uint64_t &StackSize,
                                         int64_t CFAOffset) const {
  const RISCVRegisterInfo *RI = STI.getRegisterInfo();

  RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(StackSize),
                MachineInstr::FrameDestroy, getStackAlign());
  StackSize = 0;

  CFIInstBuilder(MBB, MBBI, MachineInstr::FrameDestroy)
      .buildDefCFAOffset(CFAOffset);
}

void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
                                      MachineBasicBlock &MBB) const {
  const RISCVRegisterInfo *RI = STI.getRegisterInfo();
  MachineFrameInfo &MFI = MF.getFrameInfo();
  auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();

  // All calls are tail calls in GHC calling conv, and functions have no
  // prologue/epilogue.
  if (MF.getFunction().getCallingConv() == CallingConv::GHC)
    return;

  // Get the insert location for the epilogue. If there were no terminators in
  // the block, get the last instruction.
  MachineBasicBlock::iterator MBBI = MBB.end();
  DebugLoc DL;
  if (!MBB.empty()) {
    MBBI = MBB.getLastNonDebugInstr();
    if (MBBI != MBB.end())
      DL = MBBI->getDebugLoc();

    MBBI = MBB.getFirstTerminator();

    // Skip to before the restores of all callee-saved registers.
    while (MBBI != MBB.begin() &&
           std::prev(MBBI)->getFlag(MachineInstr::FrameDestroy))
      --MBBI;
  }

  const auto &CSI = MFI.getCalleeSavedInfo();

  // Skip to before the restores of scalar callee-saved registers
  // FIXME: assumes exactly one instruction is used to restore each
  // callee-saved register.
  auto FirstScalarCSRRestoreInsn =
      std::next(MBBI, getRVVCalleeSavedInfo(MF, CSI).size());
  CFIInstBuilder CFIBuilder(MBB, FirstScalarCSRRestoreInsn,
                            MachineInstr::FrameDestroy);

  uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
  uint64_t RealStackSize = FirstSPAdjustAmount ? FirstSPAdjustAmount
                                               : getStackSizeWithRVVPadding(MF);
  uint64_t StackSize = FirstSPAdjustAmount ? FirstSPAdjustAmount
                                           : getStackSizeWithRVVPadding(MF) -
                                                 RVFI->getReservedSpillsSize();
  uint64_t FPOffset = RealStackSize - RVFI->getVarArgsSaveSize();
  uint64_t RVVStackSize = RVFI->getRVVStackSize();

  bool RestoreSPFromFP = RI->hasStackRealignment(MF) ||
                         MFI.hasVarSizedObjects() || !hasReservedCallFrame(MF);
  if (RVVStackSize) {
    // If RestoreSPFromFP the stack pointer will be restored using the frame
    // pointer value.
    if (!RestoreSPFromFP)
      RI->adjustReg(MBB, FirstScalarCSRRestoreInsn, DL, SPReg, SPReg,
                    StackOffset::getScalable(RVVStackSize),
                    MachineInstr::FrameDestroy, getStackAlign());

    if (!hasFP(MF))
      CFIBuilder.buildDefCFA(SPReg, RealStackSize);

    emitCalleeSavedRVVEpilogCFI(MBB, FirstScalarCSRRestoreInsn);
  }

  if (FirstSPAdjustAmount) {
    uint64_t SecondSPAdjustAmount =
        getStackSizeWithRVVPadding(MF) - FirstSPAdjustAmount;
    assert(SecondSPAdjustAmount > 0 &&
           "SecondSPAdjustAmount should be greater than zero");

    // If RestoreSPFromFP the stack pointer will be restored using the frame
    // pointer value.
    if (!RestoreSPFromFP)
      RI->adjustReg(MBB, FirstScalarCSRRestoreInsn, DL, SPReg, SPReg,
                    StackOffset::getFixed(SecondSPAdjustAmount),
                    MachineInstr::FrameDestroy, getStackAlign());

    if (!hasFP(MF))
      CFIBuilder.buildDefCFAOffset(FirstSPAdjustAmount);
  }

  // Restore the stack pointer using the value of the frame pointer. Only
  // necessary if the stack pointer was modified, meaning the stack size is
  // unknown.
  //
  // In order to make sure the stack point is right through the EH region,
  // we also need to restore stack pointer from the frame pointer if we
  // don't preserve stack space within prologue/epilogue for outgoing variables,
  // normally it's just checking the variable sized object is present or not
  // is enough, but we also don't preserve that at prologue/epilogue when
  // have vector objects in stack.
  if (RestoreSPFromFP) {
    assert(hasFP(MF) && "frame pointer should not have been eliminated");
    RI->adjustReg(MBB, FirstScalarCSRRestoreInsn, DL, SPReg, FPReg,
                  StackOffset::getFixed(-FPOffset), MachineInstr::FrameDestroy,
                  getStackAlign());
  }

  if (hasFP(MF))
    CFIBuilder.buildDefCFA(SPReg, RealStackSize);

  // Skip to after the restores of scalar callee-saved registers
  // FIXME: assumes exactly one instruction is used to restore each
  // callee-saved register.
  MBBI = std::next(FirstScalarCSRRestoreInsn, getUnmanagedCSI(MF, CSI).size());
  CFIBuilder.setInsertPoint(MBBI);

  if (getLibCallID(MF, CSI) != -1) {
    // tail __riscv_restore_[0-12] instruction is considered as a terminator,
    // therefore it is unnecessary to place any CFI instructions after it. Just
    // deallocate stack if needed and return.
    if (StackSize != 0)
      deallocateStack(MF, MBB, MBBI, DL, StackSize,
                      RVFI->getLibCallStackSize());

    // Emit epilogue for shadow call stack.
    emitSCSEpilogue(MF, MBB, MBBI, DL);
    return;
  }

  // Recover callee-saved registers.
  for (const CalleeSavedInfo &CS : getUnmanagedCSI(MF, CSI))
    CFIBuilder.buildRestore(CS.getReg());

  if (RVFI->isPushable(MF) && MBBI != MBB.end() && isPop(MBBI->getOpcode())) {
    // Use available stack adjustment in pop instruction to deallocate stack
    // space. Align the stack size down to a multiple of 16. This is needed for
    // RVE.
    // FIXME: Can we increase the stack size to a multiple of 16 instead?
    uint64_t StackAdj =
        std::min(alignDown(StackSize, 16), static_cast<uint64_t>(48));
    MBBI->getOperand(1).setImm(StackAdj);
    StackSize -= StackAdj;

    if (StackSize != 0)
      deallocateStack(MF, MBB, MBBI, DL, StackSize,
                      /*stack_adj of cm.pop instr*/ RealStackSize - StackSize);

    auto NextI = next_nodbg(MBBI, MBB.end());
    if (NextI == MBB.end() || NextI->getOpcode() != RISCV::PseudoRET) {
      ++MBBI;
      CFIBuilder.setInsertPoint(MBBI);

      for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI))
        CFIBuilder.buildRestore(CS.getReg());

      // Update CFA Offset. If this is a QCI interrupt function, there will be a
      // leftover offset which is deallocated by `QC.C.MILEAVERET`, otherwise
      // getQCIInterruptStackSize() will be 0.
      CFIBuilder.buildDefCFAOffset(RVFI->getQCIInterruptStackSize());
    }
  }

  emitSiFiveCLICPreemptibleRestores(MF, MBB, MBBI, DL);

  // Deallocate stack if StackSize isn't a zero yet. If this is a QCI interrupt
  // function, there will be a leftover offset which is deallocated by
  // `QC.C.MILEAVERET`, otherwise getQCIInterruptStackSize() will be 0.
  if (StackSize != 0)
    deallocateStack(MF, MBB, MBBI, DL, StackSize,
                    RVFI->getQCIInterruptStackSize());

  // Emit epilogue for shadow call stack.
  emitSCSEpilogue(MF, MBB, MBBI, DL);

  // SiFive CLIC needs to swap `sf.mscratchcsw` into `sp`
  emitSiFiveCLICStackSwap(MF, MBB, MBBI, DL);
}

StackOffset
RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
                                           Register &FrameReg) const {
  const MachineFrameInfo &MFI = MF.getFrameInfo();
  const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
  const auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();

  // Callee-saved registers should be referenced relative to the stack
  // pointer (positive offset), otherwise use the frame pointer (negative
  // offset).
  const auto &CSI = getUnmanagedCSI(MF, MFI.getCalleeSavedInfo());
  int MinCSFI = 0;
  int MaxCSFI = -1;
  StackOffset Offset;
  auto StackID = MFI.getStackID(FI);

  assert((StackID == TargetStackID::Default ||
          StackID == TargetStackID::ScalableVector) &&
         "Unexpected stack ID for the frame object.");
  if (StackID == TargetStackID::Default) {
    assert(getOffsetOfLocalArea() == 0 && "LocalAreaOffset is not 0!");
    Offset = StackOffset::getFixed(MFI.getObjectOffset(FI) +
                                   MFI.getOffsetAdjustment());
  } else if (StackID == TargetStackID::ScalableVector) {
    Offset = StackOffset::getScalable(MFI.getObjectOffset(FI));
  }

  uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);

  if (CSI.size()) {
    MinCSFI = CSI[0].getFrameIdx();
    MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
  }

  if (FI >= MinCSFI && FI <= MaxCSFI) {
    FrameReg = SPReg;

    if (FirstSPAdjustAmount)
      Offset += StackOffset::getFixed(FirstSPAdjustAmount);
    else
      Offset += StackOffset::getFixed(getStackSizeWithRVVPadding(MF));
    return Offset;
  }

  if (RI->hasStackRealignment(MF) && !MFI.isFixedObjectIndex(FI)) {
    // If the stack was realigned, the frame pointer is set in order to allow
    // SP to be restored, so we need another base register to record the stack
    // after realignment.
    // |--------------------------| -- <-- FP
    // | callee-allocated save    | | <----|
    // | area for register varargs| |      |
    // |--------------------------| |      |
    // | callee-saved registers   | |      |
    // |--------------------------| --     |
    // | realignment (the size of | |      |
    // | this area is not counted | |      |
    // | in MFI.getStackSize())   | |      |
    // |--------------------------| --     |-- MFI.getStackSize()
    // | RVV alignment padding    | |      |
    // | (not counted in          | |      |
    // | MFI.getStackSize() but   | |      |
    // | counted in               | |      |
    // | RVFI.getRVVStackSize())  | |      |
    // |--------------------------| --     |
    // | RVV objects              | |      |
    // | (not counted in          | |      |
    // | MFI.getStackSize())      | |      |
    // |--------------------------| --     |
    // | padding before RVV       | |      |
    // | (not counted in          | |      |
    // | MFI.getStackSize() or in | |      |
    // | RVFI.getRVVStackSize())  | |      |
    // |--------------------------| --     |
    // | scalar local variables   | | <----'
    // |--------------------------| -- <-- BP (if var sized objects present)
    // | VarSize objects          | |
    // |--------------------------| -- <-- SP
    if (hasBP(MF)) {
      FrameReg = RISCVABI::getBPReg();
    } else {
      // VarSize objects must be empty in this case!
      assert(!MFI.hasVarSizedObjects());
      FrameReg = SPReg;
    }
  } else {
    FrameReg = RI->getFrameRegister(MF);
  }

  if (FrameReg == FPReg) {
    Offset += StackOffset::getFixed(RVFI->getVarArgsSaveSize());
    // When using FP to access scalable vector objects, we need to minus
    // the frame size.
    //
    // |--------------------------| -- <-- FP
    // | callee-allocated save    | |
    // | area for register varargs| |
    // |--------------------------| |
    // | callee-saved registers   | |
    // |--------------------------| | MFI.getStackSize()
    // | scalar local variables   | |
    // |--------------------------| -- (Offset of RVV objects is from here.)
    // | RVV objects              |
    // |--------------------------|
    // | VarSize objects          |
    // |--------------------------| <-- SP
    if (StackID == TargetStackID::ScalableVector) {
      assert(!RI->hasStackRealignment(MF) &&
             "Can't index across variable sized realign");
      // We don't expect any extra RVV alignment padding, as the stack size
      // and RVV object sections should be correct aligned in their own
      // right.
      assert(MFI.getStackSize() == getStackSizeWithRVVPadding(MF) &&
             "Inconsistent stack layout");
      Offset -= StackOffset::getFixed(MFI.getStackSize());
    }
    return Offset;
  }

  // This case handles indexing off both SP and BP.
  // If indexing off SP, there must not be any var sized objects
  assert(FrameReg == RISCVABI::getBPReg() || !MFI.hasVarSizedObjects());

  // When using SP to access frame objects, we need to add RVV stack size.
  //
  // |--------------------------| -- <-- FP
  // | callee-allocated save    | | <----|
  // | area for register varargs| |      |
  // |--------------------------| |      |
  // | callee-saved registers   | |      |
  // |--------------------------| --     |
  // | RVV alignment padding    | |      |
  // | (not counted in          | |      |
  // | MFI.getStackSize() but   | |      |
  // | counted in               | |      |
  // | RVFI.getRVVStackSize())  | |      |
  // |--------------------------| --     |
  // | RVV objects              | |      |-- MFI.getStackSize()
  // | (not counted in          | |      |
  // | MFI.getStackSize())      | |      |
  // |--------------------------| --     |
  // | padding before RVV       | |      |
  // | (not counted in          | |      |
  // | MFI.getStackSize())      | |      |
  // |--------------------------| --     |
  // | scalar local variables   | | <----'
  // |--------------------------| -- <-- BP (if var sized objects present)
  // | VarSize objects          | |
  // |--------------------------| -- <-- SP
  //
  // The total amount of padding surrounding RVV objects is described by
  // RVV->getRVVPadding() and it can be zero. It allows us to align the RVV
  // objects to the required alignment.
  if (MFI.getStackID(FI) == TargetStackID::Default) {
    if (MFI.isFixedObjectIndex(FI)) {
      assert(!RI->hasStackRealignment(MF) &&
             "Can't index across variable sized realign");
      Offset += StackOffset::get(getStackSizeWithRVVPadding(MF),
                                 RVFI->getRVVStackSize());
    } else {
      Offset += StackOffset::getFixed(MFI.getStackSize());
    }
  } else if (MFI.getStackID(FI) == TargetStackID::ScalableVector) {
    // Ensure the base of the RVV stack is correctly aligned: add on the
    // alignment padding.
    int ScalarLocalVarSize = MFI.getStackSize() -
                             RVFI->getCalleeSavedStackSize() -
                             RVFI->getVarArgsSaveSize() + RVFI->getRVVPadding();
    Offset += StackOffset::get(ScalarLocalVarSize, RVFI->getRVVStackSize());
  }
  return Offset;
}

void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF,
                                              BitVector &SavedRegs,
                                              RegScavenger *RS) const {
  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
  // Unconditionally spill RA and FP only if the function uses a frame
  // pointer.
  if (hasFP(MF)) {
    SavedRegs.set(RAReg);
    SavedRegs.set(FPReg);
  }
  // Mark BP as used if function has dedicated base pointer.
  if (hasBP(MF))
    SavedRegs.set(RISCVABI::getBPReg());

  // When using cm.push/pop we must save X27 if we save X26.
  auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
  if (RVFI->isPushable(MF) && SavedRegs.test(RISCV::X26))
    SavedRegs.set(RISCV::X27);

  // SiFive Preemptible Interrupt Handlers need additional frame entries
  createSiFivePreemptibleInterruptFrameEntries(MF, *RVFI);
}

std::pair<int64_t, Align>
RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFunction &MF) const {
  MachineFrameInfo &MFI = MF.getFrameInfo();
  // Create a buffer of RVV objects to allocate.
  SmallVector<int, 8> ObjectsToAllocate;
  auto pushRVVObjects = [&](int FIBegin, int FIEnd) {
    for (int I = FIBegin, E = FIEnd; I != E; ++I) {
      unsigned StackID = MFI.getStackID(I);
      if (StackID != TargetStackID::ScalableVector)
        continue;
      if (MFI.isDeadObjectIndex(I))
        continue;

      ObjectsToAllocate.push_back(I);
    }
  };
  // First push RVV Callee Saved object, then push RVV stack object
  std::vector<CalleeSavedInfo> &CSI = MF.getFrameInfo().getCalleeSavedInfo();
  const auto &RVVCSI = getRVVCalleeSavedInfo(MF, CSI);
  if (!RVVCSI.empty())
    pushRVVObjects(RVVCSI[0].getFrameIdx(),
                   RVVCSI[RVVCSI.size() - 1].getFrameIdx() + 1);
  pushRVVObjects(0, MFI.getObjectIndexEnd() - RVVCSI.size());

  // The minimum alignment is 16 bytes.
  Align RVVStackAlign(16);
  const auto &ST = MF.getSubtarget<RISCVSubtarget>();

  if (!ST.hasVInstructions()) {
    assert(ObjectsToAllocate.empty() &&
           "Can't allocate scalable-vector objects without V instructions");
    return std::make_pair(0, RVVStackAlign);
  }

  // Allocate all RVV locals and spills
  int64_t Offset = 0;
  for (int FI : ObjectsToAllocate) {
    // ObjectSize in bytes.
    int64_t ObjectSize = MFI.getObjectSize(FI);
    auto ObjectAlign =
        std::max(Align(RISCV::RVVBytesPerBlock), MFI.getObjectAlign(FI));
    // If the data type is the fractional vector type, reserve one vector
    // register for it.
    if (ObjectSize < RISCV::RVVBytesPerBlock)
      ObjectSize = RISCV::RVVBytesPerBlock;
    Offset = alignTo(Offset + ObjectSize, ObjectAlign);
    MFI.setObjectOffset(FI, -Offset);
    // Update the maximum alignment of the RVV stack section
    RVVStackAlign = std::max(RVVStackAlign, ObjectAlign);
  }

  uint64_t StackSize = Offset;

  // Ensure the alignment of the RVV stack. Since we want the most-aligned
  // object right at the bottom (i.e., any padding at the top of the frame),
  // readjust all RVV objects down by the alignment padding.
  // Stack size and offsets are multiples of vscale, stack alignment is in
  // bytes, we can divide stack alignment by minimum vscale to get a maximum
  // stack alignment multiple of vscale.
  auto VScale =
      std::max<uint64_t>(ST.getRealMinVLen() / RISCV::RVVBitsPerBlock, 1);
  if (auto RVVStackAlignVScale = RVVStackAlign.value() / VScale) {
    if (auto AlignmentPadding =
            offsetToAlignment(StackSize, Align(RVVStackAlignVScale))) {
      StackSize += AlignmentPadding;
      for (int FI : ObjectsToAllocate)
        MFI.setObjectOffset(FI, MFI.getObjectOffset(FI) - AlignmentPadding);
    }
  }

  return std::make_pair(StackSize, RVVStackAlign);
}

static unsigned getScavSlotsNumForRVV(MachineFunction &MF) {
  // For RVV spill, scalable stack offsets computing requires up to two scratch
  // registers
  static constexpr unsigned ScavSlotsNumRVVSpillScalableObject = 2;

  // For RVV spill, non-scalable stack offsets computing requires up to one
  // scratch register.
  static constexpr unsigned ScavSlotsNumRVVSpillNonScalableObject = 1;

  // ADDI instruction's destination register can be used for computing
  // offsets. So Scalable stack offsets require up to one scratch register.
  static constexpr unsigned ScavSlotsADDIScalableObject = 1;

  static constexpr unsigned MaxScavSlotsNumKnown =
      std::max({ScavSlotsADDIScalableObject, ScavSlotsNumRVVSpillScalableObject,
                ScavSlotsNumRVVSpillNonScalableObject});

  unsigned MaxScavSlotsNum = 0;
  if (!MF.getSubtarget<RISCVSubtarget>().hasVInstructions())
    return false;
  for (const MachineBasicBlock &MBB : MF)
    for (const MachineInstr &MI : MBB) {
      bool IsRVVSpill = RISCV::isRVVSpill(MI);
      for (auto &MO : MI.operands()) {
        if (!MO.isFI())
          continue;
        bool IsScalableVectorID = MF.getFrameInfo().getStackID(MO.getIndex()) ==
                                  TargetStackID::ScalableVector;
        if (IsRVVSpill) {
          MaxScavSlotsNum = std::max(
              MaxScavSlotsNum, IsScalableVectorID
                                   ? ScavSlotsNumRVVSpillScalableObject
                                   : ScavSlotsNumRVVSpillNonScalableObject);
        } else if (MI.getOpcode() == RISCV::ADDI && IsScalableVectorID) {
          MaxScavSlotsNum =
              std::max(MaxScavSlotsNum, ScavSlotsADDIScalableObject);
        }
      }
      if (MaxScavSlotsNum == MaxScavSlotsNumKnown)
        return MaxScavSlotsNumKnown;
    }
  return MaxScavSlotsNum;
}

static bool hasRVVFrameObject(const MachineFunction &MF) {
  // Originally, the function will scan all the stack objects to check whether
  // if there is any scalable vector object on the stack or not. However, it
  // causes errors in the register allocator. In issue 53016, it returns false
  // before RA because there is no RVV stack objects. After RA, it returns true
  // because there are spilling slots for RVV values during RA. It will not
  // reserve BP during register allocation and generate BP access in the PEI
  // pass due to the inconsistent behavior of the function.
  //
  // The function is changed to use hasVInstructions() as the return value. It
  // is not precise, but it can make the register allocation correct.
  //
  // FIXME: Find a better way to make the decision or revisit the solution in
  // D103622.
  //
  // Refer to https://github.com/llvm/llvm-project/issues/53016.
  return MF.getSubtarget<RISCVSubtarget>().hasVInstructions();
}

static unsigned estimateFunctionSizeInBytes(const MachineFunction &MF,
                                            const RISCVInstrInfo &TII) {
  unsigned FnSize = 0;
  for (auto &MBB : MF) {
    for (auto &MI : MBB) {
      // Far branches over 20-bit offset will be relaxed in branch relaxation
      // pass. In the worst case, conditional branches will be relaxed into
      // the following instruction sequence. Unconditional branches are
      // relaxed in the same way, with the exception that there is no first
      // branch instruction.
      //
      //        foo
      //        bne     t5, t6, .rev_cond # `TII->getInstSizeInBytes(MI)` bytes
      //        sd      s11, 0(sp)        # 4 bytes, or 2 bytes with Zca
      //        jump    .restore, s11     # 8 bytes
      // .rev_cond
      //        bar
      //        j       .dest_bb          # 4 bytes, or 2 bytes with Zca
      // .restore:
      //        ld      s11, 0(sp)        # 4 bytes, or 2 bytes with Zca
      // .dest:
      //        baz
      if (MI.isConditionalBranch())
        FnSize += TII.getInstSizeInBytes(MI);
      if (MI.isConditionalBranch() || MI.isUnconditionalBranch()) {
        if (MF.getSubtarget<RISCVSubtarget>().hasStdExtZca())
          FnSize += 2 + 8 + 2 + 2;
        else
          FnSize += 4 + 8 + 4 + 4;
        continue;
      }

      FnSize += TII.getInstSizeInBytes(MI);
    }
  }
  return FnSize;
}

void RISCVFrameLowering::processFunctionBeforeFrameFinalized(
    MachineFunction &MF, RegScavenger *RS) const {
  const RISCVRegisterInfo *RegInfo =
      MF.getSubtarget<RISCVSubtarget>().getRegisterInfo();
  const RISCVInstrInfo *TII = MF.getSubtarget<RISCVSubtarget>().getInstrInfo();
  MachineFrameInfo &MFI = MF.getFrameInfo();
  const TargetRegisterClass *RC = &RISCV::GPRRegClass;
  auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();

  int64_t RVVStackSize;
  Align RVVStackAlign;
  std::tie(RVVStackSize, RVVStackAlign) = assignRVVStackObjectOffsets(MF);

  RVFI->setRVVStackSize(RVVStackSize);
  RVFI->setRVVStackAlign(RVVStackAlign);

  if (hasRVVFrameObject(MF)) {
    // Ensure the entire stack is aligned to at least the RVV requirement: some
    // scalable-vector object alignments are not considered by the
    // target-independent code.
    MFI.ensureMaxAlignment(RVVStackAlign);
  }

  unsigned ScavSlotsNum = 0;

  // estimateStackSize has been observed to under-estimate the final stack
  // size, so give ourselves wiggle-room by checking for stack size
  // representable an 11-bit signed field rather than 12-bits.
  if (!isInt<11>(MFI.estimateStackSize(MF)))
    ScavSlotsNum = 1;

  // Far branches over 20-bit offset require a spill slot for scratch register.
  bool IsLargeFunction = !isInt<20>(estimateFunctionSizeInBytes(MF, *TII));
  if (IsLargeFunction)
    ScavSlotsNum = std::max(ScavSlotsNum, 1u);

  // RVV loads & stores have no capacity to hold the immediate address offsets
  // so we must always reserve an emergency spill slot if the MachineFunction
  // contains any RVV spills.
  ScavSlotsNum = std::max(ScavSlotsNum, getScavSlotsNumForRVV(MF));

  for (unsigned I = 0; I < ScavSlotsNum; I++) {
    int FI = MFI.CreateSpillStackObject(RegInfo->getSpillSize(*RC),
                                        RegInfo->getSpillAlign(*RC));
    RS->addScavengingFrameIndex(FI);

    if (IsLargeFunction && RVFI->getBranchRelaxationScratchFrameIndex() == -1)
      RVFI->setBranchRelaxationScratchFrameIndex(FI);
  }

  unsigned Size = RVFI->getReservedSpillsSize();
  for (const auto &Info : MFI.getCalleeSavedInfo()) {
    int FrameIdx = Info.getFrameIdx();
    if (FrameIdx < 0 || MFI.getStackID(FrameIdx) != TargetStackID::Default)
      continue;

    Size += MFI.getObjectSize(FrameIdx);
  }
  RVFI->setCalleeSavedStackSize(Size);
}

// Not preserve stack space within prologue for outgoing variables when the
// function contains variable size objects or there are vector objects accessed
// by the frame pointer.
// Let eliminateCallFramePseudoInstr preserve stack space for it.
bool RISCVFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
  return !MF.getFrameInfo().hasVarSizedObjects() &&
         !(hasFP(MF) && hasRVVFrameObject(MF));
}

// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions.
MachineBasicBlock::iterator RISCVFrameLowering::eliminateCallFramePseudoInstr(
    MachineFunction &MF, MachineBasicBlock &MBB,
    MachineBasicBlock::iterator MI) const {
  DebugLoc DL = MI->getDebugLoc();

  if (!hasReservedCallFrame(MF)) {
    // If space has not been reserved for a call frame, ADJCALLSTACKDOWN and
    // ADJCALLSTACKUP must be converted to instructions manipulating the stack
    // pointer. This is necessary when there is a variable length stack
    // allocation (e.g. alloca), which means it's not possible to allocate
    // space for outgoing arguments from within the function prologue.
    int64_t Amount = MI->getOperand(0).getImm();

    if (Amount != 0) {
      // Ensure the stack remains aligned after adjustment.
      Amount = alignSPAdjust(Amount);

      if (MI->getOpcode() == RISCV::ADJCALLSTACKDOWN)
        Amount = -Amount;

      const RISCVTargetLowering *TLI =
          MF.getSubtarget<RISCVSubtarget>().getTargetLowering();
      int64_t ProbeSize = TLI->getStackProbeSize(MF, getStackAlign());
      if (TLI->hasInlineStackProbe(MF) && -Amount >= ProbeSize) {
        // When stack probing is enabled, the decrement of SP may need to be
        // probed. We can handle both the decrement and the probing in
        // allocateStack.
        bool DynAllocation =
            MF.getInfo<RISCVMachineFunctionInfo>()->hasDynamicAllocation();
        allocateStack(MBB, MI, MF, -Amount, -Amount, !hasFP(MF),
                      /*NeedProbe=*/true, ProbeSize, DynAllocation,
                      MachineInstr::NoFlags);
      } else {
        const RISCVRegisterInfo &RI = *STI.getRegisterInfo();
        RI.adjustReg(MBB, MI, DL, SPReg, SPReg, StackOffset::getFixed(Amount),
                     MachineInstr::NoFlags, getStackAlign());
      }
    }
  }

  return MBB.erase(MI);
}

// We would like to split the SP adjustment to reduce prologue/epilogue
// as following instructions. In this way, the offset of the callee saved
// register could fit in a single store. Supposed that the first sp adjust
// amount is 2032.
//   add     sp,sp,-2032
//   sw      ra,2028(sp)
//   sw      s0,2024(sp)
//   sw      s1,2020(sp)
//   sw      s3,2012(sp)
//   sw      s4,2008(sp)
//   add     sp,sp,-64
uint64_t
RISCVFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF) const {
  const auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
  const MachineFrameInfo &MFI = MF.getFrameInfo();
  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
  uint64_t StackSize = getStackSizeWithRVVPadding(MF);

  // Disable SplitSPAdjust if save-restore libcall, push/pop or QCI interrupts
  // are used. The callee-saved registers will be pushed by the save-restore
  // libcalls, so we don't have to split the SP adjustment in this case.
  if (RVFI->getReservedSpillsSize())
    return 0;

  // Return the FirstSPAdjustAmount if the StackSize can not fit in a signed
  // 12-bit and there exists a callee-saved register needing to be pushed.
  if (!isInt<12>(StackSize) && (CSI.size() > 0)) {
    // FirstSPAdjustAmount is chosen at most as (2048 - StackAlign) because
    // 2048 will cause sp = sp + 2048 in the epilogue to be split into multiple
    // instructions. Offsets smaller than 2048 can fit in a single load/store
    // instruction, and we have to stick with the stack alignment. 2048 has
    // 16-byte alignment. The stack alignment for RV32 and RV64 is 16 and for
    // RV32E it is 4. So (2048 - StackAlign) will satisfy the stack alignment.
    const uint64_t StackAlign = getStackAlign().value();

    // Amount of (2048 - StackAlign) will prevent callee saved and restored
    // instructions be compressed, so try to adjust the amount to the largest
    // offset that stack compression instructions accept when target supports
    // compression instructions.
    if (STI.hasStdExtZca()) {
      // The compression extensions may support the following instructions:
      // riscv32: c.lwsp rd, offset[7:2] => 2^(6 + 2)
      //          c.swsp rs2, offset[7:2] => 2^(6 + 2)
      //          c.flwsp rd, offset[7:2] => 2^(6 + 2)
      //          c.fswsp rs2, offset[7:2] => 2^(6 + 2)
      // riscv64: c.ldsp rd, offset[8:3] => 2^(6 + 3)
      //          c.sdsp rs2, offset[8:3] => 2^(6 + 3)
      //          c.fldsp rd, offset[8:3] => 2^(6 + 3)
      //          c.fsdsp rs2, offset[8:3] => 2^(6 + 3)
      const uint64_t RVCompressLen = STI.getXLen() * 8;
      // Compared with amount (2048 - StackAlign), StackSize needs to
      // satisfy the following conditions to avoid using more instructions
      // to adjust the sp after adjusting the amount, such as
      // StackSize meets the condition (StackSize <= 2048 + RVCompressLen),
      // case1: Amount is 2048 - StackAlign: use addi + addi to adjust sp.
      // case2: Amount is RVCompressLen: use addi + addi to adjust sp.
      auto CanCompress = [&](uint64_t CompressLen) -> bool {
        if (StackSize <= 2047 + CompressLen ||
            (StackSize > 2048 * 2 - StackAlign &&
             StackSize <= 2047 * 2 + CompressLen) ||
            StackSize > 2048 * 3 - StackAlign)
          return true;

        return false;
      };
      // In the epilogue, addi sp, sp, 496 is used to recover the sp and it
      // can be compressed(C.ADDI16SP, offset can be [-512, 496]), but
      // addi sp, sp, 512 can not be compressed. So try to use 496 first.
      const uint64_t ADDI16SPCompressLen = 496;
      if (STI.is64Bit() && CanCompress(ADDI16SPCompressLen))
        return ADDI16SPCompressLen;
      if (CanCompress(RVCompressLen))
        return RVCompressLen;
    }
    return 2048 - StackAlign;
  }
  return 0;
}

bool RISCVFrameLowering::assignCalleeSavedSpillSlots(
    MachineFunction &MF, const TargetRegisterInfo *TRI,
    std::vector<CalleeSavedInfo> &CSI, unsigned &MinCSFrameIndex,
    unsigned &MaxCSFrameIndex) const {
  auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();

  // Preemptible Interrupts have two additional Callee-save Frame Indexes,
  // not tracked by `CSI`.
  if (RVFI->isSiFivePreemptibleInterrupt(MF)) {
    for (int I = 0; I < 2; ++I) {
      int FI = RVFI->getInterruptCSRFrameIndex(I);
      MinCSFrameIndex = std::min<unsigned>(MinCSFrameIndex, FI);
      MaxCSFrameIndex = std::max<unsigned>(MaxCSFrameIndex, FI);
    }
  }

  // Early exit if no callee saved registers are modified!
  if (CSI.empty())
    return true;

  if (RVFI->useQCIInterrupt(MF)) {
    RVFI->setQCIInterruptStackSize(QCIInterruptPushAmount);
  }

  if (RVFI->isPushable(MF)) {
    // Determine how many GPRs we need to push and save it to RVFI.
    unsigned PushedRegNum = getNumPushPopRegs(CSI);

    // `QC.C.MIENTER(.NEST)` will save `ra` and `s0`, so we should only push if
    // we want to push more than 2 registers. Otherwise, we should push if we
    // want to push more than 0 registers.
    unsigned OnlyPushIfMoreThan = RVFI->useQCIInterrupt(MF) ? 2 : 0;
    if (PushedRegNum > OnlyPushIfMoreThan) {
      RVFI->setRVPushRegs(PushedRegNum);
      RVFI->setRVPushStackSize(alignTo((STI.getXLen() / 8) * PushedRegNum, 16));
    }
  }

  MachineFrameInfo &MFI = MF.getFrameInfo();
  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();

  for (auto &CS : CSI) {
    MCRegister Reg = CS.getReg();
    const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
    unsigned Size = RegInfo->getSpillSize(*RC);

    if (RVFI->useQCIInterrupt(MF)) {
      const auto *FFI = llvm::find_if(FixedCSRFIQCIInterruptMap, [&](auto P) {
        return P.first == CS.getReg();
      });
      if (FFI != std::end(FixedCSRFIQCIInterruptMap)) {
        int64_t Offset = FFI->second * (int64_t)Size;

        int FrameIdx = MFI.CreateFixedSpillStackObject(Size, Offset);
        assert(FrameIdx < 0);
        CS.setFrameIdx(FrameIdx);
        continue;
      }
    }

    if (RVFI->useSaveRestoreLibCalls(MF) || RVFI->isPushable(MF)) {
      const auto *FII = llvm::find_if(
          FixedCSRFIMap, [&](MCPhysReg P) { return P == CS.getReg(); });
      unsigned RegNum = std::distance(std::begin(FixedCSRFIMap), FII);

      if (FII != std::end(FixedCSRFIMap)) {
        int64_t Offset;
        if (RVFI->getPushPopKind(MF) ==
            RISCVMachineFunctionInfo::PushPopKind::StdExtZcmp)
          Offset = -int64_t(RVFI->getRVPushRegs() - RegNum) * Size;
        else
          Offset = -int64_t(RegNum + 1) * Size;

        if (RVFI->useQCIInterrupt(MF))
          Offset -= QCIInterruptPushAmount;

        int FrameIdx = MFI.CreateFixedSpillStackObject(Size, Offset);
        assert(FrameIdx < 0);
        CS.setFrameIdx(FrameIdx);
        continue;
      }
    }

    // Not a fixed slot.
    Align Alignment = RegInfo->getSpillAlign(*RC);
    // We may not be able to satisfy the desired alignment specification of
    // the TargetRegisterClass if the stack alignment is smaller. Use the
    // min.
    Alignment = std::min(Alignment, getStackAlign());
    int FrameIdx = MFI.CreateStackObject(Size, Alignment, true);
    if ((unsigned)FrameIdx < MinCSFrameIndex)
      MinCSFrameIndex = FrameIdx;
    if ((unsigned)FrameIdx > MaxCSFrameIndex)
      MaxCSFrameIndex = FrameIdx;
    CS.setFrameIdx(FrameIdx);
    if (RISCVRegisterInfo::isRVVRegClass(RC))
      MFI.setStackID(FrameIdx, TargetStackID::ScalableVector);
  }

  if (RVFI->useQCIInterrupt(MF)) {
    // Allocate a fixed object that covers the entire QCI stack allocation,
    // because there are gaps which are reserved for future use.
    MFI.CreateFixedSpillStackObject(
        QCIInterruptPushAmount, -static_cast<int64_t>(QCIInterruptPushAmount));
  }

  if (RVFI->isPushable(MF)) {
    int64_t QCIOffset = RVFI->useQCIInterrupt(MF) ? QCIInterruptPushAmount : 0;
    // Allocate a fixed object that covers the full push.
    if (int64_t PushSize = RVFI->getRVPushStackSize())
      MFI.CreateFixedSpillStackObject(PushSize, -PushSize - QCIOffset);
  } else if (int LibCallRegs = getLibCallID(MF, CSI) + 1) {
    int64_t LibCallFrameSize =
        alignTo((STI.getXLen() / 8) * LibCallRegs, getStackAlign());
    MFI.CreateFixedSpillStackObject(LibCallFrameSize, -LibCallFrameSize);
  }

  return true;
}

bool RISCVFrameLowering::spillCalleeSavedRegisters(
    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
    ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
  if (CSI.empty())
    return true;

  MachineFunction *MF = MBB.getParent();
  const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
  DebugLoc DL;
  if (MI != MBB.end() && !MI->isDebugInstr())
    DL = MI->getDebugLoc();

  RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
  if (RVFI->useQCIInterrupt(*MF)) {
    // Emit QC.C.MIENTER(.NEST)
    BuildMI(
        MBB, MI, DL,
        TII.get(RVFI->getInterruptStackKind(*MF) ==
                        RISCVMachineFunctionInfo::InterruptStackKind::QCINest
                    ? RISCV::QC_C_MIENTER_NEST
                    : RISCV::QC_C_MIENTER))
        .setMIFlag(MachineInstr::FrameSetup);

    for (auto [Reg, _Offset] : FixedCSRFIQCIInterruptMap)
      MBB.addLiveIn(Reg);
  }

  if (RVFI->isPushable(*MF)) {
    // Emit CM.PUSH with base StackAdj & evaluate Push stack
    unsigned PushedRegNum = RVFI->getRVPushRegs();
    if (PushedRegNum > 0) {
      // Use encoded number to represent registers to spill.
      unsigned Opcode = getPushOpcode(
          RVFI->getPushPopKind(*MF), hasFP(*MF) && !RVFI->useQCIInterrupt(*MF));
      unsigned RegEnc = RISCVZC::encodeRegListNumRegs(PushedRegNum);
      MachineInstrBuilder PushBuilder =
          BuildMI(MBB, MI, DL, TII.get(Opcode))
              .setMIFlag(MachineInstr::FrameSetup);
      PushBuilder.addImm(RegEnc);
      PushBuilder.addImm(0);

      for (unsigned i = 0; i < PushedRegNum; i++)
        PushBuilder.addUse(FixedCSRFIMap[i], RegState::Implicit);
    }
  } else if (const char *SpillLibCall = getSpillLibCallName(*MF, CSI)) {
    // Add spill libcall via non-callee-saved register t0.
    BuildMI(MBB, MI, DL, TII.get(RISCV::PseudoCALLReg), RISCV::X5)
        .addExternalSymbol(SpillLibCall, RISCVII::MO_CALL)
        .setMIFlag(MachineInstr::FrameSetup);

    // Add registers spilled in libcall as liveins.
    for (auto &CS : CSI)
      MBB.addLiveIn(CS.getReg());
  }

  // Manually spill values not spilled by libcall & Push/Pop.
  const auto &UnmanagedCSI = getUnmanagedCSI(*MF, CSI);
  const auto &RVVCSI = getRVVCalleeSavedInfo(*MF, CSI);

  auto storeRegsToStackSlots = [&](decltype(UnmanagedCSI) CSInfo) {
    for (auto &CS : CSInfo) {
      // Insert the spill to the stack frame.
      MCRegister Reg = CS.getReg();
      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
      TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg),
                              CS.getFrameIdx(), RC, TRI, Register(),
                              MachineInstr::FrameSetup);
    }
  };
  storeRegsToStackSlots(UnmanagedCSI);
  storeRegsToStackSlots(RVVCSI);

  return true;
}

static unsigned getCalleeSavedRVVNumRegs(const Register &BaseReg) {
  return RISCV::VRRegClass.contains(BaseReg)     ? 1
         : RISCV::VRM2RegClass.contains(BaseReg) ? 2
         : RISCV::VRM4RegClass.contains(BaseReg) ? 4
                                                 : 8;
}

static MCRegister getRVVBaseRegister(const RISCVRegisterInfo &TRI,
                                     const Register &Reg) {
  MCRegister BaseReg = TRI.getSubReg(Reg, RISCV::sub_vrm1_0);
  // If it's not a grouped vector register, it doesn't have subregister, so
  // the base register is just itself.
  if (BaseReg == RISCV::NoRegister)
    BaseReg = Reg;
  return BaseReg;
}

void RISCVFrameLowering::emitCalleeSavedRVVPrologCFI(
    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, bool HasFP) const {
  MachineFunction *MF = MBB.getParent();
  const MachineFrameInfo &MFI = MF->getFrameInfo();
  RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
  const RISCVRegisterInfo &TRI = *STI.getRegisterInfo();

  const auto &RVVCSI = getRVVCalleeSavedInfo(*MF, MFI.getCalleeSavedInfo());
  if (RVVCSI.empty())
    return;

  uint64_t FixedSize = getStackSizeWithRVVPadding(*MF);
  if (!HasFP) {
    uint64_t ScalarLocalVarSize =
        MFI.getStackSize() - RVFI->getCalleeSavedStackSize() -
        RVFI->getVarArgsSaveSize() + RVFI->getRVVPadding();
    FixedSize -= ScalarLocalVarSize;
  }

  CFIInstBuilder CFIBuilder(MBB, MI, MachineInstr::FrameSetup);
  for (auto &CS : RVVCSI) {
    // Insert the spill to the stack frame.
    int FI = CS.getFrameIdx();
    MCRegister BaseReg = getRVVBaseRegister(TRI, CS.getReg());
    unsigned NumRegs = getCalleeSavedRVVNumRegs(CS.getReg());
    for (unsigned i = 0; i < NumRegs; ++i) {
      CFIBuilder.insertCFIInst(createDefCFAOffset(
          TRI, BaseReg + i, -FixedSize, MFI.getObjectOffset(FI) / 8 + i));
    }
  }
}

void RISCVFrameLowering::emitCalleeSavedRVVEpilogCFI(
    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const {
  MachineFunction *MF = MBB.getParent();
  const MachineFrameInfo &MFI = MF->getFrameInfo();
  const RISCVRegisterInfo &TRI = *STI.getRegisterInfo();

  CFIInstBuilder CFIHelper(MBB, MI, MachineInstr::FrameDestroy);
  const auto &RVVCSI = getRVVCalleeSavedInfo(*MF, MFI.getCalleeSavedInfo());
  for (auto &CS : RVVCSI) {
    MCRegister BaseReg = getRVVBaseRegister(TRI, CS.getReg());
    unsigned NumRegs = getCalleeSavedRVVNumRegs(CS.getReg());
    for (unsigned i = 0; i < NumRegs; ++i)
      CFIHelper.buildRestore(BaseReg + i);
  }
}

bool RISCVFrameLowering::restoreCalleeSavedRegisters(
    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
    MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
  if (CSI.empty())
    return true;

  MachineFunction *MF = MBB.getParent();
  const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
  DebugLoc DL;
  if (MI != MBB.end() && !MI->isDebugInstr())
    DL = MI->getDebugLoc();

  // Manually restore values not restored by libcall & Push/Pop.
  // Reverse the restore order in epilog.  In addition, the return
  // address will be restored first in the epilogue. It increases
  // the opportunity to avoid the load-to-use data hazard between
  // loading RA and return by RA.  loadRegFromStackSlot can insert
  // multiple instructions.
  const auto &UnmanagedCSI = getUnmanagedCSI(*MF, CSI);
  const auto &RVVCSI = getRVVCalleeSavedInfo(*MF, CSI);

  auto loadRegFromStackSlot = [&](decltype(UnmanagedCSI) CSInfo) {
    for (auto &CS : CSInfo) {
      MCRegister Reg = CS.getReg();
      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
      TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI,
                               Register(), MachineInstr::FrameDestroy);
      assert(MI != MBB.begin() &&
             "loadRegFromStackSlot didn't insert any code!");
    }
  };
  loadRegFromStackSlot(RVVCSI);
  loadRegFromStackSlot(UnmanagedCSI);

  RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
  if (RVFI->useQCIInterrupt(*MF)) {
    // Don't emit anything here because restoration is handled by
    // QC.C.MILEAVERET which we already inserted to return.
    assert(MI->getOpcode() == RISCV::QC_C_MILEAVERET &&
           "Unexpected QCI Interrupt Return Instruction");
  }

  if (RVFI->isPushable(*MF)) {
    unsigned PushedRegNum = RVFI->getRVPushRegs();
    if (PushedRegNum > 0) {
      unsigned Opcode = getPopOpcode(RVFI->getPushPopKind(*MF));
      unsigned RegEnc = RISCVZC::encodeRegListNumRegs(PushedRegNum);
      MachineInstrBuilder PopBuilder =
          BuildMI(MBB, MI, DL, TII.get(Opcode))
              .setMIFlag(MachineInstr::FrameDestroy);
      // Use encoded number to represent registers to restore.
      PopBuilder.addImm(RegEnc);
      PopBuilder.addImm(0);

      for (unsigned i = 0; i < RVFI->getRVPushRegs(); i++)
        PopBuilder.addDef(FixedCSRFIMap[i], RegState::ImplicitDefine);
    }
  } else {
    const char *RestoreLibCall = getRestoreLibCallName(*MF, CSI);
    if (RestoreLibCall) {
      // Add restore libcall via tail call.
      MachineBasicBlock::iterator NewMI =
          BuildMI(MBB, MI, DL, TII.get(RISCV::PseudoTAIL))
              .addExternalSymbol(RestoreLibCall, RISCVII::MO_CALL)
              .setMIFlag(MachineInstr::FrameDestroy);

      // Remove trailing returns, since the terminator is now a tail call to the
      // restore function.
      if (MI != MBB.end() && MI->getOpcode() == RISCV::PseudoRET) {
        NewMI->copyImplicitOps(*MF, *MI);
        MI->eraseFromParent();
      }
    }
  }
  return true;
}

bool RISCVFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
  // Keep the conventional code flow when not optimizing.
  if (MF.getFunction().hasOptNone())
    return false;

  return true;
}

bool RISCVFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
  MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
  const MachineFunction *MF = MBB.getParent();
  const auto *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();

  // Make sure VTYPE and VL are not live-in since we will use vsetvli in the
  // prologue to get the VLEN, and that will clobber these registers.
  //
  // We may do also check the stack contains objects with scalable vector type,
  // but this will require iterating over all the stack objects, but this may
  // not worth since the situation is rare, we could do further check in future
  // if we find it is necessary.
  if (STI.preferVsetvliOverReadVLENB() &&
      (MBB.isLiveIn(RISCV::VTYPE) || MBB.isLiveIn(RISCV::VL)))
    return false;

  if (!RVFI->useSaveRestoreLibCalls(*MF))
    return true;

  // Inserting a call to a __riscv_save libcall requires the use of the register
  // t0 (X5) to hold the return address. Therefore if this register is already
  // used we can't insert the call.

  RegScavenger RS;
  RS.enterBasicBlock(*TmpMBB);
  return !RS.isRegUsed(RISCV::X5);
}

bool RISCVFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
  const MachineFunction *MF = MBB.getParent();
  MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
  const auto *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();

  // We do not want QC.C.MILEAVERET to be subject to shrink-wrapping - it must
  // come in the final block of its function as it both pops and returns.
  if (RVFI->useQCIInterrupt(*MF))
    return MBB.succ_empty();

  if (!RVFI->useSaveRestoreLibCalls(*MF))
    return true;

  // Using the __riscv_restore libcalls to restore CSRs requires a tail call.
  // This means if we still need to continue executing code within this function
  // the restore cannot take place in this basic block.

  if (MBB.succ_size() > 1)
    return false;

  MachineBasicBlock *SuccMBB =
      MBB.succ_empty() ? TmpMBB->getFallThrough() : *MBB.succ_begin();

  // Doing a tail call should be safe if there are no successors, because either
  // we have a returning block or the end of the block is unreachable, so the
  // restore will be eliminated regardless.
  if (!SuccMBB)
    return true;

  // The successor can only contain a return, since we would effectively be
  // replacing the successor with our own tail return at the end of our block.
  return SuccMBB->isReturnBlock() && SuccMBB->size() == 1;
}

bool RISCVFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
  switch (ID) {
  case TargetStackID::Default:
  case TargetStackID::ScalableVector:
    return true;
  case TargetStackID::NoAlloc:
  case TargetStackID::SGPRSpill:
  case TargetStackID::WasmLocal:
    return false;
  }
  llvm_unreachable("Invalid TargetStackID::Value");
}

TargetStackID::Value RISCVFrameLowering::getStackIDForScalableVectors() const {
  return TargetStackID::ScalableVector;
}

// Synthesize the probe loop.
static void emitStackProbeInline(MachineBasicBlock::iterator MBBI, DebugLoc DL,
                                 Register TargetReg, bool IsRVV) {
  assert(TargetReg != RISCV::X2 && "New top of stack cannot already be in SP");

  MachineBasicBlock &MBB = *MBBI->getParent();
  MachineFunction &MF = *MBB.getParent();

  auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
  const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
  bool IsRV64 = Subtarget.is64Bit();
  Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
  const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
  uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);

  MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
  MachineBasicBlock *LoopTestMBB =
      MF.CreateMachineBasicBlock(MBB.getBasicBlock());
  MF.insert(MBBInsertPoint, LoopTestMBB);
  MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
  MF.insert(MBBInsertPoint, ExitMBB);
  MachineInstr::MIFlag Flags = MachineInstr::FrameSetup;
  Register ScratchReg = RISCV::X7;

  // ScratchReg = ProbeSize
  TII->movImm(MBB, MBBI, DL, ScratchReg, ProbeSize, Flags);

  // LoopTest:
  //   SUB SP, SP, ProbeSize
  BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::SUB), SPReg)
      .addReg(SPReg)
      .addReg(ScratchReg)
      .setMIFlags(Flags);

  //   s[d|w] zero, 0(sp)
  BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
          TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
      .addReg(RISCV::X0)
      .addReg(SPReg)
      .addImm(0)
      .setMIFlags(Flags);

  if (IsRVV) {
    //  SUB TargetReg, TargetReg, ProbeSize
    BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::SUB),
            TargetReg)
        .addReg(TargetReg)
        .addReg(ScratchReg)
        .setMIFlags(Flags);

    //  BGE TargetReg, ProbeSize, LoopTest
    BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BGE))
        .addReg(TargetReg)
        .addReg(ScratchReg)
        .addMBB(LoopTestMBB)
        .setMIFlags(Flags);

  } else {
    //  BNE SP, TargetReg, LoopTest
    BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BNE))
        .addReg(SPReg)
        .addReg(TargetReg)
        .addMBB(LoopTestMBB)
        .setMIFlags(Flags);
  }

  ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
  ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);

  LoopTestMBB->addSuccessor(ExitMBB);
  LoopTestMBB->addSuccessor(LoopTestMBB);
  MBB.addSuccessor(LoopTestMBB);
  // Update liveins.
  fullyRecomputeLiveIns({ExitMBB, LoopTestMBB});
}

void RISCVFrameLowering::inlineStackProbe(MachineFunction &MF,
                                          MachineBasicBlock &MBB) const {
  // Get the instructions that need to be replaced. We emit at most two of
  // these. Remember them in order to avoid complications coming from the need
  // to traverse the block while potentially creating more blocks.
  SmallVector<MachineInstr *, 4> ToReplace;
  for (MachineInstr &MI : MBB) {
    unsigned Opc = MI.getOpcode();
    if (Opc == RISCV::PROBED_STACKALLOC ||
        Opc == RISCV::PROBED_STACKALLOC_RVV) {
      ToReplace.push_back(&MI);
    }
  }

  for (MachineInstr *MI : ToReplace) {
    if (MI->getOpcode() == RISCV::PROBED_STACKALLOC ||
        MI->getOpcode() == RISCV::PROBED_STACKALLOC_RVV) {
      MachineBasicBlock::iterator MBBI = MI->getIterator();
      DebugLoc DL = MBB.findDebugLoc(MBBI);
      Register TargetReg = MI->getOperand(0).getReg();
      emitStackProbeInline(MBBI, DL, TargetReg,
                           (MI->getOpcode() == RISCV::PROBED_STACKALLOC_RVV));
      MBBI->eraseFromParent();
    }
  }
}
