//===-- RISCVLegalizerInfo.cpp ----------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
/// This file implements the targeting of the Machinelegalizer class for RISC-V.
/// \todo This should be generated by TableGen.
//===----------------------------------------------------------------------===//

#include "RISCVLegalizerInfo.h"
#include "MCTargetDesc/RISCVMatInt.h"
#include "RISCVMachineFunctionInfo.h"
#include "RISCVSubtarget.h"
#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Type.h"

using namespace llvm;
using namespace LegalityPredicates;
using namespace LegalizeMutations;

static LegalityPredicate
typeIsLegalIntOrFPVec(unsigned TypeIdx,
                      std::initializer_list<LLT> IntOrFPVecTys,
                      const RISCVSubtarget &ST) {
  LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
    return ST.hasVInstructions() &&
           (Query.Types[TypeIdx].getScalarSizeInBits() != 64 ||
            ST.hasVInstructionsI64()) &&
           (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 ||
            ST.getELen() == 64);
  };

  return all(typeInSet(TypeIdx, IntOrFPVecTys), P);
}

static LegalityPredicate
typeIsLegalBoolVec(unsigned TypeIdx, std::initializer_list<LLT> BoolVecTys,
                   const RISCVSubtarget &ST) {
  LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
    return ST.hasVInstructions() &&
           (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 ||
            ST.getELen() == 64);
  };
  return all(typeInSet(TypeIdx, BoolVecTys), P);
}

static LegalityPredicate typeIsLegalPtrVec(unsigned TypeIdx,
                                           std::initializer_list<LLT> PtrVecTys,
                                           const RISCVSubtarget &ST) {
  LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
    return ST.hasVInstructions() &&
           (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 ||
            ST.getELen() == 64) &&
           (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 16 ||
            Query.Types[TypeIdx].getScalarSizeInBits() == 32);
  };
  return all(typeInSet(TypeIdx, PtrVecTys), P);
}

RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
    : STI(ST), XLen(STI.getXLen()), sXLen(LLT::scalar(XLen)) {
  const LLT sDoubleXLen = LLT::scalar(2 * XLen);
  const LLT p0 = LLT::pointer(0, XLen);
  const LLT s1 = LLT::scalar(1);
  const LLT s8 = LLT::scalar(8);
  const LLT s16 = LLT::scalar(16);
  const LLT s32 = LLT::scalar(32);
  const LLT s64 = LLT::scalar(64);
  const LLT s128 = LLT::scalar(128);

  const LLT nxv1s1 = LLT::scalable_vector(1, s1);
  const LLT nxv2s1 = LLT::scalable_vector(2, s1);
  const LLT nxv4s1 = LLT::scalable_vector(4, s1);
  const LLT nxv8s1 = LLT::scalable_vector(8, s1);
  const LLT nxv16s1 = LLT::scalable_vector(16, s1);
  const LLT nxv32s1 = LLT::scalable_vector(32, s1);
  const LLT nxv64s1 = LLT::scalable_vector(64, s1);

  const LLT nxv1s8 = LLT::scalable_vector(1, s8);
  const LLT nxv2s8 = LLT::scalable_vector(2, s8);
  const LLT nxv4s8 = LLT::scalable_vector(4, s8);
  const LLT nxv8s8 = LLT::scalable_vector(8, s8);
  const LLT nxv16s8 = LLT::scalable_vector(16, s8);
  const LLT nxv32s8 = LLT::scalable_vector(32, s8);
  const LLT nxv64s8 = LLT::scalable_vector(64, s8);

  const LLT nxv1s16 = LLT::scalable_vector(1, s16);
  const LLT nxv2s16 = LLT::scalable_vector(2, s16);
  const LLT nxv4s16 = LLT::scalable_vector(4, s16);
  const LLT nxv8s16 = LLT::scalable_vector(8, s16);
  const LLT nxv16s16 = LLT::scalable_vector(16, s16);
  const LLT nxv32s16 = LLT::scalable_vector(32, s16);

  const LLT nxv1s32 = LLT::scalable_vector(1, s32);
  const LLT nxv2s32 = LLT::scalable_vector(2, s32);
  const LLT nxv4s32 = LLT::scalable_vector(4, s32);
  const LLT nxv8s32 = LLT::scalable_vector(8, s32);
  const LLT nxv16s32 = LLT::scalable_vector(16, s32);

  const LLT nxv1s64 = LLT::scalable_vector(1, s64);
  const LLT nxv2s64 = LLT::scalable_vector(2, s64);
  const LLT nxv4s64 = LLT::scalable_vector(4, s64);
  const LLT nxv8s64 = LLT::scalable_vector(8, s64);

  const LLT nxv1p0 = LLT::scalable_vector(1, p0);
  const LLT nxv2p0 = LLT::scalable_vector(2, p0);
  const LLT nxv4p0 = LLT::scalable_vector(4, p0);
  const LLT nxv8p0 = LLT::scalable_vector(8, p0);
  const LLT nxv16p0 = LLT::scalable_vector(16, p0);

  using namespace TargetOpcode;

  auto BoolVecTys = {nxv1s1, nxv2s1, nxv4s1, nxv8s1, nxv16s1, nxv32s1, nxv64s1};

  auto IntOrFPVecTys = {nxv1s8,   nxv2s8,  nxv4s8,  nxv8s8,  nxv16s8, nxv32s8,
                        nxv64s8,  nxv1s16, nxv2s16, nxv4s16, nxv8s16, nxv16s16,
                        nxv32s16, nxv1s32, nxv2s32, nxv4s32, nxv8s32, nxv16s32,
                        nxv1s64,  nxv2s64, nxv4s64, nxv8s64};

  auto PtrVecTys = {nxv1p0, nxv2p0, nxv4p0, nxv8p0, nxv16p0};

  getActionDefinitionsBuilder({G_ADD, G_SUB})
      .legalFor({sXLen})
      .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST))
      .customFor(ST.is64Bit(), {s32})
      .widenScalarToNextPow2(0)
      .clampScalar(0, sXLen, sXLen);

  getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
      .legalFor({sXLen})
      .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST))
      .widenScalarToNextPow2(0)
      .clampScalar(0, sXLen, sXLen);

  getActionDefinitionsBuilder(
      {G_UADDE, G_UADDO, G_USUBE, G_USUBO}).lower();

  getActionDefinitionsBuilder({G_SADDO, G_SSUBO}).minScalar(0, sXLen).lower();

  // TODO: Use Vector Single-Width Saturating Instructions for vector types.
  getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
      .lower();

  getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
      .legalFor({{sXLen, sXLen}})
      .customFor(ST.is64Bit(), {{s32, s32}})
      .widenScalarToNextPow2(0)
      .clampScalar(1, sXLen, sXLen)
      .clampScalar(0, sXLen, sXLen);

  getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
      .legalFor({{s32, s16}})
      .legalFor(ST.is64Bit(), {{s64, s16}, {s64, s32}})
      .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
                   typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)))
      .customIf(typeIsLegalBoolVec(1, BoolVecTys, ST))
      .maxScalar(0, sXLen);

  getActionDefinitionsBuilder(G_SEXT_INREG)
      .customFor({sXLen})
      .clampScalar(0, sXLen, sXLen)
      .lower();

  // Merge/Unmerge
  for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
    auto &MergeUnmergeActions = getActionDefinitionsBuilder(Op);
    unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
    unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
    if (XLen == 32 && ST.hasStdExtD()) {
      MergeUnmergeActions.legalIf(
          all(typeIs(BigTyIdx, s64), typeIs(LitTyIdx, s32)));
    }
    MergeUnmergeActions.widenScalarToNextPow2(LitTyIdx, XLen)
        .widenScalarToNextPow2(BigTyIdx, XLen)
        .clampScalar(LitTyIdx, sXLen, sXLen)
        .clampScalar(BigTyIdx, sXLen, sXLen);
  }

  getActionDefinitionsBuilder({G_FSHL, G_FSHR}).lower();

  getActionDefinitionsBuilder({G_ROTR, G_ROTL})
      .legalFor(ST.hasStdExtZbb() || ST.hasStdExtZbkb(), {{sXLen, sXLen}})
      .customFor(ST.is64Bit() && (ST.hasStdExtZbb() || ST.hasStdExtZbkb()),
                 {{s32, s32}})
      .lower();

  getActionDefinitionsBuilder(G_BITREVERSE).maxScalar(0, sXLen).lower();

  getActionDefinitionsBuilder(G_BITCAST).legalIf(
      all(LegalityPredicates::any(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
                                  typeIsLegalBoolVec(0, BoolVecTys, ST)),
          LegalityPredicates::any(typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST),
                                  typeIsLegalBoolVec(1, BoolVecTys, ST))));

  auto &BSWAPActions = getActionDefinitionsBuilder(G_BSWAP);
  if (ST.hasStdExtZbb() || ST.hasStdExtZbkb())
    BSWAPActions.legalFor({sXLen}).clampScalar(0, sXLen, sXLen);
  else
    BSWAPActions.maxScalar(0, sXLen).lower();

  auto &CountZerosActions = getActionDefinitionsBuilder({G_CTLZ, G_CTTZ});
  auto &CountZerosUndefActions =
      getActionDefinitionsBuilder({G_CTLZ_ZERO_UNDEF, G_CTTZ_ZERO_UNDEF});
  if (ST.hasStdExtZbb()) {
    CountZerosActions.legalFor({{sXLen, sXLen}})
        .customFor({{s32, s32}})
        .clampScalar(0, s32, sXLen)
        .widenScalarToNextPow2(0)
        .scalarSameSizeAs(1, 0);
  } else {
    CountZerosActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower();
    CountZerosUndefActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0);
  }
  CountZerosUndefActions.lower();

  auto &CTPOPActions = getActionDefinitionsBuilder(G_CTPOP);
  if (ST.hasStdExtZbb()) {
    CTPOPActions.legalFor({{sXLen, sXLen}})
        .clampScalar(0, sXLen, sXLen)
        .scalarSameSizeAs(1, 0);
  } else {
    CTPOPActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower();
  }

  getActionDefinitionsBuilder(G_CONSTANT)
      .legalFor({p0})
      .legalFor(!ST.is64Bit(), {s32})
      .customFor(ST.is64Bit(), {s64})
      .widenScalarToNextPow2(0)
      .clampScalar(0, sXLen, sXLen);

  // TODO: transform illegal vector types into legal vector type
  getActionDefinitionsBuilder(G_FREEZE)
      .legalFor({s16, s32, p0})
      .legalFor(ST.is64Bit(), {s64})
      .legalIf(typeIsLegalBoolVec(0, BoolVecTys, ST))
      .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST))
      .widenScalarToNextPow2(0)
      .clampScalar(0, s16, sXLen);

  // TODO: transform illegal vector types into legal vector type
  // TODO: Merge with G_FREEZE?
  getActionDefinitionsBuilder(
      {G_IMPLICIT_DEF, G_CONSTANT_FOLD_BARRIER})
      .legalFor({s32, sXLen, p0})
      .legalIf(typeIsLegalBoolVec(0, BoolVecTys, ST))
      .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST))
      .widenScalarToNextPow2(0)
      .clampScalar(0, s32, sXLen);

  getActionDefinitionsBuilder(G_ICMP)
      .legalFor({{sXLen, sXLen}, {sXLen, p0}})
      .legalIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST),
                   typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)))
      .widenScalarOrEltToNextPow2OrMinSize(1, 8)
      .clampScalar(1, sXLen, sXLen)
      .clampScalar(0, sXLen, sXLen);

  getActionDefinitionsBuilder(G_SELECT)
      .legalFor({{s32, sXLen}, {p0, sXLen}})
      .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
                   typeIsLegalBoolVec(1, BoolVecTys, ST)))
      .legalFor(XLen == 64 || ST.hasStdExtD(), {{s64, sXLen}})
      .widenScalarToNextPow2(0)
      .clampScalar(0, s32, (XLen == 64 || ST.hasStdExtD()) ? s64 : s32)
      .clampScalar(1, sXLen, sXLen);

  auto &LoadActions = getActionDefinitionsBuilder(G_LOAD);
  auto &StoreActions = getActionDefinitionsBuilder(G_STORE);
  auto &ExtLoadActions = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD});

  // Return the alignment needed for scalar memory ops. If unaligned scalar mem
  // is supported, we only require byte alignment. Otherwise, we need the memory
  // op to be natively aligned.
  auto getScalarMemAlign = [&ST](unsigned Size) {
    return ST.enableUnalignedScalarMem() ? 8 : Size;
  };

  LoadActions.legalForTypesWithMemDesc(
      {{s16, p0, s8, getScalarMemAlign(8)},
       {s32, p0, s8, getScalarMemAlign(8)},
       {s16, p0, s16, getScalarMemAlign(16)},
       {s32, p0, s16, getScalarMemAlign(16)},
       {s32, p0, s32, getScalarMemAlign(32)},
       {p0, p0, sXLen, getScalarMemAlign(XLen)}});
  StoreActions.legalForTypesWithMemDesc(
      {{s16, p0, s8, getScalarMemAlign(8)},
       {s32, p0, s8, getScalarMemAlign(8)},
       {s16, p0, s16, getScalarMemAlign(16)},
       {s32, p0, s16, getScalarMemAlign(16)},
       {s32, p0, s32, getScalarMemAlign(32)},
       {p0, p0, sXLen, getScalarMemAlign(XLen)}});
  ExtLoadActions.legalForTypesWithMemDesc(
      {{sXLen, p0, s8, getScalarMemAlign(8)},
       {sXLen, p0, s16, getScalarMemAlign(16)}});
  if (XLen == 64) {
    LoadActions.legalForTypesWithMemDesc(
        {{s64, p0, s8, getScalarMemAlign(8)},
         {s64, p0, s16, getScalarMemAlign(16)},
         {s64, p0, s32, getScalarMemAlign(32)},
         {s64, p0, s64, getScalarMemAlign(64)}});
    StoreActions.legalForTypesWithMemDesc(
        {{s64, p0, s8, getScalarMemAlign(8)},
         {s64, p0, s16, getScalarMemAlign(16)},
         {s64, p0, s32, getScalarMemAlign(32)},
         {s64, p0, s64, getScalarMemAlign(64)}});
    ExtLoadActions.legalForTypesWithMemDesc(
        {{s64, p0, s32, getScalarMemAlign(32)}});
  } else if (ST.hasStdExtD()) {
    LoadActions.legalForTypesWithMemDesc(
        {{s64, p0, s64, getScalarMemAlign(64)}});
    StoreActions.legalForTypesWithMemDesc(
        {{s64, p0, s64, getScalarMemAlign(64)}});
  }

  // Vector loads/stores.
  if (ST.hasVInstructions()) {
    LoadActions.legalForTypesWithMemDesc({{nxv2s8, p0, nxv2s8, 8},
                                          {nxv4s8, p0, nxv4s8, 8},
                                          {nxv8s8, p0, nxv8s8, 8},
                                          {nxv16s8, p0, nxv16s8, 8},
                                          {nxv32s8, p0, nxv32s8, 8},
                                          {nxv64s8, p0, nxv64s8, 8},
                                          {nxv2s16, p0, nxv2s16, 16},
                                          {nxv4s16, p0, nxv4s16, 16},
                                          {nxv8s16, p0, nxv8s16, 16},
                                          {nxv16s16, p0, nxv16s16, 16},
                                          {nxv32s16, p0, nxv32s16, 16},
                                          {nxv2s32, p0, nxv2s32, 32},
                                          {nxv4s32, p0, nxv4s32, 32},
                                          {nxv8s32, p0, nxv8s32, 32},
                                          {nxv16s32, p0, nxv16s32, 32}});
    StoreActions.legalForTypesWithMemDesc({{nxv2s8, p0, nxv2s8, 8},
                                           {nxv4s8, p0, nxv4s8, 8},
                                           {nxv8s8, p0, nxv8s8, 8},
                                           {nxv16s8, p0, nxv16s8, 8},
                                           {nxv32s8, p0, nxv32s8, 8},
                                           {nxv64s8, p0, nxv64s8, 8},
                                           {nxv2s16, p0, nxv2s16, 16},
                                           {nxv4s16, p0, nxv4s16, 16},
                                           {nxv8s16, p0, nxv8s16, 16},
                                           {nxv16s16, p0, nxv16s16, 16},
                                           {nxv32s16, p0, nxv32s16, 16},
                                           {nxv2s32, p0, nxv2s32, 32},
                                           {nxv4s32, p0, nxv4s32, 32},
                                           {nxv8s32, p0, nxv8s32, 32},
                                           {nxv16s32, p0, nxv16s32, 32}});

    if (ST.getELen() == 64) {
      LoadActions.legalForTypesWithMemDesc({{nxv1s8, p0, nxv1s8, 8},
                                            {nxv1s16, p0, nxv1s16, 16},
                                            {nxv1s32, p0, nxv1s32, 32}});
      StoreActions.legalForTypesWithMemDesc({{nxv1s8, p0, nxv1s8, 8},
                                             {nxv1s16, p0, nxv1s16, 16},
                                             {nxv1s32, p0, nxv1s32, 32}});
    }

    if (ST.hasVInstructionsI64()) {
      LoadActions.legalForTypesWithMemDesc({{nxv1s64, p0, nxv1s64, 64},
                                            {nxv2s64, p0, nxv2s64, 64},
                                            {nxv4s64, p0, nxv4s64, 64},
                                            {nxv8s64, p0, nxv8s64, 64}});
      StoreActions.legalForTypesWithMemDesc({{nxv1s64, p0, nxv1s64, 64},
                                             {nxv2s64, p0, nxv2s64, 64},
                                             {nxv4s64, p0, nxv4s64, 64},
                                             {nxv8s64, p0, nxv8s64, 64}});
    }

    // we will take the custom lowering logic if we have scalable vector types
    // with non-standard alignments
    LoadActions.customIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST));
    StoreActions.customIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST));

    // Pointers require that XLen sized elements are legal.
    if (XLen <= ST.getELen()) {
      LoadActions.customIf(typeIsLegalPtrVec(0, PtrVecTys, ST));
      StoreActions.customIf(typeIsLegalPtrVec(0, PtrVecTys, ST));
    }
  }

  LoadActions.widenScalarToNextPow2(0, /* MinSize = */ 8)
      .lowerIfMemSizeNotByteSizePow2()
      .clampScalar(0, s16, sXLen)
      .lower();
  StoreActions
      .clampScalar(0, s16, sXLen)
      .lowerIfMemSizeNotByteSizePow2()
      .lower();

  ExtLoadActions.widenScalarToNextPow2(0).clampScalar(0, sXLen, sXLen).lower();

  getActionDefinitionsBuilder({G_PTR_ADD, G_PTRMASK}).legalFor({{p0, sXLen}});

  getActionDefinitionsBuilder(G_PTRTOINT)
      .legalFor({{sXLen, p0}})
      .clampScalar(0, sXLen, sXLen);

  getActionDefinitionsBuilder(G_INTTOPTR)
      .legalFor({{p0, sXLen}})
      .clampScalar(1, sXLen, sXLen);

  getActionDefinitionsBuilder(G_BRCOND).legalFor({sXLen}).minScalar(0, sXLen);

  getActionDefinitionsBuilder(G_BRJT).customFor({{p0, sXLen}});

  getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});

  getActionDefinitionsBuilder(G_PHI)
      .legalFor({p0, s32, sXLen})
      .widenScalarToNextPow2(0)
      .clampScalar(0, s32, sXLen);

  getActionDefinitionsBuilder({G_GLOBAL_VALUE, G_JUMP_TABLE, G_CONSTANT_POOL})
      .legalFor({p0});

  if (ST.hasStdExtZmmul()) {
    getActionDefinitionsBuilder(G_MUL)
        .legalFor({sXLen})
        .widenScalarToNextPow2(0)
        .clampScalar(0, sXLen, sXLen);

    // clang-format off
    getActionDefinitionsBuilder({G_SMULH, G_UMULH})
        .legalFor({sXLen})
        .lower();
    // clang-format on

    getActionDefinitionsBuilder({G_SMULO, G_UMULO}).minScalar(0, sXLen).lower();
  } else {
    getActionDefinitionsBuilder(G_MUL)
        .libcallFor({sXLen, sDoubleXLen})
        .widenScalarToNextPow2(0)
        .clampScalar(0, sXLen, sDoubleXLen);

    getActionDefinitionsBuilder({G_SMULH, G_UMULH}).lowerFor({sXLen});

    getActionDefinitionsBuilder({G_SMULO, G_UMULO})
        .minScalar(0, sXLen)
        // Widen sXLen to sDoubleXLen so we can use a single libcall to get
        // the low bits for the mul result and high bits to do the overflow
        // check.
        .widenScalarIf(typeIs(0, sXLen),
                       LegalizeMutations::changeTo(0, sDoubleXLen))
        .lower();
  }

  if (ST.hasStdExtM()) {
    getActionDefinitionsBuilder({G_SDIV, G_UDIV, G_UREM})
        .legalFor({sXLen})
        .customFor({s32})
        .libcallFor({sDoubleXLen})
        .clampScalar(0, s32, sDoubleXLen)
        .widenScalarToNextPow2(0);
    getActionDefinitionsBuilder(G_SREM)
        .legalFor({sXLen})
        .libcallFor({sDoubleXLen})
        .clampScalar(0, sXLen, sDoubleXLen)
        .widenScalarToNextPow2(0);
  } else {
    getActionDefinitionsBuilder({G_UDIV, G_SDIV, G_UREM, G_SREM})
        .libcallFor({sXLen, sDoubleXLen})
        .clampScalar(0, sXLen, sDoubleXLen)
        .widenScalarToNextPow2(0);
  }

  // TODO: Use libcall for sDoubleXLen.
  getActionDefinitionsBuilder({G_SDIVREM, G_UDIVREM}).lower();

  getActionDefinitionsBuilder(G_ABS)
      .customFor(ST.hasStdExtZbb(), {sXLen})
      .minScalar(ST.hasStdExtZbb(), 0, sXLen)
      .lower();

  getActionDefinitionsBuilder({G_UMAX, G_UMIN, G_SMAX, G_SMIN})
      .legalFor(ST.hasStdExtZbb(), {sXLen})
      .minScalar(ST.hasStdExtZbb(), 0, sXLen)
      .lower();

  getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();

  getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});

  getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();

  getActionDefinitionsBuilder({G_DYN_STACKALLOC, G_STACKSAVE, G_STACKRESTORE})
      .lower();

  // FP Operations

  // FIXME: Support s128 for rv32 when libcall handling is able to use sret.
  getActionDefinitionsBuilder(
      {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM})
      .legalFor(ST.hasStdExtF(), {s32})
      .legalFor(ST.hasStdExtD(), {s64})
      .legalFor(ST.hasStdExtZfh(), {s16})
      .libcallFor({s32, s64})
      .libcallFor(ST.is64Bit(), {s128});

  getActionDefinitionsBuilder({G_FNEG, G_FABS})
      .legalFor(ST.hasStdExtF(), {s32})
      .legalFor(ST.hasStdExtD(), {s64})
      .legalFor(ST.hasStdExtZfh(), {s16})
      .lowerFor({s32, s64, s128});

  getActionDefinitionsBuilder(G_FREM)
      .libcallFor({s32, s64})
      .libcallFor(ST.is64Bit(), {s128})
      .minScalar(0, s32)
      .scalarize(0);

  getActionDefinitionsBuilder(G_FCOPYSIGN)
      .legalFor(ST.hasStdExtF(), {{s32, s32}})
      .legalFor(ST.hasStdExtD(), {{s64, s64}, {s32, s64}, {s64, s32}})
      .legalFor(ST.hasStdExtZfh(), {{s16, s16}, {s16, s32}, {s32, s16}})
      .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s16, s64}, {s64, s16}})
      .lower();

  // FIXME: Use Zfhmin.
  getActionDefinitionsBuilder(G_FPTRUNC)
      .legalFor(ST.hasStdExtD(), {{s32, s64}})
      .legalFor(ST.hasStdExtZfh(), {{s16, s32}})
      .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s16, s64}})
      .libcallFor({{s32, s64}})
      .libcallFor(ST.is64Bit(), {{s32, s128}, {s64, s128}});
  getActionDefinitionsBuilder(G_FPEXT)
      .legalFor(ST.hasStdExtD(), {{s64, s32}})
      .legalFor(ST.hasStdExtZfh(), {{s32, s16}})
      .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s64, s16}})
      .libcallFor({{s64, s32}})
      .libcallFor(ST.is64Bit(), {{s128, s32}, {s128, s64}});

  getActionDefinitionsBuilder(G_FCMP)
      .legalFor(ST.hasStdExtF(), {{sXLen, s32}})
      .legalFor(ST.hasStdExtD(), {{sXLen, s64}})
      .legalFor(ST.hasStdExtZfh(), {{sXLen, s16}})
      .clampScalar(0, sXLen, sXLen)
      .libcallFor({{sXLen, s32}, {sXLen, s64}})
      .libcallFor(ST.is64Bit(), {{sXLen, s128}});

  // TODO: Support vector version of G_IS_FPCLASS.
  getActionDefinitionsBuilder(G_IS_FPCLASS)
      .customFor(ST.hasStdExtF(), {{s1, s32}})
      .customFor(ST.hasStdExtD(), {{s1, s64}})
      .customFor(ST.hasStdExtZfh(), {{s1, s16}})
      .lowerFor({{s1, s32}, {s1, s64}});

  getActionDefinitionsBuilder(G_FCONSTANT)
      .legalFor(ST.hasStdExtF(), {s32})
      .legalFor(ST.hasStdExtD(), {s64})
      .legalFor(ST.hasStdExtZfh(), {s16})
      .lowerFor({s32, s64, s128});

  getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
      .legalFor(ST.hasStdExtF(), {{sXLen, s32}})
      .legalFor(ST.hasStdExtD(), {{sXLen, s64}})
      .legalFor(ST.hasStdExtZfh(), {{sXLen, s16}})
      .customFor(ST.is64Bit() && ST.hasStdExtF(), {{s32, s32}})
      .customFor(ST.is64Bit() && ST.hasStdExtD(), {{s32, s64}})
      .customFor(ST.is64Bit() && ST.hasStdExtZfh(), {{s32, s16}})
      .widenScalarToNextPow2(0)
      .minScalar(0, s32)
      .libcallFor({{s32, s32}, {s64, s32}, {s32, s64}, {s64, s64}})
      .libcallFor(ST.is64Bit(), {{s32, s128}, {s64, s128}}) // FIXME RV32.
      .libcallFor(ST.is64Bit(), {{s128, s32}, {s128, s64}, {s128, s128}});

  getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
      .legalFor(ST.hasStdExtF(), {{s32, sXLen}})
      .legalFor(ST.hasStdExtD(), {{s64, sXLen}})
      .legalFor(ST.hasStdExtZfh(), {{s16, sXLen}})
      .widenScalarToNextPow2(1)
      // Promote to XLen if the operation is legal.
      .widenScalarIf(
          [=, &ST](const LegalityQuery &Query) {
            return Query.Types[0].isScalar() && Query.Types[1].isScalar() &&
                   (Query.Types[1].getSizeInBits() < ST.getXLen()) &&
                   ((ST.hasStdExtF() && Query.Types[0].getSizeInBits() == 32) ||
                    (ST.hasStdExtD() && Query.Types[0].getSizeInBits() == 64) ||
                    (ST.hasStdExtZfh() &&
                     Query.Types[0].getSizeInBits() == 16));
          },
          LegalizeMutations::changeTo(1, sXLen))
      // Otherwise only promote to s32 since we have si libcalls.
      .minScalar(1, s32)
      .libcallFor({{s32, s32}, {s64, s32}, {s32, s64}, {s64, s64}})
      .libcallFor(ST.is64Bit(), {{s128, s32}, {s128, s64}}) // FIXME RV32.
      .libcallFor(ST.is64Bit(), {{s32, s128}, {s64, s128}, {s128, s128}});

  // FIXME: We can do custom inline expansion like SelectionDAG.
  getActionDefinitionsBuilder({G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
                               G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
                               G_INTRINSIC_ROUNDEVEN})
      .legalFor(ST.hasStdExtZfa(), {s32})
      .legalFor(ST.hasStdExtZfa() && ST.hasStdExtD(), {s64})
      .legalFor(ST.hasStdExtZfa() && ST.hasStdExtZfh(), {s16})
      .libcallFor({s32, s64})
      .libcallFor(ST.is64Bit(), {s128});

  getActionDefinitionsBuilder({G_FMAXIMUM, G_FMINIMUM})
      .legalFor(ST.hasStdExtZfa(), {s32})
      .legalFor(ST.hasStdExtZfa() && ST.hasStdExtD(), {s64})
      .legalFor(ST.hasStdExtZfa() && ST.hasStdExtZfh(), {s16});

  getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FTAN, G_FPOW, G_FLOG, G_FLOG2,
                               G_FLOG10, G_FEXP, G_FEXP2, G_FEXP10, G_FACOS,
                               G_FASIN, G_FATAN, G_FATAN2, G_FCOSH, G_FSINH,
                               G_FTANH})
      .libcallFor({s32, s64})
      .libcallFor(ST.is64Bit(), {s128});
  getActionDefinitionsBuilder({G_FPOWI, G_FLDEXP})
      .libcallFor({{s32, s32}, {s64, s32}})
      .libcallFor(ST.is64Bit(), {s128, s32});

  getActionDefinitionsBuilder(G_VASTART).customFor({p0});

  // va_list must be a pointer, but most sized types are pretty easy to handle
  // as the destination.
  getActionDefinitionsBuilder(G_VAARG)
      // TODO: Implement narrowScalar and widenScalar for G_VAARG for types
      // other than sXLen.
      .clampScalar(0, sXLen, sXLen)
      .lowerForCartesianProduct({sXLen, p0}, {p0});

  getActionDefinitionsBuilder(G_VSCALE)
      .clampScalar(0, sXLen, sXLen)
      .customFor({sXLen});

  auto &SplatActions =
      getActionDefinitionsBuilder(G_SPLAT_VECTOR)
          .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
                       typeIs(1, sXLen)))
          .customIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST), typeIs(1, s1)));
  // Handle case of s64 element vectors on RV32. If the subtarget does not have
  // f64, then try to lower it to G_SPLAT_VECTOR_SPLIT_64_VL. If the subtarget
  // does have f64, then we don't know whether the type is an f64 or an i64,
  // so mark the G_SPLAT_VECTOR as legal and decide later what to do with it,
  // depending on how the instructions it consumes are legalized. They are not
  // legalized yet since legalization is in reverse postorder, so we cannot
  // make the decision at this moment.
  if (XLen == 32) {
    if (ST.hasVInstructionsF64() && ST.hasStdExtD())
      SplatActions.legalIf(all(
          typeInSet(0, {nxv1s64, nxv2s64, nxv4s64, nxv8s64}), typeIs(1, s64)));
    else if (ST.hasVInstructionsI64())
      SplatActions.customIf(all(
          typeInSet(0, {nxv1s64, nxv2s64, nxv4s64, nxv8s64}), typeIs(1, s64)));
  }

  SplatActions.clampScalar(1, sXLen, sXLen);

  LegalityPredicate ExtractSubvecBitcastPred = [=](const LegalityQuery &Query) {
    LLT DstTy = Query.Types[0];
    LLT SrcTy = Query.Types[1];
    return DstTy.getElementType() == LLT::scalar(1) &&
           DstTy.getElementCount().getKnownMinValue() >= 8 &&
           SrcTy.getElementCount().getKnownMinValue() >= 8;
  };
  getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
      // We don't have the ability to slide mask vectors down indexed by their
      // i1 elements; the smallest we can do is i8. Often we are able to bitcast
      // to equivalent i8 vectors.
      .bitcastIf(
          all(typeIsLegalBoolVec(0, BoolVecTys, ST),
              typeIsLegalBoolVec(1, BoolVecTys, ST), ExtractSubvecBitcastPred),
          [=](const LegalityQuery &Query) {
            LLT CastTy = LLT::vector(
                Query.Types[0].getElementCount().divideCoefficientBy(8), 8);
            return std::pair(0, CastTy);
          })
      .customIf(LegalityPredicates::any(
          all(typeIsLegalBoolVec(0, BoolVecTys, ST),
              typeIsLegalBoolVec(1, BoolVecTys, ST)),
          all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
              typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST))));

  getActionDefinitionsBuilder(G_INSERT_SUBVECTOR)
      .customIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST),
                    typeIsLegalBoolVec(1, BoolVecTys, ST)))
      .customIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
                    typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)));

  getLegacyLegalizerInfo().computeTables();
  verify(*ST.getInstrInfo());
}

bool RISCVLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
                                           MachineInstr &MI) const {
  Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
  switch (IntrinsicID) {
  default:
    return false;
  case Intrinsic::vacopy: {
    // vacopy arguments must be legal because of the intrinsic signature.
    // No need to check here.

    MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
    MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
    MachineFunction &MF = *MI.getMF();
    const DataLayout &DL = MIRBuilder.getDataLayout();
    LLVMContext &Ctx = MF.getFunction().getContext();

    Register DstLst = MI.getOperand(1).getReg();
    LLT PtrTy = MRI.getType(DstLst);

    // Load the source va_list
    Align Alignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
    MachineMemOperand *LoadMMO = MF.getMachineMemOperand(
        MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, Alignment);
    auto Tmp = MIRBuilder.buildLoad(PtrTy, MI.getOperand(2), *LoadMMO);

    // Store the result in the destination va_list
    MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
        MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, Alignment);
    MIRBuilder.buildStore(Tmp, DstLst, *StoreMMO);

    MI.eraseFromParent();
    return true;
  }
  }
}

bool RISCVLegalizerInfo::legalizeVAStart(MachineInstr &MI,
                                         MachineIRBuilder &MIRBuilder) const {
  // Stores the address of the VarArgsFrameIndex slot into the memory location
  assert(MI.getOpcode() == TargetOpcode::G_VASTART);
  MachineFunction *MF = MI.getParent()->getParent();
  RISCVMachineFunctionInfo *FuncInfo = MF->getInfo<RISCVMachineFunctionInfo>();
  int FI = FuncInfo->getVarArgsFrameIndex();
  LLT AddrTy = MIRBuilder.getMRI()->getType(MI.getOperand(0).getReg());
  auto FINAddr = MIRBuilder.buildFrameIndex(AddrTy, FI);
  assert(MI.hasOneMemOperand());
  MIRBuilder.buildStore(FINAddr, MI.getOperand(0).getReg(),
                        *MI.memoperands()[0]);
  MI.eraseFromParent();
  return true;
}

bool RISCVLegalizerInfo::legalizeBRJT(MachineInstr &MI,
                                      MachineIRBuilder &MIRBuilder) const {
  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
  auto &MF = *MI.getParent()->getParent();
  const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
  unsigned EntrySize = MJTI->getEntrySize(MF.getDataLayout());

  Register PtrReg = MI.getOperand(0).getReg();
  LLT PtrTy = MRI.getType(PtrReg);
  Register IndexReg = MI.getOperand(2).getReg();
  LLT IndexTy = MRI.getType(IndexReg);

  if (!isPowerOf2_32(EntrySize))
    return false;

  auto ShiftAmt = MIRBuilder.buildConstant(IndexTy, Log2_32(EntrySize));
  IndexReg = MIRBuilder.buildShl(IndexTy, IndexReg, ShiftAmt).getReg(0);

  auto Addr = MIRBuilder.buildPtrAdd(PtrTy, PtrReg, IndexReg);

  MachineMemOperand *MMO = MF.getMachineMemOperand(
      MachinePointerInfo::getJumpTable(MF), MachineMemOperand::MOLoad,
      EntrySize, Align(MJTI->getEntryAlignment(MF.getDataLayout())));

  Register TargetReg;
  switch (MJTI->getEntryKind()) {
  default:
    return false;
  case MachineJumpTableInfo::EK_LabelDifference32: {
    // For PIC, the sequence is:
    // BRIND(load(Jumptable + index) + RelocBase)
    // RelocBase can be JumpTable, GOT or some sort of global base.
    unsigned LoadOpc =
        STI.is64Bit() ? TargetOpcode::G_SEXTLOAD : TargetOpcode::G_LOAD;
    auto Load = MIRBuilder.buildLoadInstr(LoadOpc, IndexTy, Addr, *MMO);
    TargetReg = MIRBuilder.buildPtrAdd(PtrTy, PtrReg, Load).getReg(0);
    break;
  }
  case MachineJumpTableInfo::EK_Custom32: {
    auto Load = MIRBuilder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, IndexTy,
                                          Addr, *MMO);
    TargetReg = MIRBuilder.buildIntToPtr(PtrTy, Load).getReg(0);
    break;
  }
  case MachineJumpTableInfo::EK_BlockAddress:
    TargetReg = MIRBuilder.buildLoad(PtrTy, Addr, *MMO).getReg(0);
    break;
  }

  MIRBuilder.buildBrIndirect(TargetReg);

  MI.eraseFromParent();
  return true;
}

bool RISCVLegalizerInfo::shouldBeInConstantPool(const APInt &APImm,
                                                bool ShouldOptForSize) const {
  assert(APImm.getBitWidth() == 32 || APImm.getBitWidth() == 64);
  int64_t Imm = APImm.getSExtValue();
  // All simm32 constants should be handled by isel.
  // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
  // this check redundant, but small immediates are common so this check
  // should have better compile time.
  if (isInt<32>(Imm))
    return false;

  // We only need to cost the immediate, if constant pool lowering is enabled.
  if (!STI.useConstantPoolForLargeInts())
    return false;

  RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, STI);
  if (Seq.size() <= STI.getMaxBuildIntsCost())
    return false;

  // Optimizations below are disabled for opt size. If we're optimizing for
  // size, use a constant pool.
  if (ShouldOptForSize)
    return true;
  //
  // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
  // that if it will avoid a constant pool.
  // It will require an extra temporary register though.
  // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
  // low and high 32 bits are the same and bit 31 and 63 are set.
  unsigned ShiftAmt, AddOpc;
  RISCVMatInt::InstSeq SeqLo =
      RISCVMatInt::generateTwoRegInstSeq(Imm, STI, ShiftAmt, AddOpc);
  return !(!SeqLo.empty() && (SeqLo.size() + 2) <= STI.getMaxBuildIntsCost());
}

bool RISCVLegalizerInfo::legalizeVScale(MachineInstr &MI,
                                        MachineIRBuilder &MIB) const {
  const LLT XLenTy(STI.getXLenVT());
  Register Dst = MI.getOperand(0).getReg();

  // We define our scalable vector types for lmul=1 to use a 64 bit known
  // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
  // vscale as VLENB / 8.
  static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
  if (STI.getRealMinVLen() < RISCV::RVVBitsPerBlock)
    // Support for VLEN==32 is incomplete.
    return false;

  // We assume VLENB is a multiple of 8. We manually choose the best shift
  // here because SimplifyDemandedBits isn't always able to simplify it.
  uint64_t Val = MI.getOperand(1).getCImm()->getZExtValue();
  if (isPowerOf2_64(Val)) {
    uint64_t Log2 = Log2_64(Val);
    if (Log2 < 3) {
      auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
      MIB.buildLShr(Dst, VLENB, MIB.buildConstant(XLenTy, 3 - Log2));
    } else if (Log2 > 3) {
      auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
      MIB.buildShl(Dst, VLENB, MIB.buildConstant(XLenTy, Log2 - 3));
    } else {
      MIB.buildInstr(RISCV::G_READ_VLENB, {Dst}, {});
    }
  } else if ((Val % 8) == 0) {
    // If the multiplier is a multiple of 8, scale it down to avoid needing
    // to shift the VLENB value.
    auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
    MIB.buildMul(Dst, VLENB, MIB.buildConstant(XLenTy, Val / 8));
  } else {
    auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
    auto VScale = MIB.buildLShr(XLenTy, VLENB, MIB.buildConstant(XLenTy, 3));
    MIB.buildMul(Dst, VScale, MIB.buildConstant(XLenTy, Val));
  }
  MI.eraseFromParent();
  return true;
}

// Custom-lower extensions from mask vectors by using a vselect either with 1
// for zero/any-extension or -1 for sign-extension:
//   (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
// Note that any-extension is lowered identically to zero-extension.
bool RISCVLegalizerInfo::legalizeExt(MachineInstr &MI,
                                     MachineIRBuilder &MIB) const {

  unsigned Opc = MI.getOpcode();
  assert(Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_SEXT ||
         Opc == TargetOpcode::G_ANYEXT);

  MachineRegisterInfo &MRI = *MIB.getMRI();
  Register Dst = MI.getOperand(0).getReg();
  Register Src = MI.getOperand(1).getReg();

  LLT DstTy = MRI.getType(Dst);
  int64_t ExtTrueVal = Opc == TargetOpcode::G_SEXT ? -1 : 1;
  LLT DstEltTy = DstTy.getElementType();
  auto SplatZero = MIB.buildSplatVector(DstTy, MIB.buildConstant(DstEltTy, 0));
  auto SplatTrue =
      MIB.buildSplatVector(DstTy, MIB.buildConstant(DstEltTy, ExtTrueVal));
  MIB.buildSelect(Dst, Src, SplatTrue, SplatZero);

  MI.eraseFromParent();
  return true;
}

bool RISCVLegalizerInfo::legalizeLoadStore(MachineInstr &MI,
                                           LegalizerHelper &Helper,
                                           MachineIRBuilder &MIB) const {
  assert((isa<GLoad>(MI) || isa<GStore>(MI)) &&
         "Machine instructions must be Load/Store.");
  MachineRegisterInfo &MRI = *MIB.getMRI();
  MachineFunction *MF = MI.getMF();
  const DataLayout &DL = MIB.getDataLayout();
  LLVMContext &Ctx = MF->getFunction().getContext();

  Register DstReg = MI.getOperand(0).getReg();
  LLT DataTy = MRI.getType(DstReg);
  if (!DataTy.isVector())
    return false;

  if (!MI.hasOneMemOperand())
    return false;

  MachineMemOperand *MMO = *MI.memoperands_begin();

  const auto *TLI = STI.getTargetLowering();
  EVT VT = EVT::getEVT(getTypeForLLT(DataTy, Ctx));

  if (TLI->allowsMemoryAccessForAlignment(Ctx, DL, VT, *MMO))
    return true;

  unsigned EltSizeBits = DataTy.getScalarSizeInBits();
  assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
         "Unexpected unaligned RVV load type");

  // Calculate the new vector type with i8 elements
  unsigned NumElements =
      DataTy.getElementCount().getKnownMinValue() * (EltSizeBits / 8);
  LLT NewDataTy = LLT::scalable_vector(NumElements, 8);

  Helper.bitcast(MI, 0, NewDataTy);

  return true;
}

/// Return the type of the mask type suitable for masking the provided
/// vector type.  This is simply an i1 element type vector of the same
/// (possibly scalable) length.
static LLT getMaskTypeFor(LLT VecTy) {
  assert(VecTy.isVector());
  ElementCount EC = VecTy.getElementCount();
  return LLT::vector(EC, LLT::scalar(1));
}

/// Creates an all ones mask suitable for masking a vector of type VecTy with
/// vector length VL.
static MachineInstrBuilder buildAllOnesMask(LLT VecTy, const SrcOp &VL,
                                            MachineIRBuilder &MIB,
                                            MachineRegisterInfo &MRI) {
  LLT MaskTy = getMaskTypeFor(VecTy);
  return MIB.buildInstr(RISCV::G_VMSET_VL, {MaskTy}, {VL});
}

/// Gets the two common "VL" operands: an all-ones mask and the vector length.
/// VecTy is a scalable vector type.
static std::pair<MachineInstrBuilder, MachineInstrBuilder>
buildDefaultVLOps(LLT VecTy, MachineIRBuilder &MIB, MachineRegisterInfo &MRI) {
  assert(VecTy.isScalableVector() && "Expecting scalable container type");
  const RISCVSubtarget &STI = MIB.getMF().getSubtarget<RISCVSubtarget>();
  LLT XLenTy(STI.getXLenVT());
  auto VL = MIB.buildConstant(XLenTy, -1);
  auto Mask = buildAllOnesMask(VecTy, VL, MIB, MRI);
  return {Mask, VL};
}

static MachineInstrBuilder
buildSplatPartsS64WithVL(const DstOp &Dst, const SrcOp &Passthru, Register Lo,
                         Register Hi, const SrcOp &VL, MachineIRBuilder &MIB,
                         MachineRegisterInfo &MRI) {
  // TODO: If the Hi bits of the splat are undefined, then it's fine to just
  // splat Lo even if it might be sign extended. I don't think we have
  // introduced a case where we're build a s64 where the upper bits are undef
  // yet.

  // Fall back to a stack store and stride x0 vector load.
  // TODO: need to lower G_SPLAT_VECTOR_SPLIT_I64. This is done in
  // preprocessDAG in SDAG.
  return MIB.buildInstr(RISCV::G_SPLAT_VECTOR_SPLIT_I64_VL, {Dst},
                        {Passthru, Lo, Hi, VL});
}

static MachineInstrBuilder
buildSplatSplitS64WithVL(const DstOp &Dst, const SrcOp &Passthru,
                         const SrcOp &Scalar, const SrcOp &VL,
                         MachineIRBuilder &MIB, MachineRegisterInfo &MRI) {
  assert(Scalar.getLLTTy(MRI) == LLT::scalar(64) && "Unexpected VecTy!");
  auto Unmerge = MIB.buildUnmerge(LLT::scalar(32), Scalar);
  return buildSplatPartsS64WithVL(Dst, Passthru, Unmerge.getReg(0),
                                  Unmerge.getReg(1), VL, MIB, MRI);
}

// Lower splats of s1 types to G_ICMP. For each mask vector type, we have a
// legal equivalently-sized i8 type, so we can use that as a go-between.
// Splats of s1 types that have constant value can be legalized as VMSET_VL or
// VMCLR_VL.
bool RISCVLegalizerInfo::legalizeSplatVector(MachineInstr &MI,
                                             MachineIRBuilder &MIB) const {
  assert(MI.getOpcode() == TargetOpcode::G_SPLAT_VECTOR);

  MachineRegisterInfo &MRI = *MIB.getMRI();

  Register Dst = MI.getOperand(0).getReg();
  Register SplatVal = MI.getOperand(1).getReg();

  LLT VecTy = MRI.getType(Dst);
  LLT XLenTy(STI.getXLenVT());

  // Handle case of s64 element vectors on rv32
  if (XLenTy.getSizeInBits() == 32 &&
      VecTy.getElementType().getSizeInBits() == 64) {
    auto [_, VL] = buildDefaultVLOps(MRI.getType(Dst), MIB, MRI);
    buildSplatSplitS64WithVL(Dst, MIB.buildUndef(VecTy), SplatVal, VL, MIB,
                             MRI);
    MI.eraseFromParent();
    return true;
  }

  // All-zeros or all-ones splats are handled specially.
  MachineInstr &SplatValMI = *MRI.getVRegDef(SplatVal);
  if (isAllOnesOrAllOnesSplat(SplatValMI, MRI)) {
    auto VL = buildDefaultVLOps(VecTy, MIB, MRI).second;
    MIB.buildInstr(RISCV::G_VMSET_VL, {Dst}, {VL});
    MI.eraseFromParent();
    return true;
  }
  if (isNullOrNullSplat(SplatValMI, MRI)) {
    auto VL = buildDefaultVLOps(VecTy, MIB, MRI).second;
    MIB.buildInstr(RISCV::G_VMCLR_VL, {Dst}, {VL});
    MI.eraseFromParent();
    return true;
  }

  // Handle non-constant mask splat (i.e. not sure if it's all zeros or all
  // ones) by promoting it to an s8 splat.
  LLT InterEltTy = LLT::scalar(8);
  LLT InterTy = VecTy.changeElementType(InterEltTy);
  auto ZExtSplatVal = MIB.buildZExt(InterEltTy, SplatVal);
  auto And =
      MIB.buildAnd(InterEltTy, ZExtSplatVal, MIB.buildConstant(InterEltTy, 1));
  auto LHS = MIB.buildSplatVector(InterTy, And);
  auto ZeroSplat =
      MIB.buildSplatVector(InterTy, MIB.buildConstant(InterEltTy, 0));
  MIB.buildICmp(CmpInst::Predicate::ICMP_NE, Dst, LHS, ZeroSplat);
  MI.eraseFromParent();
  return true;
}

static LLT getLMUL1Ty(LLT VecTy) {
  assert(VecTy.getElementType().getSizeInBits() <= 64 &&
         "Unexpected vector LLT");
  return LLT::scalable_vector(RISCV::RVVBitsPerBlock /
                                  VecTy.getElementType().getSizeInBits(),
                              VecTy.getElementType());
}

bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
                                                  MachineIRBuilder &MIB) const {
  GExtractSubvector &ES = cast<GExtractSubvector>(MI);

  MachineRegisterInfo &MRI = *MIB.getMRI();

  Register Dst = ES.getReg(0);
  Register Src = ES.getSrcVec();
  uint64_t Idx = ES.getIndexImm();

  // With an index of 0 this is a cast-like subvector, which can be performed
  // with subregister operations.
  if (Idx == 0)
    return true;

  LLT LitTy = MRI.getType(Dst);
  LLT BigTy = MRI.getType(Src);

  if (LitTy.getElementType() == LLT::scalar(1)) {
    // We can't slide this mask vector up indexed by its i1 elements.
    // This poses a problem when we wish to insert a scalable vector which
    // can't be re-expressed as a larger type. Just choose the slow path and
    // extend to a larger type, then truncate back down.
    LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8));
    LLT ExtLitTy = LitTy.changeElementType(LLT::scalar(8));
    auto BigZExt = MIB.buildZExt(ExtBigTy, Src);
    auto ExtractZExt = MIB.buildExtractSubvector(ExtLitTy, BigZExt, Idx);
    auto SplatZero = MIB.buildSplatVector(
        ExtLitTy, MIB.buildConstant(ExtLitTy.getElementType(), 0));
    MIB.buildICmp(CmpInst::Predicate::ICMP_NE, Dst, ExtractZExt, SplatZero);
    MI.eraseFromParent();
    return true;
  }

  // extract_subvector scales the index by vscale if the subvector is scalable,
  // and decomposeSubvectorInsertExtractToSubRegs takes this into account.
  const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
  MVT LitTyMVT = getMVTForLLT(LitTy);
  auto Decompose =
      RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
          getMVTForLLT(BigTy), LitTyMVT, Idx, TRI);
  unsigned RemIdx = Decompose.second;

  // If the Idx has been completely eliminated then this is a subvector extract
  // which naturally aligns to a vector register. These can easily be handled
  // using subregister manipulation.
  if (RemIdx == 0)
    return true;

  // Else LitTy is M1 or smaller and may need to be slid down: if LitTy
  // was > M1 then the index would need to be a multiple of VLMAX, and so would
  // divide exactly.
  assert(
      RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(LitTyMVT)).second ||
      RISCVTargetLowering::getLMUL(LitTyMVT) == RISCVVType::LMUL_1);

  // If the vector type is an LMUL-group type, extract a subvector equal to the
  // nearest full vector register type.
  LLT InterLitTy = BigTy;
  Register Vec = Src;
  if (TypeSize::isKnownGT(BigTy.getSizeInBits(),
                          getLMUL1Ty(BigTy).getSizeInBits())) {
    // If BigTy has an LMUL > 1, then LitTy should have a smaller LMUL, and
    // we should have successfully decomposed the extract into a subregister.
    assert(Decompose.first != RISCV::NoSubRegister);
    InterLitTy = getLMUL1Ty(BigTy);
    // SDAG builds a TargetExtractSubreg. We cannot create a a Copy with SubReg
    // specified on the source Register (the equivalent) since generic virtual
    // register does not allow subregister index.
    Vec = MIB.buildExtractSubvector(InterLitTy, Src, Idx - RemIdx).getReg(0);
  }

  // Slide this vector register down by the desired number of elements in order
  // to place the desired subvector starting at element 0.
  const LLT XLenTy(STI.getXLenVT());
  auto SlidedownAmt = MIB.buildVScale(XLenTy, RemIdx);
  auto [Mask, VL] = buildDefaultVLOps(LitTy, MIB, MRI);
  uint64_t Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC;
  auto Slidedown = MIB.buildInstr(
      RISCV::G_VSLIDEDOWN_VL, {InterLitTy},
      {MIB.buildUndef(InterLitTy), Vec, SlidedownAmt, Mask, VL, Policy});

  // Now the vector is in the right position, extract our final subvector. This
  // should resolve to a COPY.
  MIB.buildExtractSubvector(Dst, Slidedown, 0);

  MI.eraseFromParent();
  return true;
}

bool RISCVLegalizerInfo::legalizeInsertSubvector(MachineInstr &MI,
                                                 LegalizerHelper &Helper,
                                                 MachineIRBuilder &MIB) const {
  GInsertSubvector &IS = cast<GInsertSubvector>(MI);

  MachineRegisterInfo &MRI = *MIB.getMRI();

  Register Dst = IS.getReg(0);
  Register BigVec = IS.getBigVec();
  Register LitVec = IS.getSubVec();
  uint64_t Idx = IS.getIndexImm();

  LLT BigTy = MRI.getType(BigVec);
  LLT LitTy = MRI.getType(LitVec);

  if (Idx == 0 ||
      MRI.getVRegDef(BigVec)->getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
    return true;

  // We don't have the ability to slide mask vectors up indexed by their i1
  // elements; the smallest we can do is i8. Often we are able to bitcast to
  // equivalent i8 vectors. Otherwise, we can must zeroextend to equivalent i8
  // vectors and truncate down after the insert.
  if (LitTy.getElementType() == LLT::scalar(1)) {
    auto BigTyMinElts = BigTy.getElementCount().getKnownMinValue();
    auto LitTyMinElts = LitTy.getElementCount().getKnownMinValue();
    if (BigTyMinElts >= 8 && LitTyMinElts >= 8)
      return Helper.bitcast(
          IS, 0,
          LLT::vector(BigTy.getElementCount().divideCoefficientBy(8), 8));

    // We can't slide this mask vector up indexed by its i1 elements.
    // This poses a problem when we wish to insert a scalable vector which
    // can't be re-expressed as a larger type. Just choose the slow path and
    // extend to a larger type, then truncate back down.
    LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8));
    return Helper.widenScalar(IS, 0, ExtBigTy);
  }

  const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
  unsigned SubRegIdx, RemIdx;
  std::tie(SubRegIdx, RemIdx) =
      RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
          getMVTForLLT(BigTy), getMVTForLLT(LitTy), Idx, TRI);

  TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
  assert(isPowerOf2_64(
      STI.expandVScale(LitTy.getSizeInBits()).getKnownMinValue()));
  bool ExactlyVecRegSized =
      STI.expandVScale(LitTy.getSizeInBits())
          .isKnownMultipleOf(STI.expandVScale(VecRegSize));

  // If the Idx has been completely eliminated and this subvector's size is a
  // vector register or a multiple thereof, or the surrounding elements are
  // undef, then this is a subvector insert which naturally aligns to a vector
  // register. These can easily be handled using subregister manipulation.
  if (RemIdx == 0 && ExactlyVecRegSized)
    return true;

  // If the subvector is smaller than a vector register, then the insertion
  // must preserve the undisturbed elements of the register. We do this by
  // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
  // (which resolves to a subregister copy), performing a VSLIDEUP to place the
  // subvector within the vector register, and an INSERT_SUBVECTOR of that
  // LMUL=1 type back into the larger vector (resolving to another subregister
  // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
  // to avoid allocating a large register group to hold our subvector.

  // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
  // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
  // (in our case undisturbed). This means we can set up a subvector insertion
  // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
  // size of the subvector.
  const LLT XLenTy(STI.getXLenVT());
  LLT InterLitTy = BigTy;
  Register AlignedExtract = BigVec;
  unsigned AlignedIdx = Idx - RemIdx;
  if (TypeSize::isKnownGT(BigTy.getSizeInBits(),
                          getLMUL1Ty(BigTy).getSizeInBits())) {
    InterLitTy = getLMUL1Ty(BigTy);
    // Extract a subvector equal to the nearest full vector register type. This
    // should resolve to a G_EXTRACT on a subreg.
    AlignedExtract =
        MIB.buildExtractSubvector(InterLitTy, BigVec, AlignedIdx).getReg(0);
  }

  auto Insert = MIB.buildInsertSubvector(InterLitTy, MIB.buildUndef(InterLitTy),
                                         LitVec, 0);

  auto [Mask, _] = buildDefaultVLOps(BigTy, MIB, MRI);
  auto VL = MIB.buildVScale(XLenTy, LitTy.getElementCount().getKnownMinValue());

  // If we're inserting into the lowest elements, use a tail undisturbed
  // vmv.v.v.
  MachineInstrBuilder Inserted;
  bool NeedInsertSubvec =
      TypeSize::isKnownGT(BigTy.getSizeInBits(), InterLitTy.getSizeInBits());
  Register InsertedDst =
      NeedInsertSubvec ? MRI.createGenericVirtualRegister(InterLitTy) : Dst;
  if (RemIdx == 0) {
    Inserted = MIB.buildInstr(RISCV::G_VMV_V_V_VL, {InsertedDst},
                              {AlignedExtract, Insert, VL});
  } else {
    auto SlideupAmt = MIB.buildVScale(XLenTy, RemIdx);
    // Construct the vector length corresponding to RemIdx + length(LitTy).
    VL = MIB.buildAdd(XLenTy, SlideupAmt, VL);
    // Use tail agnostic policy if we're inserting over InterLitTy's tail.
    ElementCount EndIndex =
        ElementCount::getScalable(RemIdx) + LitTy.getElementCount();
    uint64_t Policy = RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED;
    if (STI.expandVScale(EndIndex) ==
        STI.expandVScale(InterLitTy.getElementCount()))
      Policy = RISCVVType::TAIL_AGNOSTIC;

    Inserted =
        MIB.buildInstr(RISCV::G_VSLIDEUP_VL, {InsertedDst},
                       {AlignedExtract, Insert, SlideupAmt, Mask, VL, Policy});
  }

  // If required, insert this subvector back into the correct vector register.
  // This should resolve to an INSERT_SUBREG instruction.
  if (NeedInsertSubvec)
    MIB.buildInsertSubvector(Dst, BigVec, Inserted, AlignedIdx);

  MI.eraseFromParent();
  return true;
}

static unsigned getRISCVWOpcode(unsigned Opcode) {
  switch (Opcode) {
  default:
    llvm_unreachable("Unexpected opcode");
  case TargetOpcode::G_ASHR:
    return RISCV::G_SRAW;
  case TargetOpcode::G_LSHR:
    return RISCV::G_SRLW;
  case TargetOpcode::G_SHL:
    return RISCV::G_SLLW;
  case TargetOpcode::G_SDIV:
    return RISCV::G_DIVW;
  case TargetOpcode::G_UDIV:
    return RISCV::G_DIVUW;
  case TargetOpcode::G_UREM:
    return RISCV::G_REMUW;
  case TargetOpcode::G_ROTL:
    return RISCV::G_ROLW;
  case TargetOpcode::G_ROTR:
    return RISCV::G_RORW;
  case TargetOpcode::G_CTLZ:
    return RISCV::G_CLZW;
  case TargetOpcode::G_CTTZ:
    return RISCV::G_CTZW;
  case TargetOpcode::G_FPTOSI:
    return RISCV::G_FCVT_W_RV64;
  case TargetOpcode::G_FPTOUI:
    return RISCV::G_FCVT_WU_RV64;
  }
}

bool RISCVLegalizerInfo::legalizeCustom(
    LegalizerHelper &Helper, MachineInstr &MI,
    LostDebugLocObserver &LocObserver) const {
  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
  MachineFunction &MF = *MI.getParent()->getParent();
  switch (MI.getOpcode()) {
  default:
    // No idea what to do.
    return false;
  case TargetOpcode::G_ABS:
    return Helper.lowerAbsToMaxNeg(MI);
  // TODO: G_FCONSTANT
  case TargetOpcode::G_CONSTANT: {
    const Function &F = MF.getFunction();
    // TODO: if PSI and BFI are present, add " ||
    // llvm::shouldOptForSize(*CurMBB, PSI, BFI)".
    bool ShouldOptForSize = F.hasOptSize();
    const ConstantInt *ConstVal = MI.getOperand(1).getCImm();
    if (!shouldBeInConstantPool(ConstVal->getValue(), ShouldOptForSize))
      return true;
    return Helper.lowerConstant(MI);
  }
  case TargetOpcode::G_SUB:
  case TargetOpcode::G_ADD: {
    Helper.Observer.changingInstr(MI);
    Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT);
    Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ANYEXT);

    Register DstALU = MRI.createGenericVirtualRegister(sXLen);

    MachineOperand &MO = MI.getOperand(0);
    MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
    auto DstSext = MIRBuilder.buildSExtInReg(sXLen, DstALU, 32);

    MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {MO}, {DstSext});
    MO.setReg(DstALU);

    Helper.Observer.changedInstr(MI);
    return true;
  }
  case TargetOpcode::G_SEXT_INREG: {
    LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
    int64_t SizeInBits = MI.getOperand(2).getImm();
    // Source size of 32 is sext.w.
    if (DstTy.getSizeInBits() == 64 && SizeInBits == 32)
      return true;

    if (STI.hasStdExtZbb() && (SizeInBits == 8 || SizeInBits == 16))
      return true;

    return Helper.lower(MI, 0, /* Unused hint type */ LLT()) ==
           LegalizerHelper::Legalized;
  }
  case TargetOpcode::G_ASHR:
  case TargetOpcode::G_LSHR:
  case TargetOpcode::G_SHL: {
    if (getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI)) {
      // We don't need a custom node for shift by constant. Just widen the
      // source and the shift amount.
      unsigned ExtOpc = TargetOpcode::G_ANYEXT;
      if (MI.getOpcode() == TargetOpcode::G_ASHR)
        ExtOpc = TargetOpcode::G_SEXT;
      else if (MI.getOpcode() == TargetOpcode::G_LSHR)
        ExtOpc = TargetOpcode::G_ZEXT;

      Helper.Observer.changingInstr(MI);
      Helper.widenScalarSrc(MI, sXLen, 1, ExtOpc);
      Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ZEXT);
      Helper.widenScalarDst(MI, sXLen);
      Helper.Observer.changedInstr(MI);
      return true;
    }

    Helper.Observer.changingInstr(MI);
    Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT);
    Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ANYEXT);
    Helper.widenScalarDst(MI, sXLen);
    MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode())));
    Helper.Observer.changedInstr(MI);
    return true;
  }
  case TargetOpcode::G_SDIV:
  case TargetOpcode::G_UDIV:
  case TargetOpcode::G_UREM:
  case TargetOpcode::G_ROTL:
  case TargetOpcode::G_ROTR: {
    Helper.Observer.changingInstr(MI);
    Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT);
    Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ANYEXT);
    Helper.widenScalarDst(MI, sXLen);
    MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode())));
    Helper.Observer.changedInstr(MI);
    return true;
  }
  case TargetOpcode::G_CTLZ:
  case TargetOpcode::G_CTTZ: {
    Helper.Observer.changingInstr(MI);
    Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT);
    Helper.widenScalarDst(MI, sXLen);
    MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode())));
    Helper.Observer.changedInstr(MI);
    return true;
  }
  case TargetOpcode::G_FPTOSI:
  case TargetOpcode::G_FPTOUI: {
    Helper.Observer.changingInstr(MI);
    Helper.widenScalarDst(MI, sXLen);
    MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode())));
    MI.addOperand(MachineOperand::CreateImm(RISCVFPRndMode::RTZ));
    Helper.Observer.changedInstr(MI);
    return true;
  }
  case TargetOpcode::G_IS_FPCLASS: {
    Register GISFPCLASS = MI.getOperand(0).getReg();
    Register Src = MI.getOperand(1).getReg();
    const MachineOperand &ImmOp = MI.getOperand(2);
    MachineIRBuilder MIB(MI);

    // Turn LLVM IR's floating point classes to that in RISC-V,
    // by simply rotating the 10-bit immediate right by two bits.
    APInt GFpClassImm(10, static_cast<uint64_t>(ImmOp.getImm()));
    auto FClassMask = MIB.buildConstant(sXLen, GFpClassImm.rotr(2).zext(XLen));
    auto ConstZero = MIB.buildConstant(sXLen, 0);

    auto GFClass = MIB.buildInstr(RISCV::G_FCLASS, {sXLen}, {Src});
    auto And = MIB.buildAnd(sXLen, GFClass, FClassMask);
    MIB.buildICmp(CmpInst::ICMP_NE, GISFPCLASS, And, ConstZero);

    MI.eraseFromParent();
    return true;
  }
  case TargetOpcode::G_BRJT:
    return legalizeBRJT(MI, MIRBuilder);
  case TargetOpcode::G_VASTART:
    return legalizeVAStart(MI, MIRBuilder);
  case TargetOpcode::G_VSCALE:
    return legalizeVScale(MI, MIRBuilder);
  case TargetOpcode::G_ZEXT:
  case TargetOpcode::G_SEXT:
  case TargetOpcode::G_ANYEXT:
    return legalizeExt(MI, MIRBuilder);
  case TargetOpcode::G_SPLAT_VECTOR:
    return legalizeSplatVector(MI, MIRBuilder);
  case TargetOpcode::G_EXTRACT_SUBVECTOR:
    return legalizeExtractSubvector(MI, MIRBuilder);
  case TargetOpcode::G_INSERT_SUBVECTOR:
    return legalizeInsertSubvector(MI, Helper, MIRBuilder);
  case TargetOpcode::G_LOAD:
  case TargetOpcode::G_STORE:
    return legalizeLoadStore(MI, Helper, MIRBuilder);
  }

  llvm_unreachable("expected switch to return");
}
