//===-- RISCVLegalizerInfo.cpp ----------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file /// This file implements the targeting of the Machinelegalizer class for RISC-V. /// \todo This should be generated by TableGen. //===----------------------------------------------------------------------===// #include "RISCVLegalizerInfo.h" #include "MCTargetDesc/RISCVMatInt.h" #include "RISCVMachineFunctionInfo.h" #include "RISCVSubtarget.h" #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h" #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Type.h" using namespace llvm; using namespace LegalityPredicates; using namespace LegalizeMutations; static LegalityPredicate typeIsLegalIntOrFPVec(unsigned TypeIdx, std::initializer_list IntOrFPVecTys, const RISCVSubtarget &ST) { LegalityPredicate P = [=, &ST](const LegalityQuery &Query) { return ST.hasVInstructions() && (Query.Types[TypeIdx].getScalarSizeInBits() != 64 || ST.hasVInstructionsI64()) && (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 || ST.getELen() == 64); }; return all(typeInSet(TypeIdx, IntOrFPVecTys), P); } static LegalityPredicate typeIsLegalBoolVec(unsigned TypeIdx, std::initializer_list BoolVecTys, const RISCVSubtarget &ST) { LegalityPredicate P = [=, &ST](const LegalityQuery &Query) { return ST.hasVInstructions() && (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 || ST.getELen() == 64); }; return all(typeInSet(TypeIdx, BoolVecTys), P); } static LegalityPredicate typeIsLegalPtrVec(unsigned TypeIdx, std::initializer_list PtrVecTys, const RISCVSubtarget &ST) { LegalityPredicate P = [=, &ST](const LegalityQuery &Query) { return ST.hasVInstructions() && (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 || ST.getELen() == 64) && (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 16 || Query.Types[TypeIdx].getScalarSizeInBits() == 32); }; return all(typeInSet(TypeIdx, PtrVecTys), P); } RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) : STI(ST), XLen(STI.getXLen()), sXLen(LLT::scalar(XLen)) { const LLT sDoubleXLen = LLT::scalar(2 * XLen); const LLT p0 = LLT::pointer(0, XLen); const LLT s1 = LLT::scalar(1); const LLT s8 = LLT::scalar(8); const LLT s16 = LLT::scalar(16); const LLT s32 = LLT::scalar(32); const LLT s64 = LLT::scalar(64); const LLT s128 = LLT::scalar(128); const LLT nxv1s1 = LLT::scalable_vector(1, s1); const LLT nxv2s1 = LLT::scalable_vector(2, s1); const LLT nxv4s1 = LLT::scalable_vector(4, s1); const LLT nxv8s1 = LLT::scalable_vector(8, s1); const LLT nxv16s1 = LLT::scalable_vector(16, s1); const LLT nxv32s1 = LLT::scalable_vector(32, s1); const LLT nxv64s1 = LLT::scalable_vector(64, s1); const LLT nxv1s8 = LLT::scalable_vector(1, s8); const LLT nxv2s8 = LLT::scalable_vector(2, s8); const LLT nxv4s8 = LLT::scalable_vector(4, s8); const LLT nxv8s8 = LLT::scalable_vector(8, s8); const LLT nxv16s8 = LLT::scalable_vector(16, s8); const LLT nxv32s8 = LLT::scalable_vector(32, s8); const LLT nxv64s8 = LLT::scalable_vector(64, s8); const LLT nxv1s16 = LLT::scalable_vector(1, s16); const LLT nxv2s16 = LLT::scalable_vector(2, s16); const LLT nxv4s16 = LLT::scalable_vector(4, s16); const LLT nxv8s16 = LLT::scalable_vector(8, s16); const LLT nxv16s16 = LLT::scalable_vector(16, s16); const LLT nxv32s16 = LLT::scalable_vector(32, s16); const LLT nxv1s32 = LLT::scalable_vector(1, s32); const LLT nxv2s32 = LLT::scalable_vector(2, s32); const LLT nxv4s32 = LLT::scalable_vector(4, s32); const LLT nxv8s32 = LLT::scalable_vector(8, s32); const LLT nxv16s32 = LLT::scalable_vector(16, s32); const LLT nxv1s64 = LLT::scalable_vector(1, s64); const LLT nxv2s64 = LLT::scalable_vector(2, s64); const LLT nxv4s64 = LLT::scalable_vector(4, s64); const LLT nxv8s64 = LLT::scalable_vector(8, s64); const LLT nxv1p0 = LLT::scalable_vector(1, p0); const LLT nxv2p0 = LLT::scalable_vector(2, p0); const LLT nxv4p0 = LLT::scalable_vector(4, p0); const LLT nxv8p0 = LLT::scalable_vector(8, p0); const LLT nxv16p0 = LLT::scalable_vector(16, p0); using namespace TargetOpcode; auto BoolVecTys = {nxv1s1, nxv2s1, nxv4s1, nxv8s1, nxv16s1, nxv32s1, nxv64s1}; auto IntOrFPVecTys = {nxv1s8, nxv2s8, nxv4s8, nxv8s8, nxv16s8, nxv32s8, nxv64s8, nxv1s16, nxv2s16, nxv4s16, nxv8s16, nxv16s16, nxv32s16, nxv1s32, nxv2s32, nxv4s32, nxv8s32, nxv16s32, nxv1s64, nxv2s64, nxv4s64, nxv8s64}; auto PtrVecTys = {nxv1p0, nxv2p0, nxv4p0, nxv8p0, nxv16p0}; getActionDefinitionsBuilder({G_ADD, G_SUB}) .legalFor({sXLen}) .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST)) .customFor(ST.is64Bit(), {s32}) .widenScalarToNextPow2(0) .clampScalar(0, sXLen, sXLen); getActionDefinitionsBuilder({G_AND, G_OR, G_XOR}) .legalFor({sXLen}) .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST)) .widenScalarToNextPow2(0) .clampScalar(0, sXLen, sXLen); getActionDefinitionsBuilder( {G_UADDE, G_UADDO, G_USUBE, G_USUBO}).lower(); getActionDefinitionsBuilder({G_SADDO, G_SSUBO}).minScalar(0, sXLen).lower(); // TODO: Use Vector Single-Width Saturating Instructions for vector types. getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT}) .lower(); getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR}) .legalFor({{sXLen, sXLen}}) .customFor(ST.is64Bit(), {{s32, s32}}) .widenScalarToNextPow2(0) .clampScalar(1, sXLen, sXLen) .clampScalar(0, sXLen, sXLen); getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT}) .legalFor({{s32, s16}}) .legalFor(ST.is64Bit(), {{s64, s16}, {s64, s32}}) .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST), typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST))) .customIf(typeIsLegalBoolVec(1, BoolVecTys, ST)) .maxScalar(0, sXLen); getActionDefinitionsBuilder(G_SEXT_INREG) .customFor({sXLen}) .clampScalar(0, sXLen, sXLen) .lower(); // Merge/Unmerge for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) { auto &MergeUnmergeActions = getActionDefinitionsBuilder(Op); unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1; unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0; if (XLen == 32 && ST.hasStdExtD()) { MergeUnmergeActions.legalIf( all(typeIs(BigTyIdx, s64), typeIs(LitTyIdx, s32))); } MergeUnmergeActions.widenScalarToNextPow2(LitTyIdx, XLen) .widenScalarToNextPow2(BigTyIdx, XLen) .clampScalar(LitTyIdx, sXLen, sXLen) .clampScalar(BigTyIdx, sXLen, sXLen); } getActionDefinitionsBuilder({G_FSHL, G_FSHR}).lower(); getActionDefinitionsBuilder({G_ROTR, G_ROTL}) .legalFor(ST.hasStdExtZbb() || ST.hasStdExtZbkb(), {{sXLen, sXLen}}) .customFor(ST.is64Bit() && (ST.hasStdExtZbb() || ST.hasStdExtZbkb()), {{s32, s32}}) .lower(); getActionDefinitionsBuilder(G_BITREVERSE).maxScalar(0, sXLen).lower(); getActionDefinitionsBuilder(G_BITCAST).legalIf( all(LegalityPredicates::any(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST), typeIsLegalBoolVec(0, BoolVecTys, ST)), LegalityPredicates::any(typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST), typeIsLegalBoolVec(1, BoolVecTys, ST)))); auto &BSWAPActions = getActionDefinitionsBuilder(G_BSWAP); if (ST.hasStdExtZbb() || ST.hasStdExtZbkb()) BSWAPActions.legalFor({sXLen}).clampScalar(0, sXLen, sXLen); else BSWAPActions.maxScalar(0, sXLen).lower(); auto &CountZerosActions = getActionDefinitionsBuilder({G_CTLZ, G_CTTZ}); auto &CountZerosUndefActions = getActionDefinitionsBuilder({G_CTLZ_ZERO_UNDEF, G_CTTZ_ZERO_UNDEF}); if (ST.hasStdExtZbb()) { CountZerosActions.legalFor({{sXLen, sXLen}}) .customFor({{s32, s32}}) .clampScalar(0, s32, sXLen) .widenScalarToNextPow2(0) .scalarSameSizeAs(1, 0); } else { CountZerosActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower(); CountZerosUndefActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0); } CountZerosUndefActions.lower(); auto &CTPOPActions = getActionDefinitionsBuilder(G_CTPOP); if (ST.hasStdExtZbb()) { CTPOPActions.legalFor({{sXLen, sXLen}}) .clampScalar(0, sXLen, sXLen) .scalarSameSizeAs(1, 0); } else { CTPOPActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower(); } getActionDefinitionsBuilder(G_CONSTANT) .legalFor({p0}) .legalFor(!ST.is64Bit(), {s32}) .customFor(ST.is64Bit(), {s64}) .widenScalarToNextPow2(0) .clampScalar(0, sXLen, sXLen); // TODO: transform illegal vector types into legal vector type getActionDefinitionsBuilder(G_FREEZE) .legalFor({s16, s32, p0}) .legalFor(ST.is64Bit(), {s64}) .legalIf(typeIsLegalBoolVec(0, BoolVecTys, ST)) .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST)) .widenScalarToNextPow2(0) .clampScalar(0, s16, sXLen); // TODO: transform illegal vector types into legal vector type // TODO: Merge with G_FREEZE? getActionDefinitionsBuilder( {G_IMPLICIT_DEF, G_CONSTANT_FOLD_BARRIER}) .legalFor({s32, sXLen, p0}) .legalIf(typeIsLegalBoolVec(0, BoolVecTys, ST)) .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST)) .widenScalarToNextPow2(0) .clampScalar(0, s32, sXLen); getActionDefinitionsBuilder(G_ICMP) .legalFor({{sXLen, sXLen}, {sXLen, p0}}) .legalIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST), typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST))) .widenScalarOrEltToNextPow2OrMinSize(1, 8) .clampScalar(1, sXLen, sXLen) .clampScalar(0, sXLen, sXLen); getActionDefinitionsBuilder(G_SELECT) .legalFor({{s32, sXLen}, {p0, sXLen}}) .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST), typeIsLegalBoolVec(1, BoolVecTys, ST))) .legalFor(XLen == 64 || ST.hasStdExtD(), {{s64, sXLen}}) .widenScalarToNextPow2(0) .clampScalar(0, s32, (XLen == 64 || ST.hasStdExtD()) ? s64 : s32) .clampScalar(1, sXLen, sXLen); auto &LoadActions = getActionDefinitionsBuilder(G_LOAD); auto &StoreActions = getActionDefinitionsBuilder(G_STORE); auto &ExtLoadActions = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD}); // Return the alignment needed for scalar memory ops. If unaligned scalar mem // is supported, we only require byte alignment. Otherwise, we need the memory // op to be natively aligned. auto getScalarMemAlign = [&ST](unsigned Size) { return ST.enableUnalignedScalarMem() ? 8 : Size; }; LoadActions.legalForTypesWithMemDesc( {{s16, p0, s8, getScalarMemAlign(8)}, {s32, p0, s8, getScalarMemAlign(8)}, {s16, p0, s16, getScalarMemAlign(16)}, {s32, p0, s16, getScalarMemAlign(16)}, {s32, p0, s32, getScalarMemAlign(32)}, {p0, p0, sXLen, getScalarMemAlign(XLen)}}); StoreActions.legalForTypesWithMemDesc( {{s16, p0, s8, getScalarMemAlign(8)}, {s32, p0, s8, getScalarMemAlign(8)}, {s16, p0, s16, getScalarMemAlign(16)}, {s32, p0, s16, getScalarMemAlign(16)}, {s32, p0, s32, getScalarMemAlign(32)}, {p0, p0, sXLen, getScalarMemAlign(XLen)}}); ExtLoadActions.legalForTypesWithMemDesc( {{sXLen, p0, s8, getScalarMemAlign(8)}, {sXLen, p0, s16, getScalarMemAlign(16)}}); if (XLen == 64) { LoadActions.legalForTypesWithMemDesc( {{s64, p0, s8, getScalarMemAlign(8)}, {s64, p0, s16, getScalarMemAlign(16)}, {s64, p0, s32, getScalarMemAlign(32)}, {s64, p0, s64, getScalarMemAlign(64)}}); StoreActions.legalForTypesWithMemDesc( {{s64, p0, s8, getScalarMemAlign(8)}, {s64, p0, s16, getScalarMemAlign(16)}, {s64, p0, s32, getScalarMemAlign(32)}, {s64, p0, s64, getScalarMemAlign(64)}}); ExtLoadActions.legalForTypesWithMemDesc( {{s64, p0, s32, getScalarMemAlign(32)}}); } else if (ST.hasStdExtD()) { LoadActions.legalForTypesWithMemDesc( {{s64, p0, s64, getScalarMemAlign(64)}}); StoreActions.legalForTypesWithMemDesc( {{s64, p0, s64, getScalarMemAlign(64)}}); } // Vector loads/stores. if (ST.hasVInstructions()) { LoadActions.legalForTypesWithMemDesc({{nxv2s8, p0, nxv2s8, 8}, {nxv4s8, p0, nxv4s8, 8}, {nxv8s8, p0, nxv8s8, 8}, {nxv16s8, p0, nxv16s8, 8}, {nxv32s8, p0, nxv32s8, 8}, {nxv64s8, p0, nxv64s8, 8}, {nxv2s16, p0, nxv2s16, 16}, {nxv4s16, p0, nxv4s16, 16}, {nxv8s16, p0, nxv8s16, 16}, {nxv16s16, p0, nxv16s16, 16}, {nxv32s16, p0, nxv32s16, 16}, {nxv2s32, p0, nxv2s32, 32}, {nxv4s32, p0, nxv4s32, 32}, {nxv8s32, p0, nxv8s32, 32}, {nxv16s32, p0, nxv16s32, 32}}); StoreActions.legalForTypesWithMemDesc({{nxv2s8, p0, nxv2s8, 8}, {nxv4s8, p0, nxv4s8, 8}, {nxv8s8, p0, nxv8s8, 8}, {nxv16s8, p0, nxv16s8, 8}, {nxv32s8, p0, nxv32s8, 8}, {nxv64s8, p0, nxv64s8, 8}, {nxv2s16, p0, nxv2s16, 16}, {nxv4s16, p0, nxv4s16, 16}, {nxv8s16, p0, nxv8s16, 16}, {nxv16s16, p0, nxv16s16, 16}, {nxv32s16, p0, nxv32s16, 16}, {nxv2s32, p0, nxv2s32, 32}, {nxv4s32, p0, nxv4s32, 32}, {nxv8s32, p0, nxv8s32, 32}, {nxv16s32, p0, nxv16s32, 32}}); if (ST.getELen() == 64) { LoadActions.legalForTypesWithMemDesc({{nxv1s8, p0, nxv1s8, 8}, {nxv1s16, p0, nxv1s16, 16}, {nxv1s32, p0, nxv1s32, 32}}); StoreActions.legalForTypesWithMemDesc({{nxv1s8, p0, nxv1s8, 8}, {nxv1s16, p0, nxv1s16, 16}, {nxv1s32, p0, nxv1s32, 32}}); } if (ST.hasVInstructionsI64()) { LoadActions.legalForTypesWithMemDesc({{nxv1s64, p0, nxv1s64, 64}, {nxv2s64, p0, nxv2s64, 64}, {nxv4s64, p0, nxv4s64, 64}, {nxv8s64, p0, nxv8s64, 64}}); StoreActions.legalForTypesWithMemDesc({{nxv1s64, p0, nxv1s64, 64}, {nxv2s64, p0, nxv2s64, 64}, {nxv4s64, p0, nxv4s64, 64}, {nxv8s64, p0, nxv8s64, 64}}); } // we will take the custom lowering logic if we have scalable vector types // with non-standard alignments LoadActions.customIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST)); StoreActions.customIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST)); // Pointers require that XLen sized elements are legal. if (XLen <= ST.getELen()) { LoadActions.customIf(typeIsLegalPtrVec(0, PtrVecTys, ST)); StoreActions.customIf(typeIsLegalPtrVec(0, PtrVecTys, ST)); } } LoadActions.widenScalarToNextPow2(0, /* MinSize = */ 8) .lowerIfMemSizeNotByteSizePow2() .clampScalar(0, s16, sXLen) .lower(); StoreActions .clampScalar(0, s16, sXLen) .lowerIfMemSizeNotByteSizePow2() .lower(); ExtLoadActions.widenScalarToNextPow2(0).clampScalar(0, sXLen, sXLen).lower(); getActionDefinitionsBuilder({G_PTR_ADD, G_PTRMASK}).legalFor({{p0, sXLen}}); getActionDefinitionsBuilder(G_PTRTOINT) .legalFor({{sXLen, p0}}) .clampScalar(0, sXLen, sXLen); getActionDefinitionsBuilder(G_INTTOPTR) .legalFor({{p0, sXLen}}) .clampScalar(1, sXLen, sXLen); getActionDefinitionsBuilder(G_BRCOND).legalFor({sXLen}).minScalar(0, sXLen); getActionDefinitionsBuilder(G_BRJT).customFor({{p0, sXLen}}); getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0}); getActionDefinitionsBuilder(G_PHI) .legalFor({p0, s32, sXLen}) .widenScalarToNextPow2(0) .clampScalar(0, s32, sXLen); getActionDefinitionsBuilder({G_GLOBAL_VALUE, G_JUMP_TABLE, G_CONSTANT_POOL}) .legalFor({p0}); if (ST.hasStdExtZmmul()) { getActionDefinitionsBuilder(G_MUL) .legalFor({sXLen}) .widenScalarToNextPow2(0) .clampScalar(0, sXLen, sXLen); // clang-format off getActionDefinitionsBuilder({G_SMULH, G_UMULH}) .legalFor({sXLen}) .lower(); // clang-format on getActionDefinitionsBuilder({G_SMULO, G_UMULO}).minScalar(0, sXLen).lower(); } else { getActionDefinitionsBuilder(G_MUL) .libcallFor({sXLen, sDoubleXLen}) .widenScalarToNextPow2(0) .clampScalar(0, sXLen, sDoubleXLen); getActionDefinitionsBuilder({G_SMULH, G_UMULH}).lowerFor({sXLen}); getActionDefinitionsBuilder({G_SMULO, G_UMULO}) .minScalar(0, sXLen) // Widen sXLen to sDoubleXLen so we can use a single libcall to get // the low bits for the mul result and high bits to do the overflow // check. .widenScalarIf(typeIs(0, sXLen), LegalizeMutations::changeTo(0, sDoubleXLen)) .lower(); } if (ST.hasStdExtM()) { getActionDefinitionsBuilder({G_SDIV, G_UDIV, G_UREM}) .legalFor({sXLen}) .customFor({s32}) .libcallFor({sDoubleXLen}) .clampScalar(0, s32, sDoubleXLen) .widenScalarToNextPow2(0); getActionDefinitionsBuilder(G_SREM) .legalFor({sXLen}) .libcallFor({sDoubleXLen}) .clampScalar(0, sXLen, sDoubleXLen) .widenScalarToNextPow2(0); } else { getActionDefinitionsBuilder({G_UDIV, G_SDIV, G_UREM, G_SREM}) .libcallFor({sXLen, sDoubleXLen}) .clampScalar(0, sXLen, sDoubleXLen) .widenScalarToNextPow2(0); } // TODO: Use libcall for sDoubleXLen. getActionDefinitionsBuilder({G_SDIVREM, G_UDIVREM}).lower(); getActionDefinitionsBuilder(G_ABS) .customFor(ST.hasStdExtZbb(), {sXLen}) .minScalar(ST.hasStdExtZbb(), 0, sXLen) .lower(); getActionDefinitionsBuilder({G_UMAX, G_UMIN, G_SMAX, G_SMIN}) .legalFor(ST.hasStdExtZbb(), {sXLen}) .minScalar(ST.hasStdExtZbb(), 0, sXLen) .lower(); getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower(); getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0}); getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall(); getActionDefinitionsBuilder({G_DYN_STACKALLOC, G_STACKSAVE, G_STACKRESTORE}) .lower(); // FP Operations // FIXME: Support s128 for rv32 when libcall handling is able to use sret. getActionDefinitionsBuilder( {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM}) .legalFor(ST.hasStdExtF(), {s32}) .legalFor(ST.hasStdExtD(), {s64}) .legalFor(ST.hasStdExtZfh(), {s16}) .libcallFor({s32, s64}) .libcallFor(ST.is64Bit(), {s128}); getActionDefinitionsBuilder({G_FNEG, G_FABS}) .legalFor(ST.hasStdExtF(), {s32}) .legalFor(ST.hasStdExtD(), {s64}) .legalFor(ST.hasStdExtZfh(), {s16}) .lowerFor({s32, s64, s128}); getActionDefinitionsBuilder(G_FREM) .libcallFor({s32, s64}) .libcallFor(ST.is64Bit(), {s128}) .minScalar(0, s32) .scalarize(0); getActionDefinitionsBuilder(G_FCOPYSIGN) .legalFor(ST.hasStdExtF(), {{s32, s32}}) .legalFor(ST.hasStdExtD(), {{s64, s64}, {s32, s64}, {s64, s32}}) .legalFor(ST.hasStdExtZfh(), {{s16, s16}, {s16, s32}, {s32, s16}}) .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s16, s64}, {s64, s16}}) .lower(); // FIXME: Use Zfhmin. getActionDefinitionsBuilder(G_FPTRUNC) .legalFor(ST.hasStdExtD(), {{s32, s64}}) .legalFor(ST.hasStdExtZfh(), {{s16, s32}}) .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s16, s64}}) .libcallFor({{s32, s64}}) .libcallFor(ST.is64Bit(), {{s32, s128}, {s64, s128}}); getActionDefinitionsBuilder(G_FPEXT) .legalFor(ST.hasStdExtD(), {{s64, s32}}) .legalFor(ST.hasStdExtZfh(), {{s32, s16}}) .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s64, s16}}) .libcallFor({{s64, s32}}) .libcallFor(ST.is64Bit(), {{s128, s32}, {s128, s64}}); getActionDefinitionsBuilder(G_FCMP) .legalFor(ST.hasStdExtF(), {{sXLen, s32}}) .legalFor(ST.hasStdExtD(), {{sXLen, s64}}) .legalFor(ST.hasStdExtZfh(), {{sXLen, s16}}) .clampScalar(0, sXLen, sXLen) .libcallFor({{sXLen, s32}, {sXLen, s64}}) .libcallFor(ST.is64Bit(), {{sXLen, s128}}); // TODO: Support vector version of G_IS_FPCLASS. getActionDefinitionsBuilder(G_IS_FPCLASS) .customFor(ST.hasStdExtF(), {{s1, s32}}) .customFor(ST.hasStdExtD(), {{s1, s64}}) .customFor(ST.hasStdExtZfh(), {{s1, s16}}) .lowerFor({{s1, s32}, {s1, s64}}); getActionDefinitionsBuilder(G_FCONSTANT) .legalFor(ST.hasStdExtF(), {s32}) .legalFor(ST.hasStdExtD(), {s64}) .legalFor(ST.hasStdExtZfh(), {s16}) .lowerFor({s32, s64, s128}); getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI}) .legalFor(ST.hasStdExtF(), {{sXLen, s32}}) .legalFor(ST.hasStdExtD(), {{sXLen, s64}}) .legalFor(ST.hasStdExtZfh(), {{sXLen, s16}}) .customFor(ST.is64Bit() && ST.hasStdExtF(), {{s32, s32}}) .customFor(ST.is64Bit() && ST.hasStdExtD(), {{s32, s64}}) .customFor(ST.is64Bit() && ST.hasStdExtZfh(), {{s32, s16}}) .widenScalarToNextPow2(0) .minScalar(0, s32) .libcallFor({{s32, s32}, {s64, s32}, {s32, s64}, {s64, s64}}) .libcallFor(ST.is64Bit(), {{s32, s128}, {s64, s128}}) // FIXME RV32. .libcallFor(ST.is64Bit(), {{s128, s32}, {s128, s64}, {s128, s128}}); getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) .legalFor(ST.hasStdExtF(), {{s32, sXLen}}) .legalFor(ST.hasStdExtD(), {{s64, sXLen}}) .legalFor(ST.hasStdExtZfh(), {{s16, sXLen}}) .widenScalarToNextPow2(1) // Promote to XLen if the operation is legal. .widenScalarIf( [=, &ST](const LegalityQuery &Query) { return Query.Types[0].isScalar() && Query.Types[1].isScalar() && (Query.Types[1].getSizeInBits() < ST.getXLen()) && ((ST.hasStdExtF() && Query.Types[0].getSizeInBits() == 32) || (ST.hasStdExtD() && Query.Types[0].getSizeInBits() == 64) || (ST.hasStdExtZfh() && Query.Types[0].getSizeInBits() == 16)); }, LegalizeMutations::changeTo(1, sXLen)) // Otherwise only promote to s32 since we have si libcalls. .minScalar(1, s32) .libcallFor({{s32, s32}, {s64, s32}, {s32, s64}, {s64, s64}}) .libcallFor(ST.is64Bit(), {{s128, s32}, {s128, s64}}) // FIXME RV32. .libcallFor(ST.is64Bit(), {{s32, s128}, {s64, s128}, {s128, s128}}); // FIXME: We can do custom inline expansion like SelectionDAG. getActionDefinitionsBuilder({G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN}) .legalFor(ST.hasStdExtZfa(), {s32}) .legalFor(ST.hasStdExtZfa() && ST.hasStdExtD(), {s64}) .legalFor(ST.hasStdExtZfa() && ST.hasStdExtZfh(), {s16}) .libcallFor({s32, s64}) .libcallFor(ST.is64Bit(), {s128}); getActionDefinitionsBuilder({G_FMAXIMUM, G_FMINIMUM}) .legalFor(ST.hasStdExtZfa(), {s32}) .legalFor(ST.hasStdExtZfa() && ST.hasStdExtD(), {s64}) .legalFor(ST.hasStdExtZfa() && ST.hasStdExtZfh(), {s16}); getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FTAN, G_FPOW, G_FLOG, G_FLOG2, G_FLOG10, G_FEXP, G_FEXP2, G_FEXP10, G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH, G_FSINH, G_FTANH}) .libcallFor({s32, s64}) .libcallFor(ST.is64Bit(), {s128}); getActionDefinitionsBuilder({G_FPOWI, G_FLDEXP}) .libcallFor({{s32, s32}, {s64, s32}}) .libcallFor(ST.is64Bit(), {s128, s32}); getActionDefinitionsBuilder(G_VASTART).customFor({p0}); // va_list must be a pointer, but most sized types are pretty easy to handle // as the destination. getActionDefinitionsBuilder(G_VAARG) // TODO: Implement narrowScalar and widenScalar for G_VAARG for types // other than sXLen. .clampScalar(0, sXLen, sXLen) .lowerForCartesianProduct({sXLen, p0}, {p0}); getActionDefinitionsBuilder(G_VSCALE) .clampScalar(0, sXLen, sXLen) .customFor({sXLen}); auto &SplatActions = getActionDefinitionsBuilder(G_SPLAT_VECTOR) .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST), typeIs(1, sXLen))) .customIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST), typeIs(1, s1))); // Handle case of s64 element vectors on RV32. If the subtarget does not have // f64, then try to lower it to G_SPLAT_VECTOR_SPLIT_64_VL. If the subtarget // does have f64, then we don't know whether the type is an f64 or an i64, // so mark the G_SPLAT_VECTOR as legal and decide later what to do with it, // depending on how the instructions it consumes are legalized. They are not // legalized yet since legalization is in reverse postorder, so we cannot // make the decision at this moment. if (XLen == 32) { if (ST.hasVInstructionsF64() && ST.hasStdExtD()) SplatActions.legalIf(all( typeInSet(0, {nxv1s64, nxv2s64, nxv4s64, nxv8s64}), typeIs(1, s64))); else if (ST.hasVInstructionsI64()) SplatActions.customIf(all( typeInSet(0, {nxv1s64, nxv2s64, nxv4s64, nxv8s64}), typeIs(1, s64))); } SplatActions.clampScalar(1, sXLen, sXLen); LegalityPredicate ExtractSubvecBitcastPred = [=](const LegalityQuery &Query) { LLT DstTy = Query.Types[0]; LLT SrcTy = Query.Types[1]; return DstTy.getElementType() == LLT::scalar(1) && DstTy.getElementCount().getKnownMinValue() >= 8 && SrcTy.getElementCount().getKnownMinValue() >= 8; }; getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR) // We don't have the ability to slide mask vectors down indexed by their // i1 elements; the smallest we can do is i8. Often we are able to bitcast // to equivalent i8 vectors. .bitcastIf( all(typeIsLegalBoolVec(0, BoolVecTys, ST), typeIsLegalBoolVec(1, BoolVecTys, ST), ExtractSubvecBitcastPred), [=](const LegalityQuery &Query) { LLT CastTy = LLT::vector( Query.Types[0].getElementCount().divideCoefficientBy(8), 8); return std::pair(0, CastTy); }) .customIf(LegalityPredicates::any( all(typeIsLegalBoolVec(0, BoolVecTys, ST), typeIsLegalBoolVec(1, BoolVecTys, ST)), all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST), typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)))); getActionDefinitionsBuilder(G_INSERT_SUBVECTOR) .customIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST), typeIsLegalBoolVec(1, BoolVecTys, ST))) .customIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST), typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST))); getLegacyLegalizerInfo().computeTables(); verify(*ST.getInstrInfo()); } bool RISCVLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const { Intrinsic::ID IntrinsicID = cast(MI).getIntrinsicID(); switch (IntrinsicID) { default: return false; case Intrinsic::vacopy: { // vacopy arguments must be legal because of the intrinsic signature. // No need to check here. MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); MachineFunction &MF = *MI.getMF(); const DataLayout &DL = MIRBuilder.getDataLayout(); LLVMContext &Ctx = MF.getFunction().getContext(); Register DstLst = MI.getOperand(1).getReg(); LLT PtrTy = MRI.getType(DstLst); // Load the source va_list Align Alignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx)); MachineMemOperand *LoadMMO = MF.getMachineMemOperand( MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, Alignment); auto Tmp = MIRBuilder.buildLoad(PtrTy, MI.getOperand(2), *LoadMMO); // Store the result in the destination va_list MachineMemOperand *StoreMMO = MF.getMachineMemOperand( MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, Alignment); MIRBuilder.buildStore(Tmp, DstLst, *StoreMMO); MI.eraseFromParent(); return true; } } } bool RISCVLegalizerInfo::legalizeVAStart(MachineInstr &MI, MachineIRBuilder &MIRBuilder) const { // Stores the address of the VarArgsFrameIndex slot into the memory location assert(MI.getOpcode() == TargetOpcode::G_VASTART); MachineFunction *MF = MI.getParent()->getParent(); RISCVMachineFunctionInfo *FuncInfo = MF->getInfo(); int FI = FuncInfo->getVarArgsFrameIndex(); LLT AddrTy = MIRBuilder.getMRI()->getType(MI.getOperand(0).getReg()); auto FINAddr = MIRBuilder.buildFrameIndex(AddrTy, FI); assert(MI.hasOneMemOperand()); MIRBuilder.buildStore(FINAddr, MI.getOperand(0).getReg(), *MI.memoperands()[0]); MI.eraseFromParent(); return true; } bool RISCVLegalizerInfo::legalizeBRJT(MachineInstr &MI, MachineIRBuilder &MIRBuilder) const { MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); auto &MF = *MI.getParent()->getParent(); const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); unsigned EntrySize = MJTI->getEntrySize(MF.getDataLayout()); Register PtrReg = MI.getOperand(0).getReg(); LLT PtrTy = MRI.getType(PtrReg); Register IndexReg = MI.getOperand(2).getReg(); LLT IndexTy = MRI.getType(IndexReg); if (!isPowerOf2_32(EntrySize)) return false; auto ShiftAmt = MIRBuilder.buildConstant(IndexTy, Log2_32(EntrySize)); IndexReg = MIRBuilder.buildShl(IndexTy, IndexReg, ShiftAmt).getReg(0); auto Addr = MIRBuilder.buildPtrAdd(PtrTy, PtrReg, IndexReg); MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getJumpTable(MF), MachineMemOperand::MOLoad, EntrySize, Align(MJTI->getEntryAlignment(MF.getDataLayout()))); Register TargetReg; switch (MJTI->getEntryKind()) { default: return false; case MachineJumpTableInfo::EK_LabelDifference32: { // For PIC, the sequence is: // BRIND(load(Jumptable + index) + RelocBase) // RelocBase can be JumpTable, GOT or some sort of global base. unsigned LoadOpc = STI.is64Bit() ? TargetOpcode::G_SEXTLOAD : TargetOpcode::G_LOAD; auto Load = MIRBuilder.buildLoadInstr(LoadOpc, IndexTy, Addr, *MMO); TargetReg = MIRBuilder.buildPtrAdd(PtrTy, PtrReg, Load).getReg(0); break; } case MachineJumpTableInfo::EK_Custom32: { auto Load = MIRBuilder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, IndexTy, Addr, *MMO); TargetReg = MIRBuilder.buildIntToPtr(PtrTy, Load).getReg(0); break; } case MachineJumpTableInfo::EK_BlockAddress: TargetReg = MIRBuilder.buildLoad(PtrTy, Addr, *MMO).getReg(0); break; } MIRBuilder.buildBrIndirect(TargetReg); MI.eraseFromParent(); return true; } bool RISCVLegalizerInfo::shouldBeInConstantPool(const APInt &APImm, bool ShouldOptForSize) const { assert(APImm.getBitWidth() == 32 || APImm.getBitWidth() == 64); int64_t Imm = APImm.getSExtValue(); // All simm32 constants should be handled by isel. // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making // this check redundant, but small immediates are common so this check // should have better compile time. if (isInt<32>(Imm)) return false; // We only need to cost the immediate, if constant pool lowering is enabled. if (!STI.useConstantPoolForLargeInts()) return false; RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, STI); if (Seq.size() <= STI.getMaxBuildIntsCost()) return false; // Optimizations below are disabled for opt size. If we're optimizing for // size, use a constant pool. if (ShouldOptForSize) return true; // // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do // that if it will avoid a constant pool. // It will require an extra temporary register though. // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where // low and high 32 bits are the same and bit 31 and 63 are set. unsigned ShiftAmt, AddOpc; RISCVMatInt::InstSeq SeqLo = RISCVMatInt::generateTwoRegInstSeq(Imm, STI, ShiftAmt, AddOpc); return !(!SeqLo.empty() && (SeqLo.size() + 2) <= STI.getMaxBuildIntsCost()); } bool RISCVLegalizerInfo::legalizeVScale(MachineInstr &MI, MachineIRBuilder &MIB) const { const LLT XLenTy(STI.getXLenVT()); Register Dst = MI.getOperand(0).getReg(); // We define our scalable vector types for lmul=1 to use a 64 bit known // minimum size. e.g. . VLENB is in bytes so we calculate // vscale as VLENB / 8. static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!"); if (STI.getRealMinVLen() < RISCV::RVVBitsPerBlock) // Support for VLEN==32 is incomplete. return false; // We assume VLENB is a multiple of 8. We manually choose the best shift // here because SimplifyDemandedBits isn't always able to simplify it. uint64_t Val = MI.getOperand(1).getCImm()->getZExtValue(); if (isPowerOf2_64(Val)) { uint64_t Log2 = Log2_64(Val); if (Log2 < 3) { auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {}); MIB.buildLShr(Dst, VLENB, MIB.buildConstant(XLenTy, 3 - Log2)); } else if (Log2 > 3) { auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {}); MIB.buildShl(Dst, VLENB, MIB.buildConstant(XLenTy, Log2 - 3)); } else { MIB.buildInstr(RISCV::G_READ_VLENB, {Dst}, {}); } } else if ((Val % 8) == 0) { // If the multiplier is a multiple of 8, scale it down to avoid needing // to shift the VLENB value. auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {}); MIB.buildMul(Dst, VLENB, MIB.buildConstant(XLenTy, Val / 8)); } else { auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {}); auto VScale = MIB.buildLShr(XLenTy, VLENB, MIB.buildConstant(XLenTy, 3)); MIB.buildMul(Dst, VScale, MIB.buildConstant(XLenTy, Val)); } MI.eraseFromParent(); return true; } // Custom-lower extensions from mask vectors by using a vselect either with 1 // for zero/any-extension or -1 for sign-extension: // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0) // Note that any-extension is lowered identically to zero-extension. bool RISCVLegalizerInfo::legalizeExt(MachineInstr &MI, MachineIRBuilder &MIB) const { unsigned Opc = MI.getOpcode(); assert(Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_ANYEXT); MachineRegisterInfo &MRI = *MIB.getMRI(); Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); LLT DstTy = MRI.getType(Dst); int64_t ExtTrueVal = Opc == TargetOpcode::G_SEXT ? -1 : 1; LLT DstEltTy = DstTy.getElementType(); auto SplatZero = MIB.buildSplatVector(DstTy, MIB.buildConstant(DstEltTy, 0)); auto SplatTrue = MIB.buildSplatVector(DstTy, MIB.buildConstant(DstEltTy, ExtTrueVal)); MIB.buildSelect(Dst, Src, SplatTrue, SplatZero); MI.eraseFromParent(); return true; } bool RISCVLegalizerInfo::legalizeLoadStore(MachineInstr &MI, LegalizerHelper &Helper, MachineIRBuilder &MIB) const { assert((isa(MI) || isa(MI)) && "Machine instructions must be Load/Store."); MachineRegisterInfo &MRI = *MIB.getMRI(); MachineFunction *MF = MI.getMF(); const DataLayout &DL = MIB.getDataLayout(); LLVMContext &Ctx = MF->getFunction().getContext(); Register DstReg = MI.getOperand(0).getReg(); LLT DataTy = MRI.getType(DstReg); if (!DataTy.isVector()) return false; if (!MI.hasOneMemOperand()) return false; MachineMemOperand *MMO = *MI.memoperands_begin(); const auto *TLI = STI.getTargetLowering(); EVT VT = EVT::getEVT(getTypeForLLT(DataTy, Ctx)); if (TLI->allowsMemoryAccessForAlignment(Ctx, DL, VT, *MMO)) return true; unsigned EltSizeBits = DataTy.getScalarSizeInBits(); assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) && "Unexpected unaligned RVV load type"); // Calculate the new vector type with i8 elements unsigned NumElements = DataTy.getElementCount().getKnownMinValue() * (EltSizeBits / 8); LLT NewDataTy = LLT::scalable_vector(NumElements, 8); Helper.bitcast(MI, 0, NewDataTy); return true; } /// Return the type of the mask type suitable for masking the provided /// vector type. This is simply an i1 element type vector of the same /// (possibly scalable) length. static LLT getMaskTypeFor(LLT VecTy) { assert(VecTy.isVector()); ElementCount EC = VecTy.getElementCount(); return LLT::vector(EC, LLT::scalar(1)); } /// Creates an all ones mask suitable for masking a vector of type VecTy with /// vector length VL. static MachineInstrBuilder buildAllOnesMask(LLT VecTy, const SrcOp &VL, MachineIRBuilder &MIB, MachineRegisterInfo &MRI) { LLT MaskTy = getMaskTypeFor(VecTy); return MIB.buildInstr(RISCV::G_VMSET_VL, {MaskTy}, {VL}); } /// Gets the two common "VL" operands: an all-ones mask and the vector length. /// VecTy is a scalable vector type. static std::pair buildDefaultVLOps(LLT VecTy, MachineIRBuilder &MIB, MachineRegisterInfo &MRI) { assert(VecTy.isScalableVector() && "Expecting scalable container type"); const RISCVSubtarget &STI = MIB.getMF().getSubtarget(); LLT XLenTy(STI.getXLenVT()); auto VL = MIB.buildConstant(XLenTy, -1); auto Mask = buildAllOnesMask(VecTy, VL, MIB, MRI); return {Mask, VL}; } static MachineInstrBuilder buildSplatPartsS64WithVL(const DstOp &Dst, const SrcOp &Passthru, Register Lo, Register Hi, const SrcOp &VL, MachineIRBuilder &MIB, MachineRegisterInfo &MRI) { // TODO: If the Hi bits of the splat are undefined, then it's fine to just // splat Lo even if it might be sign extended. I don't think we have // introduced a case where we're build a s64 where the upper bits are undef // yet. // Fall back to a stack store and stride x0 vector load. // TODO: need to lower G_SPLAT_VECTOR_SPLIT_I64. This is done in // preprocessDAG in SDAG. return MIB.buildInstr(RISCV::G_SPLAT_VECTOR_SPLIT_I64_VL, {Dst}, {Passthru, Lo, Hi, VL}); } static MachineInstrBuilder buildSplatSplitS64WithVL(const DstOp &Dst, const SrcOp &Passthru, const SrcOp &Scalar, const SrcOp &VL, MachineIRBuilder &MIB, MachineRegisterInfo &MRI) { assert(Scalar.getLLTTy(MRI) == LLT::scalar(64) && "Unexpected VecTy!"); auto Unmerge = MIB.buildUnmerge(LLT::scalar(32), Scalar); return buildSplatPartsS64WithVL(Dst, Passthru, Unmerge.getReg(0), Unmerge.getReg(1), VL, MIB, MRI); } // Lower splats of s1 types to G_ICMP. For each mask vector type, we have a // legal equivalently-sized i8 type, so we can use that as a go-between. // Splats of s1 types that have constant value can be legalized as VMSET_VL or // VMCLR_VL. bool RISCVLegalizerInfo::legalizeSplatVector(MachineInstr &MI, MachineIRBuilder &MIB) const { assert(MI.getOpcode() == TargetOpcode::G_SPLAT_VECTOR); MachineRegisterInfo &MRI = *MIB.getMRI(); Register Dst = MI.getOperand(0).getReg(); Register SplatVal = MI.getOperand(1).getReg(); LLT VecTy = MRI.getType(Dst); LLT XLenTy(STI.getXLenVT()); // Handle case of s64 element vectors on rv32 if (XLenTy.getSizeInBits() == 32 && VecTy.getElementType().getSizeInBits() == 64) { auto [_, VL] = buildDefaultVLOps(MRI.getType(Dst), MIB, MRI); buildSplatSplitS64WithVL(Dst, MIB.buildUndef(VecTy), SplatVal, VL, MIB, MRI); MI.eraseFromParent(); return true; } // All-zeros or all-ones splats are handled specially. MachineInstr &SplatValMI = *MRI.getVRegDef(SplatVal); if (isAllOnesOrAllOnesSplat(SplatValMI, MRI)) { auto VL = buildDefaultVLOps(VecTy, MIB, MRI).second; MIB.buildInstr(RISCV::G_VMSET_VL, {Dst}, {VL}); MI.eraseFromParent(); return true; } if (isNullOrNullSplat(SplatValMI, MRI)) { auto VL = buildDefaultVLOps(VecTy, MIB, MRI).second; MIB.buildInstr(RISCV::G_VMCLR_VL, {Dst}, {VL}); MI.eraseFromParent(); return true; } // Handle non-constant mask splat (i.e. not sure if it's all zeros or all // ones) by promoting it to an s8 splat. LLT InterEltTy = LLT::scalar(8); LLT InterTy = VecTy.changeElementType(InterEltTy); auto ZExtSplatVal = MIB.buildZExt(InterEltTy, SplatVal); auto And = MIB.buildAnd(InterEltTy, ZExtSplatVal, MIB.buildConstant(InterEltTy, 1)); auto LHS = MIB.buildSplatVector(InterTy, And); auto ZeroSplat = MIB.buildSplatVector(InterTy, MIB.buildConstant(InterEltTy, 0)); MIB.buildICmp(CmpInst::Predicate::ICMP_NE, Dst, LHS, ZeroSplat); MI.eraseFromParent(); return true; } static LLT getLMUL1Ty(LLT VecTy) { assert(VecTy.getElementType().getSizeInBits() <= 64 && "Unexpected vector LLT"); return LLT::scalable_vector(RISCV::RVVBitsPerBlock / VecTy.getElementType().getSizeInBits(), VecTy.getElementType()); } bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI, MachineIRBuilder &MIB) const { GExtractSubvector &ES = cast(MI); MachineRegisterInfo &MRI = *MIB.getMRI(); Register Dst = ES.getReg(0); Register Src = ES.getSrcVec(); uint64_t Idx = ES.getIndexImm(); // With an index of 0 this is a cast-like subvector, which can be performed // with subregister operations. if (Idx == 0) return true; LLT LitTy = MRI.getType(Dst); LLT BigTy = MRI.getType(Src); if (LitTy.getElementType() == LLT::scalar(1)) { // We can't slide this mask vector up indexed by its i1 elements. // This poses a problem when we wish to insert a scalable vector which // can't be re-expressed as a larger type. Just choose the slow path and // extend to a larger type, then truncate back down. LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8)); LLT ExtLitTy = LitTy.changeElementType(LLT::scalar(8)); auto BigZExt = MIB.buildZExt(ExtBigTy, Src); auto ExtractZExt = MIB.buildExtractSubvector(ExtLitTy, BigZExt, Idx); auto SplatZero = MIB.buildSplatVector( ExtLitTy, MIB.buildConstant(ExtLitTy.getElementType(), 0)); MIB.buildICmp(CmpInst::Predicate::ICMP_NE, Dst, ExtractZExt, SplatZero); MI.eraseFromParent(); return true; } // extract_subvector scales the index by vscale if the subvector is scalable, // and decomposeSubvectorInsertExtractToSubRegs takes this into account. const RISCVRegisterInfo *TRI = STI.getRegisterInfo(); MVT LitTyMVT = getMVTForLLT(LitTy); auto Decompose = RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( getMVTForLLT(BigTy), LitTyMVT, Idx, TRI); unsigned RemIdx = Decompose.second; // If the Idx has been completely eliminated then this is a subvector extract // which naturally aligns to a vector register. These can easily be handled // using subregister manipulation. if (RemIdx == 0) return true; // Else LitTy is M1 or smaller and may need to be slid down: if LitTy // was > M1 then the index would need to be a multiple of VLMAX, and so would // divide exactly. assert( RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(LitTyMVT)).second || RISCVTargetLowering::getLMUL(LitTyMVT) == RISCVVType::LMUL_1); // If the vector type is an LMUL-group type, extract a subvector equal to the // nearest full vector register type. LLT InterLitTy = BigTy; Register Vec = Src; if (TypeSize::isKnownGT(BigTy.getSizeInBits(), getLMUL1Ty(BigTy).getSizeInBits())) { // If BigTy has an LMUL > 1, then LitTy should have a smaller LMUL, and // we should have successfully decomposed the extract into a subregister. assert(Decompose.first != RISCV::NoSubRegister); InterLitTy = getLMUL1Ty(BigTy); // SDAG builds a TargetExtractSubreg. We cannot create a a Copy with SubReg // specified on the source Register (the equivalent) since generic virtual // register does not allow subregister index. Vec = MIB.buildExtractSubvector(InterLitTy, Src, Idx - RemIdx).getReg(0); } // Slide this vector register down by the desired number of elements in order // to place the desired subvector starting at element 0. const LLT XLenTy(STI.getXLenVT()); auto SlidedownAmt = MIB.buildVScale(XLenTy, RemIdx); auto [Mask, VL] = buildDefaultVLOps(LitTy, MIB, MRI); uint64_t Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC; auto Slidedown = MIB.buildInstr( RISCV::G_VSLIDEDOWN_VL, {InterLitTy}, {MIB.buildUndef(InterLitTy), Vec, SlidedownAmt, Mask, VL, Policy}); // Now the vector is in the right position, extract our final subvector. This // should resolve to a COPY. MIB.buildExtractSubvector(Dst, Slidedown, 0); MI.eraseFromParent(); return true; } bool RISCVLegalizerInfo::legalizeInsertSubvector(MachineInstr &MI, LegalizerHelper &Helper, MachineIRBuilder &MIB) const { GInsertSubvector &IS = cast(MI); MachineRegisterInfo &MRI = *MIB.getMRI(); Register Dst = IS.getReg(0); Register BigVec = IS.getBigVec(); Register LitVec = IS.getSubVec(); uint64_t Idx = IS.getIndexImm(); LLT BigTy = MRI.getType(BigVec); LLT LitTy = MRI.getType(LitVec); if (Idx == 0 || MRI.getVRegDef(BigVec)->getOpcode() == TargetOpcode::G_IMPLICIT_DEF) return true; // We don't have the ability to slide mask vectors up indexed by their i1 // elements; the smallest we can do is i8. Often we are able to bitcast to // equivalent i8 vectors. Otherwise, we can must zeroextend to equivalent i8 // vectors and truncate down after the insert. if (LitTy.getElementType() == LLT::scalar(1)) { auto BigTyMinElts = BigTy.getElementCount().getKnownMinValue(); auto LitTyMinElts = LitTy.getElementCount().getKnownMinValue(); if (BigTyMinElts >= 8 && LitTyMinElts >= 8) return Helper.bitcast( IS, 0, LLT::vector(BigTy.getElementCount().divideCoefficientBy(8), 8)); // We can't slide this mask vector up indexed by its i1 elements. // This poses a problem when we wish to insert a scalable vector which // can't be re-expressed as a larger type. Just choose the slow path and // extend to a larger type, then truncate back down. LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8)); return Helper.widenScalar(IS, 0, ExtBigTy); } const RISCVRegisterInfo *TRI = STI.getRegisterInfo(); unsigned SubRegIdx, RemIdx; std::tie(SubRegIdx, RemIdx) = RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( getMVTForLLT(BigTy), getMVTForLLT(LitTy), Idx, TRI); TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock); assert(isPowerOf2_64( STI.expandVScale(LitTy.getSizeInBits()).getKnownMinValue())); bool ExactlyVecRegSized = STI.expandVScale(LitTy.getSizeInBits()) .isKnownMultipleOf(STI.expandVScale(VecRegSize)); // If the Idx has been completely eliminated and this subvector's size is a // vector register or a multiple thereof, or the surrounding elements are // undef, then this is a subvector insert which naturally aligns to a vector // register. These can easily be handled using subregister manipulation. if (RemIdx == 0 && ExactlyVecRegSized) return true; // If the subvector is smaller than a vector register, then the insertion // must preserve the undisturbed elements of the register. We do this by // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type // (which resolves to a subregister copy), performing a VSLIDEUP to place the // subvector within the vector register, and an INSERT_SUBVECTOR of that // LMUL=1 type back into the larger vector (resolving to another subregister // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type // to avoid allocating a large register group to hold our subvector. // VSLIDEUP works by leaving elements 0getParent(); switch (MI.getOpcode()) { default: // No idea what to do. return false; case TargetOpcode::G_ABS: return Helper.lowerAbsToMaxNeg(MI); // TODO: G_FCONSTANT case TargetOpcode::G_CONSTANT: { const Function &F = MF.getFunction(); // TODO: if PSI and BFI are present, add " || // llvm::shouldOptForSize(*CurMBB, PSI, BFI)". bool ShouldOptForSize = F.hasOptSize(); const ConstantInt *ConstVal = MI.getOperand(1).getCImm(); if (!shouldBeInConstantPool(ConstVal->getValue(), ShouldOptForSize)) return true; return Helper.lowerConstant(MI); } case TargetOpcode::G_SUB: case TargetOpcode::G_ADD: { Helper.Observer.changingInstr(MI); Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT); Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ANYEXT); Register DstALU = MRI.createGenericVirtualRegister(sXLen); MachineOperand &MO = MI.getOperand(0); MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); auto DstSext = MIRBuilder.buildSExtInReg(sXLen, DstALU, 32); MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {MO}, {DstSext}); MO.setReg(DstALU); Helper.Observer.changedInstr(MI); return true; } case TargetOpcode::G_SEXT_INREG: { LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); int64_t SizeInBits = MI.getOperand(2).getImm(); // Source size of 32 is sext.w. if (DstTy.getSizeInBits() == 64 && SizeInBits == 32) return true; if (STI.hasStdExtZbb() && (SizeInBits == 8 || SizeInBits == 16)) return true; return Helper.lower(MI, 0, /* Unused hint type */ LLT()) == LegalizerHelper::Legalized; } case TargetOpcode::G_ASHR: case TargetOpcode::G_LSHR: case TargetOpcode::G_SHL: { if (getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI)) { // We don't need a custom node for shift by constant. Just widen the // source and the shift amount. unsigned ExtOpc = TargetOpcode::G_ANYEXT; if (MI.getOpcode() == TargetOpcode::G_ASHR) ExtOpc = TargetOpcode::G_SEXT; else if (MI.getOpcode() == TargetOpcode::G_LSHR) ExtOpc = TargetOpcode::G_ZEXT; Helper.Observer.changingInstr(MI); Helper.widenScalarSrc(MI, sXLen, 1, ExtOpc); Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ZEXT); Helper.widenScalarDst(MI, sXLen); Helper.Observer.changedInstr(MI); return true; } Helper.Observer.changingInstr(MI); Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT); Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ANYEXT); Helper.widenScalarDst(MI, sXLen); MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode()))); Helper.Observer.changedInstr(MI); return true; } case TargetOpcode::G_SDIV: case TargetOpcode::G_UDIV: case TargetOpcode::G_UREM: case TargetOpcode::G_ROTL: case TargetOpcode::G_ROTR: { Helper.Observer.changingInstr(MI); Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT); Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ANYEXT); Helper.widenScalarDst(MI, sXLen); MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode()))); Helper.Observer.changedInstr(MI); return true; } case TargetOpcode::G_CTLZ: case TargetOpcode::G_CTTZ: { Helper.Observer.changingInstr(MI); Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT); Helper.widenScalarDst(MI, sXLen); MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode()))); Helper.Observer.changedInstr(MI); return true; } case TargetOpcode::G_FPTOSI: case TargetOpcode::G_FPTOUI: { Helper.Observer.changingInstr(MI); Helper.widenScalarDst(MI, sXLen); MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode()))); MI.addOperand(MachineOperand::CreateImm(RISCVFPRndMode::RTZ)); Helper.Observer.changedInstr(MI); return true; } case TargetOpcode::G_IS_FPCLASS: { Register GISFPCLASS = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); const MachineOperand &ImmOp = MI.getOperand(2); MachineIRBuilder MIB(MI); // Turn LLVM IR's floating point classes to that in RISC-V, // by simply rotating the 10-bit immediate right by two bits. APInt GFpClassImm(10, static_cast(ImmOp.getImm())); auto FClassMask = MIB.buildConstant(sXLen, GFpClassImm.rotr(2).zext(XLen)); auto ConstZero = MIB.buildConstant(sXLen, 0); auto GFClass = MIB.buildInstr(RISCV::G_FCLASS, {sXLen}, {Src}); auto And = MIB.buildAnd(sXLen, GFClass, FClassMask); MIB.buildICmp(CmpInst::ICMP_NE, GISFPCLASS, And, ConstZero); MI.eraseFromParent(); return true; } case TargetOpcode::G_BRJT: return legalizeBRJT(MI, MIRBuilder); case TargetOpcode::G_VASTART: return legalizeVAStart(MI, MIRBuilder); case TargetOpcode::G_VSCALE: return legalizeVScale(MI, MIRBuilder); case TargetOpcode::G_ZEXT: case TargetOpcode::G_SEXT: case TargetOpcode::G_ANYEXT: return legalizeExt(MI, MIRBuilder); case TargetOpcode::G_SPLAT_VECTOR: return legalizeSplatVector(MI, MIRBuilder); case TargetOpcode::G_EXTRACT_SUBVECTOR: return legalizeExtractSubvector(MI, MIRBuilder); case TargetOpcode::G_INSERT_SUBVECTOR: return legalizeInsertSubvector(MI, Helper, MIRBuilder); case TargetOpcode::G_LOAD: case TargetOpcode::G_STORE: return legalizeLoadStore(MI, Helper, MIRBuilder); } llvm_unreachable("expected switch to return"); }