|  | //===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===// | 
|  | // | 
|  | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | // See https://llvm.org/LICENSE.txt for license information. | 
|  | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | // This file implements a function pass that inserts VSETVLI instructions where | 
|  | // needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL | 
|  | // instructions. | 
|  | // | 
|  | // This pass consists of 3 phases: | 
|  | // | 
|  | // Phase 1 collects how each basic block affects VL/VTYPE. | 
|  | // | 
|  | // Phase 2 uses the information from phase 1 to do a data flow analysis to | 
|  | // propagate the VL/VTYPE changes through the function. This gives us the | 
|  | // VL/VTYPE at the start of each basic block. | 
|  | // | 
|  | // Phase 3 inserts VSETVLI instructions in each basic block. Information from | 
|  | // phase 2 is used to prevent inserting a VSETVLI before the first vector | 
|  | // instruction in the block if possible. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "RISCV.h" | 
|  | #include "RISCVSubtarget.h" | 
|  | #include "llvm/CodeGen/LiveIntervals.h" | 
|  | #include "llvm/CodeGen/MachineFunctionPass.h" | 
|  | #include <queue> | 
|  | using namespace llvm; | 
|  |  | 
|  | #define DEBUG_TYPE "riscv-insert-vsetvli" | 
|  | #define RISCV_INSERT_VSETVLI_NAME "RISCV Insert VSETVLI pass" | 
|  |  | 
|  | static cl::opt<bool> DisableInsertVSETVLPHIOpt( | 
|  | "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden, | 
|  | cl::desc("Disable looking through phis when inserting vsetvlis.")); | 
|  |  | 
|  | static cl::opt<bool> UseStrictAsserts( | 
|  | "riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden, | 
|  | cl::desc("Enable strict assertion checking for the dataflow algorithm")); | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | static unsigned getVLOpNum(const MachineInstr &MI) { | 
|  | return RISCVII::getVLOpNum(MI.getDesc()); | 
|  | } | 
|  |  | 
|  | static unsigned getSEWOpNum(const MachineInstr &MI) { | 
|  | return RISCVII::getSEWOpNum(MI.getDesc()); | 
|  | } | 
|  |  | 
|  | static bool isScalarMoveInstr(const MachineInstr &MI) { | 
|  | switch (MI.getOpcode()) { | 
|  | default: | 
|  | return false; | 
|  | case RISCV::PseudoVMV_S_X_M1: | 
|  | case RISCV::PseudoVMV_S_X_M2: | 
|  | case RISCV::PseudoVMV_S_X_M4: | 
|  | case RISCV::PseudoVMV_S_X_M8: | 
|  | case RISCV::PseudoVMV_S_X_MF2: | 
|  | case RISCV::PseudoVMV_S_X_MF4: | 
|  | case RISCV::PseudoVMV_S_X_MF8: | 
|  | case RISCV::PseudoVFMV_S_F16_M1: | 
|  | case RISCV::PseudoVFMV_S_F16_M2: | 
|  | case RISCV::PseudoVFMV_S_F16_M4: | 
|  | case RISCV::PseudoVFMV_S_F16_M8: | 
|  | case RISCV::PseudoVFMV_S_F16_MF2: | 
|  | case RISCV::PseudoVFMV_S_F16_MF4: | 
|  | case RISCV::PseudoVFMV_S_F32_M1: | 
|  | case RISCV::PseudoVFMV_S_F32_M2: | 
|  | case RISCV::PseudoVFMV_S_F32_M4: | 
|  | case RISCV::PseudoVFMV_S_F32_M8: | 
|  | case RISCV::PseudoVFMV_S_F32_MF2: | 
|  | case RISCV::PseudoVFMV_S_F64_M1: | 
|  | case RISCV::PseudoVFMV_S_F64_M2: | 
|  | case RISCV::PseudoVFMV_S_F64_M4: | 
|  | case RISCV::PseudoVFMV_S_F64_M8: | 
|  | return true; | 
|  | } | 
|  | } | 
|  |  | 
|  | /// Get the EEW for a load or store instruction.  Return None if MI is not | 
|  | /// a load or store which ignores SEW. | 
|  | static Optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) { | 
|  | switch (MI.getOpcode()) { | 
|  | default: | 
|  | return None; | 
|  | case RISCV::PseudoVLE8_V_M1: | 
|  | case RISCV::PseudoVLE8_V_M1_MASK: | 
|  | case RISCV::PseudoVLE8_V_M2: | 
|  | case RISCV::PseudoVLE8_V_M2_MASK: | 
|  | case RISCV::PseudoVLE8_V_M4: | 
|  | case RISCV::PseudoVLE8_V_M4_MASK: | 
|  | case RISCV::PseudoVLE8_V_M8: | 
|  | case RISCV::PseudoVLE8_V_M8_MASK: | 
|  | case RISCV::PseudoVLE8_V_MF2: | 
|  | case RISCV::PseudoVLE8_V_MF2_MASK: | 
|  | case RISCV::PseudoVLE8_V_MF4: | 
|  | case RISCV::PseudoVLE8_V_MF4_MASK: | 
|  | case RISCV::PseudoVLE8_V_MF8: | 
|  | case RISCV::PseudoVLE8_V_MF8_MASK: | 
|  | case RISCV::PseudoVLSE8_V_M1: | 
|  | case RISCV::PseudoVLSE8_V_M1_MASK: | 
|  | case RISCV::PseudoVLSE8_V_M2: | 
|  | case RISCV::PseudoVLSE8_V_M2_MASK: | 
|  | case RISCV::PseudoVLSE8_V_M4: | 
|  | case RISCV::PseudoVLSE8_V_M4_MASK: | 
|  | case RISCV::PseudoVLSE8_V_M8: | 
|  | case RISCV::PseudoVLSE8_V_M8_MASK: | 
|  | case RISCV::PseudoVLSE8_V_MF2: | 
|  | case RISCV::PseudoVLSE8_V_MF2_MASK: | 
|  | case RISCV::PseudoVLSE8_V_MF4: | 
|  | case RISCV::PseudoVLSE8_V_MF4_MASK: | 
|  | case RISCV::PseudoVLSE8_V_MF8: | 
|  | case RISCV::PseudoVLSE8_V_MF8_MASK: | 
|  | case RISCV::PseudoVSE8_V_M1: | 
|  | case RISCV::PseudoVSE8_V_M1_MASK: | 
|  | case RISCV::PseudoVSE8_V_M2: | 
|  | case RISCV::PseudoVSE8_V_M2_MASK: | 
|  | case RISCV::PseudoVSE8_V_M4: | 
|  | case RISCV::PseudoVSE8_V_M4_MASK: | 
|  | case RISCV::PseudoVSE8_V_M8: | 
|  | case RISCV::PseudoVSE8_V_M8_MASK: | 
|  | case RISCV::PseudoVSE8_V_MF2: | 
|  | case RISCV::PseudoVSE8_V_MF2_MASK: | 
|  | case RISCV::PseudoVSE8_V_MF4: | 
|  | case RISCV::PseudoVSE8_V_MF4_MASK: | 
|  | case RISCV::PseudoVSE8_V_MF8: | 
|  | case RISCV::PseudoVSE8_V_MF8_MASK: | 
|  | case RISCV::PseudoVSSE8_V_M1: | 
|  | case RISCV::PseudoVSSE8_V_M1_MASK: | 
|  | case RISCV::PseudoVSSE8_V_M2: | 
|  | case RISCV::PseudoVSSE8_V_M2_MASK: | 
|  | case RISCV::PseudoVSSE8_V_M4: | 
|  | case RISCV::PseudoVSSE8_V_M4_MASK: | 
|  | case RISCV::PseudoVSSE8_V_M8: | 
|  | case RISCV::PseudoVSSE8_V_M8_MASK: | 
|  | case RISCV::PseudoVSSE8_V_MF2: | 
|  | case RISCV::PseudoVSSE8_V_MF2_MASK: | 
|  | case RISCV::PseudoVSSE8_V_MF4: | 
|  | case RISCV::PseudoVSSE8_V_MF4_MASK: | 
|  | case RISCV::PseudoVSSE8_V_MF8: | 
|  | case RISCV::PseudoVSSE8_V_MF8_MASK: | 
|  | return 8; | 
|  | case RISCV::PseudoVLE16_V_M1: | 
|  | case RISCV::PseudoVLE16_V_M1_MASK: | 
|  | case RISCV::PseudoVLE16_V_M2: | 
|  | case RISCV::PseudoVLE16_V_M2_MASK: | 
|  | case RISCV::PseudoVLE16_V_M4: | 
|  | case RISCV::PseudoVLE16_V_M4_MASK: | 
|  | case RISCV::PseudoVLE16_V_M8: | 
|  | case RISCV::PseudoVLE16_V_M8_MASK: | 
|  | case RISCV::PseudoVLE16_V_MF2: | 
|  | case RISCV::PseudoVLE16_V_MF2_MASK: | 
|  | case RISCV::PseudoVLE16_V_MF4: | 
|  | case RISCV::PseudoVLE16_V_MF4_MASK: | 
|  | case RISCV::PseudoVLSE16_V_M1: | 
|  | case RISCV::PseudoVLSE16_V_M1_MASK: | 
|  | case RISCV::PseudoVLSE16_V_M2: | 
|  | case RISCV::PseudoVLSE16_V_M2_MASK: | 
|  | case RISCV::PseudoVLSE16_V_M4: | 
|  | case RISCV::PseudoVLSE16_V_M4_MASK: | 
|  | case RISCV::PseudoVLSE16_V_M8: | 
|  | case RISCV::PseudoVLSE16_V_M8_MASK: | 
|  | case RISCV::PseudoVLSE16_V_MF2: | 
|  | case RISCV::PseudoVLSE16_V_MF2_MASK: | 
|  | case RISCV::PseudoVLSE16_V_MF4: | 
|  | case RISCV::PseudoVLSE16_V_MF4_MASK: | 
|  | case RISCV::PseudoVSE16_V_M1: | 
|  | case RISCV::PseudoVSE16_V_M1_MASK: | 
|  | case RISCV::PseudoVSE16_V_M2: | 
|  | case RISCV::PseudoVSE16_V_M2_MASK: | 
|  | case RISCV::PseudoVSE16_V_M4: | 
|  | case RISCV::PseudoVSE16_V_M4_MASK: | 
|  | case RISCV::PseudoVSE16_V_M8: | 
|  | case RISCV::PseudoVSE16_V_M8_MASK: | 
|  | case RISCV::PseudoVSE16_V_MF2: | 
|  | case RISCV::PseudoVSE16_V_MF2_MASK: | 
|  | case RISCV::PseudoVSE16_V_MF4: | 
|  | case RISCV::PseudoVSE16_V_MF4_MASK: | 
|  | case RISCV::PseudoVSSE16_V_M1: | 
|  | case RISCV::PseudoVSSE16_V_M1_MASK: | 
|  | case RISCV::PseudoVSSE16_V_M2: | 
|  | case RISCV::PseudoVSSE16_V_M2_MASK: | 
|  | case RISCV::PseudoVSSE16_V_M4: | 
|  | case RISCV::PseudoVSSE16_V_M4_MASK: | 
|  | case RISCV::PseudoVSSE16_V_M8: | 
|  | case RISCV::PseudoVSSE16_V_M8_MASK: | 
|  | case RISCV::PseudoVSSE16_V_MF2: | 
|  | case RISCV::PseudoVSSE16_V_MF2_MASK: | 
|  | case RISCV::PseudoVSSE16_V_MF4: | 
|  | case RISCV::PseudoVSSE16_V_MF4_MASK: | 
|  | return 16; | 
|  | case RISCV::PseudoVLE32_V_M1: | 
|  | case RISCV::PseudoVLE32_V_M1_MASK: | 
|  | case RISCV::PseudoVLE32_V_M2: | 
|  | case RISCV::PseudoVLE32_V_M2_MASK: | 
|  | case RISCV::PseudoVLE32_V_M4: | 
|  | case RISCV::PseudoVLE32_V_M4_MASK: | 
|  | case RISCV::PseudoVLE32_V_M8: | 
|  | case RISCV::PseudoVLE32_V_M8_MASK: | 
|  | case RISCV::PseudoVLE32_V_MF2: | 
|  | case RISCV::PseudoVLE32_V_MF2_MASK: | 
|  | case RISCV::PseudoVLSE32_V_M1: | 
|  | case RISCV::PseudoVLSE32_V_M1_MASK: | 
|  | case RISCV::PseudoVLSE32_V_M2: | 
|  | case RISCV::PseudoVLSE32_V_M2_MASK: | 
|  | case RISCV::PseudoVLSE32_V_M4: | 
|  | case RISCV::PseudoVLSE32_V_M4_MASK: | 
|  | case RISCV::PseudoVLSE32_V_M8: | 
|  | case RISCV::PseudoVLSE32_V_M8_MASK: | 
|  | case RISCV::PseudoVLSE32_V_MF2: | 
|  | case RISCV::PseudoVLSE32_V_MF2_MASK: | 
|  | case RISCV::PseudoVSE32_V_M1: | 
|  | case RISCV::PseudoVSE32_V_M1_MASK: | 
|  | case RISCV::PseudoVSE32_V_M2: | 
|  | case RISCV::PseudoVSE32_V_M2_MASK: | 
|  | case RISCV::PseudoVSE32_V_M4: | 
|  | case RISCV::PseudoVSE32_V_M4_MASK: | 
|  | case RISCV::PseudoVSE32_V_M8: | 
|  | case RISCV::PseudoVSE32_V_M8_MASK: | 
|  | case RISCV::PseudoVSE32_V_MF2: | 
|  | case RISCV::PseudoVSE32_V_MF2_MASK: | 
|  | case RISCV::PseudoVSSE32_V_M1: | 
|  | case RISCV::PseudoVSSE32_V_M1_MASK: | 
|  | case RISCV::PseudoVSSE32_V_M2: | 
|  | case RISCV::PseudoVSSE32_V_M2_MASK: | 
|  | case RISCV::PseudoVSSE32_V_M4: | 
|  | case RISCV::PseudoVSSE32_V_M4_MASK: | 
|  | case RISCV::PseudoVSSE32_V_M8: | 
|  | case RISCV::PseudoVSSE32_V_M8_MASK: | 
|  | case RISCV::PseudoVSSE32_V_MF2: | 
|  | case RISCV::PseudoVSSE32_V_MF2_MASK: | 
|  | return 32; | 
|  | case RISCV::PseudoVLE64_V_M1: | 
|  | case RISCV::PseudoVLE64_V_M1_MASK: | 
|  | case RISCV::PseudoVLE64_V_M2: | 
|  | case RISCV::PseudoVLE64_V_M2_MASK: | 
|  | case RISCV::PseudoVLE64_V_M4: | 
|  | case RISCV::PseudoVLE64_V_M4_MASK: | 
|  | case RISCV::PseudoVLE64_V_M8: | 
|  | case RISCV::PseudoVLE64_V_M8_MASK: | 
|  | case RISCV::PseudoVLSE64_V_M1: | 
|  | case RISCV::PseudoVLSE64_V_M1_MASK: | 
|  | case RISCV::PseudoVLSE64_V_M2: | 
|  | case RISCV::PseudoVLSE64_V_M2_MASK: | 
|  | case RISCV::PseudoVLSE64_V_M4: | 
|  | case RISCV::PseudoVLSE64_V_M4_MASK: | 
|  | case RISCV::PseudoVLSE64_V_M8: | 
|  | case RISCV::PseudoVLSE64_V_M8_MASK: | 
|  | case RISCV::PseudoVSE64_V_M1: | 
|  | case RISCV::PseudoVSE64_V_M1_MASK: | 
|  | case RISCV::PseudoVSE64_V_M2: | 
|  | case RISCV::PseudoVSE64_V_M2_MASK: | 
|  | case RISCV::PseudoVSE64_V_M4: | 
|  | case RISCV::PseudoVSE64_V_M4_MASK: | 
|  | case RISCV::PseudoVSE64_V_M8: | 
|  | case RISCV::PseudoVSE64_V_M8_MASK: | 
|  | case RISCV::PseudoVSSE64_V_M1: | 
|  | case RISCV::PseudoVSSE64_V_M1_MASK: | 
|  | case RISCV::PseudoVSSE64_V_M2: | 
|  | case RISCV::PseudoVSSE64_V_M2_MASK: | 
|  | case RISCV::PseudoVSSE64_V_M4: | 
|  | case RISCV::PseudoVSSE64_V_M4_MASK: | 
|  | case RISCV::PseudoVSSE64_V_M8: | 
|  | case RISCV::PseudoVSSE64_V_M8_MASK: | 
|  | return 64; | 
|  | } | 
|  | } | 
|  |  | 
|  | /// Return true if this is an operation on mask registers.  Note that | 
|  | /// this includes both arithmetic/logical ops and load/store (vlm/vsm). | 
|  | static bool isMaskRegOp(const MachineInstr &MI) { | 
|  | if (RISCVII::hasSEWOp(MI.getDesc().TSFlags)) { | 
|  | const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm(); | 
|  | // A Log2SEW of 0 is an operation on mask registers only. | 
|  | return Log2SEW == 0; | 
|  | } | 
|  | return false; | 
|  | } | 
|  |  | 
|  | static unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul) { | 
|  | unsigned LMul; | 
|  | bool Fractional; | 
|  | std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(VLMul); | 
|  |  | 
|  | // Convert LMul to a fixed point value with 3 fractional bits. | 
|  | LMul = Fractional ? (8 / LMul) : (LMul * 8); | 
|  |  | 
|  | assert(SEW >= 8 && "Unexpected SEW value"); | 
|  | return (SEW * 8) / LMul; | 
|  | } | 
|  |  | 
|  | /// Which subfields of VL or VTYPE have values we need to preserve? | 
|  | struct DemandedFields { | 
|  | bool VL = false; | 
|  | bool SEW = false; | 
|  | bool LMUL = false; | 
|  | bool SEWLMULRatio = false; | 
|  | bool TailPolicy = false; | 
|  | bool MaskPolicy = false; | 
|  |  | 
|  | // Return true if any part of VTYPE was used | 
|  | bool usedVTYPE() { | 
|  | return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy; | 
|  | } | 
|  |  | 
|  | // Mark all VTYPE subfields and properties as demanded | 
|  | void demandVTYPE() { | 
|  | SEW = true; | 
|  | LMUL = true; | 
|  | SEWLMULRatio = true; | 
|  | TailPolicy = true; | 
|  | MaskPolicy = true; | 
|  | } | 
|  | }; | 
|  |  | 
|  | /// Return true if the two values of the VTYPE register provided are | 
|  | /// indistinguishable from the perspective of an instruction (or set of | 
|  | /// instructions) which use only the Used subfields and properties. | 
|  | static bool areCompatibleVTYPEs(uint64_t VType1, | 
|  | uint64_t VType2, | 
|  | const DemandedFields &Used) { | 
|  | if (Used.SEW && | 
|  | RISCVVType::getSEW(VType1) != RISCVVType::getSEW(VType2)) | 
|  | return false; | 
|  |  | 
|  | if (Used.LMUL && | 
|  | RISCVVType::getVLMUL(VType1) != RISCVVType::getVLMUL(VType2)) | 
|  | return false; | 
|  |  | 
|  | if (Used.SEWLMULRatio) { | 
|  | auto Ratio1 = getSEWLMULRatio(RISCVVType::getSEW(VType1), | 
|  | RISCVVType::getVLMUL(VType1)); | 
|  | auto Ratio2 = getSEWLMULRatio(RISCVVType::getSEW(VType2), | 
|  | RISCVVType::getVLMUL(VType2)); | 
|  | if (Ratio1 != Ratio2) | 
|  | return false; | 
|  | } | 
|  |  | 
|  | if (Used.TailPolicy && | 
|  | RISCVVType::isTailAgnostic(VType1) != RISCVVType::isTailAgnostic(VType2)) | 
|  | return false; | 
|  | if (Used.MaskPolicy && | 
|  | RISCVVType::isMaskAgnostic(VType1) != RISCVVType::isMaskAgnostic(VType2)) | 
|  | return false; | 
|  | return true; | 
|  | } | 
|  |  | 
|  | /// Return the fields and properties demanded by the provided instruction. | 
|  | static DemandedFields getDemanded(const MachineInstr &MI) { | 
|  | // Warning: This function has to work on both the lowered (i.e. post | 
|  | // emitVSETVLIs) and pre-lowering forms.  The main implication of this is | 
|  | // that it can't use the value of a SEW, VL, or Policy operand as they might | 
|  | // be stale after lowering. | 
|  |  | 
|  | // Most instructions don't use any of these subfeilds. | 
|  | DemandedFields Res; | 
|  | // Start conservative if registers are used | 
|  | if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VL)) | 
|  | Res.VL = true; | 
|  | if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VTYPE)) | 
|  | Res.demandVTYPE(); | 
|  | // Start conservative on the unlowered form too | 
|  | uint64_t TSFlags = MI.getDesc().TSFlags; | 
|  | if (RISCVII::hasSEWOp(TSFlags)) { | 
|  | Res.demandVTYPE(); | 
|  | if (RISCVII::hasVLOp(TSFlags)) | 
|  | Res.VL = true; | 
|  | } | 
|  |  | 
|  | // Loads and stores with implicit EEW do not demand SEW or LMUL directly. | 
|  | // They instead demand the ratio of the two which is used in computing | 
|  | // EMUL, but which allows us the flexibility to change SEW and LMUL | 
|  | // provided we don't change the ratio. | 
|  | // Note: We assume that the instructions initial SEW is the EEW encoded | 
|  | // in the opcode.  This is asserted when constructing the VSETVLIInfo. | 
|  | if (getEEWForLoadStore(MI)) { | 
|  | Res.SEW = false; | 
|  | Res.LMUL = false; | 
|  | } | 
|  |  | 
|  | // Store instructions don't use the policy fields. | 
|  | if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) { | 
|  | Res.TailPolicy = false; | 
|  | Res.MaskPolicy = false; | 
|  | } | 
|  |  | 
|  | // If this is a mask reg operation, it only cares about VLMAX. | 
|  | // TODO: Possible extensions to this logic | 
|  | // * Probably ok if available VLMax is larger than demanded | 
|  | // * The policy bits can probably be ignored.. | 
|  | if (isMaskRegOp(MI)) { | 
|  | Res.SEW = false; | 
|  | Res.LMUL = false; | 
|  | } | 
|  |  | 
|  | return Res; | 
|  | } | 
|  |  | 
|  | /// Defines the abstract state with which the forward dataflow models the | 
|  | /// values of the VL and VTYPE registers after insertion. | 
|  | class VSETVLIInfo { | 
|  | union { | 
|  | Register AVLReg; | 
|  | unsigned AVLImm; | 
|  | }; | 
|  |  | 
|  | enum : uint8_t { | 
|  | Uninitialized, | 
|  | AVLIsReg, | 
|  | AVLIsImm, | 
|  | Unknown, | 
|  | } State = Uninitialized; | 
|  |  | 
|  | // Fields from VTYPE. | 
|  | RISCVII::VLMUL VLMul = RISCVII::LMUL_1; | 
|  | uint8_t SEW = 0; | 
|  | uint8_t TailAgnostic : 1; | 
|  | uint8_t MaskAgnostic : 1; | 
|  | uint8_t SEWLMULRatioOnly : 1; | 
|  |  | 
|  | public: | 
|  | VSETVLIInfo() | 
|  | : AVLImm(0), TailAgnostic(false), MaskAgnostic(false), | 
|  | SEWLMULRatioOnly(false) {} | 
|  |  | 
|  | static VSETVLIInfo getUnknown() { | 
|  | VSETVLIInfo Info; | 
|  | Info.setUnknown(); | 
|  | return Info; | 
|  | } | 
|  |  | 
|  | bool isValid() const { return State != Uninitialized; } | 
|  | void setUnknown() { State = Unknown; } | 
|  | bool isUnknown() const { return State == Unknown; } | 
|  |  | 
|  | void setAVLReg(Register Reg) { | 
|  | AVLReg = Reg; | 
|  | State = AVLIsReg; | 
|  | } | 
|  |  | 
|  | void setAVLImm(unsigned Imm) { | 
|  | AVLImm = Imm; | 
|  | State = AVLIsImm; | 
|  | } | 
|  |  | 
|  | bool hasAVLImm() const { return State == AVLIsImm; } | 
|  | bool hasAVLReg() const { return State == AVLIsReg; } | 
|  | Register getAVLReg() const { | 
|  | assert(hasAVLReg()); | 
|  | return AVLReg; | 
|  | } | 
|  | unsigned getAVLImm() const { | 
|  | assert(hasAVLImm()); | 
|  | return AVLImm; | 
|  | } | 
|  |  | 
|  | unsigned getSEW() const { return SEW; } | 
|  | RISCVII::VLMUL getVLMUL() const { return VLMul; } | 
|  |  | 
|  | bool hasNonZeroAVL() const { | 
|  | if (hasAVLImm()) | 
|  | return getAVLImm() > 0; | 
|  | if (hasAVLReg()) | 
|  | return getAVLReg() == RISCV::X0; | 
|  | return false; | 
|  | } | 
|  |  | 
|  | bool hasSameAVL(const VSETVLIInfo &Other) const { | 
|  | assert(isValid() && Other.isValid() && | 
|  | "Can't compare invalid VSETVLIInfos"); | 
|  | assert(!isUnknown() && !Other.isUnknown() && | 
|  | "Can't compare AVL in unknown state"); | 
|  | if (hasAVLReg() && Other.hasAVLReg()) | 
|  | return getAVLReg() == Other.getAVLReg(); | 
|  |  | 
|  | if (hasAVLImm() && Other.hasAVLImm()) | 
|  | return getAVLImm() == Other.getAVLImm(); | 
|  |  | 
|  | return false; | 
|  | } | 
|  |  | 
|  | void setVTYPE(unsigned VType) { | 
|  | assert(isValid() && !isUnknown() && | 
|  | "Can't set VTYPE for uninitialized or unknown"); | 
|  | VLMul = RISCVVType::getVLMUL(VType); | 
|  | SEW = RISCVVType::getSEW(VType); | 
|  | TailAgnostic = RISCVVType::isTailAgnostic(VType); | 
|  | MaskAgnostic = RISCVVType::isMaskAgnostic(VType); | 
|  | } | 
|  | void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) { | 
|  | assert(isValid() && !isUnknown() && | 
|  | "Can't set VTYPE for uninitialized or unknown"); | 
|  | VLMul = L; | 
|  | SEW = S; | 
|  | TailAgnostic = TA; | 
|  | MaskAgnostic = MA; | 
|  | } | 
|  |  | 
|  | unsigned encodeVTYPE() const { | 
|  | assert(isValid() && !isUnknown() && !SEWLMULRatioOnly && | 
|  | "Can't encode VTYPE for uninitialized or unknown"); | 
|  | return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); | 
|  | } | 
|  |  | 
|  | bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; } | 
|  |  | 
|  | bool hasSameSEW(const VSETVLIInfo &Other) const { | 
|  | assert(isValid() && Other.isValid() && | 
|  | "Can't compare invalid VSETVLIInfos"); | 
|  | assert(!isUnknown() && !Other.isUnknown() && | 
|  | "Can't compare VTYPE in unknown state"); | 
|  | assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly && | 
|  | "Can't compare when only LMUL/SEW ratio is valid."); | 
|  | return SEW == Other.SEW; | 
|  | } | 
|  |  | 
|  | bool hasSameVTYPE(const VSETVLIInfo &Other) const { | 
|  | assert(isValid() && Other.isValid() && | 
|  | "Can't compare invalid VSETVLIInfos"); | 
|  | assert(!isUnknown() && !Other.isUnknown() && | 
|  | "Can't compare VTYPE in unknown state"); | 
|  | assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly && | 
|  | "Can't compare when only LMUL/SEW ratio is valid."); | 
|  | return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) == | 
|  | std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic, | 
|  | Other.MaskAgnostic); | 
|  | } | 
|  |  | 
|  | unsigned getSEWLMULRatio() const { | 
|  | assert(isValid() && !isUnknown() && | 
|  | "Can't use VTYPE for uninitialized or unknown"); | 
|  | return ::getSEWLMULRatio(SEW, VLMul); | 
|  | } | 
|  |  | 
|  | // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX. | 
|  | // Note that having the same VLMAX ensures that both share the same | 
|  | // function from AVL to VL; that is, they must produce the same VL value | 
|  | // for any given AVL value. | 
|  | bool hasSameVLMAX(const VSETVLIInfo &Other) const { | 
|  | assert(isValid() && Other.isValid() && | 
|  | "Can't compare invalid VSETVLIInfos"); | 
|  | assert(!isUnknown() && !Other.isUnknown() && | 
|  | "Can't compare VTYPE in unknown state"); | 
|  | return getSEWLMULRatio() == Other.getSEWLMULRatio(); | 
|  | } | 
|  |  | 
|  | bool hasSamePolicy(const VSETVLIInfo &Other) const { | 
|  | assert(isValid() && Other.isValid() && | 
|  | "Can't compare invalid VSETVLIInfos"); | 
|  | assert(!isUnknown() && !Other.isUnknown() && | 
|  | "Can't compare VTYPE in unknown state"); | 
|  | return TailAgnostic == Other.TailAgnostic && | 
|  | MaskAgnostic == Other.MaskAgnostic; | 
|  | } | 
|  |  | 
|  | bool hasCompatibleVTYPE(const MachineInstr &MI, | 
|  | const VSETVLIInfo &Require) const { | 
|  | const DemandedFields Used = getDemanded(MI); | 
|  | return areCompatibleVTYPEs(encodeVTYPE(), Require.encodeVTYPE(), Used); | 
|  | } | 
|  |  | 
|  | // Determine whether the vector instructions requirements represented by | 
|  | // Require are compatible with the previous vsetvli instruction represented | 
|  | // by this.  MI is the instruction whose requirements we're considering. | 
|  | bool isCompatible(const MachineInstr &MI, const VSETVLIInfo &Require) const { | 
|  | assert(isValid() && Require.isValid() && | 
|  | "Can't compare invalid VSETVLIInfos"); | 
|  | assert(!Require.SEWLMULRatioOnly && | 
|  | "Expected a valid VTYPE for instruction!"); | 
|  | // Nothing is compatible with Unknown. | 
|  | if (isUnknown() || Require.isUnknown()) | 
|  | return false; | 
|  |  | 
|  | // If only our VLMAX ratio is valid, then this isn't compatible. | 
|  | if (SEWLMULRatioOnly) | 
|  | return false; | 
|  |  | 
|  | // If the instruction doesn't need an AVLReg and the SEW matches, consider | 
|  | // it compatible. | 
|  | if (Require.hasAVLReg() && Require.AVLReg == RISCV::NoRegister) | 
|  | if (SEW == Require.SEW) | 
|  | return true; | 
|  |  | 
|  | return hasSameAVL(Require) && hasCompatibleVTYPE(MI, Require); | 
|  | } | 
|  |  | 
|  | bool operator==(const VSETVLIInfo &Other) const { | 
|  | // Uninitialized is only equal to another Uninitialized. | 
|  | if (!isValid()) | 
|  | return !Other.isValid(); | 
|  | if (!Other.isValid()) | 
|  | return !isValid(); | 
|  |  | 
|  | // Unknown is only equal to another Unknown. | 
|  | if (isUnknown()) | 
|  | return Other.isUnknown(); | 
|  | if (Other.isUnknown()) | 
|  | return isUnknown(); | 
|  |  | 
|  | if (!hasSameAVL(Other)) | 
|  | return false; | 
|  |  | 
|  | // If the SEWLMULRatioOnly bits are different, then they aren't equal. | 
|  | if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly) | 
|  | return false; | 
|  |  | 
|  | // If only the VLMAX is valid, check that it is the same. | 
|  | if (SEWLMULRatioOnly) | 
|  | return hasSameVLMAX(Other); | 
|  |  | 
|  | // If the full VTYPE is valid, check that it is the same. | 
|  | return hasSameVTYPE(Other); | 
|  | } | 
|  |  | 
|  | bool operator!=(const VSETVLIInfo &Other) const { | 
|  | return !(*this == Other); | 
|  | } | 
|  |  | 
|  | // Calculate the VSETVLIInfo visible to a block assuming this and Other are | 
|  | // both predecessors. | 
|  | VSETVLIInfo intersect(const VSETVLIInfo &Other) const { | 
|  | // If the new value isn't valid, ignore it. | 
|  | if (!Other.isValid()) | 
|  | return *this; | 
|  |  | 
|  | // If this value isn't valid, this must be the first predecessor, use it. | 
|  | if (!isValid()) | 
|  | return Other; | 
|  |  | 
|  | // If either is unknown, the result is unknown. | 
|  | if (isUnknown() || Other.isUnknown()) | 
|  | return VSETVLIInfo::getUnknown(); | 
|  |  | 
|  | // If we have an exact, match return this. | 
|  | if (*this == Other) | 
|  | return *this; | 
|  |  | 
|  | // Not an exact match, but maybe the AVL and VLMAX are the same. If so, | 
|  | // return an SEW/LMUL ratio only value. | 
|  | if (hasSameAVL(Other) && hasSameVLMAX(Other)) { | 
|  | VSETVLIInfo MergeInfo = *this; | 
|  | MergeInfo.SEWLMULRatioOnly = true; | 
|  | return MergeInfo; | 
|  | } | 
|  |  | 
|  | // Otherwise the result is unknown. | 
|  | return VSETVLIInfo::getUnknown(); | 
|  | } | 
|  |  | 
|  | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | 
|  | /// Support for debugging, callable in GDB: V->dump() | 
|  | LLVM_DUMP_METHOD void dump() const { | 
|  | print(dbgs()); | 
|  | dbgs() << "\n"; | 
|  | } | 
|  |  | 
|  | /// Implement operator<<. | 
|  | /// @{ | 
|  | void print(raw_ostream &OS) const { | 
|  | OS << "{"; | 
|  | if (!isValid()) | 
|  | OS << "Uninitialized"; | 
|  | if (isUnknown()) | 
|  | OS << "unknown"; | 
|  | if (hasAVLReg()) | 
|  | OS << "AVLReg=" << (unsigned)AVLReg; | 
|  | if (hasAVLImm()) | 
|  | OS << "AVLImm=" << (unsigned)AVLImm; | 
|  | OS << ", " | 
|  | << "VLMul=" << (unsigned)VLMul << ", " | 
|  | << "SEW=" << (unsigned)SEW << ", " | 
|  | << "TailAgnostic=" << (bool)TailAgnostic << ", " | 
|  | << "MaskAgnostic=" << (bool)MaskAgnostic << ", " | 
|  | << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}"; | 
|  | } | 
|  | #endif | 
|  | }; | 
|  |  | 
|  | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | 
|  | LLVM_ATTRIBUTE_USED | 
|  | inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) { | 
|  | V.print(OS); | 
|  | return OS; | 
|  | } | 
|  | #endif | 
|  |  | 
|  | struct BlockData { | 
|  | // The VSETVLIInfo that represents the net changes to the VL/VTYPE registers | 
|  | // made by this block. Calculated in Phase 1. | 
|  | VSETVLIInfo Change; | 
|  |  | 
|  | // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this | 
|  | // block. Calculated in Phase 2. | 
|  | VSETVLIInfo Exit; | 
|  |  | 
|  | // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor | 
|  | // blocks. Calculated in Phase 2, and used by Phase 3. | 
|  | VSETVLIInfo Pred; | 
|  |  | 
|  | // Keeps track of whether the block is already in the queue. | 
|  | bool InQueue = false; | 
|  |  | 
|  | BlockData() = default; | 
|  | }; | 
|  |  | 
|  | class RISCVInsertVSETVLI : public MachineFunctionPass { | 
|  | const TargetInstrInfo *TII; | 
|  | MachineRegisterInfo *MRI; | 
|  |  | 
|  | std::vector<BlockData> BlockInfo; | 
|  | std::queue<const MachineBasicBlock *> WorkList; | 
|  |  | 
|  | public: | 
|  | static char ID; | 
|  |  | 
|  | RISCVInsertVSETVLI() : MachineFunctionPass(ID) { | 
|  | initializeRISCVInsertVSETVLIPass(*PassRegistry::getPassRegistry()); | 
|  | } | 
|  | bool runOnMachineFunction(MachineFunction &MF) override; | 
|  |  | 
|  | void getAnalysisUsage(AnalysisUsage &AU) const override { | 
|  | AU.setPreservesCFG(); | 
|  | MachineFunctionPass::getAnalysisUsage(AU); | 
|  | } | 
|  |  | 
|  | StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; } | 
|  |  | 
|  | private: | 
|  | bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require, | 
|  | const VSETVLIInfo &CurInfo) const; | 
|  | bool needVSETVLIPHI(const VSETVLIInfo &Require, | 
|  | const MachineBasicBlock &MBB) const; | 
|  | void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, | 
|  | const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo); | 
|  | void insertVSETVLI(MachineBasicBlock &MBB, | 
|  | MachineBasicBlock::iterator InsertPt, DebugLoc DL, | 
|  | const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo); | 
|  |  | 
|  | void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI); | 
|  | void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI); | 
|  | bool computeVLVTYPEChanges(const MachineBasicBlock &MBB); | 
|  | void computeIncomingVLVTYPE(const MachineBasicBlock &MBB); | 
|  | void emitVSETVLIs(MachineBasicBlock &MBB); | 
|  | void doLocalPostpass(MachineBasicBlock &MBB); | 
|  | void doPRE(MachineBasicBlock &MBB); | 
|  | void insertReadVL(MachineBasicBlock &MBB); | 
|  | }; | 
|  |  | 
|  | } // end anonymous namespace | 
|  |  | 
|  | char RISCVInsertVSETVLI::ID = 0; | 
|  |  | 
|  | INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME, | 
|  | false, false) | 
|  |  | 
|  | static bool isVectorConfigInstr(const MachineInstr &MI) { | 
|  | return MI.getOpcode() == RISCV::PseudoVSETVLI || | 
|  | MI.getOpcode() == RISCV::PseudoVSETVLIX0 || | 
|  | MI.getOpcode() == RISCV::PseudoVSETIVLI; | 
|  | } | 
|  |  | 
|  | /// Return true if this is 'vsetvli x0, x0, vtype' which preserves | 
|  | /// VL and only sets VTYPE. | 
|  | static bool isVLPreservingConfig(const MachineInstr &MI) { | 
|  | if (MI.getOpcode() != RISCV::PseudoVSETVLIX0) | 
|  | return false; | 
|  | assert(RISCV::X0 == MI.getOperand(1).getReg()); | 
|  | return RISCV::X0 == MI.getOperand(0).getReg(); | 
|  | } | 
|  |  | 
|  | static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags, | 
|  | const MachineRegisterInfo *MRI) { | 
|  | VSETVLIInfo InstrInfo; | 
|  |  | 
|  | // If the instruction has policy argument, use the argument. | 
|  | // If there is no policy argument, default to tail agnostic unless the | 
|  | // destination is tied to a source. Unless the source is undef. In that case | 
|  | // the user would have some control over the policy values. | 
|  | bool TailAgnostic = true; | 
|  | bool UsesMaskPolicy = RISCVII::usesMaskPolicy(TSFlags); | 
|  | // FIXME: Could we look at the above or below instructions to choose the | 
|  | // matched mask policy to reduce vsetvli instructions? Default mask policy is | 
|  | // agnostic if instructions use mask policy, otherwise is undisturbed. Because | 
|  | // most mask operations are mask undisturbed, so we could possibly reduce the | 
|  | // vsetvli between mask and nomasked instruction sequence. | 
|  | bool MaskAgnostic = UsesMaskPolicy; | 
|  | unsigned UseOpIdx; | 
|  | if (RISCVII::hasVecPolicyOp(TSFlags)) { | 
|  | const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1); | 
|  | uint64_t Policy = Op.getImm(); | 
|  | assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) && | 
|  | "Invalid Policy Value"); | 
|  | // Although in some cases, mismatched passthru/maskedoff with policy value | 
|  | // does not make sense (ex. tied operand is IMPLICIT_DEF with non-TAMA | 
|  | // policy, or tied operand is not IMPLICIT_DEF with TAMA policy), but users | 
|  | // have set the policy value explicitly, so compiler would not fix it. | 
|  | TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC; | 
|  | MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC; | 
|  | } else if (MI.isRegTiedToUseOperand(0, &UseOpIdx)) { | 
|  | TailAgnostic = false; | 
|  | if (UsesMaskPolicy) | 
|  | MaskAgnostic = false; | 
|  | // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic. | 
|  | const MachineOperand &UseMO = MI.getOperand(UseOpIdx); | 
|  | MachineInstr *UseMI = MRI->getVRegDef(UseMO.getReg()); | 
|  | if (UseMI && UseMI->isImplicitDef()) { | 
|  | TailAgnostic = true; | 
|  | if (UsesMaskPolicy) | 
|  | MaskAgnostic = true; | 
|  | } | 
|  | // Some pseudo instructions force a tail agnostic policy despite having a | 
|  | // tied def. | 
|  | if (RISCVII::doesForceTailAgnostic(TSFlags)) | 
|  | TailAgnostic = true; | 
|  | } | 
|  |  | 
|  | RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags); | 
|  |  | 
|  | unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm(); | 
|  | // A Log2SEW of 0 is an operation on mask registers only. | 
|  | unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; | 
|  | assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); | 
|  |  | 
|  | if (RISCVII::hasVLOp(TSFlags)) { | 
|  | const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); | 
|  | if (VLOp.isImm()) { | 
|  | int64_t Imm = VLOp.getImm(); | 
|  | // Conver the VLMax sentintel to X0 register. | 
|  | if (Imm == RISCV::VLMaxSentinel) | 
|  | InstrInfo.setAVLReg(RISCV::X0); | 
|  | else | 
|  | InstrInfo.setAVLImm(Imm); | 
|  | } else { | 
|  | InstrInfo.setAVLReg(VLOp.getReg()); | 
|  | } | 
|  | } else { | 
|  | InstrInfo.setAVLReg(RISCV::NoRegister); | 
|  | } | 
|  | #ifndef NDEBUG | 
|  | if (Optional<unsigned> EEW = getEEWForLoadStore(MI)) { | 
|  | assert(SEW == EEW && "Initial SEW doesn't match expected EEW"); | 
|  | } | 
|  | #endif | 
|  | InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); | 
|  |  | 
|  | return InstrInfo; | 
|  | } | 
|  |  | 
|  | void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, | 
|  | const VSETVLIInfo &Info, | 
|  | const VSETVLIInfo &PrevInfo) { | 
|  | DebugLoc DL = MI.getDebugLoc(); | 
|  | insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo); | 
|  | } | 
|  |  | 
|  | void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, | 
|  | MachineBasicBlock::iterator InsertPt, DebugLoc DL, | 
|  | const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) { | 
|  |  | 
|  | // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same | 
|  | // VLMAX. | 
|  | if (PrevInfo.isValid() && !PrevInfo.isUnknown() && | 
|  | Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) { | 
|  | BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) | 
|  | .addReg(RISCV::X0, RegState::Define | RegState::Dead) | 
|  | .addReg(RISCV::X0, RegState::Kill) | 
|  | .addImm(Info.encodeVTYPE()) | 
|  | .addReg(RISCV::VL, RegState::Implicit); | 
|  | return; | 
|  | } | 
|  |  | 
|  | if (Info.hasAVLImm()) { | 
|  | BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) | 
|  | .addReg(RISCV::X0, RegState::Define | RegState::Dead) | 
|  | .addImm(Info.getAVLImm()) | 
|  | .addImm(Info.encodeVTYPE()); | 
|  | return; | 
|  | } | 
|  |  | 
|  | Register AVLReg = Info.getAVLReg(); | 
|  | if (AVLReg == RISCV::NoRegister) { | 
|  | // We can only use x0, x0 if there's no chance of the vtype change causing | 
|  | // the previous vl to become invalid. | 
|  | if (PrevInfo.isValid() && !PrevInfo.isUnknown() && | 
|  | Info.hasSameVLMAX(PrevInfo)) { | 
|  | BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) | 
|  | .addReg(RISCV::X0, RegState::Define | RegState::Dead) | 
|  | .addReg(RISCV::X0, RegState::Kill) | 
|  | .addImm(Info.encodeVTYPE()) | 
|  | .addReg(RISCV::VL, RegState::Implicit); | 
|  | return; | 
|  | } | 
|  | // Otherwise use an AVL of 0 to avoid depending on previous vl. | 
|  | BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) | 
|  | .addReg(RISCV::X0, RegState::Define | RegState::Dead) | 
|  | .addImm(0) | 
|  | .addImm(Info.encodeVTYPE()); | 
|  | return; | 
|  | } | 
|  |  | 
|  | if (AVLReg.isVirtual()) | 
|  | MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass); | 
|  |  | 
|  | // Use X0 as the DestReg unless AVLReg is X0. We also need to change the | 
|  | // opcode if the AVLReg is X0 as they have different register classes for | 
|  | // the AVL operand. | 
|  | Register DestReg = RISCV::X0; | 
|  | unsigned Opcode = RISCV::PseudoVSETVLI; | 
|  | if (AVLReg == RISCV::X0) { | 
|  | DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass); | 
|  | Opcode = RISCV::PseudoVSETVLIX0; | 
|  | } | 
|  | BuildMI(MBB, InsertPt, DL, TII->get(Opcode)) | 
|  | .addReg(DestReg, RegState::Define | RegState::Dead) | 
|  | .addReg(AVLReg) | 
|  | .addImm(Info.encodeVTYPE()); | 
|  | } | 
|  |  | 
|  | // Return a VSETVLIInfo representing the changes made by this VSETVLI or | 
|  | // VSETIVLI instruction. | 
|  | static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) { | 
|  | VSETVLIInfo NewInfo; | 
|  | if (MI.getOpcode() == RISCV::PseudoVSETIVLI) { | 
|  | NewInfo.setAVLImm(MI.getOperand(1).getImm()); | 
|  | } else { | 
|  | assert(MI.getOpcode() == RISCV::PseudoVSETVLI || | 
|  | MI.getOpcode() == RISCV::PseudoVSETVLIX0); | 
|  | Register AVLReg = MI.getOperand(1).getReg(); | 
|  | assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) && | 
|  | "Can't handle X0, X0 vsetvli yet"); | 
|  | NewInfo.setAVLReg(AVLReg); | 
|  | } | 
|  | NewInfo.setVTYPE(MI.getOperand(2).getImm()); | 
|  |  | 
|  | return NewInfo; | 
|  | } | 
|  |  | 
|  | /// Return true if a VSETVLI is required to transition from CurInfo to Require | 
|  | /// before MI. | 
|  | bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI, | 
|  | const VSETVLIInfo &Require, | 
|  | const VSETVLIInfo &CurInfo) const { | 
|  | assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, MRI)); | 
|  |  | 
|  | if (CurInfo.isCompatible(MI, Require)) | 
|  | return false; | 
|  |  | 
|  | if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly()) | 
|  | return true; | 
|  |  | 
|  | // For vmv.s.x and vfmv.s.f, there is only two behaviors, VL = 0 and VL > 0. | 
|  | // VL=0 is uninteresting (as it should have been deleted already), so it is | 
|  | // compatible if we can prove both are non-zero.  Additionally, if writing | 
|  | // to an implicit_def operand, we don't need to preserve any other bits and | 
|  | // are thus compatible with any larger etype, and can disregard policy bits. | 
|  | if (isScalarMoveInstr(MI) && | 
|  | CurInfo.hasNonZeroAVL() && Require.hasNonZeroAVL()) { | 
|  | auto *VRegDef = MRI->getVRegDef(MI.getOperand(1).getReg()); | 
|  | if (VRegDef && VRegDef->isImplicitDef() && | 
|  | CurInfo.getSEW() >= Require.getSEW()) | 
|  | return false; | 
|  | if (CurInfo.hasSameSEW(Require) && CurInfo.hasSamePolicy(Require)) | 
|  | return false; | 
|  | } | 
|  |  | 
|  | // We didn't find a compatible value. If our AVL is a virtual register, | 
|  | // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need | 
|  | // and the last VL/VTYPE we observed is the same, we don't need a | 
|  | // VSETVLI here. | 
|  | if (Require.hasAVLReg() && Require.getAVLReg().isVirtual() && | 
|  | CurInfo.hasCompatibleVTYPE(MI, Require)) { | 
|  | if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) { | 
|  | if (isVectorConfigInstr(*DefMI)) { | 
|  | VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); | 
|  | if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo)) | 
|  | return false; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | return true; | 
|  | } | 
|  |  | 
|  | // Given an incoming state reaching MI, modifies that state so that it is minimally | 
|  | // compatible with MI.  The resulting state is guaranteed to be semantically legal | 
|  | // for MI, but may not be the state requested by MI. | 
|  | void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) { | 
|  | uint64_t TSFlags = MI.getDesc().TSFlags; | 
|  | if (!RISCVII::hasSEWOp(TSFlags)) | 
|  | return; | 
|  |  | 
|  | const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI); | 
|  | if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info)) | 
|  | return; | 
|  |  | 
|  | const VSETVLIInfo PrevInfo = Info; | 
|  | Info = NewInfo; | 
|  |  | 
|  | if (!RISCVII::hasVLOp(TSFlags)) | 
|  | return; | 
|  |  | 
|  | // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and | 
|  | // VL > 0. We can discard the user requested AVL and just use the last | 
|  | // one if we can prove it equally zero.  This removes a vsetvli entirely | 
|  | // if the types match or allows use of cheaper avl preserving variant | 
|  | // if VLMAX doesn't change.  If VLMAX might change, we couldn't use | 
|  | // the 'vsetvli x0, x0, vtype" variant, so we avoid the transform to | 
|  | // prevent extending live range of an avl register operand. | 
|  | // TODO: We can probably relax this for immediates. | 
|  | if (isScalarMoveInstr(MI) && PrevInfo.isValid() && | 
|  | PrevInfo.hasNonZeroAVL() && Info.hasNonZeroAVL() && | 
|  | Info.hasSameVLMAX(PrevInfo)) { | 
|  | if (PrevInfo.hasAVLImm()) | 
|  | Info.setAVLImm(PrevInfo.getAVLImm()); | 
|  | else | 
|  | Info.setAVLReg(PrevInfo.getAVLReg()); | 
|  | return; | 
|  | } | 
|  |  | 
|  | // If AVL is defined by a vsetvli with the same VLMAX, we can | 
|  | // replace the AVL operand with the AVL of the defining vsetvli. | 
|  | // We avoid general register AVLs to avoid extending live ranges | 
|  | // without being sure we can kill the original source reg entirely. | 
|  | if (!Info.hasAVLReg() || !Info.getAVLReg().isVirtual()) | 
|  | return; | 
|  | MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg()); | 
|  | if (!DefMI || !isVectorConfigInstr(*DefMI)) | 
|  | return; | 
|  |  | 
|  | VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); | 
|  | if (DefInfo.hasSameVLMAX(Info) && | 
|  | (DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) { | 
|  | if (DefInfo.hasAVLImm()) | 
|  | Info.setAVLImm(DefInfo.getAVLImm()); | 
|  | else | 
|  | Info.setAVLReg(DefInfo.getAVLReg()); | 
|  | return; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Given a state with which we evaluated MI (see transferBefore above for why | 
|  | // this might be different that the state MI requested), modify the state to | 
|  | // reflect the changes MI might make. | 
|  | void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) { | 
|  | if (isVectorConfigInstr(MI)) { | 
|  | Info = getInfoForVSETVLI(MI); | 
|  | return; | 
|  | } | 
|  |  | 
|  | if (RISCV::isFaultFirstLoad(MI)) { | 
|  | // Update AVL to vl-output of the fault first load. | 
|  | Info.setAVLReg(MI.getOperand(1).getReg()); | 
|  | return; | 
|  | } | 
|  |  | 
|  | // If this is something that updates VL/VTYPE that we don't know about, set | 
|  | // the state to unknown. | 
|  | if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || | 
|  | MI.modifiesRegister(RISCV::VTYPE)) | 
|  | Info = VSETVLIInfo::getUnknown(); | 
|  | } | 
|  |  | 
|  | bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) { | 
|  | bool HadVectorOp = false; | 
|  |  | 
|  | BlockData &BBInfo = BlockInfo[MBB.getNumber()]; | 
|  | BBInfo.Change = BBInfo.Pred; | 
|  | for (const MachineInstr &MI : MBB) { | 
|  | transferBefore(BBInfo.Change, MI); | 
|  |  | 
|  | if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags)) | 
|  | HadVectorOp = true; | 
|  |  | 
|  | transferAfter(BBInfo.Change, MI); | 
|  | } | 
|  |  | 
|  | return HadVectorOp; | 
|  | } | 
|  |  | 
|  | void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) { | 
|  |  | 
|  | BlockData &BBInfo = BlockInfo[MBB.getNumber()]; | 
|  |  | 
|  | BBInfo.InQueue = false; | 
|  |  | 
|  | VSETVLIInfo InInfo; | 
|  | if (MBB.pred_empty()) { | 
|  | // There are no predecessors, so use the default starting status. | 
|  | InInfo.setUnknown(); | 
|  | } else { | 
|  | for (MachineBasicBlock *P : MBB.predecessors()) | 
|  | InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit); | 
|  | } | 
|  |  | 
|  | // If we don't have any valid predecessor value, wait until we do. | 
|  | if (!InInfo.isValid()) | 
|  | return; | 
|  |  | 
|  | // If no change, no need to rerun block | 
|  | if (InInfo == BBInfo.Pred) | 
|  | return; | 
|  |  | 
|  | BBInfo.Pred = InInfo; | 
|  | LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB) | 
|  | << " changed to " << BBInfo.Pred << "\n"); | 
|  |  | 
|  | // Note: It's tempting to cache the state changes here, but due to the | 
|  | // compatibility checks performed a blocks output state can change based on | 
|  | // the input state.  To cache, we'd have to add logic for finding | 
|  | // never-compatible state changes. | 
|  | computeVLVTYPEChanges(MBB); | 
|  | VSETVLIInfo TmpStatus = BBInfo.Change; | 
|  |  | 
|  | // If the new exit value matches the old exit value, we don't need to revisit | 
|  | // any blocks. | 
|  | if (BBInfo.Exit == TmpStatus) | 
|  | return; | 
|  |  | 
|  | BBInfo.Exit = TmpStatus; | 
|  | LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB) | 
|  | << " changed to " << BBInfo.Exit << "\n"); | 
|  |  | 
|  | // Add the successors to the work list so we can propagate the changed exit | 
|  | // status. | 
|  | for (MachineBasicBlock *S : MBB.successors()) | 
|  | if (!BlockInfo[S->getNumber()].InQueue) | 
|  | WorkList.push(S); | 
|  | } | 
|  |  | 
|  | // If we weren't able to prove a vsetvli was directly unneeded, it might still | 
|  | // be unneeded if the AVL is a phi node where all incoming values are VL | 
|  | // outputs from the last VSETVLI in their respective basic blocks. | 
|  | bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require, | 
|  | const MachineBasicBlock &MBB) const { | 
|  | if (DisableInsertVSETVLPHIOpt) | 
|  | return true; | 
|  |  | 
|  | if (!Require.hasAVLReg()) | 
|  | return true; | 
|  |  | 
|  | Register AVLReg = Require.getAVLReg(); | 
|  | if (!AVLReg.isVirtual()) | 
|  | return true; | 
|  |  | 
|  | // We need the AVL to be produce by a PHI node in this basic block. | 
|  | MachineInstr *PHI = MRI->getVRegDef(AVLReg); | 
|  | if (!PHI || PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB) | 
|  | return true; | 
|  |  | 
|  | for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps; | 
|  | PHIOp += 2) { | 
|  | Register InReg = PHI->getOperand(PHIOp).getReg(); | 
|  | MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB(); | 
|  | const BlockData &PBBInfo = BlockInfo[PBB->getNumber()]; | 
|  | // If the exit from the predecessor has the VTYPE we are looking for | 
|  | // we might be able to avoid a VSETVLI. | 
|  | if (PBBInfo.Exit.isUnknown() || !PBBInfo.Exit.hasSameVTYPE(Require)) | 
|  | return true; | 
|  |  | 
|  | // We need the PHI input to the be the output of a VSET(I)VLI. | 
|  | MachineInstr *DefMI = MRI->getVRegDef(InReg); | 
|  | if (!DefMI || !isVectorConfigInstr(*DefMI)) | 
|  | return true; | 
|  |  | 
|  | // We found a VSET(I)VLI make sure it matches the output of the | 
|  | // predecessor block. | 
|  | VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); | 
|  | if (!DefInfo.hasSameAVL(PBBInfo.Exit) || | 
|  | !DefInfo.hasSameVTYPE(PBBInfo.Exit)) | 
|  | return true; | 
|  | } | 
|  |  | 
|  | // If all the incoming values to the PHI checked out, we don't need | 
|  | // to insert a VSETVLI. | 
|  | return false; | 
|  | } | 
|  |  | 
|  | void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { | 
|  | VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred; | 
|  | // Track whether the prefix of the block we've scanned is transparent | 
|  | // (meaning has not yet changed the abstract state). | 
|  | bool PrefixTransparent = true; | 
|  | for (MachineInstr &MI : MBB) { | 
|  | const VSETVLIInfo PrevInfo = CurInfo; | 
|  | transferBefore(CurInfo, MI); | 
|  |  | 
|  | // If this is an explicit VSETVLI or VSETIVLI, update our state. | 
|  | if (isVectorConfigInstr(MI)) { | 
|  | // Conservatively, mark the VL and VTYPE as live. | 
|  | assert(MI.getOperand(3).getReg() == RISCV::VL && | 
|  | MI.getOperand(4).getReg() == RISCV::VTYPE && | 
|  | "Unexpected operands where VL and VTYPE should be"); | 
|  | MI.getOperand(3).setIsDead(false); | 
|  | MI.getOperand(4).setIsDead(false); | 
|  | PrefixTransparent = false; | 
|  | } | 
|  |  | 
|  | uint64_t TSFlags = MI.getDesc().TSFlags; | 
|  | if (RISCVII::hasSEWOp(TSFlags)) { | 
|  | if (PrevInfo != CurInfo) { | 
|  | // If this is the first implicit state change, and the state change | 
|  | // requested can be proven to produce the same register contents, we | 
|  | // can skip emitting the actual state change and continue as if we | 
|  | // had since we know the GPR result of the implicit state change | 
|  | // wouldn't be used and VL/VTYPE registers are correct.  Note that | 
|  | // we *do* need to model the state as if it changed as while the | 
|  | // register contents are unchanged, the abstract model can change. | 
|  | if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB)) | 
|  | insertVSETVLI(MBB, MI, CurInfo, PrevInfo); | 
|  | PrefixTransparent = false; | 
|  | } | 
|  |  | 
|  | if (RISCVII::hasVLOp(TSFlags)) { | 
|  | MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); | 
|  | if (VLOp.isReg()) { | 
|  | // Erase the AVL operand from the instruction. | 
|  | VLOp.setReg(RISCV::NoRegister); | 
|  | VLOp.setIsKill(false); | 
|  | } | 
|  | MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false, | 
|  | /*isImp*/ true)); | 
|  | } | 
|  | MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false, | 
|  | /*isImp*/ true)); | 
|  | } | 
|  |  | 
|  | if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || | 
|  | MI.modifiesRegister(RISCV::VTYPE)) | 
|  | PrefixTransparent = false; | 
|  |  | 
|  | transferAfter(CurInfo, MI); | 
|  | } | 
|  |  | 
|  | // If we reach the end of the block and our current info doesn't match the | 
|  | // expected info, insert a vsetvli to correct. | 
|  | if (!UseStrictAsserts) { | 
|  | const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit; | 
|  | if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() && | 
|  | CurInfo != ExitInfo) { | 
|  | // Note there's an implicit assumption here that terminators never use | 
|  | // or modify VL or VTYPE.  Also, fallthrough will return end(). | 
|  | auto InsertPt = MBB.getFirstInstrTerminator(); | 
|  | insertVSETVLI(MBB, InsertPt, MBB.findDebugLoc(InsertPt), ExitInfo, | 
|  | CurInfo); | 
|  | CurInfo = ExitInfo; | 
|  | } | 
|  | } | 
|  |  | 
|  | if (UseStrictAsserts && CurInfo.isValid()) { | 
|  | const auto &Info = BlockInfo[MBB.getNumber()]; | 
|  | if (CurInfo != Info.Exit) { | 
|  | LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n"); | 
|  | LLVM_DEBUG(dbgs() << "  begin        state: " << Info.Pred << "\n"); | 
|  | LLVM_DEBUG(dbgs() << "  expected end state: " << Info.Exit << "\n"); | 
|  | LLVM_DEBUG(dbgs() << "  actual   end state: " << CurInfo << "\n"); | 
|  | } | 
|  | assert(CurInfo == Info.Exit && | 
|  | "InsertVSETVLI dataflow invariant violated"); | 
|  | } | 
|  | } | 
|  |  | 
|  | /// Return true if the VL value configured must be equal to the requested one. | 
|  | static bool hasFixedResult(const VSETVLIInfo &Info, const RISCVSubtarget &ST) { | 
|  | if (!Info.hasAVLImm()) | 
|  | // VLMAX is always the same value. | 
|  | // TODO: Could extend to other registers by looking at the associated vreg | 
|  | // def placement. | 
|  | return RISCV::X0 == Info.getAVLReg(); | 
|  |  | 
|  | unsigned AVL = Info.getAVLImm(); | 
|  | unsigned SEW = Info.getSEW(); | 
|  | unsigned AVLInBits = AVL * SEW; | 
|  |  | 
|  | unsigned LMul; | 
|  | bool Fractional; | 
|  | std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(Info.getVLMUL()); | 
|  |  | 
|  | if (Fractional) | 
|  | return ST.getRealMinVLen() / LMul >= AVLInBits; | 
|  | return ST.getRealMinVLen() * LMul >= AVLInBits; | 
|  | } | 
|  |  | 
|  | /// Perform simple partial redundancy elimination of the VSETVLI instructions | 
|  | /// we're about to insert by looking for cases where we can PRE from the | 
|  | /// beginning of one block to the end of one of its predecessors.  Specifically, | 
|  | /// this is geared to catch the common case of a fixed length vsetvl in a single | 
|  | /// block loop when it could execute once in the preheader instead. | 
|  | void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { | 
|  | const MachineFunction &MF = *MBB.getParent(); | 
|  | const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>(); | 
|  |  | 
|  | if (!BlockInfo[MBB.getNumber()].Pred.isUnknown()) | 
|  | return; | 
|  |  | 
|  | MachineBasicBlock *UnavailablePred = nullptr; | 
|  | VSETVLIInfo AvailableInfo; | 
|  | for (MachineBasicBlock *P : MBB.predecessors()) { | 
|  | const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit; | 
|  | if (PredInfo.isUnknown()) { | 
|  | if (UnavailablePred) | 
|  | return; | 
|  | UnavailablePred = P; | 
|  | } else if (!AvailableInfo.isValid()) { | 
|  | AvailableInfo = PredInfo; | 
|  | } else if (AvailableInfo != PredInfo) { | 
|  | return; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Unreachable, single pred, or full redundancy. Note that FRE is handled by | 
|  | // phase 3. | 
|  | if (!UnavailablePred || !AvailableInfo.isValid()) | 
|  | return; | 
|  |  | 
|  | // Critical edge - TODO: consider splitting? | 
|  | if (UnavailablePred->succ_size() != 1) | 
|  | return; | 
|  |  | 
|  | // If VL can be less than AVL, then we can't reduce the frequency of exec. | 
|  | if (!hasFixedResult(AvailableInfo, ST)) | 
|  | return; | 
|  |  | 
|  | // Does it actually let us remove an implicit transition in MBB? | 
|  | bool Found = false; | 
|  | for (auto &MI : MBB) { | 
|  | if (isVectorConfigInstr(MI)) | 
|  | return; | 
|  |  | 
|  | const uint64_t TSFlags = MI.getDesc().TSFlags; | 
|  | if (RISCVII::hasSEWOp(TSFlags)) { | 
|  | if (AvailableInfo != computeInfoForInstr(MI, TSFlags, MRI)) | 
|  | return; | 
|  | Found = true; | 
|  | break; | 
|  | } | 
|  | } | 
|  | if (!Found) | 
|  | return; | 
|  |  | 
|  | // Finally, update both data flow state and insert the actual vsetvli. | 
|  | // Doing both keeps the code in sync with the dataflow results, which | 
|  | // is critical for correctness of phase 3. | 
|  | auto OldInfo = BlockInfo[UnavailablePred->getNumber()].Exit; | 
|  | LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to " | 
|  | << UnavailablePred->getName() << " with state " | 
|  | << AvailableInfo << "\n"); | 
|  | BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo; | 
|  | BlockInfo[MBB.getNumber()].Pred = AvailableInfo; | 
|  |  | 
|  | // Note there's an implicit assumption here that terminators never use | 
|  | // or modify VL or VTYPE.  Also, fallthrough will return end(). | 
|  | auto InsertPt = UnavailablePred->getFirstInstrTerminator(); | 
|  | insertVSETVLI(*UnavailablePred, InsertPt, | 
|  | UnavailablePred->findDebugLoc(InsertPt), | 
|  | AvailableInfo, OldInfo); | 
|  | } | 
|  |  | 
|  | static void doUnion(DemandedFields &A, DemandedFields B) { | 
|  | A.VL |= B.VL; | 
|  | A.SEW |= B.SEW; | 
|  | A.LMUL |= B.LMUL; | 
|  | A.SEWLMULRatio |= B.SEWLMULRatio; | 
|  | A.TailPolicy |= B.TailPolicy; | 
|  | A.MaskPolicy |= B.MaskPolicy; | 
|  | } | 
|  |  | 
|  | // Return true if we can mutate PrevMI's VTYPE to match MI's | 
|  | // without changing any the fields which have been used. | 
|  | // TODO: Restructure code to allow code reuse between this and isCompatible | 
|  | // above. | 
|  | static bool canMutatePriorConfig(const MachineInstr &PrevMI, | 
|  | const MachineInstr &MI, | 
|  | const DemandedFields &Used) { | 
|  | // TODO: Extend this to handle cases where VL does change, but VL | 
|  | // has not been used.  (e.g. over a vmv.x.s) | 
|  | if (!isVLPreservingConfig(MI)) | 
|  | // Note: `vsetvli x0, x0, vtype' is the canonical instruction | 
|  | // for this case.  If you find yourself wanting to add other forms | 
|  | // to this "unused VTYPE" case, we're probably missing a | 
|  | // canonicalization earlier. | 
|  | return false; | 
|  |  | 
|  | if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm()) | 
|  | return false; | 
|  |  | 
|  | auto PriorVType = PrevMI.getOperand(2).getImm(); | 
|  | auto VType = MI.getOperand(2).getImm(); | 
|  | return areCompatibleVTYPEs(PriorVType, VType, Used); | 
|  | } | 
|  |  | 
|  | void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) { | 
|  | MachineInstr *PrevMI = nullptr; | 
|  | DemandedFields Used; | 
|  | SmallVector<MachineInstr*> ToDelete; | 
|  | for (MachineInstr &MI : MBB) { | 
|  | // Note: Must be *before* vsetvli handling to account for config cases | 
|  | // which only change some subfields. | 
|  | doUnion(Used, getDemanded(MI)); | 
|  |  | 
|  | if (!isVectorConfigInstr(MI)) | 
|  | continue; | 
|  |  | 
|  | if (PrevMI) { | 
|  | if (!Used.VL && !Used.usedVTYPE()) { | 
|  | ToDelete.push_back(PrevMI); | 
|  | // fallthrough | 
|  | } else if (canMutatePriorConfig(*PrevMI, MI, Used)) { | 
|  | PrevMI->getOperand(2).setImm(MI.getOperand(2).getImm()); | 
|  | ToDelete.push_back(&MI); | 
|  | // Leave PrevMI unchanged | 
|  | continue; | 
|  | } | 
|  | } | 
|  | PrevMI = &MI; | 
|  | Used = getDemanded(MI); | 
|  | Register VRegDef = MI.getOperand(0).getReg(); | 
|  | if (VRegDef != RISCV::X0 && | 
|  | !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef))) | 
|  | Used.VL = true; | 
|  | } | 
|  |  | 
|  | for (auto *MI : ToDelete) | 
|  | MI->eraseFromParent(); | 
|  | } | 
|  |  | 
|  | void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) { | 
|  | for (auto I = MBB.begin(), E = MBB.end(); I != E;) { | 
|  | MachineInstr &MI = *I++; | 
|  | if (RISCV::isFaultFirstLoad(MI)) { | 
|  | Register VLOutput = MI.getOperand(1).getReg(); | 
|  | if (!MRI->use_nodbg_empty(VLOutput)) | 
|  | BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL), | 
|  | VLOutput); | 
|  | // We don't use the vl output of the VLEFF/VLSEGFF anymore. | 
|  | MI.getOperand(1).setReg(RISCV::X0); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { | 
|  | // Skip if the vector extension is not enabled. | 
|  | const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>(); | 
|  | if (!ST.hasVInstructions()) | 
|  | return false; | 
|  |  | 
|  | LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n"); | 
|  |  | 
|  | TII = ST.getInstrInfo(); | 
|  | MRI = &MF.getRegInfo(); | 
|  |  | 
|  | assert(BlockInfo.empty() && "Expect empty block infos"); | 
|  | BlockInfo.resize(MF.getNumBlockIDs()); | 
|  |  | 
|  | bool HaveVectorOp = false; | 
|  |  | 
|  | // Phase 1 - determine how VL/VTYPE are affected by the each block. | 
|  | for (const MachineBasicBlock &MBB : MF) { | 
|  | HaveVectorOp |= computeVLVTYPEChanges(MBB); | 
|  | // Initial exit state is whatever change we found in the block. | 
|  | BlockData &BBInfo = BlockInfo[MBB.getNumber()]; | 
|  | BBInfo.Exit = BBInfo.Change; | 
|  | LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB) | 
|  | << " is " << BBInfo.Exit << "\n"); | 
|  |  | 
|  | } | 
|  |  | 
|  | // If we didn't find any instructions that need VSETVLI, we're done. | 
|  | if (!HaveVectorOp) { | 
|  | BlockInfo.clear(); | 
|  | return false; | 
|  | } | 
|  |  | 
|  | // Phase 2 - determine the exit VL/VTYPE from each block. We add all | 
|  | // blocks to the list here, but will also add any that need to be revisited | 
|  | // during Phase 2 processing. | 
|  | for (const MachineBasicBlock &MBB : MF) { | 
|  | WorkList.push(&MBB); | 
|  | BlockInfo[MBB.getNumber()].InQueue = true; | 
|  | } | 
|  | while (!WorkList.empty()) { | 
|  | const MachineBasicBlock &MBB = *WorkList.front(); | 
|  | WorkList.pop(); | 
|  | computeIncomingVLVTYPE(MBB); | 
|  | } | 
|  |  | 
|  | // Perform partial redundancy elimination of vsetvli transitions. | 
|  | for (MachineBasicBlock &MBB : MF) | 
|  | doPRE(MBB); | 
|  |  | 
|  | // Phase 3 - add any vsetvli instructions needed in the block. Use the | 
|  | // Phase 2 information to avoid adding vsetvlis before the first vector | 
|  | // instruction in the block if the VL/VTYPE is satisfied by its | 
|  | // predecessors. | 
|  | for (MachineBasicBlock &MBB : MF) | 
|  | emitVSETVLIs(MBB); | 
|  |  | 
|  | // Now that all vsetvlis are explicit, go through and do block local | 
|  | // DSE and peephole based demanded fields based transforms.  Note that | 
|  | // this *must* be done outside the main dataflow so long as we allow | 
|  | // any cross block analysis within the dataflow.  We can't have both | 
|  | // demanded fields based mutation and non-local analysis in the | 
|  | // dataflow at the same time without introducing inconsistencies. | 
|  | for (MachineBasicBlock &MBB : MF) | 
|  | doLocalPostpass(MBB); | 
|  |  | 
|  | // Once we're fully done rewriting all the instructions, do a final pass | 
|  | // through to check for VSETVLIs which write to an unused destination. | 
|  | // For the non X0, X0 variant, we can replace the destination register | 
|  | // with X0 to reduce register pressure.  This is really a generic | 
|  | // optimization which can be applied to any dead def (TODO: generalize). | 
|  | for (MachineBasicBlock &MBB : MF) { | 
|  | for (MachineInstr &MI : MBB) { | 
|  | if (MI.getOpcode() == RISCV::PseudoVSETVLI || | 
|  | MI.getOpcode() == RISCV::PseudoVSETIVLI) { | 
|  | Register VRegDef = MI.getOperand(0).getReg(); | 
|  | if (VRegDef != RISCV::X0 && MRI->use_nodbg_empty(VRegDef)) | 
|  | MI.getOperand(0).setReg(RISCV::X0); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output | 
|  | // of VLEFF/VLSEGFF. | 
|  | for (MachineBasicBlock &MBB : MF) | 
|  | insertReadVL(MBB); | 
|  |  | 
|  | BlockInfo.clear(); | 
|  | return HaveVectorOp; | 
|  | } | 
|  |  | 
|  | /// Returns an instance of the Insert VSETVLI pass. | 
|  | FunctionPass *llvm::createRISCVInsertVSETVLIPass() { | 
|  | return new RISCVInsertVSETVLI(); | 
|  | } |