| //===-- GCNPreRAOptimizations.cpp -----------------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| /// \file |
| /// This pass combines split register tuple initialization into a single psuedo: |
| /// |
| /// undef %0.sub1:sreg_64 = S_MOV_B32 1 |
| /// %0.sub0:sreg_64 = S_MOV_B32 2 |
| /// => |
| /// %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 0x200000001 |
| /// |
| /// This is to allow rematerialization of a value instead of spilling. It is |
| /// supposed to be done after register coalescer to allow it to do its job and |
| /// before actual register allocation to allow rematerialization. |
| /// |
| /// Right now the pass only handles 64 bit SGPRs with immediate initializers, |
| /// although the same shall be possible with other register classes and |
| /// instructions if necessary. |
| /// |
| //===----------------------------------------------------------------------===// |
| |
| #include "AMDGPU.h" |
| #include "GCNSubtarget.h" |
| #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
| #include "llvm/CodeGen/LiveIntervals.h" |
| #include "llvm/CodeGen/MachineFunctionPass.h" |
| #include "llvm/InitializePasses.h" |
| |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "amdgpu-pre-ra-optimizations" |
| |
| namespace { |
| |
| class GCNPreRAOptimizations : public MachineFunctionPass { |
| private: |
| const SIInstrInfo *TII; |
| MachineRegisterInfo *MRI; |
| LiveIntervals *LIS; |
| |
| bool processReg(Register Reg); |
| |
| public: |
| static char ID; |
| |
| GCNPreRAOptimizations() : MachineFunctionPass(ID) { |
| initializeGCNPreRAOptimizationsPass(*PassRegistry::getPassRegistry()); |
| } |
| |
| bool runOnMachineFunction(MachineFunction &MF) override; |
| |
| StringRef getPassName() const override { |
| return "AMDGPU Pre-RA optimizations"; |
| } |
| |
| void getAnalysisUsage(AnalysisUsage &AU) const override { |
| AU.addRequired<LiveIntervals>(); |
| AU.setPreservesAll(); |
| MachineFunctionPass::getAnalysisUsage(AU); |
| } |
| }; |
| |
| } // End anonymous namespace. |
| |
| INITIALIZE_PASS_BEGIN(GCNPreRAOptimizations, DEBUG_TYPE, |
| "AMDGPU Pre-RA optimizations", false, false) |
| INITIALIZE_PASS_DEPENDENCY(LiveIntervals) |
| INITIALIZE_PASS_END(GCNPreRAOptimizations, DEBUG_TYPE, "Pre-RA optimizations", |
| false, false) |
| |
| char GCNPreRAOptimizations::ID = 0; |
| |
| char &llvm::GCNPreRAOptimizationsID = GCNPreRAOptimizations::ID; |
| |
| FunctionPass *llvm::createGCNPreRAOptimizationsPass() { |
| return new GCNPreRAOptimizations(); |
| } |
| |
| bool GCNPreRAOptimizations::processReg(Register Reg) { |
| MachineInstr *Def0 = nullptr; |
| MachineInstr *Def1 = nullptr; |
| uint64_t Init = 0; |
| |
| for (MachineInstr &I : MRI->def_instructions(Reg)) { |
| if (I.getOpcode() != AMDGPU::S_MOV_B32 || I.getOperand(0).getReg() != Reg || |
| !I.getOperand(1).isImm() || I.getNumOperands() != 2) |
| return false; |
| |
| switch (I.getOperand(0).getSubReg()) { |
| default: |
| return false; |
| case AMDGPU::sub0: |
| if (Def0) |
| return false; |
| Def0 = &I; |
| Init |= I.getOperand(1).getImm() & 0xffffffff; |
| break; |
| case AMDGPU::sub1: |
| if (Def1) |
| return false; |
| Def1 = &I; |
| Init |= static_cast<uint64_t>(I.getOperand(1).getImm()) << 32; |
| break; |
| } |
| } |
| |
| if (!Def0 || !Def1 || Def0->getParent() != Def1->getParent()) |
| return false; |
| |
| LLVM_DEBUG(dbgs() << "Combining:\n " << *Def0 << " " << *Def1 |
| << " =>\n"); |
| |
| if (SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*Def1), |
| LIS->getInstructionIndex(*Def0))) |
| std::swap(Def0, Def1); |
| |
| LIS->RemoveMachineInstrFromMaps(*Def0); |
| LIS->RemoveMachineInstrFromMaps(*Def1); |
| auto NewI = BuildMI(*Def0->getParent(), *Def0, Def0->getDebugLoc(), |
| TII->get(AMDGPU::S_MOV_B64_IMM_PSEUDO), Reg) |
| .addImm(Init); |
| |
| Def0->eraseFromParent(); |
| Def1->eraseFromParent(); |
| LIS->InsertMachineInstrInMaps(*NewI); |
| LIS->removeInterval(Reg); |
| LIS->createAndComputeVirtRegInterval(Reg); |
| |
| LLVM_DEBUG(dbgs() << " " << *NewI); |
| |
| return true; |
| } |
| |
| bool GCNPreRAOptimizations::runOnMachineFunction(MachineFunction &MF) { |
| if (skipFunction(MF.getFunction())) |
| return false; |
| |
| const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
| TII = ST.getInstrInfo(); |
| MRI = &MF.getRegInfo(); |
| LIS = &getAnalysis<LiveIntervals>(); |
| const SIRegisterInfo *TRI = ST.getRegisterInfo(); |
| |
| bool Changed = false; |
| |
| for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) { |
| Register Reg = Register::index2VirtReg(I); |
| if (!LIS->hasInterval(Reg)) |
| continue; |
| const TargetRegisterClass *RC = MRI->getRegClass(Reg); |
| if (RC->MC->getSizeInBits() != 64 || !TRI->isSGPRClass(RC)) |
| continue; |
| Changed |= processReg(Reg); |
| } |
| |
| return Changed; |
| } |