llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp - rust-lang/llvm-project - Git at Google

 //===- AMDGPUAttributor.cpp -----------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 /// \file This pass uses Attributor framework to deduce AMDGPU attributes.
 //
 //===----------------------------------------------------------------------===//

 #include "AMDGPU.h"
 #include "GCNSubtarget.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/IntrinsicsAMDGPU.h"
 #include "llvm/IR/IntrinsicsR600.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/IPO/Attributor.h"

 #define DEBUG_TYPE "amdgpu-attributor"

 using namespace llvm;

 static constexpr StringLiteral ImplicitAttrNames[] = {
     // X ids unnecessarily propagated to kernels.
     "amdgpu-work-item-id-x",  "amdgpu-work-item-id-y",
     "amdgpu-work-item-id-z",  "amdgpu-work-group-id-x",
     "amdgpu-work-group-id-y", "amdgpu-work-group-id-z",
     "amdgpu-dispatch-ptr",    "amdgpu-dispatch-id",
     "amdgpu-queue-ptr",       "amdgpu-implicitarg-ptr"};

 // We do not need to note the x workitem or workgroup id because they are always
 // initialized.
 //
 // TODO: We should not add the attributes if the known compile time workgroup
 // size is 1 for y/z.
 static StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &NonKernelOnly,
                                      bool &IsQueuePtr) {
   switch (ID) {
   case Intrinsic::amdgcn_workitem_id_x:
     NonKernelOnly = true;
     return "amdgpu-work-item-id-x";
   case Intrinsic::amdgcn_workgroup_id_x:
     NonKernelOnly = true;
     return "amdgpu-work-group-id-x";
   case Intrinsic::amdgcn_workitem_id_y:
   case Intrinsic::r600_read_tidig_y:
     return "amdgpu-work-item-id-y";
   case Intrinsic::amdgcn_workitem_id_z:
   case Intrinsic::r600_read_tidig_z:
     return "amdgpu-work-item-id-z";
   case Intrinsic::amdgcn_workgroup_id_y:
   case Intrinsic::r600_read_tgid_y:
     return "amdgpu-work-group-id-y";
   case Intrinsic::amdgcn_workgroup_id_z:
   case Intrinsic::r600_read_tgid_z:
     return "amdgpu-work-group-id-z";
   case Intrinsic::amdgcn_dispatch_ptr:
     return "amdgpu-dispatch-ptr";
   case Intrinsic::amdgcn_dispatch_id:
     return "amdgpu-dispatch-id";
   case Intrinsic::amdgcn_kernarg_segment_ptr:
     return "amdgpu-kernarg-segment-ptr";
   case Intrinsic::amdgcn_implicitarg_ptr:
     return "amdgpu-implicitarg-ptr";
   case Intrinsic::amdgcn_queue_ptr:
   case Intrinsic::amdgcn_is_shared:
   case Intrinsic::amdgcn_is_private:
     // TODO: Does not require queue ptr on gfx9+
   case Intrinsic::trap:
   case Intrinsic::debugtrap:
     IsQueuePtr = true;
     return "amdgpu-queue-ptr";
   default:
     return "";
   }
 }

 static bool castRequiresQueuePtr(unsigned SrcAS) {
   return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
 }

 static bool isDSAddress(const Constant *C) {
   const GlobalValue *GV = dyn_cast<GlobalValue>(C);
   if (!GV)
     return false;
   unsigned AS = GV->getAddressSpace();
   return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
 }

 class AMDGPUInformationCache : public InformationCache {
 public:
   AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
                          BumpPtrAllocator &Allocator,
                          SetVector<Function *> *CGSCC, TargetMachine &TM)
       : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {}
   TargetMachine &TM;

   enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };

   /// Check if the subtarget has aperture regs.
   bool hasApertureRegs(Function &F) {
     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
     return ST.hasApertureRegs();
   }

 private:
   /// Check if the ConstantExpr \p CE requires queue ptr attribute.
   static bool visitConstExpr(const ConstantExpr *CE) {
     if (CE->getOpcode() == Instruction::AddrSpaceCast) {
       unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
       return castRequiresQueuePtr(SrcAS);
     }
     return false;
   }

   /// Get the constant access bitmap for \p C.
   uint8_t getConstantAccess(const Constant *C) {
     auto It = ConstantStatus.find(C);
     if (It != ConstantStatus.end())
       return It->second;

     uint8_t Result = 0;
     if (isDSAddress(C))
       Result = DS_GLOBAL;

     if (const auto *CE = dyn_cast<ConstantExpr>(C))
       if (visitConstExpr(CE))
         Result |= ADDR_SPACE_CAST;

     for (const Use &U : C->operands()) {
       const auto *OpC = dyn_cast<Constant>(U);
       if (!OpC)
         continue;

       Result |= getConstantAccess(OpC);
     }
     return Result;
   }

 public:
   /// Returns true if \p Fn needs a queue ptr attribute because of \p C.
   bool needsQueuePtr(const Constant *C, Function &Fn) {
     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
     bool HasAperture = hasApertureRegs(Fn);

     // No need to explore the constants.
     if (!IsNonEntryFunc && HasAperture)
       return false;

     uint8_t Access = getConstantAccess(C);

     // We need to trap on DS globals in non-entry functions.
     if (IsNonEntryFunc && (Access & DS_GLOBAL))
       return true;

     return !HasAperture && (Access & ADDR_SPACE_CAST);
   }

 private:
   /// Used to determine if the Constant needs a queue ptr attribute.
   DenseMap<const Constant *, uint8_t> ConstantStatus;
 };

 struct AAAMDAttributes : public StateWrapper<BooleanState, AbstractAttribute> {
   using Base = StateWrapper<BooleanState, AbstractAttribute>;
   AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}

   /// Create an abstract attribute view for the position \p IRP.
   static AAAMDAttributes &createForPosition(const IRPosition &IRP,
                                             Attributor &A);

   /// See AbstractAttribute::getName().
   const std::string getName() const override { return "AAAMDAttributes"; }

   /// See AbstractAttribute::getIdAddr().
   const char *getIdAddr() const override { return &ID; }

   /// This function should return true if the type of the \p AA is
   /// AAAMDAttributes.
   static bool classof(const AbstractAttribute *AA) {
     return (AA->getIdAddr() == &ID);
   }

   virtual const DenseSet<StringRef> &getAttributes() const = 0;

   /// Unique ID (due to the unique address)
   static const char ID;
 };
 const char AAAMDAttributes::ID = 0;

 struct AAAMDWorkGroupSize
     : public StateWrapper<BooleanState, AbstractAttribute> {
   using Base = StateWrapper<BooleanState, AbstractAttribute>;
   AAAMDWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}

   /// Create an abstract attribute view for the position \p IRP.
   static AAAMDWorkGroupSize &createForPosition(const IRPosition &IRP,
                                                Attributor &A);

   /// See AbstractAttribute::getName().
   const std::string getName() const override { return "AAAMDWorkGroupSize"; }

   /// See AbstractAttribute::getIdAddr().
   const char *getIdAddr() const override { return &ID; }

   /// This function should return true if the type of the \p AA is
   /// AAAMDAttributes.
   static bool classof(const AbstractAttribute *AA) {
     return (AA->getIdAddr() == &ID);
   }

   /// Unique ID (due to the unique address)
   static const char ID;
 };
 const char AAAMDWorkGroupSize::ID = 0;

 struct AAAMDWorkGroupSizeFunction : public AAAMDWorkGroupSize {
   AAAMDWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
       : AAAMDWorkGroupSize(IRP, A) {}

   void initialize(Attributor &A) override {
     Function *F = getAssociatedFunction();
     CallingConv::ID CC = F->getCallingConv();

     if (CC != CallingConv::AMDGPU_KERNEL)
       return;

     bool InitialValue = false;
     if (F->hasFnAttribute("uniform-work-group-size"))
       InitialValue = F->getFnAttribute("uniform-work-group-size")
                          .getValueAsString()
                          .equals("true");

     if (InitialValue)
       indicateOptimisticFixpoint();
     else
       indicatePessimisticFixpoint();
   }

   ChangeStatus updateImpl(Attributor &A) override {
     ChangeStatus Change = ChangeStatus::UNCHANGED;

     auto CheckCallSite = [&](AbstractCallSite CS) {
       Function *Caller = CS.getInstruction()->getFunction();
       LLVM_DEBUG(dbgs() << "[AAAMDWorkGroupSize] Call " << Caller->getName()
                         << "->" << getAssociatedFunction()->getName() << "\n");

       const auto &CallerInfo = A.getAAFor<AAAMDWorkGroupSize>(
           *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);

       Change = Change | clampStateAndIndicateChange(this->getState(),
                                                     CallerInfo.getState());

       return true;
     };

     bool AllCallSitesKnown = true;
     if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
       indicatePessimisticFixpoint();

     return Change;
   }

   ChangeStatus manifest(Attributor &A) override {
     SmallVector<Attribute, 8> AttrList;
     LLVMContext &Ctx = getAssociatedFunction()->getContext();

     AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
                                       getAssumed() ? "true" : "false"));
     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
                                               /* ForceReplace */ true);
   }

   bool isValidState() const override {
     // This state is always valid, even when the state is false.
     return true;
   }

   const std::string getAsStr() const override {
     return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
   }

   /// See AbstractAttribute::trackStatistics()
   void trackStatistics() const override {}
 };

 AAAMDWorkGroupSize &AAAMDWorkGroupSize::createForPosition(const IRPosition &IRP,
                                                           Attributor &A) {
   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
     return *new (A.Allocator) AAAMDWorkGroupSizeFunction(IRP, A);
   llvm_unreachable("AAAMDWorkGroupSize is only valid for function position");
 }

 struct AAAMDAttributesFunction : public AAAMDAttributes {
   AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
       : AAAMDAttributes(IRP, A) {}

   void initialize(Attributor &A) override {
     Function *F = getAssociatedFunction();
     CallingConv::ID CC = F->getCallingConv();
     bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx);

     // Don't add attributes to instrinsics
     if (F->isIntrinsic()) {
       indicatePessimisticFixpoint();
       return;
     }

     // Ignore functions with graphics calling conventions, these are currently
     // not allowed to have kernel arguments.
     if (AMDGPU::isGraphics(F->getCallingConv())) {
       indicatePessimisticFixpoint();
       return;
     }

     for (StringRef Attr : ImplicitAttrNames) {
       if (F->hasFnAttribute(Attr))
         Attributes.insert(Attr);
     }

     // TODO: We shouldn't need this in the future.
     if (CallingConvSupportsAllImplicits &&
         F->hasAddressTaken(nullptr, true, true, true)) {
       for (StringRef AttrName : ImplicitAttrNames) {
         Attributes.insert(AttrName);
       }
     }
   }

   ChangeStatus updateImpl(Attributor &A) override {
     Function *F = getAssociatedFunction();
     ChangeStatus Change = ChangeStatus::UNCHANGED;
     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
     CallingConv::ID CC = F->getCallingConv();
     bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx);
     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());

     auto AddAttribute = [&](StringRef AttrName) {
       if (Attributes.insert(AttrName).second)
         Change = ChangeStatus::CHANGED;
     };

     // Check for Intrinsics and propagate attributes.
     const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
         *this, this->getIRPosition(), DepClassTy::REQUIRED);

     // We have to assume that we can reach a function with these attributes.
     // We do not consider inline assembly as a unknown callee.
     if (CallingConvSupportsAllImplicits && AAEdges.hasNonAsmUnknownCallee()) {
       for (StringRef AttrName : ImplicitAttrNames) {
         AddAttribute(AttrName);
       }
     }

     bool NeedsQueuePtr = false;
     bool HasCall = false;
     for (Function *Callee : AAEdges.getOptimisticEdges()) {
       Intrinsic::ID IID = Callee->getIntrinsicID();
       if (IID != Intrinsic::not_intrinsic) {
         if (!IsNonEntryFunc && IID == Intrinsic::amdgcn_kernarg_segment_ptr) {
           AddAttribute("amdgpu-kernarg-segment-ptr");
           continue;
         }

         bool NonKernelOnly = false;
         StringRef AttrName =
             intrinsicToAttrName(IID, NonKernelOnly, NeedsQueuePtr);

         if (!AttrName.empty() && (IsNonEntryFunc || !NonKernelOnly))
           AddAttribute(AttrName);

         continue;
       }

       HasCall = true;
       const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>(
           *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
       const DenseSet<StringRef> &CalleeAttributes = AAAMD.getAttributes();
       // Propagate implicit attributes from called function.
       for (StringRef AttrName : ImplicitAttrNames)
         if (CalleeAttributes.count(AttrName))
           AddAttribute(AttrName);
     }

     HasCall |= AAEdges.hasUnknownCallee();
     if (!IsNonEntryFunc && HasCall)
       AddAttribute("amdgpu-calls");

     // Check the function body.
     auto CheckAlloca = [&](Instruction &I) {
       AddAttribute("amdgpu-stack-objects");
       return false;
     };

     bool UsedAssumedInformation = false;
     A.checkForAllInstructions(CheckAlloca, *this, {Instruction::Alloca},
                               UsedAssumedInformation);

     // If we found that we need amdgpu-queue-ptr, nothing else to do.
     if (NeedsQueuePtr || Attributes.count("amdgpu-queue-ptr")) {
       AddAttribute("amdgpu-queue-ptr");
       return Change;
     }

     auto CheckAddrSpaceCasts = [&](Instruction &I) {
       unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
       if (castRequiresQueuePtr(SrcAS)) {
         NeedsQueuePtr = true;
         return false;
       }
       return true;
     };

     bool HasApertureRegs = InfoCache.hasApertureRegs(*F);

     // `checkForAllInstructions` is much more cheaper than going through all
     // instructions, try it first.

     // amdgpu-queue-ptr is not needed if aperture regs is present.
     if (!HasApertureRegs)
       A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
                                 {Instruction::AddrSpaceCast},
                                 UsedAssumedInformation);

     // If we found  that we need amdgpu-queue-ptr, nothing else to do.
     if (NeedsQueuePtr) {
       AddAttribute("amdgpu-queue-ptr");
       return Change;
     }

     if (!IsNonEntryFunc && HasApertureRegs)
       return Change;

     for (BasicBlock &BB : *F) {
       for (Instruction &I : BB) {
         for (const Use &U : I.operands()) {
           if (const auto *C = dyn_cast<Constant>(U)) {
             if (InfoCache.needsQueuePtr(C, *F)) {
               AddAttribute("amdgpu-queue-ptr");
               return Change;
             }
           }
         }
       }
     }

     return Change;
   }

   ChangeStatus manifest(Attributor &A) override {
     SmallVector<Attribute, 8> AttrList;
     LLVMContext &Ctx = getAssociatedFunction()->getContext();

     for (StringRef AttrName : Attributes)
       AttrList.push_back(Attribute::get(Ctx, AttrName));

     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
                                               /* ForceReplace */ true);
   }

   const std::string getAsStr() const override {
     return "AMDInfo[" + std::to_string(Attributes.size()) + "]";
   }

   const DenseSet<StringRef> &getAttributes() const override {
     return Attributes;
   }

   /// See AbstractAttribute::trackStatistics()
   void trackStatistics() const override {}

 private:
   DenseSet<StringRef> Attributes;
 };

 AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
                                                     Attributor &A) {
   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
     return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
   llvm_unreachable("AAAMDAttributes is only valid for function position");
 }

 class AMDGPUAttributor : public ModulePass {
 public:
   AMDGPUAttributor() : ModulePass(ID) {}

   /// doInitialization - Virtual method overridden by subclasses to do
   /// any necessary initialization before any pass is run.
   bool doInitialization(Module &) override {
     auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
     if (!TPC)
       report_fatal_error("TargetMachine is required");

     TM = &TPC->getTM<TargetMachine>();
     return false;
   }

   bool runOnModule(Module &M) override {
     SetVector<Function *> Functions;
     AnalysisGetter AG;
     for (Function &F : M)
       Functions.insert(&F);

     CallGraphUpdater CGUpdater;
     BumpPtrAllocator Allocator;
     AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
     Attributor A(Functions, InfoCache, CGUpdater);

     for (Function &F : M) {
       A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
       A.getOrCreateAAFor<AAAMDWorkGroupSize>(IRPosition::function(F));
     }

     ChangeStatus Change = A.run();
     return Change == ChangeStatus::CHANGED;
   }

   StringRef getPassName() const override { return "AMDGPU Attributor"; }
   TargetMachine *TM;
   static char ID;
 };

 char AMDGPUAttributor::ID = 0;

 Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); }
 INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false)
	//===- AMDGPUAttributor.cpp -----------------------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	/// \file This pass uses Attributor framework to deduce AMDGPU attributes.
	//
	//===----------------------------------------------------------------------===//

	#include "AMDGPU.h"
	#include "GCNSubtarget.h"
	#include "llvm/CodeGen/TargetPassConfig.h"
	#include "llvm/IR/IntrinsicsAMDGPU.h"
	#include "llvm/IR/IntrinsicsR600.h"
	#include "llvm/Target/TargetMachine.h"
	#include "llvm/Transforms/IPO/Attributor.h"

	#define DEBUG_TYPE "amdgpu-attributor"

	using namespace llvm;

	static constexpr StringLiteral ImplicitAttrNames[] = {
	// X ids unnecessarily propagated to kernels.
	"amdgpu-work-item-id-x", "amdgpu-work-item-id-y",
	"amdgpu-work-item-id-z", "amdgpu-work-group-id-x",
	"amdgpu-work-group-id-y", "amdgpu-work-group-id-z",
	"amdgpu-dispatch-ptr", "amdgpu-dispatch-id",
	"amdgpu-queue-ptr", "amdgpu-implicitarg-ptr"};

	// We do not need to note the x workitem or workgroup id because they are always
	// initialized.
	//
	// TODO: We should not add the attributes if the known compile time workgroup
	// size is 1 for y/z.
	static StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &NonKernelOnly,
	bool &IsQueuePtr) {
	switch (ID) {
	case Intrinsic::amdgcn_workitem_id_x:
	NonKernelOnly = true;
	return "amdgpu-work-item-id-x";
	case Intrinsic::amdgcn_workgroup_id_x:
	NonKernelOnly = true;
	return "amdgpu-work-group-id-x";
	case Intrinsic::amdgcn_workitem_id_y:
	case Intrinsic::r600_read_tidig_y:
	return "amdgpu-work-item-id-y";
	case Intrinsic::amdgcn_workitem_id_z:
	case Intrinsic::r600_read_tidig_z:
	return "amdgpu-work-item-id-z";
	case Intrinsic::amdgcn_workgroup_id_y:
	case Intrinsic::r600_read_tgid_y:
	return "amdgpu-work-group-id-y";
	case Intrinsic::amdgcn_workgroup_id_z:
	case Intrinsic::r600_read_tgid_z:
	return "amdgpu-work-group-id-z";
	case Intrinsic::amdgcn_dispatch_ptr:
	return "amdgpu-dispatch-ptr";
	case Intrinsic::amdgcn_dispatch_id:
	return "amdgpu-dispatch-id";
	case Intrinsic::amdgcn_kernarg_segment_ptr:
	return "amdgpu-kernarg-segment-ptr";
	case Intrinsic::amdgcn_implicitarg_ptr:
	return "amdgpu-implicitarg-ptr";
	case Intrinsic::amdgcn_queue_ptr:
	case Intrinsic::amdgcn_is_shared:
	case Intrinsic::amdgcn_is_private:
	// TODO: Does not require queue ptr on gfx9+
	case Intrinsic::trap:
	case Intrinsic::debugtrap:
	IsQueuePtr = true;
	return "amdgpu-queue-ptr";
	default:
	return "";
	}
	}

	static bool castRequiresQueuePtr(unsigned SrcAS) {
	return SrcAS == AMDGPUAS::LOCAL_ADDRESS \|\| SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
	}

	static bool isDSAddress(const Constant *C) {
	const GlobalValue *GV = dyn_cast<GlobalValue>(C);
	if (!GV)
	return false;
	unsigned AS = GV->getAddressSpace();
	return AS == AMDGPUAS::LOCAL_ADDRESS \|\| AS == AMDGPUAS::REGION_ADDRESS;
	}

	class AMDGPUInformationCache : public InformationCache {
	public:
	AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
	BumpPtrAllocator &Allocator,
	SetVector<Function > CGSCC, TargetMachine &TM)
	: InformationCache(M, AG, Allocator, CGSCC), TM(TM) {}
	TargetMachine &TM;

	enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };

	/// Check if the subtarget has aperture regs.
	bool hasApertureRegs(Function &F) {
	const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
	return ST.hasApertureRegs();
	}

	private:
	/// Check if the ConstantExpr \p CE requires queue ptr attribute.
	static bool visitConstExpr(const ConstantExpr *CE) {
	if (CE->getOpcode() == Instruction::AddrSpaceCast) {
	unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
	return castRequiresQueuePtr(SrcAS);
	}
	return false;
	}

	/// Get the constant access bitmap for \p C.
	uint8_t getConstantAccess(const Constant *C) {
	auto It = ConstantStatus.find(C);
	if (It != ConstantStatus.end())
	return It->second;

	uint8_t Result = 0;
	if (isDSAddress(C))
	Result = DS_GLOBAL;

	if (const auto *CE = dyn_cast<ConstantExpr>(C))
	if (visitConstExpr(CE))
	Result \|= ADDR_SPACE_CAST;

	for (const Use &U : C->operands()) {
	const auto *OpC = dyn_cast<Constant>(U);
	if (!OpC)
	continue;

	Result \|= getConstantAccess(OpC);
	}
	return Result;
	}

	public:
	/// Returns true if \p Fn needs a queue ptr attribute because of \p C.
	bool needsQueuePtr(const Constant *C, Function &Fn) {
	bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
	bool HasAperture = hasApertureRegs(Fn);

	// No need to explore the constants.
	if (!IsNonEntryFunc && HasAperture)
	return false;

	uint8_t Access = getConstantAccess(C);

	// We need to trap on DS globals in non-entry functions.
	if (IsNonEntryFunc && (Access & DS_GLOBAL))
	return true;

	return !HasAperture && (Access & ADDR_SPACE_CAST);
	}

	private:
	/// Used to determine if the Constant needs a queue ptr attribute.
	DenseMap<const Constant *, uint8_t> ConstantStatus;
	};

	struct AAAMDAttributes : public StateWrapper<BooleanState, AbstractAttribute> {
	using Base = StateWrapper<BooleanState, AbstractAttribute>;
	AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}

	/// Create an abstract attribute view for the position \p IRP.
	static AAAMDAttributes &createForPosition(const IRPosition &IRP,
	Attributor &A);

	/// See AbstractAttribute::getName().
	const std::string getName() const override { return "AAAMDAttributes"; }

	/// See AbstractAttribute::getIdAddr().
	const char *getIdAddr() const override { return &ID; }

	/// This function should return true if the type of the \p AA is
	/// AAAMDAttributes.
	static bool classof(const AbstractAttribute *AA) {
	return (AA->getIdAddr() == &ID);
	}

	virtual const DenseSet<StringRef> &getAttributes() const = 0;

	/// Unique ID (due to the unique address)
	static const char ID;
	};
	const char AAAMDAttributes::ID = 0;

	struct AAAMDWorkGroupSize
	: public StateWrapper<BooleanState, AbstractAttribute> {
	using Base = StateWrapper<BooleanState, AbstractAttribute>;
	AAAMDWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}

	/// Create an abstract attribute view for the position \p IRP.
	static AAAMDWorkGroupSize &createForPosition(const IRPosition &IRP,
	Attributor &A);

	/// See AbstractAttribute::getName().
	const std::string getName() const override { return "AAAMDWorkGroupSize"; }

	/// See AbstractAttribute::getIdAddr().
	const char *getIdAddr() const override { return &ID; }

	/// This function should return true if the type of the \p AA is
	/// AAAMDAttributes.
	static bool classof(const AbstractAttribute *AA) {
	return (AA->getIdAddr() == &ID);
	}

	/// Unique ID (due to the unique address)
	static const char ID;
	};
	const char AAAMDWorkGroupSize::ID = 0;

	struct AAAMDWorkGroupSizeFunction : public AAAMDWorkGroupSize {
	AAAMDWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
	: AAAMDWorkGroupSize(IRP, A) {}

	void initialize(Attributor &A) override {
	Function *F = getAssociatedFunction();
	CallingConv::ID CC = F->getCallingConv();

	if (CC != CallingConv::AMDGPU_KERNEL)
	return;

	bool InitialValue = false;
	if (F->hasFnAttribute("uniform-work-group-size"))
	InitialValue = F->getFnAttribute("uniform-work-group-size")
	.getValueAsString()
	.equals("true");

	if (InitialValue)
	indicateOptimisticFixpoint();
	else
	indicatePessimisticFixpoint();
	}

	ChangeStatus updateImpl(Attributor &A) override {
	ChangeStatus Change = ChangeStatus::UNCHANGED;

	auto CheckCallSite = [&](AbstractCallSite CS) {
	Function *Caller = CS.getInstruction()->getFunction();
	LLVM_DEBUG(dbgs() << "[AAAMDWorkGroupSize] Call " << Caller->getName()
	<< "->" << getAssociatedFunction()->getName() << "\n");

	const auto &CallerInfo = A.getAAFor<AAAMDWorkGroupSize>(
	this, IRPosition::function(Caller), DepClassTy::REQUIRED);

	Change = Change \| clampStateAndIndicateChange(this->getState(),
	CallerInfo.getState());

	return true;
	};

	bool AllCallSitesKnown = true;
	if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
	indicatePessimisticFixpoint();

	return Change;
	}

	ChangeStatus manifest(Attributor &A) override {
	SmallVector<Attribute, 8> AttrList;
	LLVMContext &Ctx = getAssociatedFunction()->getContext();

	AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
	getAssumed() ? "true" : "false"));
	return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
	/* ForceReplace */ true);
	}

	bool isValidState() const override {
	// This state is always valid, even when the state is false.
	return true;
	}

	const std::string getAsStr() const override {
	return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
	}

	/// See AbstractAttribute::trackStatistics()
	void trackStatistics() const override {}
	};

	AAAMDWorkGroupSize &AAAMDWorkGroupSize::createForPosition(const IRPosition &IRP,
	Attributor &A) {
	if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
	return *new (A.Allocator) AAAMDWorkGroupSizeFunction(IRP, A);
	llvm_unreachable("AAAMDWorkGroupSize is only valid for function position");
	}

	struct AAAMDAttributesFunction : public AAAMDAttributes {
	AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
	: AAAMDAttributes(IRP, A) {}

	void initialize(Attributor &A) override {
	Function *F = getAssociatedFunction();
	CallingConv::ID CC = F->getCallingConv();
	bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx);

	// Don't add attributes to instrinsics
	if (F->isIntrinsic()) {
	indicatePessimisticFixpoint();
	return;
	}

	// Ignore functions with graphics calling conventions, these are currently
	// not allowed to have kernel arguments.
	if (AMDGPU::isGraphics(F->getCallingConv())) {
	indicatePessimisticFixpoint();
	return;
	}

	for (StringRef Attr : ImplicitAttrNames) {
	if (F->hasFnAttribute(Attr))
	Attributes.insert(Attr);
	}

	// TODO: We shouldn't need this in the future.
	if (CallingConvSupportsAllImplicits &&
	F->hasAddressTaken(nullptr, true, true, true)) {
	for (StringRef AttrName : ImplicitAttrNames) {
	Attributes.insert(AttrName);
	}
	}
	}

	ChangeStatus updateImpl(Attributor &A) override {
	Function *F = getAssociatedFunction();
	ChangeStatus Change = ChangeStatus::UNCHANGED;
	bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
	CallingConv::ID CC = F->getCallingConv();
	bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx);
	auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());

	auto AddAttribute = [&](StringRef AttrName) {
	if (Attributes.insert(AttrName).second)
	Change = ChangeStatus::CHANGED;
	};

	// Check for Intrinsics and propagate attributes.
	const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
	*this, this->getIRPosition(), DepClassTy::REQUIRED);

	// We have to assume that we can reach a function with these attributes.
	// We do not consider inline assembly as a unknown callee.
	if (CallingConvSupportsAllImplicits && AAEdges.hasNonAsmUnknownCallee()) {
	for (StringRef AttrName : ImplicitAttrNames) {
	AddAttribute(AttrName);
	}
	}

	bool NeedsQueuePtr = false;
	bool HasCall = false;
	for (Function *Callee : AAEdges.getOptimisticEdges()) {
	Intrinsic::ID IID = Callee->getIntrinsicID();
	if (IID != Intrinsic::not_intrinsic) {
	if (!IsNonEntryFunc && IID == Intrinsic::amdgcn_kernarg_segment_ptr) {
	AddAttribute("amdgpu-kernarg-segment-ptr");
	continue;
	}

	bool NonKernelOnly = false;
	StringRef AttrName =
	intrinsicToAttrName(IID, NonKernelOnly, NeedsQueuePtr);

	if (!AttrName.empty() && (IsNonEntryFunc \|\| !NonKernelOnly))
	AddAttribute(AttrName);

	continue;
	}

	HasCall = true;
	const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>(
	this, IRPosition::function(Callee), DepClassTy::REQUIRED);
	const DenseSet<StringRef> &CalleeAttributes = AAAMD.getAttributes();
	// Propagate implicit attributes from called function.
	for (StringRef AttrName : ImplicitAttrNames)
	if (CalleeAttributes.count(AttrName))
	AddAttribute(AttrName);
	}

	HasCall \|= AAEdges.hasUnknownCallee();
	if (!IsNonEntryFunc && HasCall)
	AddAttribute("amdgpu-calls");

	// Check the function body.
	auto CheckAlloca = [&](Instruction &I) {
	AddAttribute("amdgpu-stack-objects");
	return false;
	};

	bool UsedAssumedInformation = false;
	A.checkForAllInstructions(CheckAlloca, *this, {Instruction::Alloca},
	UsedAssumedInformation);

	// If we found that we need amdgpu-queue-ptr, nothing else to do.
	if (NeedsQueuePtr \|\| Attributes.count("amdgpu-queue-ptr")) {
	AddAttribute("amdgpu-queue-ptr");
	return Change;
	}

	auto CheckAddrSpaceCasts = [&](Instruction &I) {
	unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
	if (castRequiresQueuePtr(SrcAS)) {
	NeedsQueuePtr = true;
	return false;
	}
	return true;
	};

	bool HasApertureRegs = InfoCache.hasApertureRegs(*F);

	// `checkForAllInstructions` is much more cheaper than going through all
	// instructions, try it first.

	// amdgpu-queue-ptr is not needed if aperture regs is present.
	if (!HasApertureRegs)
	A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
	{Instruction::AddrSpaceCast},
	UsedAssumedInformation);

	// If we found that we need amdgpu-queue-ptr, nothing else to do.
	if (NeedsQueuePtr) {
	AddAttribute("amdgpu-queue-ptr");
	return Change;
	}

	if (!IsNonEntryFunc && HasApertureRegs)
	return Change;

	for (BasicBlock &BB : *F) {
	for (Instruction &I : BB) {
	for (const Use &U : I.operands()) {
	if (const auto *C = dyn_cast<Constant>(U)) {
	if (InfoCache.needsQueuePtr(C, *F)) {
	AddAttribute("amdgpu-queue-ptr");
	return Change;
	}
	}
	}
	}
	}

	return Change;
	}

	ChangeStatus manifest(Attributor &A) override {
	SmallVector<Attribute, 8> AttrList;
	LLVMContext &Ctx = getAssociatedFunction()->getContext();

	for (StringRef AttrName : Attributes)
	AttrList.push_back(Attribute::get(Ctx, AttrName));

	return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
	/* ForceReplace */ true);
	}

	const std::string getAsStr() const override {
	return "AMDInfo[" + std::to_string(Attributes.size()) + "]";
	}

	const DenseSet<StringRef> &getAttributes() const override {
	return Attributes;
	}

	/// See AbstractAttribute::trackStatistics()
	void trackStatistics() const override {}

	private:
	DenseSet<StringRef> Attributes;
	};

	AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
	Attributor &A) {
	if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
	return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
	llvm_unreachable("AAAMDAttributes is only valid for function position");
	}

	class AMDGPUAttributor : public ModulePass {
	public:
	AMDGPUAttributor() : ModulePass(ID) {}

	/// doInitialization - Virtual method overridden by subclasses to do
	/// any necessary initialization before any pass is run.
	bool doInitialization(Module &) override {
	auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
	if (!TPC)
	report_fatal_error("TargetMachine is required");

	TM = &TPC->getTM<TargetMachine>();
	return false;
	}

	bool runOnModule(Module &M) override {
	SetVector<Function *> Functions;
	AnalysisGetter AG;
	for (Function &F : M)
	Functions.insert(&F);

	CallGraphUpdater CGUpdater;
	BumpPtrAllocator Allocator;
	AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
	Attributor A(Functions, InfoCache, CGUpdater);

	for (Function &F : M) {
	A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
	A.getOrCreateAAFor<AAAMDWorkGroupSize>(IRPosition::function(F));
	}

	ChangeStatus Change = A.run();
	return Change == ChangeStatus::CHANGED;
	}

	StringRef getPassName() const override { return "AMDGPU Attributor"; }
	TargetMachine *TM;
	static char ID;
	};

	char AMDGPUAttributor::ID = 0;

	Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); }
	INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false)