blob: 8280751e1dbdd2164529d61b0c7573c29643a8a7 [file] [log] [blame]
//===- AMDGPURegBankLegalizeRules --------------------------------*- C++ -*-==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUREGBANKLEGALIZERULES_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUREGBANKLEGALIZERULES_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include <functional>
namespace llvm {
class MachineRegisterInfo;
class MachineInstr;
class GCNSubtarget;
class MachineFunction;
template <typename T> class GenericUniformityInfo;
template <typename T> class GenericSSAContext;
using MachineSSAContext = GenericSSAContext<MachineFunction>;
using MachineUniformityInfo = GenericUniformityInfo<MachineSSAContext>;
namespace AMDGPU {
// IDs used to build predicate for RegBankLegalizeRule. Predicate can have one
// or more IDs and each represents a check for 'uniform or divergent' + LLT or
// just LLT on register operand.
// Most often checking one operand is enough to decide which RegBankLLTMapping
// to apply (see Fast Rules), IDs are useful when two or more operands need to
// be checked.
enum UniformityLLTOpPredicateID {
_,
// scalars
S1,
S16,
S32,
S64,
UniS1,
UniS16,
UniS32,
UniS64,
DivS1,
DivS32,
DivS64,
// pointers
P1,
P3,
P4,
P5,
UniP1,
UniP3,
UniP4,
UniP5,
DivP1,
DivP3,
DivP4,
DivP5,
// vectors
V2S16,
V2S32,
V3S32,
V4S32,
// B types
B32,
B64,
B96,
B128,
B256,
B512,
UniB32,
UniB64,
UniB96,
UniB128,
UniB256,
UniB512,
DivB32,
DivB64,
DivB96,
DivB128,
DivB256,
DivB512,
};
// How to apply register bank on register operand.
// In most cases, this serves as a LLT and register bank assert.
// Can change operands and insert copies, extends, truncs, and read-any-lanes.
// Anything more complicated requires LoweringMethod.
enum RegBankLLTMappingApplyID {
InvalidMapping,
None,
IntrId,
Imm,
Vcc,
// sgpr scalars, pointers, vectors and B-types
Sgpr16,
Sgpr32,
Sgpr64,
SgprP1,
SgprP3,
SgprP4,
SgprP5,
SgprV4S32,
SgprB32,
SgprB64,
SgprB96,
SgprB128,
SgprB256,
SgprB512,
// vgpr scalars, pointers, vectors and B-types
Vgpr32,
Vgpr64,
VgprP1,
VgprP3,
VgprP4,
VgprP5,
VgprB32,
VgprB64,
VgprB96,
VgprB128,
VgprB256,
VgprB512,
VgprV4S32,
// Dst only modifiers: read-any-lane and truncs
UniInVcc,
UniInVgprS32,
UniInVgprV4S32,
UniInVgprB32,
UniInVgprB64,
UniInVgprB96,
UniInVgprB128,
UniInVgprB256,
UniInVgprB512,
Sgpr32Trunc,
// Src only modifiers: waterfalls, extends
Sgpr32AExt,
Sgpr32AExtBoolInReg,
Sgpr32SExt,
};
// Instruction needs to be replaced with sequence of instructions. Lowering was
// not done by legalizer since instructions is available in either sgpr or vgpr.
// For example S64 AND is available on sgpr, for that reason S64 AND is legal in
// context of Legalizer that only checks LLT. But S64 AND is not available on
// vgpr. Lower it to two S32 vgpr ANDs.
enum LoweringMethodID {
DoNotLower,
UniExtToSel,
VgprToVccCopy,
SplitTo32,
Ext32To64,
UniCstExt,
SplitLoad,
WidenLoad,
};
enum FastRulesTypes {
NoFastRules,
Standard, // S16, S32, S64, V2S16
StandardB, // B32, B64, B96, B128
Vector, // S32, V2S32, V3S32, V4S32
};
struct RegBankLLTMapping {
SmallVector<RegBankLLTMappingApplyID, 2> DstOpMapping;
SmallVector<RegBankLLTMappingApplyID, 4> SrcOpMapping;
LoweringMethodID LoweringMethod;
RegBankLLTMapping(
std::initializer_list<RegBankLLTMappingApplyID> DstOpMappingList,
std::initializer_list<RegBankLLTMappingApplyID> SrcOpMappingList,
LoweringMethodID LoweringMethod = DoNotLower);
};
struct PredicateMapping {
SmallVector<UniformityLLTOpPredicateID, 4> OpUniformityAndTypes;
std::function<bool(const MachineInstr &)> TestFunc;
PredicateMapping(
std::initializer_list<UniformityLLTOpPredicateID> OpList,
std::function<bool(const MachineInstr &)> TestFunc = nullptr);
bool match(const MachineInstr &MI, const MachineUniformityInfo &MUI,
const MachineRegisterInfo &MRI) const;
};
struct RegBankLegalizeRule {
PredicateMapping Predicate;
RegBankLLTMapping OperandMapping;
};
class SetOfRulesForOpcode {
// "Slow Rules". More complex 'Rules[i].Predicate', check them one by one.
SmallVector<RegBankLegalizeRule, 4> Rules;
// "Fast Rules"
// Instead of testing each 'Rules[i].Predicate' we do direct access to
// RegBankLLTMapping using getFastPredicateSlot. For example if:
// - FastTypes == Standard Uni[0] holds Mapping in case Op 0 is uniform S32
// - FastTypes == Vector Div[3] holds Mapping in case Op 0 is divergent V4S32
FastRulesTypes FastTypes = NoFastRules;
#define InvMapping RegBankLLTMapping({InvalidMapping}, {InvalidMapping})
RegBankLLTMapping Uni[4] = {InvMapping, InvMapping, InvMapping, InvMapping};
RegBankLLTMapping Div[4] = {InvMapping, InvMapping, InvMapping, InvMapping};
public:
SetOfRulesForOpcode();
SetOfRulesForOpcode(FastRulesTypes FastTypes);
const RegBankLLTMapping &
findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI,
const MachineUniformityInfo &MUI) const;
void addRule(RegBankLegalizeRule Rule);
void addFastRuleDivergent(UniformityLLTOpPredicateID Ty,
RegBankLLTMapping RuleApplyIDs);
void addFastRuleUniform(UniformityLLTOpPredicateID Ty,
RegBankLLTMapping RuleApplyIDs);
private:
int getFastPredicateSlot(UniformityLLTOpPredicateID Ty) const;
};
// Essentially 'map<Opcode(or intrinsic_opcode), SetOfRulesForOpcode>' but a
// little more efficient.
class RegBankLegalizeRules {
const GCNSubtarget *ST;
MachineRegisterInfo *MRI;
// Separate maps for G-opcodes and instrinsics since they are in different
// enums. Multiple opcodes can share same set of rules.
// RulesAlias = map<Opcode, KeyOpcode>
// Rules = map<KeyOpcode, SetOfRulesForOpcode>
SmallDenseMap<unsigned, unsigned, 256> GRulesAlias;
SmallDenseMap<unsigned, SetOfRulesForOpcode, 128> GRules;
SmallDenseMap<unsigned, unsigned, 128> IRulesAlias;
SmallDenseMap<unsigned, SetOfRulesForOpcode, 64> IRules;
class RuleSetInitializer {
SetOfRulesForOpcode *RuleSet;
public:
// Used for clang-format line breaks and to force writing all rules for
// opcode in same place.
template <class AliasMap, class RulesMap>
RuleSetInitializer(std::initializer_list<unsigned> OpcList,
AliasMap &RulesAlias, RulesMap &Rules,
FastRulesTypes FastTypes = NoFastRules) {
unsigned KeyOpcode = *OpcList.begin();
for (unsigned Opc : OpcList) {
[[maybe_unused]] auto [_, NewInput] =
RulesAlias.try_emplace(Opc, KeyOpcode);
assert(NewInput && "Can't redefine existing Rules");
}
auto [DenseMapIter, NewInput] = Rules.try_emplace(KeyOpcode, FastTypes);
assert(NewInput && "Can't redefine existing Rules");
RuleSet = &DenseMapIter->second;
}
RuleSetInitializer(const RuleSetInitializer &) = delete;
RuleSetInitializer &operator=(const RuleSetInitializer &) = delete;
RuleSetInitializer(RuleSetInitializer &&) = delete;
RuleSetInitializer &operator=(RuleSetInitializer &&) = delete;
~RuleSetInitializer() = default;
RuleSetInitializer &Div(UniformityLLTOpPredicateID Ty,
RegBankLLTMapping RuleApplyIDs,
bool STPred = true) {
if (STPred)
RuleSet->addFastRuleDivergent(Ty, RuleApplyIDs);
return *this;
}
RuleSetInitializer &Uni(UniformityLLTOpPredicateID Ty,
RegBankLLTMapping RuleApplyIDs,
bool STPred = true) {
if (STPred)
RuleSet->addFastRuleUniform(Ty, RuleApplyIDs);
return *this;
}
RuleSetInitializer &Any(RegBankLegalizeRule Init, bool STPred = true) {
if (STPred)
RuleSet->addRule(Init);
return *this;
}
};
RuleSetInitializer addRulesForGOpcs(std::initializer_list<unsigned> OpcList,
FastRulesTypes FastTypes = NoFastRules);
RuleSetInitializer addRulesForIOpcs(std::initializer_list<unsigned> OpcList,
FastRulesTypes FastTypes = NoFastRules);
public:
// Initialize rules for all opcodes.
RegBankLegalizeRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI);
// In case we don't want to regenerate same rules, we can use already
// generated rules but need to refresh references to objects that are
// created for this run.
void refreshRefs(const GCNSubtarget &_ST, MachineRegisterInfo &_MRI) {
ST = &_ST;
MRI = &_MRI;
};
const SetOfRulesForOpcode &getRulesForOpc(MachineInstr &MI) const;
};
} // end namespace AMDGPU
} // end namespace llvm
#endif