| //===-- Target.cpp ----------------------------------------------*- C++ -*-===// | 
 | // | 
 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
 | // See https://llvm.org/LICENSE.txt for license information. | 
 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
 | // | 
 | //===----------------------------------------------------------------------===// | 
 | #include "../Target.h" | 
 |  | 
 | #include "../Error.h" | 
 | #include "../ParallelSnippetGenerator.h" | 
 | #include "../SerialSnippetGenerator.h" | 
 | #include "../SnippetGenerator.h" | 
 | #include "MCTargetDesc/X86BaseInfo.h" | 
 | #include "MCTargetDesc/X86MCTargetDesc.h" | 
 | #include "X86.h" | 
 | #include "X86RegisterInfo.h" | 
 | #include "X86Subtarget.h" | 
 | #include "llvm/ADT/Sequence.h" | 
 | #include "llvm/MC/MCInstBuilder.h" | 
 | #include "llvm/Support/FormatVariadic.h" | 
 |  | 
 | namespace llvm { | 
 | namespace exegesis { | 
 |  | 
 | // Returns a non-null reason if we cannot handle the memory references in this | 
 | // instruction. | 
 | static const char *isInvalidMemoryInstr(const Instruction &Instr) { | 
 |   switch (Instr.Description.TSFlags & X86II::FormMask) { | 
 |   default: | 
 |     llvm_unreachable("Unknown FormMask value"); | 
 |   // These have no memory access. | 
 |   case X86II::Pseudo: | 
 |   case X86II::RawFrm: | 
 |   case X86II::AddCCFrm: | 
 |   case X86II::PrefixByte: | 
 |   case X86II::MRMDestReg: | 
 |   case X86II::MRMSrcReg: | 
 |   case X86II::MRMSrcReg4VOp3: | 
 |   case X86II::MRMSrcRegOp4: | 
 |   case X86II::MRMSrcRegCC: | 
 |   case X86II::MRMXrCC: | 
 |   case X86II::MRMr0: | 
 |   case X86II::MRMXr: | 
 |   case X86II::MRM0r: | 
 |   case X86II::MRM1r: | 
 |   case X86II::MRM2r: | 
 |   case X86II::MRM3r: | 
 |   case X86II::MRM4r: | 
 |   case X86II::MRM5r: | 
 |   case X86II::MRM6r: | 
 |   case X86II::MRM7r: | 
 |   case X86II::MRM0X: | 
 |   case X86II::MRM1X: | 
 |   case X86II::MRM2X: | 
 |   case X86II::MRM3X: | 
 |   case X86II::MRM4X: | 
 |   case X86II::MRM5X: | 
 |   case X86II::MRM6X: | 
 |   case X86II::MRM7X: | 
 |   case X86II::MRM_C0: | 
 |   case X86II::MRM_C1: | 
 |   case X86II::MRM_C2: | 
 |   case X86II::MRM_C3: | 
 |   case X86II::MRM_C4: | 
 |   case X86II::MRM_C5: | 
 |   case X86II::MRM_C6: | 
 |   case X86II::MRM_C7: | 
 |   case X86II::MRM_C8: | 
 |   case X86II::MRM_C9: | 
 |   case X86II::MRM_CA: | 
 |   case X86II::MRM_CB: | 
 |   case X86II::MRM_CC: | 
 |   case X86II::MRM_CD: | 
 |   case X86II::MRM_CE: | 
 |   case X86II::MRM_CF: | 
 |   case X86II::MRM_D0: | 
 |   case X86II::MRM_D1: | 
 |   case X86II::MRM_D2: | 
 |   case X86II::MRM_D3: | 
 |   case X86II::MRM_D4: | 
 |   case X86II::MRM_D5: | 
 |   case X86II::MRM_D6: | 
 |   case X86II::MRM_D7: | 
 |   case X86II::MRM_D8: | 
 |   case X86II::MRM_D9: | 
 |   case X86II::MRM_DA: | 
 |   case X86II::MRM_DB: | 
 |   case X86II::MRM_DC: | 
 |   case X86II::MRM_DD: | 
 |   case X86II::MRM_DE: | 
 |   case X86II::MRM_DF: | 
 |   case X86II::MRM_E0: | 
 |   case X86II::MRM_E1: | 
 |   case X86II::MRM_E2: | 
 |   case X86II::MRM_E3: | 
 |   case X86II::MRM_E4: | 
 |   case X86II::MRM_E5: | 
 |   case X86II::MRM_E6: | 
 |   case X86II::MRM_E7: | 
 |   case X86II::MRM_E8: | 
 |   case X86II::MRM_E9: | 
 |   case X86II::MRM_EA: | 
 |   case X86II::MRM_EB: | 
 |   case X86II::MRM_EC: | 
 |   case X86II::MRM_ED: | 
 |   case X86II::MRM_EE: | 
 |   case X86II::MRM_EF: | 
 |   case X86II::MRM_F0: | 
 |   case X86II::MRM_F1: | 
 |   case X86II::MRM_F2: | 
 |   case X86II::MRM_F3: | 
 |   case X86II::MRM_F4: | 
 |   case X86II::MRM_F5: | 
 |   case X86II::MRM_F6: | 
 |   case X86II::MRM_F7: | 
 |   case X86II::MRM_F8: | 
 |   case X86II::MRM_F9: | 
 |   case X86II::MRM_FA: | 
 |   case X86II::MRM_FB: | 
 |   case X86II::MRM_FC: | 
 |   case X86II::MRM_FD: | 
 |   case X86II::MRM_FE: | 
 |   case X86II::MRM_FF: | 
 |   case X86II::RawFrmImm8: | 
 |     return nullptr; | 
 |   case X86II::AddRegFrm: | 
 |     return (Instr.Description.Opcode == X86::POP16r || | 
 |             Instr.Description.Opcode == X86::POP32r || | 
 |             Instr.Description.Opcode == X86::PUSH16r || | 
 |             Instr.Description.Opcode == X86::PUSH32r) | 
 |                ? "unsupported opcode: unsupported memory access" | 
 |                : nullptr; | 
 |   // These access memory and are handled. | 
 |   case X86II::MRMDestMem: | 
 |   case X86II::MRMSrcMem: | 
 |   case X86II::MRMSrcMem4VOp3: | 
 |   case X86II::MRMSrcMemOp4: | 
 |   case X86II::MRMSrcMemCC: | 
 |   case X86II::MRMXmCC: | 
 |   case X86II::MRMXm: | 
 |   case X86II::MRM0m: | 
 |   case X86II::MRM1m: | 
 |   case X86II::MRM2m: | 
 |   case X86II::MRM3m: | 
 |   case X86II::MRM4m: | 
 |   case X86II::MRM5m: | 
 |   case X86II::MRM6m: | 
 |   case X86II::MRM7m: | 
 |     return nullptr; | 
 |   // These access memory and are not handled yet. | 
 |   case X86II::RawFrmImm16: | 
 |   case X86II::RawFrmMemOffs: | 
 |   case X86II::RawFrmSrc: | 
 |   case X86II::RawFrmDst: | 
 |   case X86II::RawFrmDstSrc: | 
 |     return "unsupported opcode: non uniform memory access"; | 
 |   } | 
 | } | 
 |  | 
 | // If the opcode is invalid, returns a pointer to a character literal indicating | 
 | // the reason. nullptr indicates a valid opcode. | 
 | static const char *isInvalidOpcode(const Instruction &Instr) { | 
 |   const auto OpcodeName = Instr.Name; | 
 |   if ((Instr.Description.TSFlags & X86II::FormMask) == X86II::Pseudo) | 
 |     return "unsupported opcode: pseudo instruction"; | 
 |   if (OpcodeName.startswith("POP") || OpcodeName.startswith("PUSH") || | 
 |       OpcodeName.startswith("ADJCALLSTACK") || OpcodeName.startswith("LEAVE")) | 
 |     return "unsupported opcode: Push/Pop/AdjCallStack/Leave"; | 
 |   if (const auto reason = isInvalidMemoryInstr(Instr)) | 
 |     return reason; | 
 |   // We do not handle instructions with OPERAND_PCREL. | 
 |   for (const Operand &Op : Instr.Operands) | 
 |     if (Op.isExplicit() && | 
 |         Op.getExplicitOperandInfo().OperandType == MCOI::OPERAND_PCREL) | 
 |       return "unsupported opcode: PC relative operand"; | 
 |   // We do not handle second-form X87 instructions. We only handle first-form | 
 |   // ones (_Fp), see comment in X86InstrFPStack.td. | 
 |   for (const Operand &Op : Instr.Operands) | 
 |     if (Op.isReg() && Op.isExplicit() && | 
 |         Op.getExplicitOperandInfo().RegClass == X86::RSTRegClassID) | 
 |       return "unsupported second-form X87 instruction"; | 
 |   return nullptr; | 
 | } | 
 |  | 
 | static unsigned getX86FPFlags(const Instruction &Instr) { | 
 |   return Instr.Description.TSFlags & X86II::FPTypeMask; | 
 | } | 
 |  | 
 | // Helper to fill a memory operand with a value. | 
 | static void setMemOp(InstructionTemplate &IT, int OpIdx, | 
 |                      const MCOperand &OpVal) { | 
 |   const auto Op = IT.getInstr().Operands[OpIdx]; | 
 |   assert(Op.isExplicit() && "invalid memory pattern"); | 
 |   IT.getValueFor(Op) = OpVal; | 
 | } | 
 |  | 
 | // Common (latency, uops) code for LEA templates. `GetDestReg` takes the | 
 | // addressing base and index registers and returns the LEA destination register. | 
 | static Expected<std::vector<CodeTemplate>> generateLEATemplatesCommon( | 
 |     const Instruction &Instr, const BitVector &ForbiddenRegisters, | 
 |     const LLVMState &State, const SnippetGenerator::Options &Opts, | 
 |     std::function<void(unsigned, unsigned, BitVector &CandidateDestRegs)> | 
 |         RestrictDestRegs) { | 
 |   assert(Instr.Operands.size() == 6 && "invalid LEA"); | 
 |   assert(X86II::getMemoryOperandNo(Instr.Description.TSFlags) == 1 && | 
 |          "invalid LEA"); | 
 |  | 
 |   constexpr const int kDestOp = 0; | 
 |   constexpr const int kBaseOp = 1; | 
 |   constexpr const int kIndexOp = 3; | 
 |   auto PossibleDestRegs = | 
 |       Instr.Operands[kDestOp].getRegisterAliasing().sourceBits(); | 
 |   remove(PossibleDestRegs, ForbiddenRegisters); | 
 |   auto PossibleBaseRegs = | 
 |       Instr.Operands[kBaseOp].getRegisterAliasing().sourceBits(); | 
 |   remove(PossibleBaseRegs, ForbiddenRegisters); | 
 |   auto PossibleIndexRegs = | 
 |       Instr.Operands[kIndexOp].getRegisterAliasing().sourceBits(); | 
 |   remove(PossibleIndexRegs, ForbiddenRegisters); | 
 |  | 
 |   const auto &RegInfo = State.getRegInfo(); | 
 |   std::vector<CodeTemplate> Result; | 
 |   for (const unsigned BaseReg : PossibleBaseRegs.set_bits()) { | 
 |     for (const unsigned IndexReg : PossibleIndexRegs.set_bits()) { | 
 |       for (int LogScale = 0; LogScale <= 3; ++LogScale) { | 
 |         // FIXME: Add an option for controlling how we explore immediates. | 
 |         for (const int Disp : {0, 42}) { | 
 |           InstructionTemplate IT(&Instr); | 
 |           const int64_t Scale = 1ull << LogScale; | 
 |           setMemOp(IT, 1, MCOperand::createReg(BaseReg)); | 
 |           setMemOp(IT, 2, MCOperand::createImm(Scale)); | 
 |           setMemOp(IT, 3, MCOperand::createReg(IndexReg)); | 
 |           setMemOp(IT, 4, MCOperand::createImm(Disp)); | 
 |           // SegmentReg must be 0 for LEA. | 
 |           setMemOp(IT, 5, MCOperand::createReg(0)); | 
 |  | 
 |           // Output reg candidates are selected by the caller. | 
 |           auto PossibleDestRegsNow = PossibleDestRegs; | 
 |           RestrictDestRegs(BaseReg, IndexReg, PossibleDestRegsNow); | 
 |           assert(PossibleDestRegsNow.set_bits().begin() != | 
 |                      PossibleDestRegsNow.set_bits().end() && | 
 |                  "no remaining registers"); | 
 |           setMemOp( | 
 |               IT, 0, | 
 |               MCOperand::createReg(*PossibleDestRegsNow.set_bits().begin())); | 
 |  | 
 |           CodeTemplate CT; | 
 |           CT.Instructions.push_back(std::move(IT)); | 
 |           CT.Config = formatv("{3}(%{0}, %{1}, {2})", RegInfo.getName(BaseReg), | 
 |                               RegInfo.getName(IndexReg), Scale, Disp) | 
 |                           .str(); | 
 |           Result.push_back(std::move(CT)); | 
 |           if (Result.size() >= Opts.MaxConfigsPerOpcode) | 
 |             return std::move(Result); | 
 |         } | 
 |       } | 
 |     } | 
 |   } | 
 |  | 
 |   return std::move(Result); | 
 | } | 
 |  | 
 | namespace { | 
 | class X86SerialSnippetGenerator : public SerialSnippetGenerator { | 
 | public: | 
 |   using SerialSnippetGenerator::SerialSnippetGenerator; | 
 |  | 
 |   Expected<std::vector<CodeTemplate>> | 
 |   generateCodeTemplates(InstructionTemplate Variant, | 
 |                         const BitVector &ForbiddenRegisters) const override; | 
 | }; | 
 | } // namespace | 
 |  | 
 | Expected<std::vector<CodeTemplate>> | 
 | X86SerialSnippetGenerator::generateCodeTemplates( | 
 |     InstructionTemplate Variant, const BitVector &ForbiddenRegisters) const { | 
 |   const Instruction &Instr = Variant.getInstr(); | 
 |  | 
 |   if (const auto reason = isInvalidOpcode(Instr)) | 
 |     return make_error<Failure>(reason); | 
 |  | 
 |   // LEA gets special attention. | 
 |   const auto Opcode = Instr.Description.getOpcode(); | 
 |   if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r) { | 
 |     return generateLEATemplatesCommon( | 
 |         Instr, ForbiddenRegisters, State, Opts, | 
 |         [this](unsigned BaseReg, unsigned IndexReg, | 
 |                BitVector &CandidateDestRegs) { | 
 |           // We just select a destination register that aliases the base | 
 |           // register. | 
 |           CandidateDestRegs &= | 
 |               State.getRATC().getRegister(BaseReg).aliasedBits(); | 
 |         }); | 
 |   } | 
 |  | 
 |   if (Instr.hasMemoryOperands()) | 
 |     return make_error<Failure>( | 
 |         "unsupported memory operand in latency measurements"); | 
 |  | 
 |   switch (getX86FPFlags(Instr)) { | 
 |   case X86II::NotFP: | 
 |     return SerialSnippetGenerator::generateCodeTemplates(Variant, | 
 |                                                          ForbiddenRegisters); | 
 |   case X86II::ZeroArgFP: | 
 |   case X86II::OneArgFP: | 
 |   case X86II::SpecialFP: | 
 |   case X86II::CompareFP: | 
 |   case X86II::CondMovFP: | 
 |     return make_error<Failure>("Unsupported x87 Instruction"); | 
 |   case X86II::OneArgFPRW: | 
 |   case X86II::TwoArgFP: | 
 |     // These are instructions like | 
 |     //   - `ST(0) = fsqrt(ST(0))` (OneArgFPRW) | 
 |     //   - `ST(0) = ST(0) + ST(i)` (TwoArgFP) | 
 |     // They are intrinsically serial and do not modify the state of the stack. | 
 |     return generateSelfAliasingCodeTemplates(Variant); | 
 |   default: | 
 |     llvm_unreachable("Unknown FP Type!"); | 
 |   } | 
 | } | 
 |  | 
 | namespace { | 
 | class X86ParallelSnippetGenerator : public ParallelSnippetGenerator { | 
 | public: | 
 |   using ParallelSnippetGenerator::ParallelSnippetGenerator; | 
 |  | 
 |   Expected<std::vector<CodeTemplate>> | 
 |   generateCodeTemplates(InstructionTemplate Variant, | 
 |                         const BitVector &ForbiddenRegisters) const override; | 
 | }; | 
 |  | 
 | } // namespace | 
 |  | 
 | Expected<std::vector<CodeTemplate>> | 
 | X86ParallelSnippetGenerator::generateCodeTemplates( | 
 |     InstructionTemplate Variant, const BitVector &ForbiddenRegisters) const { | 
 |   const Instruction &Instr = Variant.getInstr(); | 
 |  | 
 |   if (const auto reason = isInvalidOpcode(Instr)) | 
 |     return make_error<Failure>(reason); | 
 |  | 
 |   // LEA gets special attention. | 
 |   const auto Opcode = Instr.Description.getOpcode(); | 
 |   if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r) { | 
 |     return generateLEATemplatesCommon( | 
 |         Instr, ForbiddenRegisters, State, Opts, | 
 |         [this](unsigned BaseReg, unsigned IndexReg, | 
 |                BitVector &CandidateDestRegs) { | 
 |           // Any destination register that is not used for addressing is fine. | 
 |           remove(CandidateDestRegs, | 
 |                  State.getRATC().getRegister(BaseReg).aliasedBits()); | 
 |           remove(CandidateDestRegs, | 
 |                  State.getRATC().getRegister(IndexReg).aliasedBits()); | 
 |         }); | 
 |   } | 
 |  | 
 |   switch (getX86FPFlags(Instr)) { | 
 |   case X86II::NotFP: | 
 |     return ParallelSnippetGenerator::generateCodeTemplates(Variant, | 
 |                                                            ForbiddenRegisters); | 
 |   case X86II::ZeroArgFP: | 
 |   case X86II::OneArgFP: | 
 |   case X86II::SpecialFP: | 
 |     return make_error<Failure>("Unsupported x87 Instruction"); | 
 |   case X86II::OneArgFPRW: | 
 |   case X86II::TwoArgFP: | 
 |     // These are instructions like | 
 |     //   - `ST(0) = fsqrt(ST(0))` (OneArgFPRW) | 
 |     //   - `ST(0) = ST(0) + ST(i)` (TwoArgFP) | 
 |     // They are intrinsically serial and do not modify the state of the stack. | 
 |     // We generate the same code for latency and uops. | 
 |     return generateSelfAliasingCodeTemplates(Variant); | 
 |   case X86II::CompareFP: | 
 |   case X86II::CondMovFP: | 
 |     // We can compute uops for any FP instruction that does not grow or shrink | 
 |     // the stack (either do not touch the stack or push as much as they pop). | 
 |     return generateUnconstrainedCodeTemplates( | 
 |         Variant, "instruction does not grow/shrink the FP stack"); | 
 |   default: | 
 |     llvm_unreachable("Unknown FP Type!"); | 
 |   } | 
 | } | 
 |  | 
 | static unsigned getLoadImmediateOpcode(unsigned RegBitWidth) { | 
 |   switch (RegBitWidth) { | 
 |   case 8: | 
 |     return X86::MOV8ri; | 
 |   case 16: | 
 |     return X86::MOV16ri; | 
 |   case 32: | 
 |     return X86::MOV32ri; | 
 |   case 64: | 
 |     return X86::MOV64ri; | 
 |   } | 
 |   llvm_unreachable("Invalid Value Width"); | 
 | } | 
 |  | 
 | // Generates instruction to load an immediate value into a register. | 
 | static MCInst loadImmediate(unsigned Reg, unsigned RegBitWidth, | 
 |                             const APInt &Value) { | 
 |   if (Value.getBitWidth() > RegBitWidth) | 
 |     llvm_unreachable("Value must fit in the Register"); | 
 |   return MCInstBuilder(getLoadImmediateOpcode(RegBitWidth)) | 
 |       .addReg(Reg) | 
 |       .addImm(Value.getZExtValue()); | 
 | } | 
 |  | 
 | // Allocates scratch memory on the stack. | 
 | static MCInst allocateStackSpace(unsigned Bytes) { | 
 |   return MCInstBuilder(X86::SUB64ri8) | 
 |       .addReg(X86::RSP) | 
 |       .addReg(X86::RSP) | 
 |       .addImm(Bytes); | 
 | } | 
 |  | 
 | // Fills scratch memory at offset `OffsetBytes` with value `Imm`. | 
 | static MCInst fillStackSpace(unsigned MovOpcode, unsigned OffsetBytes, | 
 |                              uint64_t Imm) { | 
 |   return MCInstBuilder(MovOpcode) | 
 |       // Address = ESP | 
 |       .addReg(X86::RSP)    // BaseReg | 
 |       .addImm(1)           // ScaleAmt | 
 |       .addReg(0)           // IndexReg | 
 |       .addImm(OffsetBytes) // Disp | 
 |       .addReg(0)           // Segment | 
 |       // Immediate. | 
 |       .addImm(Imm); | 
 | } | 
 |  | 
 | // Loads scratch memory into register `Reg` using opcode `RMOpcode`. | 
 | static MCInst loadToReg(unsigned Reg, unsigned RMOpcode) { | 
 |   return MCInstBuilder(RMOpcode) | 
 |       .addReg(Reg) | 
 |       // Address = ESP | 
 |       .addReg(X86::RSP) // BaseReg | 
 |       .addImm(1)        // ScaleAmt | 
 |       .addReg(0)        // IndexReg | 
 |       .addImm(0)        // Disp | 
 |       .addReg(0);       // Segment | 
 | } | 
 |  | 
 | // Releases scratch memory. | 
 | static MCInst releaseStackSpace(unsigned Bytes) { | 
 |   return MCInstBuilder(X86::ADD64ri8) | 
 |       .addReg(X86::RSP) | 
 |       .addReg(X86::RSP) | 
 |       .addImm(Bytes); | 
 | } | 
 |  | 
 | // Reserves some space on the stack, fills it with the content of the provided | 
 | // constant and provide methods to load the stack value into a register. | 
 | namespace { | 
 | struct ConstantInliner { | 
 |   explicit ConstantInliner(const APInt &Constant) : Constant_(Constant) {} | 
 |  | 
 |   std::vector<MCInst> loadAndFinalize(unsigned Reg, unsigned RegBitWidth, | 
 |                                       unsigned Opcode); | 
 |  | 
 |   std::vector<MCInst> loadX87STAndFinalize(unsigned Reg); | 
 |  | 
 |   std::vector<MCInst> loadX87FPAndFinalize(unsigned Reg); | 
 |  | 
 |   std::vector<MCInst> popFlagAndFinalize(); | 
 |  | 
 |   std::vector<MCInst> loadImplicitRegAndFinalize(unsigned Opcode, | 
 |                                                  unsigned Value); | 
 |  | 
 | private: | 
 |   ConstantInliner &add(const MCInst &Inst) { | 
 |     Instructions.push_back(Inst); | 
 |     return *this; | 
 |   } | 
 |  | 
 |   void initStack(unsigned Bytes); | 
 |  | 
 |   static constexpr const unsigned kF80Bytes = 10; // 80 bits. | 
 |  | 
 |   APInt Constant_; | 
 |   std::vector<MCInst> Instructions; | 
 | }; | 
 | } // namespace | 
 |  | 
 | std::vector<MCInst> ConstantInliner::loadAndFinalize(unsigned Reg, | 
 |                                                      unsigned RegBitWidth, | 
 |                                                      unsigned Opcode) { | 
 |   assert((RegBitWidth & 7) == 0 && "RegBitWidth must be a multiple of 8 bits"); | 
 |   initStack(RegBitWidth / 8); | 
 |   add(loadToReg(Reg, Opcode)); | 
 |   add(releaseStackSpace(RegBitWidth / 8)); | 
 |   return std::move(Instructions); | 
 | } | 
 |  | 
 | std::vector<MCInst> ConstantInliner::loadX87STAndFinalize(unsigned Reg) { | 
 |   initStack(kF80Bytes); | 
 |   add(MCInstBuilder(X86::LD_F80m) | 
 |           // Address = ESP | 
 |           .addReg(X86::RSP) // BaseReg | 
 |           .addImm(1)        // ScaleAmt | 
 |           .addReg(0)        // IndexReg | 
 |           .addImm(0)        // Disp | 
 |           .addReg(0));      // Segment | 
 |   if (Reg != X86::ST0) | 
 |     add(MCInstBuilder(X86::ST_Frr).addReg(Reg)); | 
 |   add(releaseStackSpace(kF80Bytes)); | 
 |   return std::move(Instructions); | 
 | } | 
 |  | 
 | std::vector<MCInst> ConstantInliner::loadX87FPAndFinalize(unsigned Reg) { | 
 |   initStack(kF80Bytes); | 
 |   add(MCInstBuilder(X86::LD_Fp80m) | 
 |           .addReg(Reg) | 
 |           // Address = ESP | 
 |           .addReg(X86::RSP) // BaseReg | 
 |           .addImm(1)        // ScaleAmt | 
 |           .addReg(0)        // IndexReg | 
 |           .addImm(0)        // Disp | 
 |           .addReg(0));      // Segment | 
 |   add(releaseStackSpace(kF80Bytes)); | 
 |   return std::move(Instructions); | 
 | } | 
 |  | 
 | std::vector<MCInst> ConstantInliner::popFlagAndFinalize() { | 
 |   initStack(8); | 
 |   add(MCInstBuilder(X86::POPF64)); | 
 |   return std::move(Instructions); | 
 | } | 
 |  | 
 | std::vector<MCInst> | 
 | ConstantInliner::loadImplicitRegAndFinalize(unsigned Opcode, unsigned Value) { | 
 |   add(allocateStackSpace(4)); | 
 |   add(fillStackSpace(X86::MOV32mi, 0, Value)); // Mask all FP exceptions | 
 |   add(MCInstBuilder(Opcode) | 
 |           // Address = ESP | 
 |           .addReg(X86::RSP) // BaseReg | 
 |           .addImm(1)        // ScaleAmt | 
 |           .addReg(0)        // IndexReg | 
 |           .addImm(0)        // Disp | 
 |           .addReg(0));      // Segment | 
 |   add(releaseStackSpace(4)); | 
 |   return std::move(Instructions); | 
 | } | 
 |  | 
 | void ConstantInliner::initStack(unsigned Bytes) { | 
 |   assert(Constant_.getBitWidth() <= Bytes * 8 && | 
 |          "Value does not have the correct size"); | 
 |   const APInt WideConstant = Constant_.getBitWidth() < Bytes * 8 | 
 |                                  ? Constant_.sext(Bytes * 8) | 
 |                                  : Constant_; | 
 |   add(allocateStackSpace(Bytes)); | 
 |   size_t ByteOffset = 0; | 
 |   for (; Bytes - ByteOffset >= 4; ByteOffset += 4) | 
 |     add(fillStackSpace( | 
 |         X86::MOV32mi, ByteOffset, | 
 |         WideConstant.extractBits(32, ByteOffset * 8).getZExtValue())); | 
 |   if (Bytes - ByteOffset >= 2) { | 
 |     add(fillStackSpace( | 
 |         X86::MOV16mi, ByteOffset, | 
 |         WideConstant.extractBits(16, ByteOffset * 8).getZExtValue())); | 
 |     ByteOffset += 2; | 
 |   } | 
 |   if (Bytes - ByteOffset >= 1) | 
 |     add(fillStackSpace( | 
 |         X86::MOV8mi, ByteOffset, | 
 |         WideConstant.extractBits(8, ByteOffset * 8).getZExtValue())); | 
 | } | 
 |  | 
 | #include "X86GenExegesis.inc" | 
 |  | 
 | namespace { | 
 | class ExegesisX86Target : public ExegesisTarget { | 
 | public: | 
 |   ExegesisX86Target() : ExegesisTarget(X86CpuPfmCounters) {} | 
 |  | 
 | private: | 
 |   void addTargetSpecificPasses(PassManagerBase &PM) const override; | 
 |  | 
 |   unsigned getScratchMemoryRegister(const Triple &TT) const override; | 
 |  | 
 |   unsigned getLoopCounterRegister(const Triple &) const override; | 
 |  | 
 |   unsigned getMaxMemoryAccessSize() const override { return 64; } | 
 |  | 
 |   Error randomizeTargetMCOperand(const Instruction &Instr, const Variable &Var, | 
 |                                  MCOperand &AssignedValue, | 
 |                                  const BitVector &ForbiddenRegs) const override; | 
 |  | 
 |   void fillMemoryOperands(InstructionTemplate &IT, unsigned Reg, | 
 |                           unsigned Offset) const override; | 
 |  | 
 |   void decrementLoopCounterAndJump(MachineBasicBlock &MBB, | 
 |                                    MachineBasicBlock &TargetMBB, | 
 |                                    const MCInstrInfo &MII) const override; | 
 |  | 
 |   std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI, unsigned Reg, | 
 |                                const APInt &Value) const override; | 
 |  | 
 |   ArrayRef<unsigned> getUnavailableRegisters() const override { | 
 |     return makeArrayRef(kUnavailableRegisters, | 
 |                         sizeof(kUnavailableRegisters) / | 
 |                             sizeof(kUnavailableRegisters[0])); | 
 |   } | 
 |  | 
 |   bool allowAsBackToBack(const Instruction &Instr) const override { | 
 |     const unsigned Opcode = Instr.Description.Opcode; | 
 |     return !isInvalidOpcode(Instr) && Opcode != X86::LEA64r && | 
 |            Opcode != X86::LEA64_32r && Opcode != X86::LEA16r; | 
 |   } | 
 |  | 
 |   std::vector<InstructionTemplate> | 
 |   generateInstructionVariants(const Instruction &Instr, | 
 |                               unsigned MaxConfigsPerOpcode) const override; | 
 |  | 
 |   std::unique_ptr<SnippetGenerator> createSerialSnippetGenerator( | 
 |       const LLVMState &State, | 
 |       const SnippetGenerator::Options &Opts) const override { | 
 |     return std::make_unique<X86SerialSnippetGenerator>(State, Opts); | 
 |   } | 
 |  | 
 |   std::unique_ptr<SnippetGenerator> createParallelSnippetGenerator( | 
 |       const LLVMState &State, | 
 |       const SnippetGenerator::Options &Opts) const override { | 
 |     return std::make_unique<X86ParallelSnippetGenerator>(State, Opts); | 
 |   } | 
 |  | 
 |   bool matchesArch(Triple::ArchType Arch) const override { | 
 |     return Arch == Triple::x86_64 || Arch == Triple::x86; | 
 |   } | 
 |  | 
 |   static const unsigned kUnavailableRegisters[4]; | 
 | }; | 
 |  | 
 | // We disable a few registers that cannot be encoded on instructions with a REX | 
 | // prefix. | 
 | const unsigned ExegesisX86Target::kUnavailableRegisters[4] = {X86::AH, X86::BH, | 
 |                                                               X86::CH, X86::DH}; | 
 |  | 
 | // We're using one of R8-R15 because these registers are never hardcoded in | 
 | // instructions (e.g. MOVS writes to EDI, ESI, EDX), so they have less | 
 | // conflicts. | 
 | constexpr const unsigned kLoopCounterReg = X86::R8; | 
 |  | 
 | } // namespace | 
 |  | 
 | void ExegesisX86Target::addTargetSpecificPasses(PassManagerBase &PM) const { | 
 |   // Lowers FP pseudo-instructions, e.g. ABS_Fp32 -> ABS_F. | 
 |   PM.add(createX86FloatingPointStackifierPass()); | 
 | } | 
 |  | 
 | unsigned ExegesisX86Target::getScratchMemoryRegister(const Triple &TT) const { | 
 |   if (!TT.isArch64Bit()) { | 
 |     // FIXME: This would require popping from the stack, so we would have to | 
 |     // add some additional setup code. | 
 |     return 0; | 
 |   } | 
 |   return TT.isOSWindows() ? X86::RCX : X86::RDI; | 
 | } | 
 |  | 
 | unsigned ExegesisX86Target::getLoopCounterRegister(const Triple &TT) const { | 
 |   if (!TT.isArch64Bit()) { | 
 |     return 0; | 
 |   } | 
 |   return kLoopCounterReg; | 
 | } | 
 |  | 
 | Error ExegesisX86Target::randomizeTargetMCOperand( | 
 |     const Instruction &Instr, const Variable &Var, MCOperand &AssignedValue, | 
 |     const BitVector &ForbiddenRegs) const { | 
 |   const Operand &Op = Instr.getPrimaryOperand(Var); | 
 |   switch (Op.getExplicitOperandInfo().OperandType) { | 
 |   case X86::OperandType::OPERAND_ROUNDING_CONTROL: | 
 |     AssignedValue = | 
 |         MCOperand::createImm(randomIndex(X86::STATIC_ROUNDING::TO_ZERO)); | 
 |     return Error::success(); | 
 |   default: | 
 |     break; | 
 |   } | 
 |   return make_error<Failure>( | 
 |       Twine("unimplemented operand type ") | 
 |           .concat(Twine(Op.getExplicitOperandInfo().OperandType))); | 
 | } | 
 |  | 
 | void ExegesisX86Target::fillMemoryOperands(InstructionTemplate &IT, | 
 |                                            unsigned Reg, | 
 |                                            unsigned Offset) const { | 
 |   assert(!isInvalidMemoryInstr(IT.getInstr()) && | 
 |          "fillMemoryOperands requires a valid memory instruction"); | 
 |   int MemOpIdx = X86II::getMemoryOperandNo(IT.getInstr().Description.TSFlags); | 
 |   assert(MemOpIdx >= 0 && "invalid memory operand index"); | 
 |   // getMemoryOperandNo() ignores tied operands, so we have to add them back. | 
 |   MemOpIdx += X86II::getOperandBias(IT.getInstr().Description); | 
 |   setMemOp(IT, MemOpIdx + 0, MCOperand::createReg(Reg));    // BaseReg | 
 |   setMemOp(IT, MemOpIdx + 1, MCOperand::createImm(1));      // ScaleAmt | 
 |   setMemOp(IT, MemOpIdx + 2, MCOperand::createReg(0));      // IndexReg | 
 |   setMemOp(IT, MemOpIdx + 3, MCOperand::createImm(Offset)); // Disp | 
 |   setMemOp(IT, MemOpIdx + 4, MCOperand::createReg(0));      // Segment | 
 | } | 
 |  | 
 | void ExegesisX86Target::decrementLoopCounterAndJump( | 
 |     MachineBasicBlock &MBB, MachineBasicBlock &TargetMBB, | 
 |     const MCInstrInfo &MII) const { | 
 |   BuildMI(&MBB, DebugLoc(), MII.get(X86::ADD64ri8)) | 
 |       .addDef(kLoopCounterReg) | 
 |       .addUse(kLoopCounterReg) | 
 |       .addImm(-1); | 
 |   BuildMI(&MBB, DebugLoc(), MII.get(X86::JCC_1)) | 
 |       .addMBB(&TargetMBB) | 
 |       .addImm(X86::COND_NE); | 
 | } | 
 |  | 
 | std::vector<MCInst> ExegesisX86Target::setRegTo(const MCSubtargetInfo &STI, | 
 |                                                 unsigned Reg, | 
 |                                                 const APInt &Value) const { | 
 |   if (X86::GR8RegClass.contains(Reg)) | 
 |     return {loadImmediate(Reg, 8, Value)}; | 
 |   if (X86::GR16RegClass.contains(Reg)) | 
 |     return {loadImmediate(Reg, 16, Value)}; | 
 |   if (X86::GR32RegClass.contains(Reg)) | 
 |     return {loadImmediate(Reg, 32, Value)}; | 
 |   if (X86::GR64RegClass.contains(Reg)) | 
 |     return {loadImmediate(Reg, 64, Value)}; | 
 |   ConstantInliner CI(Value); | 
 |   if (X86::VR64RegClass.contains(Reg)) | 
 |     return CI.loadAndFinalize(Reg, 64, X86::MMX_MOVQ64rm); | 
 |   if (X86::VR128XRegClass.contains(Reg)) { | 
 |     if (STI.getFeatureBits()[X86::FeatureAVX512]) | 
 |       return CI.loadAndFinalize(Reg, 128, X86::VMOVDQU32Z128rm); | 
 |     if (STI.getFeatureBits()[X86::FeatureAVX]) | 
 |       return CI.loadAndFinalize(Reg, 128, X86::VMOVDQUrm); | 
 |     return CI.loadAndFinalize(Reg, 128, X86::MOVDQUrm); | 
 |   } | 
 |   if (X86::VR256XRegClass.contains(Reg)) { | 
 |     if (STI.getFeatureBits()[X86::FeatureAVX512]) | 
 |       return CI.loadAndFinalize(Reg, 256, X86::VMOVDQU32Z256rm); | 
 |     if (STI.getFeatureBits()[X86::FeatureAVX]) | 
 |       return CI.loadAndFinalize(Reg, 256, X86::VMOVDQUYrm); | 
 |   } | 
 |   if (X86::VR512RegClass.contains(Reg)) | 
 |     if (STI.getFeatureBits()[X86::FeatureAVX512]) | 
 |       return CI.loadAndFinalize(Reg, 512, X86::VMOVDQU32Zrm); | 
 |   if (X86::RSTRegClass.contains(Reg)) { | 
 |     return CI.loadX87STAndFinalize(Reg); | 
 |   } | 
 |   if (X86::RFP32RegClass.contains(Reg) || X86::RFP64RegClass.contains(Reg) || | 
 |       X86::RFP80RegClass.contains(Reg)) { | 
 |     return CI.loadX87FPAndFinalize(Reg); | 
 |   } | 
 |   if (Reg == X86::EFLAGS) | 
 |     return CI.popFlagAndFinalize(); | 
 |   if (Reg == X86::MXCSR) | 
 |     return CI.loadImplicitRegAndFinalize( | 
 |         STI.getFeatureBits()[X86::FeatureAVX] ? X86::VLDMXCSR : X86::LDMXCSR, | 
 |         0x1f80); | 
 |   if (Reg == X86::FPCW) | 
 |     return CI.loadImplicitRegAndFinalize(X86::FLDCW16m, 0x37f); | 
 |   return {}; // Not yet implemented. | 
 | } | 
 |  | 
 | // Instruction can have some variable operands, and we may want to see how | 
 | // different operands affect performance. So for each operand position, | 
 | // precompute all the possible choices we might care about, | 
 | // and greedily generate all the possible combinations of choices. | 
 | std::vector<InstructionTemplate> ExegesisX86Target::generateInstructionVariants( | 
 |     const Instruction &Instr, unsigned MaxConfigsPerOpcode) const { | 
 |   bool Exploration = false; | 
 |   SmallVector<SmallVector<MCOperand, 1>, 4> VariableChoices; | 
 |   VariableChoices.resize(Instr.Variables.size()); | 
 |   for (auto I : llvm::zip(Instr.Variables, VariableChoices)) { | 
 |     const Variable &Var = std::get<0>(I); | 
 |     SmallVectorImpl<MCOperand> &Choices = std::get<1>(I); | 
 |  | 
 |     switch (Instr.getPrimaryOperand(Var).getExplicitOperandInfo().OperandType) { | 
 |     default: | 
 |       // We don't wish to explicitly explore this variable. | 
 |       Choices.emplace_back(); // But add invalid MCOperand to simplify logic. | 
 |       continue; | 
 |     case X86::OperandType::OPERAND_COND_CODE: { | 
 |       Exploration = true; | 
 |       auto CondCodes = seq((int)X86::CondCode::COND_O, | 
 |                            1 + (int)X86::CondCode::LAST_VALID_COND); | 
 |       Choices.reserve(std::distance(CondCodes.begin(), CondCodes.end())); | 
 |       for (int CondCode : CondCodes) | 
 |         Choices.emplace_back(MCOperand::createImm(CondCode)); | 
 |       break; | 
 |     } | 
 |     } | 
 |   } | 
 |  | 
 |   // If we don't wish to explore any variables, defer to the baseline method. | 
 |   if (!Exploration) | 
 |     return ExegesisTarget::generateInstructionVariants(Instr, | 
 |                                                        MaxConfigsPerOpcode); | 
 |  | 
 |   std::vector<InstructionTemplate> Variants; | 
 |   size_t NumVariants; | 
 |   CombinationGenerator<MCOperand, decltype(VariableChoices)::value_type, 4> G( | 
 |       VariableChoices); | 
 |  | 
 |   // How many operand combinations can we produce, within the limit? | 
 |   NumVariants = std::min(G.numCombinations(), (size_t)MaxConfigsPerOpcode); | 
 |   // And actually produce all the wanted operand combinations. | 
 |   Variants.reserve(NumVariants); | 
 |   G.generate([&](ArrayRef<MCOperand> State) -> bool { | 
 |     Variants.emplace_back(&Instr); | 
 |     Variants.back().setVariableValues(State); | 
 |     // Did we run out of space for variants? | 
 |     return Variants.size() >= NumVariants; | 
 |   }); | 
 |  | 
 |   assert(Variants.size() == NumVariants && | 
 |          Variants.size() <= MaxConfigsPerOpcode && | 
 |          "Should not produce too many variants"); | 
 |   return Variants; | 
 | } | 
 |  | 
 | static ExegesisTarget *getTheExegesisX86Target() { | 
 |   static ExegesisX86Target Target; | 
 |   return &Target; | 
 | } | 
 |  | 
 | void InitializeX86ExegesisTarget() { | 
 |   ExegesisTarget::registerTarget(getTheExegesisX86Target()); | 
 | } | 
 |  | 
 | } // namespace exegesis | 
 | } // namespace llvm |