//===- bolt/Core/BinaryFunctionProfile.cpp - Profile processing -----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements BinaryFunction member functions related to processing
// the execution profile.
//
//===----------------------------------------------------------------------===//

#include "bolt/Core/BinaryBasicBlock.h"
#include "bolt/Core/BinaryFunction.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"

#undef  DEBUG_TYPE
#define DEBUG_TYPE "bolt-prof"

using namespace llvm;
using namespace bolt;

namespace opts {

extern cl::OptionCategory BoltOptCategory;

cl::opt<IndirectCallPromotionType> ICP(
    "indirect-call-promotion", cl::init(ICP_NONE),
    cl::desc("indirect call promotion"),
    cl::values(
        clEnumValN(ICP_NONE, "none", "do not perform indirect call promotion"),
        clEnumValN(ICP_CALLS, "calls", "perform ICP on indirect calls"),
        clEnumValN(ICP_JUMP_TABLES, "jump-tables",
                   "perform ICP on jump tables"),
        clEnumValN(ICP_ALL, "all", "perform ICP on calls and jump tables")),
    cl::ZeroOrMore, cl::cat(BoltOptCategory));

static cl::alias ICPAlias("icp",
                          cl::desc("Alias for --indirect-call-promotion"),
                          cl::aliasopt(ICP));

extern cl::opt<JumpTableSupportLevel> JumpTables;

static cl::opt<bool> FixFuncCounts(
    "fix-func-counts",
    cl::desc("adjust function counts based on basic blocks execution count"),
    cl::Hidden, cl::cat(BoltOptCategory));

static cl::opt<bool> FixBlockCounts(
    "fix-block-counts",
    cl::desc("adjust block counts based on outgoing branch counts"),
    cl::init(true), cl::Hidden, cl::cat(BoltOptCategory));

static cl::opt<bool>
    InferFallThroughs("infer-fall-throughs",
                      cl::desc("infer execution count for fall-through blocks"),
                      cl::Hidden, cl::cat(BoltOptCategory));

} // namespace opts

namespace llvm {
namespace bolt {

void BinaryFunction::postProcessProfile() {
  if (!hasValidProfile()) {
    clearProfile();
    return;
  }

  if (!(getProfileFlags() & PF_LBR))
    return;

  // If we have at least some branch data for the function indicate that it
  // was executed.
  if (opts::FixFuncCounts && ExecutionCount == 0)
    ExecutionCount = 1;

  // Compute preliminary execution count for each basic block.
  for (BinaryBasicBlock *BB : BasicBlocks) {
    if ((!BB->isEntryPoint() && !BB->isLandingPad()) ||
        BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE)
      BB->ExecutionCount = 0;
  }
  for (BinaryBasicBlock *BB : BasicBlocks) {
    auto SuccBIIter = BB->branch_info_begin();
    for (BinaryBasicBlock *Succ : BB->successors()) {
      // All incoming edges to the primary entry have been accounted for, thus
      // we skip the update here.
      if (SuccBIIter->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
          Succ != BasicBlocks.front())
        Succ->setExecutionCount(Succ->getExecutionCount() + SuccBIIter->Count);
      ++SuccBIIter;
    }
  }

  // Fix for old profiles.
  for (BinaryBasicBlock *BB : BasicBlocks) {
    if (BB->size() != 1 || BB->succ_size() != 1)
      continue;

    if (BB->getKnownExecutionCount() == 0)
      continue;

    MCInst *Instr = BB->getFirstNonPseudoInstr();
    assert(Instr && "expected non-pseudo instr");
    if (!BC.MIB->hasAnnotation(*Instr, "NOP"))
      continue;

    BinaryBasicBlock *FTSuccessor = BB->getSuccessor();
    BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*FTSuccessor);
    if (!BI.Count) {
      BI.Count = BB->getKnownExecutionCount();
      FTSuccessor->setExecutionCount(FTSuccessor->getKnownExecutionCount() +
                                     BI.Count);
    }
  }

  if (opts::FixBlockCounts) {
    for (BinaryBasicBlock *BB : BasicBlocks) {
      // Make sure that execution count of a block is at least the branch count
      // of an incoming/outgoing jump.
      auto SuccBIIter = BB->branch_info_begin();
      for (BinaryBasicBlock *Succ : BB->successors()) {
        uint64_t Count = SuccBIIter->Count;
        if (Count != BinaryBasicBlock::COUNT_NO_PROFILE && Count > 0) {
          Succ->setExecutionCount(std::max(Succ->getExecutionCount(), Count));
          BB->setExecutionCount(std::max(BB->getExecutionCount(), Count));
        }
        ++SuccBIIter;
      }
      // Make sure that execution count of a block is at least the number of
      // function calls from the block.
      for (MCInst &Inst : *BB) {
        // Ignore non-call instruction
        if (!BC.MIB->isCall(Inst))
          continue;

        auto CountAnnt = BC.MIB->tryGetAnnotationAs<uint64_t>(Inst, "Count");
        if (CountAnnt)
          BB->setExecutionCount(std::max(BB->getExecutionCount(), *CountAnnt));
      }
    }
  }

  if (opts::InferFallThroughs)
    inferFallThroughCounts();

  // Update profile information for jump tables based on CFG branch data.
  for (BinaryBasicBlock *BB : BasicBlocks) {
    const MCInst *LastInstr = BB->getLastNonPseudoInstr();
    if (!LastInstr)
      continue;
    const uint64_t JTAddress = BC.MIB->getJumpTable(*LastInstr);
    if (!JTAddress)
      continue;
    JumpTable *JT = getJumpTableContainingAddress(JTAddress);
    if (!JT)
      continue;

    uint64_t TotalBranchCount = 0;
    for (const BinaryBasicBlock::BinaryBranchInfo &BranchInfo :
         BB->branch_info()) {
      TotalBranchCount += BranchInfo.Count;
    }
    JT->Count += TotalBranchCount;

    if (opts::ICP < ICP_JUMP_TABLES && opts::JumpTables < JTS_AGGRESSIVE)
      continue;

    if (JT->Counts.empty())
      JT->Counts.resize(JT->Entries.size());
    auto EI = JT->Entries.begin();
    uint64_t Delta = (JTAddress - JT->getAddress()) / JT->EntrySize;
    EI += Delta;
    while (EI != JT->Entries.end()) {
      const BinaryBasicBlock *TargetBB = getBasicBlockForLabel(*EI);
      if (TargetBB) {
        const BinaryBasicBlock::BinaryBranchInfo &BranchInfo =
            BB->getBranchInfo(*TargetBB);
        assert(Delta < JT->Counts.size());
        JT->Counts[Delta].Count += BranchInfo.Count;
        JT->Counts[Delta].Mispreds += BranchInfo.MispredictedCount;
      }
      ++Delta;
      ++EI;
      // A label marks the start of another jump table.
      if (JT->Labels.count(Delta * JT->EntrySize))
        break;
    }
  }
}

void BinaryFunction::mergeProfileDataInto(BinaryFunction &BF) const {
  // No reason to merge invalid or empty profiles into BF.
  if (!hasValidProfile())
    return;

  // Update function execution count.
  if (getExecutionCount() != BinaryFunction::COUNT_NO_PROFILE)
    BF.setExecutionCount(BF.getKnownExecutionCount() + getExecutionCount());

  // Since we are merging a valid profile, the new profile should be valid too.
  // It has either already been valid, or it has been cleaned up.
  BF.ProfileMatchRatio = 1.0f;

  // Update basic block and edge counts.
  auto BBMergeI = BF.begin();
  for (BinaryBasicBlock *BB : BasicBlocks) {
    BinaryBasicBlock *BBMerge = &*BBMergeI;
    assert(getIndex(BB) == BF.getIndex(BBMerge));

    // Update basic block count.
    if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE) {
      BBMerge->setExecutionCount(BBMerge->getKnownExecutionCount() +
                                 BB->getExecutionCount());
    }

    // Update edge count for successors of this basic block.
    auto BBMergeSI = BBMerge->succ_begin();
    auto BIMergeI = BBMerge->branch_info_begin();
    auto BII = BB->branch_info_begin();
    for (const BinaryBasicBlock *BBSucc : BB->successors()) {
      (void)BBSucc;
      assert(getIndex(BBSucc) == BF.getIndex(*BBMergeSI));
      (void)BBMergeSI;

      // At this point no branch count should be set to COUNT_NO_PROFILE.
      assert(BII->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
             "unexpected unknown branch profile");
      assert(BIMergeI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
             "unexpected unknown branch profile");

      BIMergeI->Count += BII->Count;

      // When we merge inferred and real fall-through branch data, the merged
      // data is considered inferred.
      if (BII->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED &&
          BIMergeI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
        BIMergeI->MispredictedCount += BII->MispredictedCount;
      } else {
        BIMergeI->MispredictedCount = BinaryBasicBlock::COUNT_INFERRED;
      }

      ++BBMergeSI;
      ++BII;
      ++BIMergeI;
    }
    assert(BBMergeSI == BBMerge->succ_end());

    ++BBMergeI;
  }
  assert(BBMergeI == BF.end());

  // Merge jump tables profile info.
  auto JTMergeI = BF.JumpTables.begin();
  for (const auto &JTEntry : JumpTables) {
    if (JTMergeI->second->Counts.empty())
      JTMergeI->second->Counts.resize(JTEntry.second->Counts.size());
    auto CountMergeI = JTMergeI->second->Counts.begin();
    for (const JumpTable::JumpInfo &JI : JTEntry.second->Counts) {
      CountMergeI->Count += JI.Count;
      CountMergeI->Mispreds += JI.Mispreds;
      ++CountMergeI;
    }
    assert(CountMergeI == JTMergeI->second->Counts.end());

    ++JTMergeI;
  }
  assert(JTMergeI == BF.JumpTables.end());
}

void BinaryFunction::inferFallThroughCounts() {
  // Work on a basic block at a time, propagating frequency information
  // forwards.
  // It is important to walk in the layout order.
  for (BinaryBasicBlock *BB : BasicBlocks) {
    const uint64_t BBExecCount = BB->getExecutionCount();

    // Propagate this information to successors, filling in fall-through edges
    // with frequency information
    if (BB->succ_size() == 0)
      continue;

    // Calculate frequency of outgoing branches from this node according to
    // LBR data.
    uint64_t ReportedBranches = 0;
    for (const BinaryBasicBlock::BinaryBranchInfo &SuccBI : BB->branch_info())
      if (SuccBI.Count != BinaryBasicBlock::COUNT_NO_PROFILE)
        ReportedBranches += SuccBI.Count;

    // Get taken count of conditional tail call if the block ends with one.
    uint64_t CTCTakenCount = 0;
    const MCInst *CTCInstr = BB->getLastNonPseudoInstr();
    if (CTCInstr && BC.MIB->getConditionalTailCall(*CTCInstr)) {
      CTCTakenCount = BC.MIB->getAnnotationWithDefault<uint64_t>(
          *CTCInstr, "CTCTakenCount");
    }

    // Calculate frequency of throws from this node according to LBR data
    // for branching into associated landing pads. Since it is possible
    // for a landing pad to be associated with more than one basic blocks,
    // we may overestimate the frequency of throws for such blocks.
    uint64_t ReportedThrows = 0;
    for (const BinaryBasicBlock *LP : BB->landing_pads())
      ReportedThrows += LP->getExecutionCount();

    const uint64_t TotalReportedJumps =
        ReportedBranches + CTCTakenCount + ReportedThrows;

    // Infer the frequency of the fall-through edge, representing not taking the
    // branch.
    uint64_t Inferred = 0;
    if (BBExecCount > TotalReportedJumps)
      Inferred = BBExecCount - TotalReportedJumps;

    LLVM_DEBUG(
        if (BBExecCount < TotalReportedJumps) dbgs()
            << "Fall-through inference is slightly inconsistent. "
               "exec frequency is less than the outgoing edges frequency ("
            << BBExecCount << " < " << ReportedBranches
            << ") for  BB at offset 0x"
            << Twine::utohexstr(getAddress() + BB->getOffset()) << '\n';);

    if (BB->succ_size() <= 2) {
      // Skip if the last instruction is an unconditional jump.
      const MCInst *LastInstr = BB->getLastNonPseudoInstr();
      if (LastInstr && (BC.MIB->isUnconditionalBranch(*LastInstr) ||
                        BC.MIB->isIndirectBranch(*LastInstr)))
        continue;
      // If there is an FT it will be the last successor.
      auto &SuccBI = *BB->branch_info_rbegin();
      auto &Succ = *BB->succ_rbegin();
      if (SuccBI.Count == 0) {
        SuccBI.Count = Inferred;
        SuccBI.MispredictedCount = BinaryBasicBlock::COUNT_INFERRED;
        Succ->ExecutionCount =
            std::max(Succ->getKnownExecutionCount(), Inferred);
      }
    }
  }
}

void BinaryFunction::clearProfile() {
  // Keep function execution profile the same. Only clear basic block and edge
  // counts.
  for (BinaryBasicBlock *BB : BasicBlocks) {
    BB->ExecutionCount = 0;
    for (BinaryBasicBlock::BinaryBranchInfo &BI : BB->branch_info()) {
      BI.Count = 0;
      BI.MispredictedCount = 0;
    }
  }
}

} // namespace bolt
} // namespace llvm
