|  | //===-- BenchmarkRunner.cpp -------------------------------------*- C++ -*-===// | 
|  | // | 
|  | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | // See https://llvm.org/LICENSE.txt for license information. | 
|  | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include <cmath> | 
|  | #include <memory> | 
|  | #include <string> | 
|  |  | 
|  | #include "Assembler.h" | 
|  | #include "BenchmarkRunner.h" | 
|  | #include "Error.h" | 
|  | #include "MCInstrDescView.h" | 
|  | #include "MmapUtils.h" | 
|  | #include "PerfHelper.h" | 
|  | #include "SubprocessMemory.h" | 
|  | #include "Target.h" | 
|  | #include "llvm/ADT/StringExtras.h" | 
|  | #include "llvm/ADT/StringRef.h" | 
|  | #include "llvm/ADT/Twine.h" | 
|  | #include "llvm/Support/CrashRecoveryContext.h" | 
|  | #include "llvm/Support/Error.h" | 
|  | #include "llvm/Support/FileSystem.h" | 
|  | #include "llvm/Support/MemoryBuffer.h" | 
|  | #include "llvm/Support/Program.h" | 
|  | #include "llvm/Support/Signals.h" | 
|  | #include "llvm/Support/SystemZ/zOSSupport.h" | 
|  |  | 
|  | #ifdef __linux__ | 
|  | #ifdef HAVE_LIBPFM | 
|  | #include <perfmon/perf_event.h> | 
|  | #endif | 
|  | #include <sys/mman.h> | 
|  | #include <sys/ptrace.h> | 
|  | #include <sys/resource.h> | 
|  | #include <sys/socket.h> | 
|  | #include <sys/syscall.h> | 
|  | #include <sys/wait.h> | 
|  | #include <unistd.h> | 
|  |  | 
|  | #if defined(__GLIBC__) && __has_include(<sys/rseq.h>) && defined(HAVE_BUILTIN_THREAD_POINTER) | 
|  | #include <sys/rseq.h> | 
|  | #if defined(RSEQ_SIG) && defined(SYS_rseq) | 
|  | #define GLIBC_INITS_RSEQ | 
|  | #endif | 
|  | #endif | 
|  | #endif // __linux__ | 
|  |  | 
|  | namespace llvm { | 
|  | namespace exegesis { | 
|  |  | 
|  | BenchmarkRunner::BenchmarkRunner(const LLVMState &State, Benchmark::ModeE Mode, | 
|  | BenchmarkPhaseSelectorE BenchmarkPhaseSelector, | 
|  | ExecutionModeE ExecutionMode, | 
|  | ArrayRef<ValidationEvent> ValCounters) | 
|  | : State(State), Mode(Mode), BenchmarkPhaseSelector(BenchmarkPhaseSelector), | 
|  | ExecutionMode(ExecutionMode), ValidationCounters(ValCounters), | 
|  | Scratch(std::make_unique<ScratchSpace>()) {} | 
|  |  | 
|  | BenchmarkRunner::~BenchmarkRunner() = default; | 
|  |  | 
|  | void BenchmarkRunner::FunctionExecutor::accumulateCounterValues( | 
|  | const llvm::SmallVectorImpl<int64_t> &NewValues, | 
|  | llvm::SmallVectorImpl<int64_t> *Result) { | 
|  | const size_t NumValues = std::max(NewValues.size(), Result->size()); | 
|  | if (NumValues > Result->size()) | 
|  | Result->resize(NumValues, 0); | 
|  | for (size_t I = 0, End = NewValues.size(); I < End; ++I) | 
|  | (*Result)[I] += NewValues[I]; | 
|  | } | 
|  |  | 
|  | Expected<llvm::SmallVector<int64_t, 4>> | 
|  | BenchmarkRunner::FunctionExecutor::runAndSample( | 
|  | const char *Counters, ArrayRef<const char *> ValidationCounters, | 
|  | SmallVectorImpl<int64_t> &ValidationCounterValues) const { | 
|  | // We sum counts when there are several counters for a single ProcRes | 
|  | // (e.g. P23 on SandyBridge). | 
|  | llvm::SmallVector<int64_t, 4> CounterValues; | 
|  | SmallVector<StringRef, 2> CounterNames; | 
|  | StringRef(Counters).split(CounterNames, '+'); | 
|  | for (auto &CounterName : CounterNames) { | 
|  | CounterName = CounterName.trim(); | 
|  | Expected<SmallVector<int64_t, 4>> ValueOrError = runWithCounter( | 
|  | CounterName, ValidationCounters, ValidationCounterValues); | 
|  | if (!ValueOrError) | 
|  | return ValueOrError.takeError(); | 
|  | accumulateCounterValues(ValueOrError.get(), &CounterValues); | 
|  | } | 
|  | return CounterValues; | 
|  | } | 
|  |  | 
|  | namespace { | 
|  | class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor { | 
|  | public: | 
|  | static Expected<std::unique_ptr<InProcessFunctionExecutorImpl>> | 
|  | create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj, | 
|  | BenchmarkRunner::ScratchSpace *Scratch) { | 
|  | Expected<ExecutableFunction> EF = | 
|  | ExecutableFunction::create(State.createTargetMachine(), std::move(Obj)); | 
|  |  | 
|  | if (!EF) | 
|  | return EF.takeError(); | 
|  |  | 
|  | return std::unique_ptr<InProcessFunctionExecutorImpl>( | 
|  | new InProcessFunctionExecutorImpl(State, std::move(*EF), Scratch)); | 
|  | } | 
|  |  | 
|  | private: | 
|  | InProcessFunctionExecutorImpl(const LLVMState &State, | 
|  | ExecutableFunction Function, | 
|  | BenchmarkRunner::ScratchSpace *Scratch) | 
|  | : State(State), Function(std::move(Function)), Scratch(Scratch) {} | 
|  |  | 
|  | static void | 
|  | accumulateCounterValues(const llvm::SmallVector<int64_t, 4> &NewValues, | 
|  | llvm::SmallVector<int64_t, 4> *Result) { | 
|  | const size_t NumValues = std::max(NewValues.size(), Result->size()); | 
|  | if (NumValues > Result->size()) | 
|  | Result->resize(NumValues, 0); | 
|  | for (size_t I = 0, End = NewValues.size(); I < End; ++I) | 
|  | (*Result)[I] += NewValues[I]; | 
|  | } | 
|  |  | 
|  | Expected<llvm::SmallVector<int64_t, 4>> runWithCounter( | 
|  | StringRef CounterName, ArrayRef<const char *> ValidationCounters, | 
|  | SmallVectorImpl<int64_t> &ValidationCounterValues) const override { | 
|  | const ExegesisTarget &ET = State.getExegesisTarget(); | 
|  | char *const ScratchPtr = Scratch->ptr(); | 
|  | auto CounterOrError = | 
|  | ET.createCounter(CounterName, State, ValidationCounters); | 
|  |  | 
|  | if (!CounterOrError) | 
|  | return CounterOrError.takeError(); | 
|  |  | 
|  | pfm::CounterGroup *Counter = CounterOrError.get().get(); | 
|  | Scratch->clear(); | 
|  | { | 
|  | auto PS = ET.withSavedState(); | 
|  | CrashRecoveryContext CRC; | 
|  | CrashRecoveryContext::Enable(); | 
|  | const bool Crashed = !CRC.RunSafely([this, Counter, ScratchPtr]() { | 
|  | Counter->start(); | 
|  | this->Function(ScratchPtr); | 
|  | Counter->stop(); | 
|  | }); | 
|  | CrashRecoveryContext::Disable(); | 
|  | PS.reset(); | 
|  | if (Crashed) { | 
|  | #ifdef LLVM_ON_UNIX | 
|  | // See "Exit Status for Commands": | 
|  | // https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xcu_chap02.html | 
|  | constexpr const int kSigOffset = 128; | 
|  | return make_error<SnippetSignal>(CRC.RetCode - kSigOffset); | 
|  | #else | 
|  | // The exit code of the process on windows is not meaningful as a | 
|  | // signal, so simply pass in -1 as the signal into the error. | 
|  | return make_error<SnippetSignal>(-1); | 
|  | #endif // LLVM_ON_UNIX | 
|  | } | 
|  | } | 
|  |  | 
|  | auto ValidationValuesOrErr = Counter->readValidationCountersOrError(); | 
|  | if (!ValidationValuesOrErr) | 
|  | return ValidationValuesOrErr.takeError(); | 
|  |  | 
|  | ArrayRef RealValidationValues = *ValidationValuesOrErr; | 
|  | for (size_t I = 0; I < RealValidationValues.size(); ++I) | 
|  | ValidationCounterValues[I] = RealValidationValues[I]; | 
|  |  | 
|  | return Counter->readOrError(Function.getFunctionBytes()); | 
|  | } | 
|  |  | 
|  | const LLVMState &State; | 
|  | const ExecutableFunction Function; | 
|  | BenchmarkRunner::ScratchSpace *const Scratch; | 
|  | }; | 
|  |  | 
|  | #ifdef __linux__ | 
|  | // The following class implements a function executor that executes the | 
|  | // benchmark code within a subprocess rather than within the main llvm-exegesis | 
|  | // process. This allows for much more control over the execution context of the | 
|  | // snippet, particularly with regard to memory. This class performs all the | 
|  | // necessary functions to create the subprocess, execute the snippet in the | 
|  | // subprocess, and report results/handle errors. | 
|  | class SubProcessFunctionExecutorImpl | 
|  | : public BenchmarkRunner::FunctionExecutor { | 
|  | public: | 
|  | static Expected<std::unique_ptr<SubProcessFunctionExecutorImpl>> | 
|  | create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj, | 
|  | const BenchmarkKey &Key) { | 
|  | Expected<ExecutableFunction> EF = | 
|  | ExecutableFunction::create(State.createTargetMachine(), std::move(Obj)); | 
|  | if (!EF) | 
|  | return EF.takeError(); | 
|  |  | 
|  | return std::unique_ptr<SubProcessFunctionExecutorImpl>( | 
|  | new SubProcessFunctionExecutorImpl(State, std::move(*EF), Key)); | 
|  | } | 
|  |  | 
|  | private: | 
|  | SubProcessFunctionExecutorImpl(const LLVMState &State, | 
|  | ExecutableFunction Function, | 
|  | const BenchmarkKey &Key) | 
|  | : State(State), Function(std::move(Function)), Key(Key) {} | 
|  |  | 
|  | enum ChildProcessExitCodeE { | 
|  | CounterFDReadFailed = 1, | 
|  | RSeqDisableFailed, | 
|  | FunctionDataMappingFailed, | 
|  | AuxiliaryMemorySetupFailed | 
|  | }; | 
|  |  | 
|  | StringRef childProcessExitCodeToString(int ExitCode) const { | 
|  | switch (ExitCode) { | 
|  | case ChildProcessExitCodeE::CounterFDReadFailed: | 
|  | return "Counter file descriptor read failed"; | 
|  | case ChildProcessExitCodeE::RSeqDisableFailed: | 
|  | return "Disabling restartable sequences failed"; | 
|  | case ChildProcessExitCodeE::FunctionDataMappingFailed: | 
|  | return "Failed to map memory for assembled snippet"; | 
|  | case ChildProcessExitCodeE::AuxiliaryMemorySetupFailed: | 
|  | return "Failed to setup auxiliary memory"; | 
|  | default: | 
|  | return "Child process returned with unknown exit code"; | 
|  | } | 
|  | } | 
|  |  | 
|  | Error sendFileDescriptorThroughSocket(int SocketFD, int FD) const { | 
|  | struct msghdr Message = {}; | 
|  | char Buffer[CMSG_SPACE(sizeof(FD))]; | 
|  | memset(Buffer, 0, sizeof(Buffer)); | 
|  | Message.msg_control = Buffer; | 
|  | Message.msg_controllen = sizeof(Buffer); | 
|  |  | 
|  | struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message); | 
|  | ControlMessage->cmsg_level = SOL_SOCKET; | 
|  | ControlMessage->cmsg_type = SCM_RIGHTS; | 
|  | ControlMessage->cmsg_len = CMSG_LEN(sizeof(FD)); | 
|  |  | 
|  | memcpy(CMSG_DATA(ControlMessage), &FD, sizeof(FD)); | 
|  |  | 
|  | Message.msg_controllen = CMSG_SPACE(sizeof(FD)); | 
|  |  | 
|  | ssize_t BytesWritten = sendmsg(SocketFD, &Message, 0); | 
|  |  | 
|  | if (BytesWritten < 0) | 
|  | return make_error<Failure>("Failed to write FD to socket: " + | 
|  | Twine(strerror(errno))); | 
|  |  | 
|  | return Error::success(); | 
|  | } | 
|  |  | 
|  | Expected<int> getFileDescriptorFromSocket(int SocketFD) const { | 
|  | struct msghdr Message = {}; | 
|  |  | 
|  | char ControlBuffer[256]; | 
|  | Message.msg_control = ControlBuffer; | 
|  | Message.msg_controllen = sizeof(ControlBuffer); | 
|  |  | 
|  | ssize_t BytesRead = recvmsg(SocketFD, &Message, 0); | 
|  |  | 
|  | if (BytesRead < 0) | 
|  | return make_error<Failure>("Failed to read FD from socket: " + | 
|  | Twine(strerror(errno))); | 
|  |  | 
|  | struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message); | 
|  |  | 
|  | int FD; | 
|  |  | 
|  | if (ControlMessage->cmsg_len != CMSG_LEN(sizeof(FD))) | 
|  | return make_error<Failure>("Failed to get correct number of bytes for " | 
|  | "file descriptor from socket."); | 
|  |  | 
|  | memcpy(&FD, CMSG_DATA(ControlMessage), sizeof(FD)); | 
|  |  | 
|  | return FD; | 
|  | } | 
|  |  | 
|  | Error createSubProcessAndRunBenchmark( | 
|  | StringRef CounterName, SmallVectorImpl<int64_t> &CounterValues, | 
|  | ArrayRef<const char *> ValidationCounters, | 
|  | SmallVectorImpl<int64_t> &ValidationCounterValues) const { | 
|  | int PipeFiles[2]; | 
|  | int PipeSuccessOrErr = socketpair(AF_UNIX, SOCK_DGRAM, 0, PipeFiles); | 
|  | if (PipeSuccessOrErr != 0) { | 
|  | return make_error<Failure>( | 
|  | "Failed to create a pipe for interprocess communication between " | 
|  | "llvm-exegesis and the benchmarking subprocess: " + | 
|  | Twine(strerror(errno))); | 
|  | } | 
|  |  | 
|  | SubprocessMemory SPMemory; | 
|  | Error MemoryInitError = SPMemory.initializeSubprocessMemory(getpid()); | 
|  | if (MemoryInitError) | 
|  | return MemoryInitError; | 
|  |  | 
|  | Error AddMemDefError = | 
|  | SPMemory.addMemoryDefinition(Key.MemoryValues, getpid()); | 
|  | if (AddMemDefError) | 
|  | return AddMemDefError; | 
|  |  | 
|  | pid_t ParentOrChildPID = fork(); | 
|  |  | 
|  | if (ParentOrChildPID == -1) { | 
|  | return make_error<Failure>("Failed to create child process: " + | 
|  | Twine(strerror(errno))); | 
|  | } | 
|  |  | 
|  | if (ParentOrChildPID == 0) { | 
|  | // We are in the child process, close the write end of the pipe | 
|  | close(PipeFiles[1]); | 
|  | // Unregister handlers, signal handling is now handled through ptrace in | 
|  | // the host process | 
|  | llvm::sys::unregisterHandlers(); | 
|  | prepareAndRunBenchmark(PipeFiles[0], Key); | 
|  | // The child process terminates in the above function, so we should never | 
|  | // get to this point. | 
|  | llvm_unreachable("Child process didn't exit when expected."); | 
|  | } | 
|  |  | 
|  | const ExegesisTarget &ET = State.getExegesisTarget(); | 
|  | auto CounterOrError = ET.createCounter( | 
|  | CounterName, State, ValidationCounters, ParentOrChildPID); | 
|  |  | 
|  | if (!CounterOrError) | 
|  | return CounterOrError.takeError(); | 
|  |  | 
|  | pfm::CounterGroup *Counter = CounterOrError.get().get(); | 
|  |  | 
|  | close(PipeFiles[0]); | 
|  |  | 
|  | // Make sure to attach to the process (and wait for the sigstop to be | 
|  | // delivered and for the process to continue) before we write to the counter | 
|  | // file descriptor. Attaching to the process before writing to the socket | 
|  | // ensures that the subprocess at most has blocked on the read call. If we | 
|  | // attach afterwards, the subprocess might exit before we get to the attach | 
|  | // call due to effects like scheduler contention, introducing transient | 
|  | // failures. | 
|  | if (ptrace(PTRACE_ATTACH, ParentOrChildPID, NULL, NULL) != 0) | 
|  | return make_error<Failure>("Failed to attach to the child process: " + | 
|  | Twine(strerror(errno))); | 
|  |  | 
|  | if (wait(NULL) == -1) { | 
|  | return make_error<Failure>( | 
|  | "Failed to wait for child process to stop after attaching: " + | 
|  | Twine(strerror(errno))); | 
|  | } | 
|  |  | 
|  | if (ptrace(PTRACE_CONT, ParentOrChildPID, NULL, NULL) != 0) | 
|  | return make_error<Failure>( | 
|  | "Failed to continue execution of the child process: " + | 
|  | Twine(strerror(errno))); | 
|  |  | 
|  | int CounterFileDescriptor = Counter->getFileDescriptor(); | 
|  | Error SendError = | 
|  | sendFileDescriptorThroughSocket(PipeFiles[1], CounterFileDescriptor); | 
|  |  | 
|  | if (SendError) | 
|  | return SendError; | 
|  |  | 
|  | int ChildStatus; | 
|  | if (wait(&ChildStatus) == -1) { | 
|  | return make_error<Failure>( | 
|  | "Waiting for the child process to complete failed: " + | 
|  | Twine(strerror(errno))); | 
|  | } | 
|  |  | 
|  | if (WIFEXITED(ChildStatus)) { | 
|  | int ChildExitCode = WEXITSTATUS(ChildStatus); | 
|  | if (ChildExitCode == 0) { | 
|  | // The child exited succesfully, read counter values and return | 
|  | // success | 
|  | auto CounterValueOrErr = Counter->readOrError(); | 
|  | if (!CounterValueOrErr) | 
|  | return CounterValueOrErr.takeError(); | 
|  | CounterValues = std::move(*CounterValueOrErr); | 
|  |  | 
|  | auto ValidationValuesOrErr = Counter->readValidationCountersOrError(); | 
|  | if (!ValidationValuesOrErr) | 
|  | return ValidationValuesOrErr.takeError(); | 
|  |  | 
|  | ArrayRef RealValidationValues = *ValidationValuesOrErr; | 
|  | for (size_t I = 0; I < RealValidationValues.size(); ++I) | 
|  | ValidationCounterValues[I] = RealValidationValues[I]; | 
|  |  | 
|  | return Error::success(); | 
|  | } | 
|  | // The child exited, but not successfully | 
|  | return make_error<Failure>( | 
|  | "Child benchmarking process exited with non-zero exit code: " + | 
|  | childProcessExitCodeToString(ChildExitCode)); | 
|  | } | 
|  |  | 
|  | // An error was encountered running the snippet, process it | 
|  | siginfo_t ChildSignalInfo; | 
|  | if (ptrace(PTRACE_GETSIGINFO, ParentOrChildPID, NULL, &ChildSignalInfo) == | 
|  | -1) { | 
|  | return make_error<Failure>("Getting signal info from the child failed: " + | 
|  | Twine(strerror(errno))); | 
|  | } | 
|  |  | 
|  | if (ChildSignalInfo.si_signo == SIGSEGV) | 
|  | return make_error<SnippetSegmentationFault>( | 
|  | reinterpret_cast<intptr_t>(ChildSignalInfo.si_addr)); | 
|  |  | 
|  | return make_error<SnippetSignal>(ChildSignalInfo.si_signo); | 
|  | } | 
|  |  | 
|  | void disableCoreDumps() const { | 
|  | struct rlimit rlim; | 
|  |  | 
|  | rlim.rlim_cur = 0; | 
|  | setrlimit(RLIMIT_CORE, &rlim); | 
|  | } | 
|  |  | 
|  | [[noreturn]] void prepareAndRunBenchmark(int Pipe, | 
|  | const BenchmarkKey &Key) const { | 
|  | // Disable core dumps in the child process as otherwise everytime we | 
|  | // encounter an execution failure like a segmentation fault, we will create | 
|  | // a core dump. We report the information directly rather than require the | 
|  | // user inspect a core dump. | 
|  | disableCoreDumps(); | 
|  |  | 
|  | // The following occurs within the benchmarking subprocess | 
|  | pid_t ParentPID = getppid(); | 
|  |  | 
|  | Expected<int> CounterFileDescriptorOrError = | 
|  | getFileDescriptorFromSocket(Pipe); | 
|  |  | 
|  | if (!CounterFileDescriptorOrError) | 
|  | exit(ChildProcessExitCodeE::CounterFDReadFailed); | 
|  |  | 
|  | int CounterFileDescriptor = *CounterFileDescriptorOrError; | 
|  |  | 
|  | // Glibc versions greater than 2.35 automatically call rseq during | 
|  | // initialization. Unmapping the region that glibc sets up for this causes | 
|  | // segfaults in the program Unregister the rseq region so that we can safely | 
|  | // unmap it later | 
|  | #ifdef GLIBC_INITS_RSEQ | 
|  | long RseqDisableOutput = | 
|  | syscall(SYS_rseq, (intptr_t)__builtin_thread_pointer() + __rseq_offset, | 
|  | __rseq_size, RSEQ_FLAG_UNREGISTER, RSEQ_SIG); | 
|  | if (RseqDisableOutput != 0) | 
|  | exit(ChildProcessExitCodeE::RSeqDisableFailed); | 
|  | #endif // GLIBC_INITS_RSEQ | 
|  |  | 
|  | // The frontend that generates the memory annotation structures should | 
|  | // validate that the address to map the snippet in at is a multiple of | 
|  | // the page size. Assert that this is true here. | 
|  | assert(Key.SnippetAddress % getpagesize() == 0 && | 
|  | "The snippet address needs to be aligned to a page boundary."); | 
|  |  | 
|  | size_t FunctionDataCopySize = this->Function.FunctionBytes.size(); | 
|  | void *MapAddress = NULL; | 
|  | int MapFlags = MAP_PRIVATE | MAP_ANONYMOUS; | 
|  |  | 
|  | if (Key.SnippetAddress != 0) { | 
|  | MapAddress = reinterpret_cast<void *>(Key.SnippetAddress); | 
|  | MapFlags |= MAP_FIXED_NOREPLACE; | 
|  | } | 
|  |  | 
|  | char *FunctionDataCopy = | 
|  | (char *)mmap(MapAddress, FunctionDataCopySize, PROT_READ | PROT_WRITE, | 
|  | MapFlags, 0, 0); | 
|  | if ((intptr_t)FunctionDataCopy == -1) | 
|  | exit(ChildProcessExitCodeE::FunctionDataMappingFailed); | 
|  |  | 
|  | memcpy(FunctionDataCopy, this->Function.FunctionBytes.data(), | 
|  | this->Function.FunctionBytes.size()); | 
|  | mprotect(FunctionDataCopy, FunctionDataCopySize, PROT_READ | PROT_EXEC); | 
|  |  | 
|  | Expected<int> AuxMemFDOrError = | 
|  | SubprocessMemory::setupAuxiliaryMemoryInSubprocess( | 
|  | Key.MemoryValues, ParentPID, CounterFileDescriptor); | 
|  | if (!AuxMemFDOrError) | 
|  | exit(ChildProcessExitCodeE::AuxiliaryMemorySetupFailed); | 
|  |  | 
|  | ((void (*)(size_t, int))(intptr_t)FunctionDataCopy)(FunctionDataCopySize, | 
|  | *AuxMemFDOrError); | 
|  |  | 
|  | exit(0); | 
|  | } | 
|  |  | 
|  | Expected<llvm::SmallVector<int64_t, 4>> runWithCounter( | 
|  | StringRef CounterName, ArrayRef<const char *> ValidationCounters, | 
|  | SmallVectorImpl<int64_t> &ValidationCounterValues) const override { | 
|  | SmallVector<int64_t, 4> Value(1, 0); | 
|  | Error PossibleBenchmarkError = createSubProcessAndRunBenchmark( | 
|  | CounterName, Value, ValidationCounters, ValidationCounterValues); | 
|  |  | 
|  | if (PossibleBenchmarkError) | 
|  | return std::move(PossibleBenchmarkError); | 
|  |  | 
|  | return Value; | 
|  | } | 
|  |  | 
|  | const LLVMState &State; | 
|  | const ExecutableFunction Function; | 
|  | const BenchmarkKey &Key; | 
|  | }; | 
|  | #endif // __linux__ | 
|  | } // namespace | 
|  |  | 
|  | Expected<SmallString<0>> BenchmarkRunner::assembleSnippet( | 
|  | const BenchmarkCode &BC, const SnippetRepetitor &Repetitor, | 
|  | unsigned MinInstructions, unsigned LoopBodySize, | 
|  | bool GenerateMemoryInstructions) const { | 
|  | const std::vector<MCInst> &Instructions = BC.Key.Instructions; | 
|  | SmallString<0> Buffer; | 
|  | raw_svector_ostream OS(Buffer); | 
|  | if (Error E = assembleToStream( | 
|  | State.getExegesisTarget(), State.createTargetMachine(), BC.LiveIns, | 
|  | Repetitor.Repeat(Instructions, MinInstructions, LoopBodySize, | 
|  | GenerateMemoryInstructions), | 
|  | OS, BC.Key, GenerateMemoryInstructions)) { | 
|  | return std::move(E); | 
|  | } | 
|  | return Buffer; | 
|  | } | 
|  |  | 
|  | Expected<BenchmarkRunner::RunnableConfiguration> | 
|  | BenchmarkRunner::getRunnableConfiguration( | 
|  | const BenchmarkCode &BC, unsigned NumRepetitions, unsigned LoopBodySize, | 
|  | const SnippetRepetitor &Repetitor) const { | 
|  | RunnableConfiguration RC; | 
|  |  | 
|  | Benchmark &BenchmarkResult = RC.BenchmarkResult; | 
|  | BenchmarkResult.Mode = Mode; | 
|  | BenchmarkResult.CpuName = | 
|  | std::string(State.getTargetMachine().getTargetCPU()); | 
|  | BenchmarkResult.LLVMTriple = | 
|  | State.getTargetMachine().getTargetTriple().normalize(); | 
|  | BenchmarkResult.NumRepetitions = NumRepetitions; | 
|  | BenchmarkResult.Info = BC.Info; | 
|  |  | 
|  | const std::vector<MCInst> &Instructions = BC.Key.Instructions; | 
|  |  | 
|  | bool GenerateMemoryInstructions = ExecutionMode == ExecutionModeE::SubProcess; | 
|  |  | 
|  | BenchmarkResult.Key = BC.Key; | 
|  |  | 
|  | // Assemble at least kMinInstructionsForSnippet instructions by repeating | 
|  | // the snippet for debug/analysis. This is so that the user clearly | 
|  | // understands that the inside instructions are repeated. | 
|  | if (BenchmarkPhaseSelector > BenchmarkPhaseSelectorE::PrepareSnippet) { | 
|  | const int MinInstructionsForSnippet = 4 * Instructions.size(); | 
|  | const int LoopBodySizeForSnippet = 2 * Instructions.size(); | 
|  | auto Snippet = | 
|  | assembleSnippet(BC, Repetitor, MinInstructionsForSnippet, | 
|  | LoopBodySizeForSnippet, GenerateMemoryInstructions); | 
|  | if (Error E = Snippet.takeError()) | 
|  | return std::move(E); | 
|  |  | 
|  | if (auto Err = getBenchmarkFunctionBytes(*Snippet, | 
|  | BenchmarkResult.AssembledSnippet)) | 
|  | return std::move(Err); | 
|  | } | 
|  |  | 
|  | // Assemble NumRepetitions instructions repetitions of the snippet for | 
|  | // measurements. | 
|  | if (BenchmarkPhaseSelector > | 
|  | BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) { | 
|  | auto Snippet = | 
|  | assembleSnippet(BC, Repetitor, BenchmarkResult.NumRepetitions, | 
|  | LoopBodySize, GenerateMemoryInstructions); | 
|  | if (Error E = Snippet.takeError()) | 
|  | return std::move(E); | 
|  | RC.ObjectFile = getObjectFromBuffer(*Snippet); | 
|  | } | 
|  |  | 
|  | return std::move(RC); | 
|  | } | 
|  |  | 
|  | Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>> | 
|  | BenchmarkRunner::createFunctionExecutor( | 
|  | object::OwningBinary<object::ObjectFile> ObjectFile, | 
|  | const BenchmarkKey &Key) const { | 
|  | switch (ExecutionMode) { | 
|  | case ExecutionModeE::InProcess: { | 
|  | auto InProcessExecutorOrErr = InProcessFunctionExecutorImpl::create( | 
|  | State, std::move(ObjectFile), Scratch.get()); | 
|  | if (!InProcessExecutorOrErr) | 
|  | return InProcessExecutorOrErr.takeError(); | 
|  |  | 
|  | return std::move(*InProcessExecutorOrErr); | 
|  | } | 
|  | case ExecutionModeE::SubProcess: { | 
|  | #ifdef __linux__ | 
|  | auto SubProcessExecutorOrErr = SubProcessFunctionExecutorImpl::create( | 
|  | State, std::move(ObjectFile), Key); | 
|  | if (!SubProcessExecutorOrErr) | 
|  | return SubProcessExecutorOrErr.takeError(); | 
|  |  | 
|  | return std::move(*SubProcessExecutorOrErr); | 
|  | #else | 
|  | return make_error<Failure>( | 
|  | "The subprocess execution mode is only supported on Linux"); | 
|  | #endif | 
|  | } | 
|  | } | 
|  | llvm_unreachable("ExecutionMode is outside expected range"); | 
|  | } | 
|  |  | 
|  | std::pair<Error, Benchmark> BenchmarkRunner::runConfiguration( | 
|  | RunnableConfiguration &&RC, | 
|  | const std::optional<StringRef> &DumpFile) const { | 
|  | Benchmark &BenchmarkResult = RC.BenchmarkResult; | 
|  | object::OwningBinary<object::ObjectFile> &ObjectFile = RC.ObjectFile; | 
|  |  | 
|  | if (DumpFile && BenchmarkPhaseSelector > | 
|  | BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) { | 
|  | auto ObjectFilePath = | 
|  | writeObjectFile(ObjectFile.getBinary()->getData(), *DumpFile); | 
|  | if (Error E = ObjectFilePath.takeError()) { | 
|  | return {std::move(E), std::move(BenchmarkResult)}; | 
|  | } | 
|  | outs() << "Check generated assembly with: /usr/bin/objdump -d " | 
|  | << *ObjectFilePath << "\n"; | 
|  | } | 
|  |  | 
|  | if (BenchmarkPhaseSelector < BenchmarkPhaseSelectorE::Measure) { | 
|  | BenchmarkResult.Error = "actual measurements skipped."; | 
|  | return {Error::success(), std::move(BenchmarkResult)}; | 
|  | } | 
|  |  | 
|  | Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>> Executor = | 
|  | createFunctionExecutor(std::move(ObjectFile), RC.BenchmarkResult.Key); | 
|  | if (!Executor) | 
|  | return {Executor.takeError(), std::move(BenchmarkResult)}; | 
|  | auto NewMeasurements = runMeasurements(**Executor); | 
|  |  | 
|  | if (Error E = NewMeasurements.takeError()) { | 
|  | return {std::move(E), std::move(BenchmarkResult)}; | 
|  | } | 
|  | assert(BenchmarkResult.NumRepetitions > 0 && "invalid NumRepetitions"); | 
|  | for (BenchmarkMeasure &BM : *NewMeasurements) { | 
|  | // Scale the measurements by instruction. | 
|  | BM.PerInstructionValue /= BenchmarkResult.NumRepetitions; | 
|  | // Scale the measurements by snippet. | 
|  | BM.PerSnippetValue /= | 
|  | std::ceil(BenchmarkResult.NumRepetitions / | 
|  | static_cast<double>(BenchmarkResult.Key.Instructions.size())); | 
|  | } | 
|  | BenchmarkResult.Measurements = std::move(*NewMeasurements); | 
|  |  | 
|  | return {Error::success(), std::move(BenchmarkResult)}; | 
|  | } | 
|  |  | 
|  | Expected<std::string> | 
|  | BenchmarkRunner::writeObjectFile(StringRef Buffer, StringRef FileName) const { | 
|  | int ResultFD = 0; | 
|  | SmallString<256> ResultPath = FileName; | 
|  | if (Error E = errorCodeToError( | 
|  | FileName.empty() ? sys::fs::createTemporaryFile("snippet", "o", | 
|  | ResultFD, ResultPath) | 
|  | : sys::fs::openFileForReadWrite( | 
|  | FileName, ResultFD, sys::fs::CD_CreateAlways, | 
|  | sys::fs::OF_None))) | 
|  | return std::move(E); | 
|  | raw_fd_ostream OFS(ResultFD, true /*ShouldClose*/); | 
|  | OFS.write(Buffer.data(), Buffer.size()); | 
|  | OFS.flush(); | 
|  | return std::string(ResultPath); | 
|  | } | 
|  |  | 
|  | static bool EventLessThan(const std::pair<ValidationEvent, const char *> LHS, | 
|  | const ValidationEvent RHS) { | 
|  | return static_cast<int>(LHS.first) < static_cast<int>(RHS); | 
|  | } | 
|  |  | 
|  | Error BenchmarkRunner::getValidationCountersToRun( | 
|  | SmallVector<const char *> &ValCountersToRun) const { | 
|  | const PfmCountersInfo &PCI = State.getPfmCounters(); | 
|  | ValCountersToRun.reserve(ValidationCounters.size()); | 
|  |  | 
|  | ValCountersToRun.reserve(ValidationCounters.size()); | 
|  | ArrayRef TargetValidationEvents(PCI.ValidationEvents, | 
|  | PCI.NumValidationEvents); | 
|  | for (const ValidationEvent RequestedValEvent : ValidationCounters) { | 
|  | auto ValCounterIt = | 
|  | lower_bound(TargetValidationEvents, RequestedValEvent, EventLessThan); | 
|  | if (ValCounterIt == TargetValidationEvents.end() || | 
|  | ValCounterIt->first != RequestedValEvent) | 
|  | return make_error<Failure>("Cannot create validation counter"); | 
|  |  | 
|  | assert(ValCounterIt->first == RequestedValEvent && | 
|  | "The array of validation events from the target should be sorted"); | 
|  | ValCountersToRun.push_back(ValCounterIt->second); | 
|  | } | 
|  |  | 
|  | return Error::success(); | 
|  | } | 
|  |  | 
|  | BenchmarkRunner::FunctionExecutor::~FunctionExecutor() {} | 
|  |  | 
|  | } // namespace exegesis | 
|  | } // namespace llvm |