| //===-- RandomIRBuilder.cpp -----------------------------------------------===// | 
 | // | 
 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
 | // See https://llvm.org/LICENSE.txt for license information. | 
 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
 | // | 
 | //===----------------------------------------------------------------------===// | 
 |  | 
 | #include "llvm/FuzzMutate/RandomIRBuilder.h" | 
 | #include "llvm/ADT/STLExtras.h" | 
 | #include "llvm/FuzzMutate/OpDescriptor.h" | 
 | #include "llvm/FuzzMutate/Random.h" | 
 | #include "llvm/IR/BasicBlock.h" | 
 | #include "llvm/IR/Constants.h" | 
 | #include "llvm/IR/DataLayout.h" | 
 | #include "llvm/IR/Dominators.h" | 
 | #include "llvm/IR/Function.h" | 
 | #include "llvm/IR/Instructions.h" | 
 | #include "llvm/IR/IntrinsicInst.h" | 
 | #include "llvm/IR/Module.h" | 
 |  | 
 | using namespace llvm; | 
 | using namespace fuzzerop; | 
 |  | 
 | /// Return a vector of Blocks that dominates this block, excluding current | 
 | /// block. | 
 | static std::vector<BasicBlock *> getDominators(BasicBlock *BB) { | 
 |   std::vector<BasicBlock *> ret; | 
 |   DominatorTree DT(*BB->getParent()); | 
 |   DomTreeNode *Node = DT.getNode(BB); | 
 |   // It's possible that an orphan block is not in the dom tree. In that case we | 
 |   // just return nothing. | 
 |   if (!Node) | 
 |     return ret; | 
 |   Node = Node->getIDom(); | 
 |   while (Node && Node->getBlock()) { | 
 |     ret.push_back(Node->getBlock()); | 
 |     // Get parent block. | 
 |     Node = Node->getIDom(); | 
 |   } | 
 |   return ret; | 
 | } | 
 |  | 
 | /// Return a vector of Blocks that is dominated by this block, excluding current | 
 | /// block | 
 | static std::vector<BasicBlock *> getDominatees(BasicBlock *BB) { | 
 |   DominatorTree DT(*BB->getParent()); | 
 |   std::vector<BasicBlock *> ret; | 
 |   DomTreeNode *Parent = DT.getNode(BB); | 
 |   // It's possible that an orphan block is not in the dom tree. In that case we | 
 |   // just return nothing. | 
 |   if (!Parent) | 
 |     return ret; | 
 |   for (DomTreeNode *Child : Parent->children()) | 
 |     ret.push_back(Child->getBlock()); | 
 |   uint64_t Idx = 0; | 
 |   while (Idx < ret.size()) { | 
 |     DomTreeNode *Node = DT[ret[Idx]]; | 
 |     Idx++; | 
 |     for (DomTreeNode *Child : Node->children()) | 
 |       ret.push_back(Child->getBlock()); | 
 |   } | 
 |   return ret; | 
 | } | 
 |  | 
 | AllocaInst *RandomIRBuilder::createStackMemory(Function *F, Type *Ty, | 
 |                                                Value *Init) { | 
 |   /// TODO: For all Allocas, maybe allocate an array. | 
 |   BasicBlock *EntryBB = &F->getEntryBlock(); | 
 |   DataLayout DL(F->getParent()); | 
 |   AllocaInst *Alloca = new AllocaInst(Ty, DL.getAllocaAddrSpace(), "A", | 
 |                                       &*EntryBB->getFirstInsertionPt()); | 
 |   if (Init) | 
 |     new StoreInst(Init, Alloca, Alloca->getNextNode()); | 
 |   return Alloca; | 
 | } | 
 |  | 
 | std::pair<GlobalVariable *, bool> | 
 | RandomIRBuilder::findOrCreateGlobalVariable(Module *M, ArrayRef<Value *> Srcs, | 
 |                                             fuzzerop::SourcePred Pred) { | 
 |   auto MatchesPred = [&Srcs, &Pred](GlobalVariable *GV) { | 
 |     // Can't directly compare GV's type, as it would be a pointer to the actual | 
 |     // type. | 
 |     return Pred.matches(Srcs, UndefValue::get(GV->getValueType())); | 
 |   }; | 
 |   bool DidCreate = false; | 
 |   SmallVector<GlobalVariable *, 4> GlobalVars; | 
 |   for (GlobalVariable &GV : M->globals()) { | 
 |     GlobalVars.push_back(&GV); | 
 |   } | 
 |   auto RS = makeSampler(Rand, make_filter_range(GlobalVars, MatchesPred)); | 
 |   RS.sample(nullptr, 1); | 
 |   GlobalVariable *GV = RS.getSelection(); | 
 |   if (!GV) { | 
 |     DidCreate = true; | 
 |     using LinkageTypes = GlobalVariable::LinkageTypes; | 
 |     auto TRS = makeSampler<Constant *>(Rand); | 
 |     TRS.sample(Pred.generate(Srcs, KnownTypes)); | 
 |     Constant *Init = TRS.getSelection(); | 
 |     Type *Ty = Init->getType(); | 
 |     GV = new GlobalVariable(*M, Ty, false, LinkageTypes::ExternalLinkage, Init, | 
 |                             "G", nullptr, | 
 |                             GlobalValue::ThreadLocalMode::NotThreadLocal, | 
 |                             M->getDataLayout().getDefaultGlobalsAddressSpace()); | 
 |   } | 
 |   return {GV, DidCreate}; | 
 | } | 
 |  | 
 | Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB, | 
 |                                            ArrayRef<Instruction *> Insts) { | 
 |   return findOrCreateSource(BB, Insts, {}, anyType()); | 
 | } | 
 |  | 
 | Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB, | 
 |                                            ArrayRef<Instruction *> Insts, | 
 |                                            ArrayRef<Value *> Srcs, | 
 |                                            SourcePred Pred, | 
 |                                            bool allowConstant) { | 
 |   auto MatchesPred = [&Srcs, &Pred](Value *V) { return Pred.matches(Srcs, V); }; | 
 |   SmallVector<uint64_t, 8> SrcTys; | 
 |   for (uint64_t i = 0; i < EndOfValueSource; i++) | 
 |     SrcTys.push_back(i); | 
 |   std::shuffle(SrcTys.begin(), SrcTys.end(), Rand); | 
 |   for (uint64_t SrcTy : SrcTys) { | 
 |     switch (SrcTy) { | 
 |     case SrcFromInstInCurBlock: { | 
 |       auto RS = makeSampler(Rand, make_filter_range(Insts, MatchesPred)); | 
 |       if (!RS.isEmpty()) { | 
 |         return RS.getSelection(); | 
 |       } | 
 |       break; | 
 |     } | 
 |     case FunctionArgument: { | 
 |       Function *F = BB.getParent(); | 
 |       SmallVector<Argument *, 8> Args; | 
 |       for (uint64_t i = 0; i < F->arg_size(); i++) { | 
 |         Args.push_back(F->getArg(i)); | 
 |       } | 
 |       auto RS = makeSampler(Rand, make_filter_range(Args, MatchesPred)); | 
 |       if (!RS.isEmpty()) { | 
 |         return RS.getSelection(); | 
 |       } | 
 |       break; | 
 |     } | 
 |     case InstInDominator: { | 
 |       auto Dominators = getDominators(&BB); | 
 |       std::shuffle(Dominators.begin(), Dominators.end(), Rand); | 
 |       for (BasicBlock *Dom : Dominators) { | 
 |         SmallVector<Instruction *, 16> Instructions; | 
 |         for (Instruction &I : *Dom) { | 
 |           Instructions.push_back(&I); | 
 |         } | 
 |         auto RS = | 
 |             makeSampler(Rand, make_filter_range(Instructions, MatchesPred)); | 
 |         // Also consider choosing no source, meaning we want a new one. | 
 |         if (!RS.isEmpty()) { | 
 |           return RS.getSelection(); | 
 |         } | 
 |       } | 
 |       break; | 
 |     } | 
 |     case SrcFromGlobalVariable: { | 
 |       Module *M = BB.getParent()->getParent(); | 
 |       auto [GV, DidCreate] = findOrCreateGlobalVariable(M, Srcs, Pred); | 
 |       Type *Ty = GV->getValueType(); | 
 |       LoadInst *LoadGV = nullptr; | 
 |       if (BB.getTerminator()) { | 
 |         LoadGV = new LoadInst(Ty, GV, "LGV", &*BB.getFirstInsertionPt()); | 
 |       } else { | 
 |         LoadGV = new LoadInst(Ty, GV, "LGV", &BB); | 
 |       } | 
 |       // Because we might be generating new values, we have to check if it | 
 |       // matches again. | 
 |       if (DidCreate) { | 
 |         if (Pred.matches(Srcs, LoadGV)) { | 
 |           return LoadGV; | 
 |         } | 
 |         LoadGV->eraseFromParent(); | 
 |         // If no one is using this GlobalVariable, delete it too. | 
 |         if (GV->use_empty()) { | 
 |           GV->eraseFromParent(); | 
 |         } | 
 |       } | 
 |       break; | 
 |     } | 
 |     case NewConstOrStack: { | 
 |       return newSource(BB, Insts, Srcs, Pred, allowConstant); | 
 |     } | 
 |     default: | 
 |     case EndOfValueSource: { | 
 |       llvm_unreachable("EndOfValueSource executed"); | 
 |     } | 
 |     } | 
 |   } | 
 |   llvm_unreachable("Can't find a source"); | 
 | } | 
 |  | 
 | Value *RandomIRBuilder::newSource(BasicBlock &BB, ArrayRef<Instruction *> Insts, | 
 |                                   ArrayRef<Value *> Srcs, SourcePred Pred, | 
 |                                   bool allowConstant) { | 
 |   // Generate some constants to choose from. | 
 |   auto RS = makeSampler<Value *>(Rand); | 
 |   RS.sample(Pred.generate(Srcs, KnownTypes)); | 
 |  | 
 |   // If we can find a pointer to load from, use it half the time. | 
 |   Value *Ptr = findPointer(BB, Insts); | 
 |   if (Ptr) { | 
 |     // Create load from the chosen pointer | 
 |     auto IP = BB.getFirstInsertionPt(); | 
 |     if (auto *I = dyn_cast<Instruction>(Ptr)) { | 
 |       IP = ++I->getIterator(); | 
 |       assert(IP != BB.end() && "guaranteed by the findPointer"); | 
 |     } | 
 |     // Pick the type independently. | 
 |     Type *AccessTy = RS.getSelection()->getType(); | 
 |     auto *NewLoad = new LoadInst(AccessTy, Ptr, "L", &*IP); | 
 |  | 
 |     // Only sample this load if it really matches the descriptor | 
 |     if (Pred.matches(Srcs, NewLoad)) | 
 |       RS.sample(NewLoad, RS.totalWeight()); | 
 |     else | 
 |       NewLoad->eraseFromParent(); | 
 |   } | 
 |  | 
 |   Value *newSrc = RS.getSelection(); | 
 |   // Generate a stack alloca and store the constant to it if constant is not | 
 |   // allowed, our hope is that later mutations can generate some values and | 
 |   // store to this placeholder. | 
 |   if (!allowConstant && isa<Constant>(newSrc)) { | 
 |     Type *Ty = newSrc->getType(); | 
 |     Function *F = BB.getParent(); | 
 |     AllocaInst *Alloca = createStackMemory(F, Ty, newSrc); | 
 |     if (BB.getTerminator()) { | 
 |       newSrc = new LoadInst(Ty, Alloca, /*ArrLen,*/ "L", BB.getTerminator()); | 
 |     } else { | 
 |       newSrc = new LoadInst(Ty, Alloca, /*ArrLen,*/ "L", &BB); | 
 |     } | 
 |   } | 
 |   return newSrc; | 
 | } | 
 |  | 
 | static bool isCompatibleReplacement(const Instruction *I, const Use &Operand, | 
 |                                     const Value *Replacement) { | 
 |   unsigned int OperandNo = Operand.getOperandNo(); | 
 |   if (Operand->getType() != Replacement->getType()) | 
 |     return false; | 
 |   switch (I->getOpcode()) { | 
 |   case Instruction::GetElementPtr: | 
 |   case Instruction::ExtractElement: | 
 |   case Instruction::ExtractValue: | 
 |     // TODO: We could potentially validate these, but for now just leave indices | 
 |     // alone. | 
 |     if (OperandNo >= 1) | 
 |       return false; | 
 |     break; | 
 |   case Instruction::InsertValue: | 
 |   case Instruction::InsertElement: | 
 |   case Instruction::ShuffleVector: | 
 |     if (OperandNo >= 2) | 
 |       return false; | 
 |     break; | 
 |   // For Br/Switch, we only try to modify the 1st Operand (condition). | 
 |   // Modify other operands, like switch case may accidently change case from | 
 |   // ConstantInt to a register, which is illegal. | 
 |   case Instruction::Switch: | 
 |   case Instruction::Br: | 
 |     if (OperandNo >= 1) | 
 |       return false; | 
 |     break; | 
 |   case Instruction::Call: | 
 |   case Instruction::Invoke: | 
 |   case Instruction::CallBr: { | 
 |     const Function *Callee = cast<CallBase>(I)->getCalledFunction(); | 
 |     // If it's an indirect call, give up. | 
 |     if (!Callee) | 
 |       return false; | 
 |     // If callee is not an intrinsic, operand 0 is the function to be called. | 
 |     // Since we cannot assume that the replacement is a function pointer, | 
 |     // we give up. | 
 |     if (!Callee->getIntrinsicID() && OperandNo == 0) | 
 |       return false; | 
 |     return !Callee->hasParamAttribute(OperandNo, Attribute::ImmArg); | 
 |   } | 
 |   default: | 
 |     break; | 
 |   } | 
 |   return true; | 
 | } | 
 |  | 
 | Instruction *RandomIRBuilder::connectToSink(BasicBlock &BB, | 
 |                                             ArrayRef<Instruction *> Insts, | 
 |                                             Value *V) { | 
 |   SmallVector<uint64_t, 8> SinkTys; | 
 |   for (uint64_t i = 0; i < EndOfValueSink; i++) | 
 |     SinkTys.push_back(i); | 
 |   std::shuffle(SinkTys.begin(), SinkTys.end(), Rand); | 
 |   auto findSinkAndConnect = | 
 |       [this, V](ArrayRef<Instruction *> Instructions) -> Instruction * { | 
 |     auto RS = makeSampler<Use *>(Rand); | 
 |     for (auto &I : Instructions) { | 
 |       for (Use &U : I->operands()) | 
 |         if (isCompatibleReplacement(I, U, V)) | 
 |           RS.sample(&U, 1); | 
 |     } | 
 |     if (!RS.isEmpty()) { | 
 |       Use *Sink = RS.getSelection(); | 
 |       User *U = Sink->getUser(); | 
 |       unsigned OpNo = Sink->getOperandNo(); | 
 |       U->setOperand(OpNo, V); | 
 |       return cast<Instruction>(U); | 
 |     } | 
 |     return nullptr; | 
 |   }; | 
 |   Instruction *Sink = nullptr; | 
 |   for (uint64_t SinkTy : SinkTys) { | 
 |     switch (SinkTy) { | 
 |     case SinkToInstInCurBlock: | 
 |       Sink = findSinkAndConnect(Insts); | 
 |       if (Sink) | 
 |         return Sink; | 
 |       break; | 
 |     case PointersInDominator: { | 
 |       auto Dominators = getDominators(&BB); | 
 |       std::shuffle(Dominators.begin(), Dominators.end(), Rand); | 
 |       for (BasicBlock *Dom : Dominators) { | 
 |         for (Instruction &I : *Dom) { | 
 |           if (isa<PointerType>(I.getType())) | 
 |             return new StoreInst(V, &I, Insts.back()); | 
 |         } | 
 |       } | 
 |       break; | 
 |     } | 
 |     case InstInDominatee: { | 
 |       auto Dominatees = getDominatees(&BB); | 
 |       std::shuffle(Dominatees.begin(), Dominatees.end(), Rand); | 
 |       for (BasicBlock *Dominee : Dominatees) { | 
 |         std::vector<Instruction *> Instructions; | 
 |         for (Instruction &I : *Dominee) | 
 |           Instructions.push_back(&I); | 
 |         Sink = findSinkAndConnect(Instructions); | 
 |         if (Sink) { | 
 |           return Sink; | 
 |         } | 
 |       } | 
 |       break; | 
 |     } | 
 |     case NewStore: | 
 |       /// TODO: allocate a new stack memory. | 
 |       return newSink(BB, Insts, V); | 
 |     case SinkToGlobalVariable: { | 
 |       Module *M = BB.getParent()->getParent(); | 
 |       auto [GV, DidCreate] = | 
 |           findOrCreateGlobalVariable(M, {}, fuzzerop::onlyType(V->getType())); | 
 |       return new StoreInst(V, GV, Insts.back()); | 
 |     } | 
 |     case EndOfValueSink: | 
 |     default: | 
 |       llvm_unreachable("EndOfValueSink executed"); | 
 |     } | 
 |   } | 
 |   llvm_unreachable("Can't find a sink"); | 
 | } | 
 |  | 
 | Instruction *RandomIRBuilder::newSink(BasicBlock &BB, | 
 |                                       ArrayRef<Instruction *> Insts, Value *V) { | 
 |   Value *Ptr = findPointer(BB, Insts); | 
 |   if (!Ptr) { | 
 |     if (uniform(Rand, 0, 1)) { | 
 |       Type *Ty = V->getType(); | 
 |       Ptr = createStackMemory(BB.getParent(), Ty, UndefValue::get(Ty)); | 
 |     } else { | 
 |       Ptr = UndefValue::get(PointerType::get(V->getType(), 0)); | 
 |     } | 
 |   } | 
 |  | 
 |   return new StoreInst(V, Ptr, Insts.back()); | 
 | } | 
 |  | 
 | Value *RandomIRBuilder::findPointer(BasicBlock &BB, | 
 |                                     ArrayRef<Instruction *> Insts) { | 
 |   auto IsMatchingPtr = [](Instruction *Inst) { | 
 |     // Invoke instructions sometimes produce valid pointers but currently | 
 |     // we can't insert loads or stores from them | 
 |     if (Inst->isTerminator()) | 
 |       return false; | 
 |  | 
 |     return Inst->getType()->isPointerTy(); | 
 |   }; | 
 |   if (auto RS = makeSampler(Rand, make_filter_range(Insts, IsMatchingPtr))) | 
 |     return RS.getSelection(); | 
 |   return nullptr; | 
 | } | 
 |  | 
 | Type *RandomIRBuilder::randomType() { | 
 |   uint64_t TyIdx = uniform<uint64_t>(Rand, 0, KnownTypes.size() - 1); | 
 |   return KnownTypes[TyIdx]; | 
 | } | 
 |  | 
 | Function *RandomIRBuilder::createFunctionDeclaration(Module &M, | 
 |                                                      uint64_t ArgNum) { | 
 |   Type *RetType = randomType(); | 
 |  | 
 |   SmallVector<Type *, 2> Args; | 
 |   for (uint64_t i = 0; i < ArgNum; i++) { | 
 |     Args.push_back(randomType()); | 
 |   } | 
 |  | 
 |   Function *F = Function::Create(FunctionType::get(RetType, Args, | 
 |                                                    /*isVarArg=*/false), | 
 |                                  GlobalValue::ExternalLinkage, "f", &M); | 
 |   return F; | 
 | } | 
 | Function *RandomIRBuilder::createFunctionDeclaration(Module &M) { | 
 |   return createFunctionDeclaration( | 
 |       M, uniform<uint64_t>(Rand, MinArgNum, MaxArgNum)); | 
 | } | 
 |  | 
 | Function *RandomIRBuilder::createFunctionDefinition(Module &M, | 
 |                                                     uint64_t ArgNum) { | 
 |   Function *F = this->createFunctionDeclaration(M, ArgNum); | 
 |  | 
 |   // TODO: Some arguments and a return value would probably be more | 
 |   // interesting. | 
 |   LLVMContext &Context = M.getContext(); | 
 |   DataLayout DL(&M); | 
 |   BasicBlock *BB = BasicBlock::Create(Context, "BB", F); | 
 |   Type *RetTy = F->getReturnType(); | 
 |   if (RetTy != Type::getVoidTy(Context)) { | 
 |     Instruction *RetAlloca = | 
 |         new AllocaInst(RetTy, DL.getAllocaAddrSpace(), "RP", BB); | 
 |     Instruction *RetLoad = new LoadInst(RetTy, RetAlloca, "", BB); | 
 |     ReturnInst::Create(Context, RetLoad, BB); | 
 |   } else { | 
 |     ReturnInst::Create(Context, BB); | 
 |   } | 
 |  | 
 |   return F; | 
 | } | 
 | Function *RandomIRBuilder::createFunctionDefinition(Module &M) { | 
 |   return createFunctionDefinition( | 
 |       M, uniform<uint64_t>(Rand, MinArgNum, MaxArgNum)); | 
 | } |