LLVM 22.0.0git
IRMutator.cpp
Go to the documentation of this file.
1//===-- IRMutator.cpp -----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/ADT/SmallSet.h"
18#include "llvm/IR/BasicBlock.h"
19#include "llvm/IR/FMF.h"
20#include "llvm/IR/Function.h"
24#include "llvm/IR/IntrinsicsAMDGPU.h"
25#include "llvm/IR/Module.h"
26#include "llvm/IR/Operator.h"
28#include "llvm/IR/Verifier.h"
33#include <map>
34#include <optional>
35
36using namespace llvm;
37
39 auto RS = makeSampler<Function *>(IB.Rand);
40 for (Function &F : M)
41 if (!F.isDeclaration())
42 RS.sample(&F, /*Weight=*/1);
43
44 while (RS.totalWeight() < IB.MinFunctionNum) {
45 Function *F = IB.createFunctionDefinition(M);
46 RS.sample(F, /*Weight=*/1);
47 }
48 mutate(*RS.getSelection(), IB);
49}
50
53 [](BasicBlock *BB) { return !BB->isEHPad(); });
55 mutate(*makeSampler(IB.Rand, Range).getSelection(), IB);
56}
57
59 mutate(*makeSampler(IB.Rand, make_pointer_range(BB)).getSelection(), IB);
60}
61
63 return M.getInstructionCount() + M.size() + M.global_size() + M.alias_size();
64}
65
66void IRMutator::mutateModule(Module &M, int Seed, size_t MaxSize) {
67 std::vector<Type *> Types;
68 for (const auto &Getter : AllowedTypes)
69 Types.push_back(Getter(M.getContext()));
70 RandomIRBuilder IB(Seed, Types);
71
72 size_t CurSize = IRMutator::getModuleSize(M);
73 auto RS = makeSampler<IRMutationStrategy *>(IB.Rand);
74 for (const auto &Strategy : Strategies)
75 RS.sample(Strategy.get(),
76 Strategy->getWeight(CurSize, MaxSize, RS.totalWeight()));
77 if (RS.totalWeight() == 0)
78 return;
79 auto Strategy = RS.getSelection();
80
81 Strategy->mutate(M, IB);
82}
83
86 FPM.addPass(DCEPass());
88 FAM.registerPass([&] { return TargetLibraryAnalysis(); });
90 FPM.run(F, FAM);
91}
92
96}
97
98std::vector<fuzzerop::OpDescriptor> InjectorIRStrategy::getDefaultOps() {
99 std::vector<fuzzerop::OpDescriptor> Ops;
106 return Ops;
107}
108
109std::optional<fuzzerop::OpDescriptor>
110InjectorIRStrategy::chooseOperation(Value *Src, RandomIRBuilder &IB) {
111 auto OpMatchesPred = [&Src](fuzzerop::OpDescriptor &Op) {
112 return Op.SourcePreds[0].matches({}, Src);
113 };
114 auto RS = makeSampler(IB.Rand, make_filter_range(Operations, OpMatchesPred));
115 if (RS.isEmpty())
116 return std::nullopt;
117 return *RS;
118}
119
122 return I;
123 } else {
124 // Certain intrinsics, such as @llvm.amdgcn.cs.chain, must be immediately
125 // followed by an unreachable instruction..
126 if (UnreachableInst *UI = dyn_cast<UnreachableInst>(BB.getTerminator())) {
127 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(UI->getPrevNode())) {
128 return II;
129 }
130 }
131 }
132
133 return BB.getTerminator();
134}
135
137 auto End = BB.end();
138
139 if (BB.empty()) {
140 return End;
141 }
142
143 Instruction *EffectiveTerminator = getEffectiveTerminator(BB);
144 if (EffectiveTerminator != BB.getTerminator()) {
145 // Adjust range for special cases such as tail call.
146 End = std::prev(BB.end());
147 }
148
149 return End;
150}
151
154 auto End = getEndIterator(BB);
155 return make_range(BB.getFirstInsertionPt(), End);
156}
157
161 if (Insts.size() < 1)
162 return;
163
164 // Choose an insertion point for our new instruction.
165 size_t IP = uniform<size_t>(IB.Rand, 0, Insts.size() - 1);
166
167 auto InstsBefore = ArrayRef(Insts).slice(0, IP);
168 auto InstsAfter = ArrayRef(Insts).slice(IP);
169
170 // Choose a source, which will be used to constrain the operation selection.
172 Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore));
173
174 // Choose an operation that's constrained to be valid for the type of the
175 // source, collect any other sources it needs, and then build it.
176 auto OpDesc = chooseOperation(Srcs[0], IB);
177 // Bail if no operation was found
178 if (!OpDesc)
179 return;
180
181 for (const auto &Pred : ArrayRef(OpDesc->SourcePreds).slice(1))
182 Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore, Srcs, Pred));
183
184 if (Value *Op = OpDesc->BuilderFunc(Srcs, Insts[IP]->getIterator())) {
185 // Find a sink and wire up the results of the operation.
186 IB.connectToSink(BB, InstsAfter, Op);
187 }
188}
189
190uint64_t InstDeleterIRStrategy::getWeight(size_t CurrentSize, size_t MaxSize,
191 uint64_t CurrentWeight) {
192 // If we have less than 200 bytes, panic and try to always delete.
193 if (CurrentSize > MaxSize - 200)
194 return CurrentWeight ? CurrentWeight * 100 : 1;
195 // Draw a line starting from when we only have 1k left and increasing linearly
196 // to double the current weight.
197 int64_t Line = (-2 * static_cast<int64_t>(CurrentWeight)) *
198 (static_cast<int64_t>(MaxSize) -
199 static_cast<int64_t>(CurrentSize) - 1000) /
200 1000;
201 // Clamp negative weights to zero.
202 if (Line < 0)
203 return 0;
204 return Line;
205}
206
208 auto RS = makeSampler<Instruction *>(IB.Rand);
209 for (Instruction &Inst : instructions(F)) {
210 // TODO: We can't handle these instructions.
211 if (Inst.isTerminator() || Inst.isEHPad() || Inst.isSwiftError() ||
212 isa<PHINode>(Inst))
213 continue;
214
215 RS.sample(&Inst, /*Weight=*/1);
216 }
217 if (RS.isEmpty())
218 return;
219
220 // Delete the instruction.
221 mutate(*RS.getSelection(), IB);
222 // Clean up any dead code that's left over after removing the instruction.
224}
225
227 assert(!Inst.isTerminator() && "Deleting terminators invalidates CFG");
228
229 if (Inst.getType()->isVoidTy()) {
230 // Instructions with void type (ie, store) have no uses to worry about. Just
231 // erase it and move on.
232 Inst.eraseFromParent();
233 return;
234 }
235
236 // Otherwise we need to find some other value with the right type to keep the
237 // users happy.
238 auto Pred = fuzzerop::onlyType(Inst.getType());
239 auto RS = makeSampler<Value *>(IB.Rand);
241 BasicBlock *BB = Inst.getParent();
242 for (auto I = BB->getFirstInsertionPt(), E = Inst.getIterator(); I != E;
243 ++I) {
244 if (Pred.matches({}, &*I))
245 RS.sample(&*I, /*Weight=*/1);
246 InstsBefore.push_back(&*I);
247 }
248 if (!RS)
249 RS.sample(IB.newSource(*BB, InstsBefore, {}, Pred), /*Weight=*/1);
250
251 Inst.replaceAllUsesWith(RS.getSelection());
252 Inst.eraseFromParent();
253}
254
256 RandomIRBuilder &IB) {
257 SmallVector<std::function<void()>, 8> Modifications;
258 CmpInst *CI = nullptr;
259 GetElementPtrInst *GEP = nullptr;
260 switch (Inst.getOpcode()) {
261 default:
262 break;
263 // Add nsw, nuw flag
264 case Instruction::Add:
265 case Instruction::Mul:
266 case Instruction::Sub:
267 case Instruction::Shl:
268 Modifications.push_back(
269 [&Inst]() { Inst.setHasNoSignedWrap(!Inst.hasNoSignedWrap()); });
270 Modifications.push_back(
271 [&Inst]() { Inst.setHasNoUnsignedWrap(!Inst.hasNoUnsignedWrap()); });
272 break;
273 case Instruction::ICmp:
274 CI = cast<ICmpInst>(&Inst);
275 for (unsigned p = CmpInst::FIRST_ICMP_PREDICATE;
277 Modifications.push_back(
278 [CI, p]() { CI->setPredicate(static_cast<CmpInst::Predicate>(p)); });
279 }
280 break;
281 // Add inbound flag.
282 case Instruction::GetElementPtr:
283 GEP = cast<GetElementPtrInst>(&Inst);
284 Modifications.push_back(
285 [GEP]() { GEP->setIsInBounds(!GEP->isInBounds()); });
286 break;
287 // Add exact flag.
288 case Instruction::UDiv:
289 case Instruction::SDiv:
290 case Instruction::LShr:
291 case Instruction::AShr:
292 Modifications.push_back([&Inst] { Inst.setIsExact(!Inst.isExact()); });
293 break;
294
295 case Instruction::FCmp:
296 CI = cast<FCmpInst>(&Inst);
297 for (unsigned p = CmpInst::FIRST_FCMP_PREDICATE;
299 Modifications.push_back(
300 [CI, p]() { CI->setPredicate(static_cast<CmpInst::Predicate>(p)); });
301 }
302 break;
303 }
304
305 // Add fast math flag if possible.
306 if (isa<FPMathOperator>(&Inst)) {
307 // Try setting everything unless they are already on.
308 Modifications.push_back(
309 [&Inst] { Inst.setFast(!Inst.getFastMathFlags().all()); });
310 // Try unsetting everything unless they are already off.
311 Modifications.push_back(
312 [&Inst] { Inst.setFast(!Inst.getFastMathFlags().none()); });
313 // Individual setting by flipping the bit
314 Modifications.push_back(
315 [&Inst] { Inst.setHasAllowReassoc(!Inst.hasAllowReassoc()); });
316 Modifications.push_back([&Inst] { Inst.setHasNoNaNs(!Inst.hasNoNaNs()); });
317 Modifications.push_back([&Inst] { Inst.setHasNoInfs(!Inst.hasNoInfs()); });
318 Modifications.push_back(
319 [&Inst] { Inst.setHasNoSignedZeros(!Inst.hasNoSignedZeros()); });
320 Modifications.push_back(
321 [&Inst] { Inst.setHasAllowReciprocal(!Inst.hasAllowReciprocal()); });
322 Modifications.push_back(
323 [&Inst] { Inst.setHasAllowContract(!Inst.hasAllowContract()); });
324 Modifications.push_back(
325 [&Inst] { Inst.setHasApproxFunc(!Inst.hasApproxFunc()); });
326 }
327
328 // Randomly switch operands of instructions
329 std::pair<int, int> NoneItem({-1, -1}), ShuffleItems(NoneItem);
330 switch (Inst.getOpcode()) {
331 case Instruction::SDiv:
332 case Instruction::UDiv:
333 case Instruction::SRem:
334 case Instruction::URem:
335 case Instruction::FDiv:
336 case Instruction::FRem: {
337 // Verify that the after shuffle the second operand is not
338 // constant 0.
339 Value *Operand = Inst.getOperand(0);
340 if (Constant *C = dyn_cast<Constant>(Operand)) {
341 if (!C->isZeroValue()) {
342 ShuffleItems = {0, 1};
343 }
344 }
345 break;
346 }
347 case Instruction::Select:
348 ShuffleItems = {1, 2};
349 break;
350 case Instruction::Add:
351 case Instruction::Sub:
352 case Instruction::Mul:
353 case Instruction::Shl:
354 case Instruction::LShr:
355 case Instruction::AShr:
356 case Instruction::And:
357 case Instruction::Or:
358 case Instruction::Xor:
359 case Instruction::FAdd:
360 case Instruction::FSub:
361 case Instruction::FMul:
362 case Instruction::ICmp:
363 case Instruction::FCmp:
364 case Instruction::ShuffleVector:
365 ShuffleItems = {0, 1};
366 break;
367 }
368 if (ShuffleItems != NoneItem) {
369 Modifications.push_back([&Inst, &ShuffleItems]() {
370 Value *Op0 = Inst.getOperand(ShuffleItems.first);
371 Inst.setOperand(ShuffleItems.first, Inst.getOperand(ShuffleItems.second));
372 Inst.setOperand(ShuffleItems.second, Op0);
373 });
374 }
375
376 auto RS = makeSampler(IB.Rand, Modifications);
377 if (RS)
378 RS.getSelection()();
379}
380
381/// Return a case value that is not already taken to make sure we don't have two
382/// cases with same value.
384 uint64_t MaxValue, RandomIRBuilder &IB) {
385 uint64_t tmp;
386 do {
387 tmp = uniform<uint64_t>(IB.Rand, 0, MaxValue);
388 } while (CasesTaken.count(tmp) != 0);
389 CasesTaken.insert(tmp);
390 return tmp;
391}
392
393/// Determines whether a function is unsupported by the current mutator's
394/// implementation. The function returns true if any of the following criteria
395/// are met:
396/// * The function accepts metadata or token types as arguments.
397/// * The function has ABI attributes that could cause UB.
398/// * The function uses a non-callable CC that may result in UB.
400 // Some functions accept metadata type or token type as arguments.
401 // We don't call those functions for now.
402 // For example, `@llvm.dbg.declare(metadata, metadata, metadata)`
403 // https://llvm.org/docs/SourceLevelDebugging.html#llvm-dbg-declare
404 auto IsUnsupportedTy = [](Type *T) {
405 return T->isMetadataTy() || T->isTokenTy();
406 };
407
408 if (IsUnsupportedTy(F->getReturnType()) ||
409 any_of(F->getFunctionType()->params(), IsUnsupportedTy)) {
410 return true;
411 }
412
413 // ABI attributes must be specified both at the function
414 // declaration/definition and call-site, otherwise the
415 // behavior may be undefined.
416 // We don't call those functions for now to prevent UB from happening.
417 auto IsABIAttribute = [](AttributeSet A) {
418 static const Attribute::AttrKind ABIAttrs[] = {
419 Attribute::StructRet, Attribute::ByVal,
420 Attribute::InAlloca, Attribute::InReg,
421 Attribute::StackAlignment, Attribute::SwiftSelf,
422 Attribute::SwiftAsync, Attribute::SwiftError,
423 Attribute::Preallocated, Attribute::ByRef,
424 Attribute::ZExt, Attribute::SExt};
425
426 return llvm::any_of(ABIAttrs, [&](Attribute::AttrKind kind) {
427 return A.hasAttribute(kind);
428 });
429 };
430
431 auto FuncAttrs = F->getAttributes();
432 if (IsABIAttribute(FuncAttrs.getRetAttrs())) {
433 return true;
434 }
435 for (size_t i = 0; i < F->arg_size(); i++) {
436 if (IsABIAttribute(FuncAttrs.getParamAttrs(i))) {
437 return true;
438 }
439 }
440
441 // If it is not satisfied, the IR will be invalid.
442 if (!isCallableCC(F->getCallingConv())) {
443 return true;
444 }
445
446 // This intrinsic has specific requirements for its parameters and the caller
447 // must adhere to certain calling conventions.
448 if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::amdgcn_cs_chain) {
449 return true;
450 }
451
452 return false;
453}
454
456 Module *M = BB.getParent()->getParent();
457 // If nullptr is selected, we will create a new function declaration.
458 SmallVector<Function *, 32> Functions({nullptr});
459 for (Function &F : M->functions()) {
460 Functions.push_back(&F);
461 }
462
463 auto RS = makeSampler(IB.Rand, Functions);
464 Function *F = RS.getSelection();
465
466 if (!F || isUnsupportedFunction(F)) {
467 F = IB.createFunctionDeclaration(*M);
468 }
469
470 FunctionType *FTy = F->getFunctionType();
472 if (!F->arg_empty()) {
473 for (Type *ArgTy : FTy->params()) {
474 SourcePreds.push_back(fuzzerop::onlyType(ArgTy));
475 }
476 }
477 bool isRetVoid = (F->getReturnType() == Type::getVoidTy(M->getContext()));
478 auto BuilderFunc = [FTy, F, isRetVoid](ArrayRef<Value *> Srcs,
479 BasicBlock::iterator InsertPt) {
480 StringRef Name = isRetVoid ? nullptr : "C";
481 CallInst *Call = CallInst::Create(FTy, F, Srcs, Name, InsertPt);
482 Call->setCallingConv(F->getCallingConv());
483 // Don't return this call inst if it return void as it can't be sinked.
484 return isRetVoid ? nullptr : Call;
485 };
486
489 if (Insts.size() < 1)
490 return;
491
492 // Choose an insertion point for our new call instruction.
493 uint64_t IP = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1);
494
495 auto InstsBefore = ArrayRef(Insts).slice(0, IP);
496 auto InstsAfter = ArrayRef(Insts).slice(IP);
497
498 // Choose a source, which will be used to constrain the operation selection.
500
501 for (const auto &Pred : ArrayRef(SourcePreds)) {
502 Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore, Srcs, Pred));
503 }
504
505 if (Value *Op = BuilderFunc(Srcs, Insts[IP]->getIterator())) {
506 // Find a sink and wire up the results of the operation.
507 IB.connectToSink(BB, InstsAfter, Op);
508 }
509}
510
514 if (Insts.size() < 1)
515 return;
516
517 // Choose a point where we split the block.
518 uint64_t IP = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1);
519 auto InstsBeforeSplit = ArrayRef(Insts).slice(0, IP);
520
521 // `Sink` inherits Blocks' terminator, `Source` will have a BranchInst
522 // directly jumps to `Sink`. Here, we have to create a new terminator for
523 // `Source`.
524 BasicBlock *Block = Insts[IP]->getParent();
525 BasicBlock *Source = Block;
526 BasicBlock *Sink = Block->splitBasicBlock(Insts[IP], "BB");
527
528 Function *F = BB.getParent();
529 LLVMContext &C = F->getParent()->getContext();
530 // A coin decides if it is branch or switch
531 if (uniform<uint64_t>(IB.Rand, 0, 1)) {
532 // Branch
533 BasicBlock *IfTrue = BasicBlock::Create(C, "T", F);
534 BasicBlock *IfFalse = BasicBlock::Create(C, "F", F);
535 Value *Cond =
536 IB.findOrCreateSource(*Source, InstsBeforeSplit, {},
538 BranchInst *Branch = BranchInst::Create(IfTrue, IfFalse, Cond);
539 // Remove the old terminator.
540 ReplaceInstWithInst(Source->getTerminator(), Branch);
541 // Connect these blocks to `Sink`
542 connectBlocksToSink({IfTrue, IfFalse}, Sink, IB);
543 } else {
544 // Switch
545 // Determine Integer type, it IS possible we use a boolean to switch.
546 auto RS =
547 makeSampler(IB.Rand, make_filter_range(IB.KnownTypes, [](Type *Ty) {
548 return Ty->isIntegerTy();
549 }));
550 assert(RS && "There is no integer type in all allowed types, is the "
551 "setting correct?");
552 Type *Ty = RS.getSelection();
553 IntegerType *IntTy = cast<IntegerType>(Ty);
554
555 uint64_t BitSize = IntTy->getBitWidth();
556 uint64_t MaxCaseVal =
557 (BitSize >= 64) ? (uint64_t)-1 : ((uint64_t)1 << BitSize) - 1;
558 // Create Switch inst in Block
559 Value *Cond = IB.findOrCreateSource(*Source, InstsBeforeSplit, {},
560 fuzzerop::onlyType(IntTy), false);
561 BasicBlock *DefaultBlock = BasicBlock::Create(C, "SW_D", F);
562 uint64_t NumCases = uniform<uint64_t>(IB.Rand, 1, MaxNumCases);
563 NumCases = (NumCases > MaxCaseVal) ? MaxCaseVal + 1 : NumCases;
564 SwitchInst *Switch = SwitchInst::Create(Cond, DefaultBlock, NumCases);
565 // Remove the old terminator.
566 ReplaceInstWithInst(Source->getTerminator(), Switch);
567
568 // Create blocks, for each block assign a case value.
569 SmallVector<BasicBlock *, 4> Blocks({DefaultBlock});
570 SmallSet<uint64_t, 4> CasesTaken;
571 for (uint64_t i = 0; i < NumCases; i++) {
572 uint64_t CaseVal = getUniqueCaseValue(CasesTaken, MaxCaseVal, IB);
573 BasicBlock *CaseBlock = BasicBlock::Create(C, "SW_C", F);
574 ConstantInt *OnValue = ConstantInt::get(IntTy, CaseVal);
575 Switch->addCase(OnValue, CaseBlock);
576 Blocks.push_back(CaseBlock);
577 }
578
579 // Connect these blocks to `Sink`
580 connectBlocksToSink(Blocks, Sink, IB);
581 }
582}
583
584/// The caller has to guarantee that these blocks are "empty", i.e. it doesn't
585/// even have terminator.
586void InsertCFGStrategy::connectBlocksToSink(ArrayRef<BasicBlock *> Blocks,
587 BasicBlock *Sink,
588 RandomIRBuilder &IB) {
589 uint64_t DirectSinkIdx = uniform<uint64_t>(IB.Rand, 0, Blocks.size() - 1);
590 for (uint64_t i = 0; i < Blocks.size(); i++) {
591 // We have at least one block that directly goes to sink.
592 CFGToSink ToSink = (i == DirectSinkIdx)
593 ? CFGToSink::DirectSink
594 : static_cast<CFGToSink>(uniform<uint64_t>(
595 IB.Rand, 0, CFGToSink::EndOfCFGToLink - 1));
596 BasicBlock *BB = Blocks[i];
597 Function *F = BB->getParent();
598 LLVMContext &C = F->getParent()->getContext();
599 switch (ToSink) {
600 case CFGToSink::Return: {
601 Type *RetTy = F->getReturnType();
602 Value *RetValue = nullptr;
603 if (!RetTy->isVoidTy())
604 RetValue =
605 IB.findOrCreateSource(*BB, {}, {}, fuzzerop::onlyType(RetTy));
606 ReturnInst::Create(C, RetValue, BB);
607 break;
608 }
609 case CFGToSink::DirectSink: {
610 BranchInst::Create(Sink, BB);
611 break;
612 }
613 case CFGToSink::SinkOrSelfLoop: {
614 SmallVector<BasicBlock *, 2> Branches({Sink, BB});
615 // A coin decides which block is true branch.
616 uint64_t coin = uniform<uint64_t>(IB.Rand, 0, 1);
617 Value *Cond = IB.findOrCreateSource(
618 *BB, {}, {}, fuzzerop::onlyType(Type::getInt1Ty(C)), false);
619 BranchInst::Create(Branches[coin], Branches[1 - coin], Cond, BB);
620 break;
621 }
622 case CFGToSink::EndOfCFGToLink:
623 llvm_unreachable("EndOfCFGToLink executed, something's wrong.");
624 }
625 }
626}
627
629 // Can't insert PHI node to entry node.
630 if (&BB == &BB.getParent()->getEntryBlock())
631 return;
632 Type *Ty = IB.randomType();
633 PHINode *PHI = PHINode::Create(Ty, llvm::pred_size(&BB), "", BB.begin());
634
635 // Use a map to make sure the same incoming basic block has the same value.
636 DenseMap<BasicBlock *, Value *> IncomingValues;
637 for (BasicBlock *Pred : predecessors(&BB)) {
638 Value *Src = IncomingValues[Pred];
639 // If `Pred` is not in the map yet, we'll get a nullptr.
640 if (!Src) {
642 for (auto I = Pred->begin(); I != Pred->end(); ++I)
643 Insts.push_back(&*I);
644 // There is no need to inform IB what previously used values are if we are
645 // using `onlyType`
646 Src = IB.findOrCreateSource(*Pred, Insts, {}, fuzzerop::onlyType(Ty));
647 IncomingValues[Pred] = Src;
648 }
649 PHI->addIncoming(Src, Pred);
650 }
653 IB.connectToSink(BB, InstsAfter, PHI);
654}
655
657 for (BasicBlock &BB : F) {
658 this->mutate(BB, IB);
659 }
660}
664 if (Insts.size() < 1)
665 return;
666 // Choose an Instruction to mutate.
667 uint64_t Idx = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1);
668 Instruction *Inst = Insts[Idx];
669 // `Idx + 1` so we don't sink to ourselves.
670 auto InstsAfter = ArrayRef(Insts).slice(Idx + 1);
671 Type *Ty = Inst->getType();
672 // Don't sink terminators, void function calls, token, etc.
673 if (!Ty->isVoidTy() && !Ty->isTokenTy())
674 // Find a new sink and wire up the results of the operation.
675 IB.connectToSink(BB, InstsAfter, Inst);
676}
677
679 // A deterministic alternative to SmallPtrSet with the same lookup
680 // performance.
681 std::map<size_t, Instruction *> AliveInsts;
682 std::map<Instruction *, size_t> AliveInstsLookup;
683 size_t InsertIdx = 0;
684 for (auto &I : make_early_inc_range(
686 getEffectiveTerminator(BB)->getIterator()))) {
687 // First gather all instructions that can be shuffled. Don't take
688 // terminator.
689 AliveInsts.insert({InsertIdx, &I});
690 AliveInstsLookup.insert({&I, InsertIdx++});
691 // Then remove these instructions from the block
692 I.removeFromParent();
693 }
694
695 // Shuffle these instructions using topological sort.
696 // Returns false if all current instruction's dependencies in this block have
697 // been shuffled. If so, this instruction can be shuffled too.
698 auto hasAliveParent = [&AliveInsts, &AliveInstsLookup](size_t Index) {
699 for (Value *O : AliveInsts[Index]->operands()) {
700 Instruction *P = dyn_cast<Instruction>(O);
701 if (P && AliveInstsLookup.count(P))
702 return true;
703 }
704 return false;
705 };
706 // Get all alive instructions that depend on the current instruction.
707 // Takes Instruction* instead of index because the instruction is already
708 // shuffled.
709 auto getAliveChildren = [&AliveInstsLookup](Instruction *I) {
711 for (Value *U : I->users()) {
712 if (Instruction *P = dyn_cast<Instruction>(U)) {
713 auto It = AliveInstsLookup.find(P);
714 if (It != AliveInstsLookup.end())
715 Children.insert(It->second);
716 }
717 }
718 return Children;
719 };
720 SmallSet<size_t, 8> RootIndices;
722 for (const auto &[Index, Inst] : AliveInsts) {
723 if (!hasAliveParent(Index))
724 RootIndices.insert(Index);
725 }
726 // Topological sort by randomly selecting a node without a parent, or root.
727 while (!RootIndices.empty()) {
728 auto RS = makeSampler<size_t>(IB.Rand);
729 for (size_t RootIdx : RootIndices)
730 RS.sample(RootIdx, 1);
731 size_t RootIdx = RS.getSelection();
732
733 RootIndices.erase(RootIdx);
734 Instruction *Root = AliveInsts[RootIdx];
735 AliveInsts.erase(RootIdx);
736 AliveInstsLookup.erase(Root);
737 Insts.push_back(Root);
738
739 for (size_t Child : getAliveChildren(Root)) {
740 if (!hasAliveParent(Child)) {
741 RootIndices.insert(Child);
742 }
743 }
744 }
745
746 Instruction *Terminator = getEffectiveTerminator(BB);
747 // Then put instructions back.
748 for (Instruction *I : Insts) {
749 I->insertBefore(Terminator->getIterator());
750 }
751}
752
753std::unique_ptr<Module> llvm::parseModule(const uint8_t *Data, size_t Size,
754 LLVMContext &Context) {
755
756 if (Size <= 1)
757 // We get bogus data given an empty corpus - just create a new module.
758 return std::make_unique<Module>("M", Context);
759
760 auto Buffer = MemoryBuffer::getMemBuffer(
761 StringRef(reinterpret_cast<const char *>(Data), Size), "Fuzzer input",
762 /*RequiresNullTerminator=*/false);
763
764 SMDiagnostic Err;
765 auto M = parseBitcodeFile(Buffer->getMemBufferRef(), Context);
766 if (Error E = M.takeError()) {
767 errs() << toString(std::move(E)) << "\n";
768 return nullptr;
769 }
770 return std::move(M.get());
771}
772
773size_t llvm::writeModule(const Module &M, uint8_t *Dest, size_t MaxSize) {
774 std::string Buf;
775 {
778 }
779 if (Buf.size() > MaxSize)
780 return 0;
781 memcpy(Dest, Buf.data(), Buf.size());
782 return Buf.size();
783}
784
785std::unique_ptr<Module> llvm::parseAndVerify(const uint8_t *Data, size_t Size,
786 LLVMContext &Context) {
787 auto M = parseModule(Data, Size, Context);
788 if (!M || verifyModule(*M, &errs()))
789 return nullptr;
790
791 return M;
792}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
Expand Atomic instructions
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
Hexagon Common GEP
static Instruction * getEffectiveTerminator(BasicBlock &BB)
Definition: IRMutator.cpp:120
static bool isUnsupportedFunction(Function *F)
Determines whether a function is unsupported by the current mutator's implementation.
Definition: IRMutator.cpp:399
static void eliminateDeadCode(Function &F)
Definition: IRMutator.cpp:84
static iterator_range< BasicBlock::iterator > getInsertionRange(BasicBlock &BB)
Definition: IRMutator.cpp:153
static BasicBlock::iterator getEndIterator(BasicBlock &BB)
Definition: IRMutator.cpp:136
static uint64_t getUniqueCaseValue(SmallSet< uint64_t, 4 > &CasesTaken, uint64_t MaxValue, RandomIRBuilder &IB)
Return a case value that is not already taken to make sure we don't have two cases with same value.
Definition: IRMutator.cpp:383
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
FunctionAnalysisManager FAM
This file defines the Pass Instrumentation classes that provide instrumentation points into the pass ...
const SmallVectorImpl< MachineOperand > & Cond
static ManagedStatic< cl::opt< uint64_t >, CreateSeed > Seed
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallSet class.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
Definition: PassManager.h:473
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:191
AttrKind
This enumeration lists the attributes that can be associated with parameters, function results,...
Definition: Attributes.h:88
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
iterator end()
Definition: BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:459
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:393
bool empty() const
Definition: BasicBlock.h:481
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:206
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:213
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:170
bool isEHPad() const
Return true if this basic block is an exception handling block.
Definition: BasicBlock.h:707
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:233
LLVM_ABI const CallInst * getTerminatingMustTailCall() const
Returns the call instruction marked 'musttail' prior to the terminating return instruction of this ba...
Definition: BasicBlock.cpp:256
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:666
void setPredicate(Predicate P)
Set the predicate for this instruction to the specified value.
Definition: InstrTypes.h:770
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:678
@ FIRST_ICMP_PREDICATE
Definition: InstrTypes.h:709
@ FIRST_FCMP_PREDICATE
Definition: InstrTypes.h:696
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
This is an important base class in LLVM.
Definition: Constant.h:43
Basic Dead Code Elimination pass.
Definition: DCE.h:23
This class represents an Operation in the Expression.
Lightweight error class with error context and mandatory checking.
Definition: Error.h:159
bool none() const
Definition: FMF.h:57
bool all() const
Definition: FMF.h:58
Class to represent function types.
Definition: DerivedTypes.h:105
ArrayRef< Type * > params() const
Definition: DerivedTypes.h:132
const BasicBlock & getEntryBlock() const
Definition: Function.h:807
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:949
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:663
virtual void mutate(Module &M, RandomIRBuilder &IB)
Definition: IRMutator.cpp:38
LLVM_ABI void mutateModule(Module &M, int Seed, size_t MaxSize)
Mutate given module.
Definition: IRMutator.cpp:66
static LLVM_ABI size_t getModuleSize(const Module &M)
Calculate the size of module as the number of objects in it, i.e.
Definition: IRMutator.cpp:62
static std::vector< fuzzerop::OpDescriptor > getDefaultOps()
Definition: IRMutator.cpp:98
void mutate(Function &F, RandomIRBuilder &IB) override
Definition: IRMutator.cpp:93
void mutate(BasicBlock &BB, RandomIRBuilder &IB) override
Definition: IRMutator.cpp:511
void mutate(BasicBlock &BB, RandomIRBuilder &IB) override
Definition: IRMutator.cpp:455
void mutate(BasicBlock &BB, RandomIRBuilder &IB) override
Definition: IRMutator.cpp:628
uint64_t getWeight(size_t CurrentSize, size_t MaxSize, uint64_t CurrentWeight) override
Provide a weight to bias towards choosing this strategy for a mutation.
Definition: IRMutator.cpp:190
void mutate(Function &F, RandomIRBuilder &IB) override
Definition: IRMutator.cpp:207
void mutate(Instruction &Inst, RandomIRBuilder &IB) override
Definition: IRMutator.cpp:255
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI bool hasNoNaNs() const LLVM_READONLY
Determine whether the no-NaNs flag is set.
LLVM_ABI bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
LLVM_ABI bool hasNoInfs() const LLVM_READONLY
Determine whether the no-infs flag is set.
LLVM_ABI void setHasNoSignedZeros(bool B)
Set or clear the no-signed-zeros flag on this instruction, which must be an operator which supports t...
LLVM_ABI bool hasNoSignedZeros() const LLVM_READONLY
Determine whether the no-signed-zeros flag is set.
LLVM_ABI bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
LLVM_ABI void setHasAllowContract(bool B)
Set or clear the allow-contract flag on this instruction, which must be an operator which supports th...
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI void setHasNoNaNs(bool B)
Set or clear the no-nans flag on this instruction, which must be an operator which supports this flag...
LLVM_ABI void setHasApproxFunc(bool B)
Set or clear the approximate-math-functions flag on this instruction, which must be an operator which...
LLVM_ABI void setHasAllowReassoc(bool B)
Set or clear the reassociation flag on this instruction, which must be an operator which supports thi...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI bool isExact() const LLVM_READONLY
Determine whether the exact flag is set.
bool isTerminator() const
Definition: Instruction.h:315
LLVM_ABI bool hasAllowReciprocal() const LLVM_READONLY
Determine whether the allow-reciprocal flag is set.
LLVM_ABI void setHasNoInfs(bool B)
Set or clear the no-infs flag on this instruction, which must be an operator which supports this flag...
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI bool hasApproxFunc() const LLVM_READONLY
Determine whether the approximate-math-functions flag is set.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:312
LLVM_ABI void setIsExact(bool b=true)
Set or clear the exact flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI void setHasAllowReciprocal(bool B)
Set or clear the allow-reciprocal flag on this instruction, which must be an operator which supports ...
LLVM_ABI bool hasAllowContract() const LLVM_READONLY
Determine whether the allow-contract flag is set.
LLVM_ABI void setFast(bool B)
Set or clear all fast-math-flags on this instruction, which must be an operator which supports this f...
LLVM_ABI bool hasAllowReassoc() const LLVM_READONLY
Determine whether the allow-reassociation flag is set.
Class to represent integer types.
Definition: DerivedTypes.h:42
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:74
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:49
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
static std::unique_ptr< MemoryBuffer > getMemBuffer(StringRef InputData, StringRef BufferName="", bool RequiresNullTerminator=true)
Open the specified memory range as a MemoryBuffer.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same_v< PassT, PassManager > > addPass(PassT &&Pass)
Definition: PassManager.h:196
PreservedAnalyses run(IRUnitT &IR, AnalysisManagerT &AM, ExtraArgTs... ExtraArgs)
Run all of the passes in this manager over the given unit of IR.
static ReturnInst * Create(LLVMContext &C, Value *retVal=nullptr, InsertPosition InsertBefore=nullptr)
Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...
Definition: SourceMgr.h:282
void mutate(BasicBlock &BB, RandomIRBuilder &IB) override
Definition: IRMutator.cpp:678
void mutate(Function &F, RandomIRBuilder &IB) override
Definition: IRMutator.cpp:656
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:356
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:176
bool empty() const
Definition: SmallSet.h:169
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:182
size_t size() const
Definition: SmallVector.h:79
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
Multiway switch.
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, InsertPosition InsertBefore=nullptr)
Analysis pass providing the TargetLibraryInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
bool isTokenTy() const
Return true if this is 'token'.
Definition: Type.h:234
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
This function has undefined behavior.
void setOperand(unsigned i, Value *Val)
Definition: User.h:237
Value * getOperand(unsigned i) const
Definition: User.h:232
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:546
const ParentTy * getParent() const
Definition: ilist_node.h:34
self_iterator getIterator()
Definition: ilist_node.h:134
A range adaptor for a pair of iterators.
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:662
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
static SourcePred onlyType(Type *Only)
Definition: OpDescriptor.h:96
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
LLVM_ABI void ReplaceInstWithInst(BasicBlock *BB, BasicBlock::iterator &BI, Instruction *I)
Replace the instruction specified by BI with the instruction specified by I.
LLVM_ABI void describeFuzzerIntOps(std::vector< fuzzerop::OpDescriptor > &Ops)
Getters for the default sets of operations, per general category.
Definition: Operations.cpp:18
LLVM_ABI Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
LLVM_ABI void WriteBitcodeToFile(const Module &M, raw_ostream &Out, bool ShouldPreserveUseListOrder=false, const ModuleSummaryIndex *Index=nullptr, bool GenerateHash=false, ModuleHash *ModHash=nullptr)
Write the specified module to the specified raw output stream.
LLVM_ABI std::unique_ptr< Module > parseAndVerify(const uint8_t *Data, size_t Size, LLVMContext &Context)
Try to parse module and verify it.
Definition: IRMutator.cpp:785
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::unique_ptr< Module > parseModule(const uint8_t *Data, size_t Size, LLVMContext &Context)
Fuzzer friendly interface for the llvm bitcode parser.
Definition: IRMutator.cpp:753
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:663
auto pred_size(const MachineBasicBlock *BB)
LLVM_ABI void describeFuzzerAggregateOps(std::vector< fuzzerop::OpDescriptor > &Ops)
Definition: Operations.cpp:88
LLVM_ABI size_t writeModule(const Module &M, uint8_t *Dest, size_t MaxSize)
Fuzzer friendly interface for the llvm bitcode printer.
Definition: IRMutator.cpp:773
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
ReservoirSampler< ElT, GenT > makeSampler(GenT &RandGen, RangeT &&Items)
Definition: Random.h:75
LLVM_ABI void describeFuzzerVectorOps(std::vector< fuzzerop::OpDescriptor > &Ops)
Definition: Operations.cpp:94
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
Definition: STLExtras.h:581
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
DWARFExpression::Operation Op
LLVM_ABI void describeFuzzerFloatOps(std::vector< fuzzerop::OpDescriptor > &Ops)
Definition: Operations.cpp:45
LLVM_ABI void describeFuzzerControlFlowOps(std::vector< fuzzerop::OpDescriptor > &Ops)
Definition: Operations.cpp:75
auto predecessors(const MachineBasicBlock *BB)
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition: iterator.h:363
LLVM_ABI void describeFuzzerPointerOps(std::vector< fuzzerop::OpDescriptor > &Ops)
Definition: Operations.cpp:84
const char * toString(DWARFSectionKind Kind)
constexpr bool isCallableCC(CallingConv::ID CC)
Definition: CallingConv.h:298
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
Definition: Verifier.cpp:7513
A description of some operation we can build while fuzzing IR.
Definition: OpDescriptor.h:90