LLVM 21.0.0git
BoundsChecking.cpp
Go to the documentation of this file.
1//===- BoundsChecking.cpp - Instrumentation for run-time bounds checking --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/Statistic.h"
11#include "llvm/ADT/StringRef.h"
12#include "llvm/ADT/Twine.h"
17#include "llvm/IR/BasicBlock.h"
18#include "llvm/IR/Constants.h"
19#include "llvm/IR/DataLayout.h"
20#include "llvm/IR/Function.h"
21#include "llvm/IR/IRBuilder.h"
23#include "llvm/IR/Instruction.h"
25#include "llvm/IR/Intrinsics.h"
26#include "llvm/IR/Value.h"
29#include "llvm/Support/Debug.h"
31#include <utility>
32
33using namespace llvm;
34
35#define DEBUG_TYPE "bounds-checking"
36
37static cl::opt<bool> SingleTrapBB("bounds-checking-single-trap",
38 cl::desc("Use one trap block per function"));
39
40STATISTIC(ChecksAdded, "Bounds checks added");
41STATISTIC(ChecksSkipped, "Bounds checks skipped");
42STATISTIC(ChecksUnable, "Bounds checks unable to add");
43
44class BuilderTy : public IRBuilder<TargetFolder> {
45public:
47 : IRBuilder<TargetFolder>(TheBB, IP, Folder) {
49 }
50};
51
52/// Gets the conditions under which memory accessing instructions will overflow.
53///
54/// \p Ptr is the pointer that will be read/written, and \p InstVal is either
55/// the result from the load or the value being stored. It is used to determine
56/// the size of memory block that is touched.
57///
58/// Returns the condition under which the access will overflow.
60 const DataLayout &DL, TargetLibraryInfo &TLI,
61 ObjectSizeOffsetEvaluator &ObjSizeEval,
62 BuilderTy &IRB, ScalarEvolution &SE) {
63 TypeSize NeededSize = DL.getTypeStoreSize(InstVal->getType());
64 LLVM_DEBUG(dbgs() << "Instrument " << *Ptr << " for " << Twine(NeededSize)
65 << " bytes\n");
66
67 SizeOffsetValue SizeOffset = ObjSizeEval.compute(Ptr);
68
69 if (!SizeOffset.bothKnown()) {
70 ++ChecksUnable;
71 return nullptr;
72 }
73
74 Value *Size = SizeOffset.Size;
75 Value *Offset = SizeOffset.Offset;
76 ConstantInt *SizeCI = dyn_cast<ConstantInt>(Size);
77
78 Type *IndexTy = DL.getIndexType(Ptr->getType());
79 Value *NeededSizeVal = IRB.CreateTypeSize(IndexTy, NeededSize);
80
81 auto SizeRange = SE.getUnsignedRange(SE.getSCEV(Size));
82 auto OffsetRange = SE.getUnsignedRange(SE.getSCEV(Offset));
83 auto NeededSizeRange = SE.getUnsignedRange(SE.getSCEV(NeededSizeVal));
84
85 // three checks are required to ensure safety:
86 // . Offset >= 0 (since the offset is given from the base ptr)
87 // . Size >= Offset (unsigned)
88 // . Size - Offset >= NeededSize (unsigned)
89 //
90 // optimization: if Size >= 0 (signed), skip 1st check
91 // FIXME: add NSW/NUW here? -- we dont care if the subtraction overflows
92 Value *ObjSize = IRB.CreateSub(Size, Offset);
93 Value *Cmp2 = SizeRange.getUnsignedMin().uge(OffsetRange.getUnsignedMax())
94 ? ConstantInt::getFalse(Ptr->getContext())
96 Value *Cmp3 = SizeRange.sub(OffsetRange)
97 .getUnsignedMin()
98 .uge(NeededSizeRange.getUnsignedMax())
99 ? ConstantInt::getFalse(Ptr->getContext())
100 : IRB.CreateICmpULT(ObjSize, NeededSizeVal);
101 Value *Or = IRB.CreateOr(Cmp2, Cmp3);
102 if ((!SizeCI || SizeCI->getValue().slt(0)) &&
103 !SizeRange.getSignedMin().isNonNegative()) {
104 Value *Cmp1 = IRB.CreateICmpSLT(Offset, ConstantInt::get(IndexTy, 0));
105 Or = IRB.CreateOr(Cmp1, Or);
106 }
107
108 return Or;
109}
110
111static CallInst *InsertTrap(BuilderTy &IRB, bool DebugTrapBB,
112 std::optional<int8_t> GuardKind) {
113 if (!DebugTrapBB)
114 return IRB.CreateIntrinsic(Intrinsic::trap, {}, {});
115
116 return IRB.CreateIntrinsic(
117 Intrinsic::ubsantrap, {},
118 ConstantInt::get(IRB.getInt8Ty(),
119 GuardKind.has_value()
120 ? GuardKind.value()
121 : IRB.GetInsertBlock()->getParent()->size()));
122}
123
124static CallInst *InsertCall(BuilderTy &IRB, bool MayReturn, StringRef Name) {
125 Function *Fn = IRB.GetInsertBlock()->getParent();
126 LLVMContext &Ctx = Fn->getContext();
127 llvm::AttrBuilder B(Ctx);
128 B.addAttribute(llvm::Attribute::NoUnwind);
129 if (!MayReturn)
130 B.addAttribute(llvm::Attribute::NoReturn);
132 Name,
134 Type::getVoidTy(Ctx));
135 return IRB.CreateCall(Callee);
136}
137
138/// Adds run-time bounds checks to memory accessing instructions.
139///
140/// \p Or is the condition that should guard the trap.
141///
142/// \p GetTrapBB is a callable that returns the trap BB to use on failure.
143template <typename GetTrapBBT>
144static void insertBoundsCheck(Value *Or, BuilderTy &IRB, GetTrapBBT GetTrapBB) {
145 // check if the comparison is always false
146 ConstantInt *C = dyn_cast_or_null<ConstantInt>(Or);
147 if (C) {
148 ++ChecksSkipped;
149 // If non-zero, nothing to do.
150 if (!C->getZExtValue())
151 return;
152 }
153 ++ChecksAdded;
154
156 BasicBlock *OldBB = SplitI->getParent();
157 BasicBlock *Cont = OldBB->splitBasicBlock(SplitI);
158 OldBB->getTerminator()->eraseFromParent();
159
160 BasicBlock *TrapBB = GetTrapBB(IRB, Cont);
161
162 if (C) {
163 // If we have a constant zero, unconditionally branch.
164 // FIXME: We should really handle this differently to bypass the splitting
165 // the block.
166 BranchInst::Create(TrapBB, OldBB);
167 return;
168 }
169
170 // Create the conditional branch.
171 BranchInst::Create(TrapBB, Cont, Or, OldBB);
172}
173
174static std::string
176 std::string Name = "__ubsan_handle_local_out_of_bounds";
177 if (Opts.MinRuntime)
178 Name += "_minimal";
179 if (!Opts.MayReturn)
180 Name += "_abort";
181 return Name;
182}
183
185 ScalarEvolution &SE,
186 const BoundsCheckingPass::Options &Opts) {
187 if (F.hasFnAttribute(Attribute::NoSanitizeBounds))
188 return false;
189
190 const DataLayout &DL = F.getDataLayout();
191 ObjectSizeOpts EvalOpts;
192 EvalOpts.RoundToAlign = true;
193 EvalOpts.EvalMode = ObjectSizeOpts::Mode::ExactUnderlyingSizeAndOffset;
194 ObjectSizeOffsetEvaluator ObjSizeEval(DL, &TLI, F.getContext(), EvalOpts);
195
196 // check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory
197 // touching instructions
199 for (Instruction &I : instructions(F)) {
200 Value *Or = nullptr;
201 BuilderTy IRB(I.getParent(), BasicBlock::iterator(&I), TargetFolder(DL));
202 if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
203 if (!LI->isVolatile())
204 Or = getBoundsCheckCond(LI->getPointerOperand(), LI, DL, TLI,
205 ObjSizeEval, IRB, SE);
206 } else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
207 if (!SI->isVolatile())
208 Or = getBoundsCheckCond(SI->getPointerOperand(), SI->getValueOperand(),
209 DL, TLI, ObjSizeEval, IRB, SE);
210 } else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I)) {
211 if (!AI->isVolatile())
212 Or =
213 getBoundsCheckCond(AI->getPointerOperand(), AI->getCompareOperand(),
214 DL, TLI, ObjSizeEval, IRB, SE);
215 } else if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I)) {
216 if (!AI->isVolatile())
217 Or = getBoundsCheckCond(AI->getPointerOperand(), AI->getValOperand(),
218 DL, TLI, ObjSizeEval, IRB, SE);
219 }
220 if (Or) {
221 if (Opts.GuardKind) {
222 llvm::Value *Allow = IRB.CreateIntrinsic(
223 IRB.getInt1Ty(), Intrinsic::allow_ubsan_check,
224 {llvm::ConstantInt::getSigned(IRB.getInt8Ty(), *Opts.GuardKind)});
225 Or = IRB.CreateAnd(Or, Allow);
226 }
227 TrapInfo.push_back(std::make_pair(&I, Or));
228 }
229 }
230
231 std::string Name;
232 if (Opts.Rt)
233 Name = getRuntimeCallName(*Opts.Rt);
234
235 // Create a trapping basic block on demand using a callback. Depending on
236 // flags, this will either create a single block for the entire function or
237 // will create a fresh block every time it is called.
238 BasicBlock *ReuseTrapBB = nullptr;
239 auto GetTrapBB = [&ReuseTrapBB, &Opts, &Name](BuilderTy &IRB,
240 BasicBlock *Cont) {
241 Function *Fn = IRB.GetInsertBlock()->getParent();
242 auto DebugLoc = IRB.getCurrentDebugLocation();
244
245 // Create a trapping basic block on demand using a callback. Depending on
246 // flags, this will either create a single block for the entire function or
247 // will create a fresh block every time it is called.
248 if (ReuseTrapBB)
249 return ReuseTrapBB;
250
251 BasicBlock *TrapBB = BasicBlock::Create(Fn->getContext(), "trap", Fn);
252 IRB.SetInsertPoint(TrapBB);
253
254 bool DebugTrapBB = !Opts.Merge;
255 CallInst *TrapCall = Opts.Rt ? InsertCall(IRB, Opts.Rt->MayReturn, Name)
256 : InsertTrap(IRB, DebugTrapBB, Opts.GuardKind);
257 if (DebugTrapBB)
258 TrapCall->addFnAttr(llvm::Attribute::NoMerge);
259
260 TrapCall->setDoesNotThrow();
261 TrapCall->setDebugLoc(DebugLoc);
262
263 bool MayReturn = Opts.Rt && Opts.Rt->MayReturn;
264 if (MayReturn) {
265 IRB.CreateBr(Cont);
266 } else {
267 TrapCall->setDoesNotReturn();
268 IRB.CreateUnreachable();
269 }
270
271 if (!MayReturn && SingleTrapBB && !DebugTrapBB)
272 ReuseTrapBB = TrapBB;
273
274 return TrapBB;
275 };
276
277 for (const auto &Entry : TrapInfo) {
278 Instruction *Inst = Entry.first;
280 insertBoundsCheck(Entry.second, IRB, GetTrapBB);
281 }
282
283 return !TrapInfo.empty();
284}
285
287 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
288 auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
289
290 if (!addBoundsChecking(F, TLI, SE, Opts))
291 return PreservedAnalyses::all();
292
294}
295
297 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
299 OS, MapClassName2PassName);
300 OS << "<";
301 if (Opts.Rt) {
302 if (Opts.Rt->MinRuntime)
303 OS << "min-";
304 OS << "rt";
305 if (!Opts.Rt->MayReturn)
306 OS << "-abort";
307 } else {
308 OS << "trap";
309 }
310 if (Opts.Merge)
311 OS << ";merge";
312 if (Opts.GuardKind)
313 OS << ";guard=" << static_cast<int>(*Opts.GuardKind);
314 OS << ">";
315}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static void insertBoundsCheck(Value *Or, BuilderTy &IRB, GetTrapBBT GetTrapBB)
Adds run-time bounds checks to memory accessing instructions.
static bool addBoundsChecking(Function &F, TargetLibraryInfo &TLI, ScalarEvolution &SE, const BoundsCheckingPass::Options &Opts)
static CallInst * InsertCall(BuilderTy &IRB, bool MayReturn, StringRef Name)
static Value * getBoundsCheckCond(Value *Ptr, Value *InstVal, const DataLayout &DL, TargetLibraryInfo &TLI, ObjectSizeOffsetEvaluator &ObjSizeEval, BuilderTy &IRB, ScalarEvolution &SE)
Gets the conditions under which memory accessing instructions will overflow.
static std::string getRuntimeCallName(const BoundsCheckingPass::Options::Runtime &Opts)
static CallInst * InsertTrap(BuilderTy &IRB, bool DebugTrapBB, std::optional< int8_t > GuardKind)
static cl::opt< bool > SingleTrapBB("bounds-checking-single-trap", cl::desc("Use one trap block per function"))
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(...)
Definition: Debug.h:106
std::string Name
uint64_t Size
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
raw_pwrite_stream & OS
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
BuilderTy(BasicBlock *TheBB, BasicBlock::iterator IP, TargetFolder Folder)
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1130
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:501
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:704
static AttributeList get(LLVMContext &C, ArrayRef< std::pair< unsigned, Attribute > > Attrs)
Create an AttributeList with the specified parameters in it.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:213
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:599
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:220
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:240
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
void setDoesNotReturn()
Definition: InstrTypes.h:1936
void setDoesNotThrow()
Definition: InstrTypes.h:1943
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
Definition: InstrTypes.h:1474
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:873
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:148
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
A debug info location.
Definition: DebugLoc.h:33
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:170
size_t size() const
Definition: Function.h:864
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:369
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:657
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2286
void SetNoSanitizeMetadata()
Set nosanitize metadata.
Definition: IRBuilder.h:244
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Definition: IRBuilder.h:530
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:194
Value * CreateTypeSize(Type *DstType, TypeSize Size)
Create an expression which evaluates to the number of units in Size at runtime.
Definition: IRBuilder.cpp:103
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:193
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:900
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1387
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1518
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2449
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1540
Value * CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2302
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:535
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2705
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:508
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:176
FunctionCallee getOrInsertFunction(StringRef Name, FunctionType *T, AttributeList AttributeList)
Look up the specified function in the module symbol table.
Definition: Module.cpp:204
Evaluate the size and offset of an object pointed to by a Value*.
SizeOffsetValue compute(Value *V)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
bool empty() const
Definition: SmallVector.h:81
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
TargetFolder - Create constants with target dependent folding.
Definition: TargetFolder.h:34
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getVoidTy(LLVMContext &C)
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:32
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
@ Or
Bitwise or logical OR of integers.
std::optional< Runtime > Rt
std::optional< int8_t > GuardKind
Various options to control the behavior of getObjectSize.
Mode EvalMode
How we want to evaluate this object's size.
bool RoundToAlign
Whether to round the result up to the alignment of allocas, byval arguments, and global variables.
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:69
bool bothKnown() const