LLVM 22.0.0git
GlobalSplit.cpp
Go to the documentation of this file.
1//===- GlobalSplit.cpp - global variable splitter -------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass uses inrange annotations on GEP indices to split globals where
10// beneficial. Clang currently attaches these annotations to references to
11// virtual table globals under the Itanium ABI for the benefit of the
12// whole-program virtual call optimization and control flow integrity passes.
13//
14//===----------------------------------------------------------------------===//
15
19#include "llvm/IR/Constant.h"
20#include "llvm/IR/Constants.h"
21#include "llvm/IR/DataLayout.h"
22#include "llvm/IR/Function.h"
23#include "llvm/IR/GlobalValue.h"
25#include "llvm/IR/Intrinsics.h"
26#include "llvm/IR/LLVMContext.h"
27#include "llvm/IR/Metadata.h"
28#include "llvm/IR/Module.h"
29#include "llvm/IR/Operator.h"
30#include "llvm/IR/Type.h"
31#include "llvm/IR/User.h"
33#include <cstdint>
34#include <vector>
35
36using namespace llvm;
37
38static bool splitGlobal(GlobalVariable &GV) {
39 // If the address of the global is taken outside of the module, we cannot
40 // apply this transformation.
41 if (!GV.hasLocalLinkage())
42 return false;
43
44 // We currently only know how to split ConstantStructs.
45 auto *Init = dyn_cast_or_null<ConstantStruct>(GV.getInitializer());
46 if (!Init)
47 return false;
48
49 const DataLayout &DL = GV.getDataLayout();
50 const StructLayout *SL = DL.getStructLayout(Init->getType());
51 ArrayRef<TypeSize> MemberOffsets = SL->getMemberOffsets();
52 unsigned IndexWidth = DL.getIndexTypeSizeInBits(GV.getType());
53
54 // Verify that each user of the global is an inrange getelementptr constant,
55 // and collect information on how it relates to the global.
56 struct GEPInfo {
58 unsigned MemberIndex;
59 APInt MemberRelativeOffset;
60
61 GEPInfo(GEPOperator *GEP, unsigned MemberIndex, APInt MemberRelativeOffset)
62 : GEP(GEP), MemberIndex(MemberIndex),
63 MemberRelativeOffset(std::move(MemberRelativeOffset)) {}
64 };
66 for (User *U : GV.users()) {
67 auto *GEP = dyn_cast<GEPOperator>(U);
68 if (!GEP)
69 return false;
70
71 std::optional<ConstantRange> InRange = GEP->getInRange();
72 if (!InRange)
73 return false;
74
75 APInt Offset(IndexWidth, 0);
76 if (!GEP->accumulateConstantOffset(DL, Offset))
77 return false;
78
79 // Determine source-relative inrange.
80 ConstantRange SrcInRange = InRange->sextOrTrunc(IndexWidth).add(Offset);
81
82 // Check that the GEP offset is in the range (treating upper bound as
83 // inclusive here).
84 if (!SrcInRange.contains(Offset) && SrcInRange.getUpper() != Offset)
85 return false;
86
87 // Find which struct member the range corresponds to.
88 if (SrcInRange.getLower().uge(SL->getSizeInBytes()))
89 return false;
90
91 unsigned MemberIndex =
93 TypeSize MemberStart = MemberOffsets[MemberIndex];
94 TypeSize MemberEnd = MemberIndex == MemberOffsets.size() - 1
95 ? SL->getSizeInBytes()
96 : MemberOffsets[MemberIndex + 1];
97
98 // Verify that the range matches that struct member.
99 if (SrcInRange.getLower() != MemberStart ||
100 SrcInRange.getUpper() != MemberEnd)
101 return false;
102
103 Infos.emplace_back(GEP, MemberIndex, Offset - MemberStart);
104 }
105
107 GV.getMetadata(LLVMContext::MD_type, Types);
108
109 IntegerType *Int32Ty = Type::getInt32Ty(GV.getContext());
110
111 std::vector<GlobalVariable *> SplitGlobals(Init->getNumOperands());
112 for (unsigned I = 0; I != Init->getNumOperands(); ++I) {
113 // Build a global representing this split piece.
114 auto *SplitGV =
115 new GlobalVariable(*GV.getParent(), Init->getOperand(I)->getType(),
117 Init->getOperand(I), GV.getName() + "." + utostr(I));
118 SplitGlobals[I] = SplitGV;
119
120 unsigned SplitBegin = SL->getElementOffset(I);
121 unsigned SplitEnd = (I == Init->getNumOperands() - 1)
122 ? SL->getSizeInBytes()
123 : SL->getElementOffset(I + 1);
124
125 // Rebuild type metadata, adjusting by the split offset.
126 // FIXME: See if we can use DW_OP_piece to preserve debug metadata here.
127 for (MDNode *Type : Types) {
128 uint64_t ByteOffset = cast<ConstantInt>(
129 cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
130 ->getZExtValue();
131 // Type metadata may be attached one byte after the end of the vtable, for
132 // classes without virtual methods in Itanium ABI. AFAIK, it is never
133 // attached to the first byte of a vtable. Subtract one to get the right
134 // slice.
135 // This is making an assumption that vtable groups are the only kinds of
136 // global variables that !type metadata can be attached to, and that they
137 // are either Itanium ABI vtable groups or contain a single vtable (i.e.
138 // Microsoft ABI vtables).
139 uint64_t AttachedTo = (ByteOffset == 0) ? ByteOffset : ByteOffset - 1;
140 if (AttachedTo < SplitBegin || AttachedTo >= SplitEnd)
141 continue;
142 SplitGV->addMetadata(
143 LLVMContext::MD_type,
145 {ConstantAsMetadata::get(
146 ConstantInt::get(Int32Ty, ByteOffset - SplitBegin)),
147 Type->getOperand(1)}));
148 }
149
150 if (GV.hasMetadata(LLVMContext::MD_vcall_visibility))
151 SplitGV->setVCallVisibilityMetadata(GV.getVCallVisibility());
152 }
153
154 for (const GEPInfo &Info : Infos) {
155 assert(Info.MemberIndex < SplitGlobals.size() && "Invalid member");
156 auto *NewGEP = ConstantExpr::getGetElementPtr(
157 Type::getInt8Ty(GV.getContext()), SplitGlobals[Info.MemberIndex],
158 ConstantInt::get(GV.getContext(), Info.MemberRelativeOffset),
159 Info.GEP->isInBounds());
160 Info.GEP->replaceAllUsesWith(NewGEP);
161 }
162
163 // Finally, remove the original global. Any remaining uses refer to invalid
164 // elements of the global, so replace with poison.
165 if (!GV.use_empty())
167 GV.eraseFromParent();
168 return true;
169}
170
171static bool splitGlobals(Module &M) {
172 // First, see if the module uses either of the llvm.type.test or
173 // llvm.type.checked.load intrinsics, which indicates that splitting globals
174 // may be beneficial.
175 Function *TypeTestFunc =
176 Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_test);
177 Function *TypeCheckedLoadFunc =
178 Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_checked_load);
179 Function *TypeCheckedLoadRelativeFunc = Intrinsic::getDeclarationIfExists(
180 &M, Intrinsic::type_checked_load_relative);
181 if ((!TypeTestFunc || TypeTestFunc->use_empty()) &&
182 (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()) &&
183 (!TypeCheckedLoadRelativeFunc ||
184 TypeCheckedLoadRelativeFunc->use_empty()))
185 return false;
186
187 bool Changed = false;
188 for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals()))
189 Changed |= splitGlobal(GV);
190 return Changed;
191}
192
194 if (!splitGlobals(M))
195 return PreservedAnalyses::all();
197}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool splitGlobals(Module &M)
static bool splitGlobal(GlobalVariable &GV)
Definition: GlobalSplit.cpp:38
Hexagon Common GEP
Module.h This file contains the declarations for the Module class.
#define I(x, y, z)
Definition: MD5.cpp:58
This file contains the declarations for metadata subclasses.
static bool InRange(int64_t Value, unsigned short Shift, int LBound, int HBound)
This file defines the SmallVector class.
This file contains some functions that are useful when dealing with strings.
Class for arbitrary precision integers.
Definition: APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1540
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1221
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, GEPNoWrapFlags NW=GEPNoWrapFlags::none(), std::optional< ConstantRange > InRange=std::nullopt, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
Definition: Constants.h:1274
This class represents a range of values.
Definition: ConstantRange.h:47
const APInt & getLower() const
Return the lower value for this range.
const APInt & getUpper() const
Return the upper value for this range.
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool hasMetadata() const
Return true if this value has any metadata attached to it.
Definition: Value.h:602
LLVM_ABI VCallVisibility getVCallVisibility() const
Definition: Metadata.cpp:1900
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
Definition: Value.h:576
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
bool hasLocalLinkage() const
Definition: GlobalValue.h:530
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:663
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:296
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition: Globals.cpp:132
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:61
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
LLVM_ABI void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition: Globals.cpp:507
Class to represent integer types.
Definition: DerivedTypes.h:42
Metadata node.
Definition: Metadata.h:1077
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1565
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1885
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:118
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:938
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:626
TypeSize getSizeInBytes() const
Definition: DataLayout.h:635
MutableArrayRef< TypeSize > getMemberOffsets()
Definition: DataLayout.h:649
LLVM_ABI unsigned getElementContainingOffset(uint64_t FixedOffset) const
Given a valid byte offset into the structure, returns the structure index that contains it.
Definition: DataLayout.cpp:92
TypeSize getElementOffset(unsigned Idx) const
Definition: DataLayout.h:657
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:546
iterator_range< user_iterator > users()
Definition: Value.h:426
bool use_empty() const
Definition: Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1101
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:322
LLVM_ABI Function * getDeclarationIfExists(const Module *M, ID id)
Look up the Function declaration of the intrinsic id in the Module M and return it if it exists.
Definition: Intrinsics.cpp:762
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:477
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:663