32#include "llvm/IR/IntrinsicsAArch64.h"
42#define DEBUG_TYPE "aarch64-sve-intrinsic-opts"
53 bool coalescePTrueIntrinsicCalls(
BasicBlock &BB,
67void SVEIntrinsicOpts::getAnalysisUsage(
AnalysisUsage &AU)
const {
72char SVEIntrinsicOpts::ID = 0;
73static const char *
name =
"SVE intrinsics optimizations";
79 return new SVEIntrinsicOpts();
99 if (
match(
User, m_Intrinsic<Intrinsic::aarch64_sve_convert_to_svbool>())) {
105 if (ConvertToUses.
empty())
111 const auto *PTrueVTy = cast<ScalableVectorType>(PTrue->
getType());
114 auto *IntrUser = dyn_cast<IntrinsicInst>(
User);
115 if (IntrUser && IntrUser->getIntrinsicID() ==
116 Intrinsic::aarch64_sve_convert_from_svbool) {
117 const auto *IntrUserVTy = cast<ScalableVectorType>(IntrUser->getType());
120 if (IntrUserVTy->getElementCount().getKnownMinValue() >
121 PTrueVTy->getElementCount().getKnownMinValue())
133bool SVEIntrinsicOpts::coalescePTrueIntrinsicCalls(
135 if (PTrues.
size() <= 1)
139 auto *MostEncompassingPTrue =
141 auto *PTrue1VTy = cast<ScalableVectorType>(PTrue1->getType());
142 auto *PTrue2VTy = cast<ScalableVectorType>(PTrue2->getType());
143 return PTrue1VTy->getElementCount().getKnownMinValue() <
144 PTrue2VTy->getElementCount().getKnownMinValue();
149 PTrues.
remove(MostEncompassingPTrue);
159 Builder.SetInsertPoint(&BB, ++MostEncompassingPTrue->getIterator());
161 auto *MostEncompassingPTrueVTy =
162 cast<VectorType>(MostEncompassingPTrue->getType());
163 auto *ConvertToSVBool = Builder.CreateIntrinsic(
164 Intrinsic::aarch64_sve_convert_to_svbool, {MostEncompassingPTrueVTy},
165 {MostEncompassingPTrue});
167 bool ConvertFromCreated =
false;
168 for (
auto *PTrue : PTrues) {
169 auto *PTrueVTy = cast<VectorType>(PTrue->getType());
173 if (MostEncompassingPTrueVTy != PTrueVTy) {
174 ConvertFromCreated =
true;
176 Builder.SetInsertPoint(&BB, ++ConvertToSVBool->getIterator());
177 auto *ConvertFromSVBool =
178 Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool,
179 {PTrueVTy}, {ConvertToSVBool});
180 PTrue->replaceAllUsesWith(ConvertFromSVBool);
182 PTrue->replaceAllUsesWith(MostEncompassingPTrue);
184 PTrue->eraseFromParent();
188 if (!ConvertFromCreated)
189 ConvertToSVBool->eraseFromParent();
242bool SVEIntrinsicOpts::optimizePTrueIntrinsicCalls(
244 bool Changed =
false;
246 for (
auto *
F : Functions) {
247 for (
auto &BB : *
F) {
256 auto *IntrI = dyn_cast<IntrinsicInst>(&
I);
257 if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
260 const auto PTruePattern =
261 cast<ConstantInt>(IntrI->getOperand(0))->getZExtValue();
263 if (PTruePattern == AArch64SVEPredPattern::all)
264 SVAllPTrues.
insert(IntrI);
265 if (PTruePattern == AArch64SVEPredPattern::pow2)
266 SVPow2PTrues.
insert(IntrI);
269 Changed |= coalescePTrueIntrinsicCalls(BB, SVAllPTrues);
270 Changed |= coalescePTrueIntrinsicCalls(BB, SVPow2PTrues);
279bool SVEIntrinsicOpts::optimizePredicateStore(
Instruction *
I) {
280 auto *
F =
I->getFunction();
281 auto Attr =
F->getFnAttribute(Attribute::VScaleRange);
285 unsigned MinVScale = Attr.getVScaleRangeMin();
286 std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax();
288 if (!MaxVScale || MinVScale != MaxVScale)
293 auto *FixedPredType =
297 auto *
Store = dyn_cast<StoreInst>(
I);
298 if (!Store || !
Store->isSimple())
302 if (
Store->getOperand(0)->getType() != FixedPredType)
306 auto *IntrI = dyn_cast<IntrinsicInst>(
Store->getOperand(0));
307 if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::vector_extract)
311 if (!cast<ConstantInt>(IntrI->getOperand(1))->isZero())
315 auto *BitCast = dyn_cast<BitCastInst>(IntrI->getOperand(0));
320 if (BitCast->getOperand(0)->getType() != PredType)
324 Builder.SetInsertPoint(
I);
326 Builder.CreateStore(BitCast->getOperand(0),
Store->getPointerOperand());
328 Store->eraseFromParent();
329 if (IntrI->use_empty())
330 IntrI->eraseFromParent();
331 if (BitCast->use_empty())
332 BitCast->eraseFromParent();
339bool SVEIntrinsicOpts::optimizePredicateLoad(
Instruction *
I) {
340 auto *
F =
I->getFunction();
341 auto Attr =
F->getFnAttribute(Attribute::VScaleRange);
345 unsigned MinVScale = Attr.getVScaleRangeMin();
346 std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax();
348 if (!MaxVScale || MinVScale != MaxVScale)
353 auto *FixedPredType =
357 auto *BitCast = dyn_cast<BitCastInst>(
I);
358 if (!BitCast || BitCast->getType() != PredType)
362 auto *IntrI = dyn_cast<IntrinsicInst>(BitCast->getOperand(0));
363 if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::vector_insert)
367 if (!isa<UndefValue>(IntrI->getOperand(0)) ||
368 !cast<ConstantInt>(IntrI->getOperand(2))->isZero())
372 auto *
Load = dyn_cast<LoadInst>(IntrI->getOperand(1));
373 if (!Load || !
Load->isSimple())
377 if (
Load->getType() != FixedPredType)
381 Builder.SetInsertPoint(Load);
383 auto *LoadPred = Builder.CreateLoad(PredType,
Load->getPointerOperand());
385 BitCast->replaceAllUsesWith(LoadPred);
386 BitCast->eraseFromParent();
387 if (IntrI->use_empty())
388 IntrI->eraseFromParent();
389 if (
Load->use_empty())
390 Load->eraseFromParent();
395bool SVEIntrinsicOpts::optimizeInstructions(
397 bool Changed =
false;
399 for (
auto *
F : Functions) {
400 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>(*F).getDomTree();
406 for (
auto *BB : RPOT) {
408 switch (
I.getOpcode()) {
409 case Instruction::Store:
410 Changed |= optimizePredicateStore(&
I);
412 case Instruction::BitCast:
413 Changed |= optimizePredicateLoad(&
I);
423bool SVEIntrinsicOpts::optimizeFunctions(
425 bool Changed =
false;
427 Changed |= optimizePTrueIntrinsicCalls(Functions);
428 Changed |= optimizeInstructions(Functions);
433bool SVEIntrinsicOpts::runOnModule(
Module &M) {
434 bool Changed =
false;
440 for (
auto &
F :
M.getFunctionList()) {
441 if (!
F.isDeclaration())
444 switch (
F.getIntrinsicID()) {
445 case Intrinsic::vector_extract:
446 case Intrinsic::vector_insert:
447 case Intrinsic::aarch64_sve_ptrue:
448 for (
User *U :
F.users())
456 if (!Functions.
empty())
457 Changed |= optimizeFunctions(Functions);
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Module.h This file contains the declarations for the Module class.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static bool isPTruePromoted(IntrinsicInst *PTrue)
Checks if a ptrue intrinsic call is promoted.
This file implements a set that has insertion order iteration characteristics.
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
LLVM Basic Block Representation.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
virtual bool runOnModule(Module &M)=0
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
A Module instance is used to store all the information related to an LLVM module.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
bool remove(const value_type &X)
Remove an item from the set vector.
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
size_type size() const
Determine the number of elements in the SetVector.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A SetVector that performs no allocations if smaller than a certain size.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
bool match(Val *V, const Pattern &P)
This is an optimization pass for GlobalISel generic memory operations.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
ModulePass * createSVEIntrinsicOptsPass()
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...