LLVM 22.0.0git
CodeGenPrepare.cpp
Go to the documentation of this file.
1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass munges the code in the input function to better prepare it for
10// SelectionDAG-based code generation. This works around limitations in it's
11// basic-block-at-a-time approach. It should eventually be removed.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/MapVector.h"
21#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/Statistic.h"
45#include "llvm/Config/llvm-config.h"
46#include "llvm/IR/Argument.h"
47#include "llvm/IR/Attributes.h"
48#include "llvm/IR/BasicBlock.h"
49#include "llvm/IR/Constant.h"
50#include "llvm/IR/Constants.h"
51#include "llvm/IR/DataLayout.h"
52#include "llvm/IR/DebugInfo.h"
54#include "llvm/IR/Dominators.h"
55#include "llvm/IR/Function.h"
57#include "llvm/IR/GlobalValue.h"
59#include "llvm/IR/IRBuilder.h"
60#include "llvm/IR/InlineAsm.h"
61#include "llvm/IR/InstrTypes.h"
62#include "llvm/IR/Instruction.h"
65#include "llvm/IR/Intrinsics.h"
66#include "llvm/IR/IntrinsicsAArch64.h"
67#include "llvm/IR/LLVMContext.h"
68#include "llvm/IR/MDBuilder.h"
69#include "llvm/IR/Module.h"
70#include "llvm/IR/Operator.h"
73#include "llvm/IR/Statepoint.h"
74#include "llvm/IR/Type.h"
75#include "llvm/IR/Use.h"
76#include "llvm/IR/User.h"
77#include "llvm/IR/Value.h"
78#include "llvm/IR/ValueHandle.h"
79#include "llvm/IR/ValueMap.h"
81#include "llvm/Pass.h"
87#include "llvm/Support/Debug.h"
97#include <algorithm>
98#include <cassert>
99#include <cstdint>
100#include <iterator>
101#include <limits>
102#include <memory>
103#include <optional>
104#include <utility>
105#include <vector>
106
107using namespace llvm;
108using namespace llvm::PatternMatch;
109
110#define DEBUG_TYPE "codegenprepare"
111
112STATISTIC(NumBlocksElim, "Number of blocks eliminated");
113STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
114STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
115STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
116 "sunken Cmps");
117STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
118 "of sunken Casts");
119STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
120 "computations were sunk");
121STATISTIC(NumMemoryInstsPhiCreated,
122 "Number of phis created when address "
123 "computations were sunk to memory instructions");
124STATISTIC(NumMemoryInstsSelectCreated,
125 "Number of select created when address "
126 "computations were sunk to memory instructions");
127STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
128STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
129STATISTIC(NumAndsAdded,
130 "Number of and mask instructions added to form ext loads");
131STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
132STATISTIC(NumRetsDup, "Number of return instructions duplicated");
133STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
134STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
135STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
136
138 "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
139 cl::desc("Disable branch optimizations in CodeGenPrepare"));
140
141static cl::opt<bool>
142 DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
143 cl::desc("Disable GC optimizations in CodeGenPrepare"));
144
145static cl::opt<bool>
146 DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden,
147 cl::init(false),
148 cl::desc("Disable select to branch conversion."));
149
150static cl::opt<bool>
151 AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true),
152 cl::desc("Address sinking in CGP using GEPs."));
153
154static cl::opt<bool>
155 EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true),
156 cl::desc("Enable sinking and/cmp into branches."));
157
159 "disable-cgp-store-extract", cl::Hidden, cl::init(false),
160 cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
161
163 "stress-cgp-store-extract", cl::Hidden, cl::init(false),
164 cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
165
167 "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
168 cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
169 "CodeGenPrepare"));
170
172 "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
173 cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
174 "optimization in CodeGenPrepare"));
175
177 "disable-preheader-prot", cl::Hidden, cl::init(false),
178 cl::desc("Disable protection against removing loop preheaders"));
179
181 "profile-guided-section-prefix", cl::Hidden, cl::init(true),
182 cl::desc("Use profile info to add section prefix for hot/cold functions"));
183
185 "profile-unknown-in-special-section", cl::Hidden,
186 cl::desc("In profiling mode like sampleFDO, if a function doesn't have "
187 "profile, we cannot tell the function is cold for sure because "
188 "it may be a function newly added without ever being sampled. "
189 "With the flag enabled, compiler can put such profile unknown "
190 "functions into a special section, so runtime system can choose "
191 "to handle it in a different way than .text section, to save "
192 "RAM for example. "));
193
195 "bbsections-guided-section-prefix", cl::Hidden, cl::init(true),
196 cl::desc("Use the basic-block-sections profile to determine the text "
197 "section prefix for hot functions. Functions with "
198 "basic-block-sections profile will be placed in `.text.hot` "
199 "regardless of their FDO profile info. Other functions won't be "
200 "impacted, i.e., their prefixes will be decided by FDO/sampleFDO "
201 "profiles."));
202
204 "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
205 cl::desc("Skip merging empty blocks if (frequency of empty block) / "
206 "(frequency of destination block) is greater than this ratio"));
207
209 "force-split-store", cl::Hidden, cl::init(false),
210 cl::desc("Force store splitting no matter what the target query says."));
211
213 "cgp-type-promotion-merge", cl::Hidden,
214 cl::desc("Enable merging of redundant sexts when one is dominating"
215 " the other."),
216 cl::init(true));
217
219 "disable-complex-addr-modes", cl::Hidden, cl::init(false),
220 cl::desc("Disables combining addressing modes with different parts "
221 "in optimizeMemoryInst."));
222
223static cl::opt<bool>
224 AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false),
225 cl::desc("Allow creation of Phis in Address sinking."));
226
228 "addr-sink-new-select", cl::Hidden, cl::init(true),
229 cl::desc("Allow creation of selects in Address sinking."));
230
232 "addr-sink-combine-base-reg", cl::Hidden, cl::init(true),
233 cl::desc("Allow combining of BaseReg field in Address sinking."));
234
236 "addr-sink-combine-base-gv", cl::Hidden, cl::init(true),
237 cl::desc("Allow combining of BaseGV field in Address sinking."));
238
240 "addr-sink-combine-base-offs", cl::Hidden, cl::init(true),
241 cl::desc("Allow combining of BaseOffs field in Address sinking."));
242
244 "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true),
245 cl::desc("Allow combining of ScaledReg field in Address sinking."));
246
247static cl::opt<bool>
248 EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden,
249 cl::init(true),
250 cl::desc("Enable splitting large offset of GEP."));
251
253 "cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false),
254 cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."));
255
256static cl::opt<bool>
257 VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false),
258 cl::desc("Enable BFI update verification for "
259 "CodeGenPrepare."));
260
261static cl::opt<bool>
262 OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true),
263 cl::desc("Enable converting phi types in CodeGenPrepare"));
264
266 HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden,
267 cl::desc("Least BB number of huge function."));
268
270 MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100),
272 cl::desc("Max number of address users to look at"));
273
274static cl::opt<bool>
275 DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false),
276 cl::desc("Disable elimination of dead PHI nodes."));
277
278namespace {
279
280enum ExtType {
281 ZeroExtension, // Zero extension has been seen.
282 SignExtension, // Sign extension has been seen.
283 BothExtension // This extension type is used if we saw sext after
284 // ZeroExtension had been set, or if we saw zext after
285 // SignExtension had been set. It makes the type
286 // information of a promoted instruction invalid.
287};
288
289enum ModifyDT {
290 NotModifyDT, // Not Modify any DT.
291 ModifyBBDT, // Modify the Basic Block Dominator Tree.
292 ModifyInstDT // Modify the Instruction Dominator in a Basic Block,
293 // This usually means we move/delete/insert instruction
294 // in a Basic Block. So we should re-iterate instructions
295 // in such Basic Block.
296};
297
298using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
299using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>;
300using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>;
302using ValueToSExts = MapVector<Value *, SExts>;
303
304class TypePromotionTransaction;
305
306class CodeGenPrepare {
307 friend class CodeGenPrepareLegacyPass;
308 const TargetMachine *TM = nullptr;
309 const TargetSubtargetInfo *SubtargetInfo = nullptr;
310 const TargetLowering *TLI = nullptr;
311 const TargetRegisterInfo *TRI = nullptr;
312 const TargetTransformInfo *TTI = nullptr;
313 const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr;
314 const TargetLibraryInfo *TLInfo = nullptr;
315 LoopInfo *LI = nullptr;
316 std::unique_ptr<BlockFrequencyInfo> BFI;
317 std::unique_ptr<BranchProbabilityInfo> BPI;
318 ProfileSummaryInfo *PSI = nullptr;
319
320 /// As we scan instructions optimizing them, this is the next instruction
321 /// to optimize. Transforms that can invalidate this should update it.
322 BasicBlock::iterator CurInstIterator;
323
324 /// Keeps track of non-local addresses that have been sunk into a block.
325 /// This allows us to avoid inserting duplicate code for blocks with
326 /// multiple load/stores of the same address. The usage of WeakTrackingVH
327 /// enables SunkAddrs to be treated as a cache whose entries can be
328 /// invalidated if a sunken address computation has been erased.
329 ValueMap<Value *, WeakTrackingVH> SunkAddrs;
330
331 /// Keeps track of all instructions inserted for the current function.
332 SetOfInstrs InsertedInsts;
333
334 /// Keeps track of the type of the related instruction before their
335 /// promotion for the current function.
336 InstrToOrigTy PromotedInsts;
337
338 /// Keep track of instructions removed during promotion.
339 SetOfInstrs RemovedInsts;
340
341 /// Keep track of sext chains based on their initial value.
342 DenseMap<Value *, Instruction *> SeenChainsForSExt;
343
344 /// Keep track of GEPs accessing the same data structures such as structs or
345 /// arrays that are candidates to be split later because of their large
346 /// size.
347 MapVector<AssertingVH<Value>,
349 LargeOffsetGEPMap;
350
351 /// Keep track of new GEP base after splitting the GEPs having large offset.
352 SmallSet<AssertingVH<Value>, 2> NewGEPBases;
353
354 /// Map serial numbers to Large offset GEPs.
355 DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID;
356
357 /// Keep track of SExt promoted.
358 ValueToSExts ValToSExtendedUses;
359
360 /// True if the function has the OptSize attribute.
361 bool OptSize;
362
363 /// DataLayout for the Function being processed.
364 const DataLayout *DL = nullptr;
365
366 /// Building the dominator tree can be expensive, so we only build it
367 /// lazily and update it when required.
368 std::unique_ptr<DominatorTree> DT;
369
370public:
371 CodeGenPrepare(){};
372 CodeGenPrepare(const TargetMachine *TM) : TM(TM){};
373 /// If encounter huge function, we need to limit the build time.
374 bool IsHugeFunc = false;
375
376 /// FreshBBs is like worklist, it collected the updated BBs which need
377 /// to be optimized again.
378 /// Note: Consider building time in this pass, when a BB updated, we need
379 /// to insert such BB into FreshBBs for huge function.
380 SmallPtrSet<BasicBlock *, 32> FreshBBs;
381
382 void releaseMemory() {
383 // Clear per function information.
384 InsertedInsts.clear();
385 PromotedInsts.clear();
386 FreshBBs.clear();
387 BPI.reset();
388 BFI.reset();
389 }
390
391 bool run(Function &F, FunctionAnalysisManager &AM);
392
393private:
394 template <typename F>
395 void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) {
396 // Substituting can cause recursive simplifications, which can invalidate
397 // our iterator. Use a WeakTrackingVH to hold onto it in case this
398 // happens.
399 Value *CurValue = &*CurInstIterator;
400 WeakTrackingVH IterHandle(CurValue);
401
402 f();
403
404 // If the iterator instruction was recursively deleted, start over at the
405 // start of the block.
406 if (IterHandle != CurValue) {
407 CurInstIterator = BB->begin();
408 SunkAddrs.clear();
409 }
410 }
411
412 // Get the DominatorTree, building if necessary.
413 DominatorTree &getDT(Function &F) {
414 if (!DT)
415 DT = std::make_unique<DominatorTree>(F);
416 return *DT;
417 }
418
419 void removeAllAssertingVHReferences(Value *V);
420 bool eliminateAssumptions(Function &F);
421 bool eliminateFallThrough(Function &F, DominatorTree *DT = nullptr);
422 bool eliminateMostlyEmptyBlocks(Function &F);
423 BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
424 bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
425 void eliminateMostlyEmptyBlock(BasicBlock *BB);
426 bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
427 bool isPreheader);
428 bool makeBitReverse(Instruction &I);
429 bool optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT);
430 bool optimizeInst(Instruction *I, ModifyDT &ModifiedDT);
431 bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy,
432 unsigned AddrSpace);
433 bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr);
434 bool optimizeInlineAsmInst(CallInst *CS);
435 bool optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT);
436 bool optimizeExt(Instruction *&I);
437 bool optimizeExtUses(Instruction *I);
438 bool optimizeLoadExt(LoadInst *Load);
439 bool optimizeShiftInst(BinaryOperator *BO);
440 bool optimizeFunnelShift(IntrinsicInst *Fsh);
441 bool optimizeSelectInst(SelectInst *SI);
442 bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
443 bool optimizeSwitchType(SwitchInst *SI);
444 bool optimizeSwitchPhiConstants(SwitchInst *SI);
445 bool optimizeSwitchInst(SwitchInst *SI);
446 bool optimizeExtractElementInst(Instruction *Inst);
447 bool dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT);
448 bool fixupDbgVariableRecord(DbgVariableRecord &I);
449 bool fixupDbgVariableRecordsOnInst(Instruction &I);
450 bool placeDbgValues(Function &F);
451 bool placePseudoProbes(Function &F);
452 bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
453 LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
454 bool tryToPromoteExts(TypePromotionTransaction &TPT,
455 const SmallVectorImpl<Instruction *> &Exts,
456 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
457 unsigned CreatedInstsCost = 0);
458 bool mergeSExts(Function &F);
459 bool splitLargeGEPOffsets();
460 bool optimizePhiType(PHINode *Inst, SmallPtrSetImpl<PHINode *> &Visited,
461 SmallPtrSetImpl<Instruction *> &DeletedInstrs);
462 bool optimizePhiTypes(Function &F);
463 bool performAddressTypePromotion(
464 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
465 bool HasPromoted, TypePromotionTransaction &TPT,
466 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
467 bool splitBranchCondition(Function &F, ModifyDT &ModifiedDT);
468 bool simplifyOffsetableRelocate(GCStatepointInst &I);
469
470 bool tryToSinkFreeOperands(Instruction *I);
471 bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1,
472 CmpInst *Cmp, Intrinsic::ID IID);
473 bool optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT);
474 bool optimizeURem(Instruction *Rem);
475 bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
476 bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
477 bool unfoldPowerOf2Test(CmpInst *Cmp);
478 void verifyBFIUpdates(Function &F);
479 bool _run(Function &F);
480};
481
482class CodeGenPrepareLegacyPass : public FunctionPass {
483public:
484 static char ID; // Pass identification, replacement for typeid
485
486 CodeGenPrepareLegacyPass() : FunctionPass(ID) {
488 }
489
490 bool runOnFunction(Function &F) override;
491
492 StringRef getPassName() const override { return "CodeGen Prepare"; }
493
494 void getAnalysisUsage(AnalysisUsage &AU) const override {
495 // FIXME: When we can selectively preserve passes, preserve the domtree.
496 AU.addRequired<ProfileSummaryInfoWrapperPass>();
497 AU.addRequired<TargetLibraryInfoWrapperPass>();
498 AU.addRequired<TargetPassConfig>();
499 AU.addRequired<TargetTransformInfoWrapperPass>();
500 AU.addRequired<LoopInfoWrapperPass>();
501 AU.addUsedIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
502 }
503};
504
505} // end anonymous namespace
506
507char CodeGenPrepareLegacyPass::ID = 0;
508
509bool CodeGenPrepareLegacyPass::runOnFunction(Function &F) {
510 if (skipFunction(F))
511 return false;
512 auto TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
513 CodeGenPrepare CGP(TM);
514 CGP.DL = &F.getDataLayout();
515 CGP.SubtargetInfo = TM->getSubtargetImpl(F);
516 CGP.TLI = CGP.SubtargetInfo->getTargetLowering();
517 CGP.TRI = CGP.SubtargetInfo->getRegisterInfo();
518 CGP.TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
519 CGP.TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
520 CGP.LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
521 CGP.BPI.reset(new BranchProbabilityInfo(F, *CGP.LI));
522 CGP.BFI.reset(new BlockFrequencyInfo(F, *CGP.BPI, *CGP.LI));
523 CGP.PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
524 auto BBSPRWP =
525 getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
526 CGP.BBSectionsProfileReader = BBSPRWP ? &BBSPRWP->getBBSPR() : nullptr;
527
528 return CGP._run(F);
529}
530
531INITIALIZE_PASS_BEGIN(CodeGenPrepareLegacyPass, DEBUG_TYPE,
532 "Optimize for code generation", false, false)
539INITIALIZE_PASS_END(CodeGenPrepareLegacyPass, DEBUG_TYPE,
540 "Optimize for code generation", false, false)
541
543 return new CodeGenPrepareLegacyPass();
544}
545
548 CodeGenPrepare CGP(TM);
549
550 bool Changed = CGP.run(F, AM);
551 if (!Changed)
552 return PreservedAnalyses::all();
553
558 return PA;
559}
560
561bool CodeGenPrepare::run(Function &F, FunctionAnalysisManager &AM) {
562 DL = &F.getDataLayout();
563 SubtargetInfo = TM->getSubtargetImpl(F);
564 TLI = SubtargetInfo->getTargetLowering();
565 TRI = SubtargetInfo->getRegisterInfo();
566 TLInfo = &AM.getResult<TargetLibraryAnalysis>(F);
568 LI = &AM.getResult<LoopAnalysis>(F);
569 BPI.reset(new BranchProbabilityInfo(F, *LI));
570 BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
571 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
572 PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
573 BBSectionsProfileReader =
575 return _run(F);
576}
577
578bool CodeGenPrepare::_run(Function &F) {
579 bool EverMadeChange = false;
580
581 OptSize = F.hasOptSize();
582 // Use the basic-block-sections profile to promote hot functions to .text.hot
583 // if requested.
584 if (BBSectionsGuidedSectionPrefix && BBSectionsProfileReader &&
585 BBSectionsProfileReader->isFunctionHot(F.getName())) {
586 (void)F.setSectionPrefix("hot");
587 } else if (ProfileGuidedSectionPrefix) {
588 // The hot attribute overwrites profile count based hotness while profile
589 // counts based hotness overwrite the cold attribute.
590 // This is a conservative behabvior.
591 if (F.hasFnAttribute(Attribute::Hot) ||
592 PSI->isFunctionHotInCallGraph(&F, *BFI))
593 (void)F.setSectionPrefix("hot");
594 // If PSI shows this function is not hot, we will placed the function
595 // into unlikely section if (1) PSI shows this is a cold function, or
596 // (2) the function has a attribute of cold.
597 else if (PSI->isFunctionColdInCallGraph(&F, *BFI) ||
598 F.hasFnAttribute(Attribute::Cold))
599 (void)F.setSectionPrefix("unlikely");
602 (void)F.setSectionPrefix("unknown");
603 }
604
605 /// This optimization identifies DIV instructions that can be
606 /// profitably bypassed and carried out with a shorter, faster divide.
607 if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI->isSlowDivBypassed()) {
608 const DenseMap<unsigned int, unsigned int> &BypassWidths =
610 BasicBlock *BB = &*F.begin();
611 while (BB != nullptr) {
612 // bypassSlowDivision may create new BBs, but we don't want to reapply the
613 // optimization to those blocks.
614 BasicBlock *Next = BB->getNextNode();
615 if (!llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
616 EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
617 BB = Next;
618 }
619 }
620
621 // Get rid of @llvm.assume builtins before attempting to eliminate empty
622 // blocks, since there might be blocks that only contain @llvm.assume calls
623 // (plus arguments that we can get rid of).
624 EverMadeChange |= eliminateAssumptions(F);
625
626 // Eliminate blocks that contain only PHI nodes and an
627 // unconditional branch.
628 EverMadeChange |= eliminateMostlyEmptyBlocks(F);
629
630 ModifyDT ModifiedDT = ModifyDT::NotModifyDT;
632 EverMadeChange |= splitBranchCondition(F, ModifiedDT);
633
634 // Split some critical edges where one of the sources is an indirect branch,
635 // to help generate sane code for PHIs involving such edges.
636 EverMadeChange |=
637 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/true);
638
639 // If we are optimzing huge function, we need to consider the build time.
640 // Because the basic algorithm's complex is near O(N!).
641 IsHugeFunc = F.size() > HugeFuncThresholdInCGPP;
642
643 // Transformations above may invalidate dominator tree and/or loop info.
644 DT.reset();
645 LI->releaseMemory();
646 LI->analyze(getDT(F));
647
648 bool MadeChange = true;
649 bool FuncIterated = false;
650 while (MadeChange) {
651 MadeChange = false;
652
653 for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
654 if (FuncIterated && !FreshBBs.contains(&BB))
655 continue;
656
657 ModifyDT ModifiedDTOnIteration = ModifyDT::NotModifyDT;
658 bool Changed = optimizeBlock(BB, ModifiedDTOnIteration);
659
660 if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT)
661 DT.reset();
662
663 MadeChange |= Changed;
664 if (IsHugeFunc) {
665 // If the BB is updated, it may still has chance to be optimized.
666 // This usually happen at sink optimization.
667 // For example:
668 //
669 // bb0:
670 // %and = and i32 %a, 4
671 // %cmp = icmp eq i32 %and, 0
672 //
673 // If the %cmp sink to other BB, the %and will has chance to sink.
674 if (Changed)
675 FreshBBs.insert(&BB);
676 else if (FuncIterated)
677 FreshBBs.erase(&BB);
678 } else {
679 // For small/normal functions, we restart BB iteration if the dominator
680 // tree of the Function was changed.
681 if (ModifiedDTOnIteration != ModifyDT::NotModifyDT)
682 break;
683 }
684 }
685 // We have iterated all the BB in the (only work for huge) function.
686 FuncIterated = IsHugeFunc;
687
688 if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
689 MadeChange |= mergeSExts(F);
690 if (!LargeOffsetGEPMap.empty())
691 MadeChange |= splitLargeGEPOffsets();
692 MadeChange |= optimizePhiTypes(F);
693
694 if (MadeChange)
695 eliminateFallThrough(F, DT.get());
696
697#ifndef NDEBUG
698 if (MadeChange && VerifyLoopInfo)
699 LI->verify(getDT(F));
700#endif
701
702 // Really free removed instructions during promotion.
703 for (Instruction *I : RemovedInsts)
704 I->deleteValue();
705
706 EverMadeChange |= MadeChange;
707 SeenChainsForSExt.clear();
708 ValToSExtendedUses.clear();
709 RemovedInsts.clear();
710 LargeOffsetGEPMap.clear();
711 LargeOffsetGEPID.clear();
712 }
713
714 NewGEPBases.clear();
715 SunkAddrs.clear();
716
717 if (!DisableBranchOpts) {
718 MadeChange = false;
719 // Use a set vector to get deterministic iteration order. The order the
720 // blocks are removed may affect whether or not PHI nodes in successors
721 // are removed.
722 SmallSetVector<BasicBlock *, 8> WorkList;
723 for (BasicBlock &BB : F) {
725 MadeChange |= ConstantFoldTerminator(&BB, true);
726 if (!MadeChange)
727 continue;
728
729 for (BasicBlock *Succ : Successors)
730 if (pred_empty(Succ))
731 WorkList.insert(Succ);
732 }
733
734 // Delete the dead blocks and any of their dead successors.
735 MadeChange |= !WorkList.empty();
736 while (!WorkList.empty()) {
737 BasicBlock *BB = WorkList.pop_back_val();
739
740 DeleteDeadBlock(BB);
741
742 for (BasicBlock *Succ : Successors)
743 if (pred_empty(Succ))
744 WorkList.insert(Succ);
745 }
746
747 // Merge pairs of basic blocks with unconditional branches, connected by
748 // a single edge.
749 if (EverMadeChange || MadeChange)
750 MadeChange |= eliminateFallThrough(F);
751
752 EverMadeChange |= MadeChange;
753 }
754
755 if (!DisableGCOpts) {
757 for (BasicBlock &BB : F)
758 for (Instruction &I : BB)
759 if (auto *SP = dyn_cast<GCStatepointInst>(&I))
760 Statepoints.push_back(SP);
761 for (auto &I : Statepoints)
762 EverMadeChange |= simplifyOffsetableRelocate(*I);
763 }
764
765 // Do this last to clean up use-before-def scenarios introduced by other
766 // preparatory transforms.
767 EverMadeChange |= placeDbgValues(F);
768 EverMadeChange |= placePseudoProbes(F);
769
770#ifndef NDEBUG
772 verifyBFIUpdates(F);
773#endif
774
775 return EverMadeChange;
776}
777
778bool CodeGenPrepare::eliminateAssumptions(Function &F) {
779 bool MadeChange = false;
780 for (BasicBlock &BB : F) {
781 CurInstIterator = BB.begin();
782 while (CurInstIterator != BB.end()) {
783 Instruction *I = &*(CurInstIterator++);
784 if (auto *Assume = dyn_cast<AssumeInst>(I)) {
785 MadeChange = true;
786 Value *Operand = Assume->getOperand(0);
787 Assume->eraseFromParent();
788
789 resetIteratorIfInvalidatedWhileCalling(&BB, [&]() {
790 RecursivelyDeleteTriviallyDeadInstructions(Operand, TLInfo, nullptr);
791 });
792 }
793 }
794 }
795 return MadeChange;
796}
797
798/// An instruction is about to be deleted, so remove all references to it in our
799/// GEP-tracking data strcutures.
800void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {
801 LargeOffsetGEPMap.erase(V);
802 NewGEPBases.erase(V);
803
805 if (!GEP)
806 return;
807
808 LargeOffsetGEPID.erase(GEP);
809
810 auto VecI = LargeOffsetGEPMap.find(GEP->getPointerOperand());
811 if (VecI == LargeOffsetGEPMap.end())
812 return;
813
814 auto &GEPVector = VecI->second;
815 llvm::erase_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; });
816
817 if (GEPVector.empty())
818 LargeOffsetGEPMap.erase(VecI);
819}
820
821// Verify BFI has been updated correctly by recomputing BFI and comparing them.
822void LLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) {
823 DominatorTree NewDT(F);
824 LoopInfo NewLI(NewDT);
825 BranchProbabilityInfo NewBPI(F, NewLI, TLInfo);
826 BlockFrequencyInfo NewBFI(F, NewBPI, NewLI);
827 NewBFI.verifyMatch(*BFI);
828}
829
830/// Merge basic blocks which are connected by a single edge, where one of the
831/// basic blocks has a single successor pointing to the other basic block,
832/// which has a single predecessor.
833bool CodeGenPrepare::eliminateFallThrough(Function &F, DominatorTree *DT) {
834 bool Changed = false;
835 // Scan all of the blocks in the function, except for the entry block.
836 // Use a temporary array to avoid iterator being invalidated when
837 // deleting blocks.
840
841 SmallSet<WeakTrackingVH, 16> Preds;
842 for (auto &Block : Blocks) {
844 if (!BB)
845 continue;
846 // If the destination block has a single pred, then this is a trivial
847 // edge, just collapse it.
848 BasicBlock *SinglePred = BB->getSinglePredecessor();
849
850 // Don't merge if BB's address is taken.
851 if (!SinglePred || SinglePred == BB || BB->hasAddressTaken())
852 continue;
853
854 // Make an effort to skip unreachable blocks.
855 if (DT && !DT->isReachableFromEntry(BB))
856 continue;
857
858 BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
859 if (Term && !Term->isConditional()) {
860 Changed = true;
861 LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n");
862
863 // Merge BB into SinglePred and delete it.
864 MergeBlockIntoPredecessor(BB, /* DTU */ nullptr, LI, /* MSSAU */ nullptr,
865 /* MemDep */ nullptr,
866 /* PredecessorWithTwoSuccessors */ false, DT);
867 Preds.insert(SinglePred);
868
869 if (IsHugeFunc) {
870 // Update FreshBBs to optimize the merged BB.
871 FreshBBs.insert(SinglePred);
872 FreshBBs.erase(BB);
873 }
874 }
875 }
876
877 // (Repeatedly) merging blocks into their predecessors can create redundant
878 // debug intrinsics.
879 for (const auto &Pred : Preds)
880 if (auto *BB = cast_or_null<BasicBlock>(Pred))
882
883 return Changed;
884}
885
886/// Find a destination block from BB if BB is mergeable empty block.
887BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
888 // If this block doesn't end with an uncond branch, ignore it.
889 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
890 if (!BI || !BI->isUnconditional())
891 return nullptr;
892
893 // If the instruction before the branch (skipping debug info) isn't a phi
894 // node, then other stuff is happening here.
896 if (BBI != BB->begin()) {
897 --BBI;
898 if (!isa<PHINode>(BBI))
899 return nullptr;
900 }
901
902 // Do not break infinite loops.
903 BasicBlock *DestBB = BI->getSuccessor(0);
904 if (DestBB == BB)
905 return nullptr;
906
907 if (!canMergeBlocks(BB, DestBB))
908 DestBB = nullptr;
909
910 return DestBB;
911}
912
913/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
914/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
915/// edges in ways that are non-optimal for isel. Start by eliminating these
916/// blocks so we can split them the way we want them.
917bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
918 SmallPtrSet<BasicBlock *, 16> Preheaders;
919 SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
920 while (!LoopList.empty()) {
921 Loop *L = LoopList.pop_back_val();
922 llvm::append_range(LoopList, *L);
923 if (BasicBlock *Preheader = L->getLoopPreheader())
924 Preheaders.insert(Preheader);
925 }
926
927 bool MadeChange = false;
928 // Copy blocks into a temporary array to avoid iterator invalidation issues
929 // as we remove them.
930 // Note that this intentionally skips the entry block.
932 for (auto &Block : llvm::drop_begin(F)) {
933 // Delete phi nodes that could block deleting other empty blocks.
935 MadeChange |= DeleteDeadPHIs(&Block, TLInfo);
936 Blocks.push_back(&Block);
937 }
938
939 for (auto &Block : Blocks) {
941 if (!BB)
942 continue;
943 BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
944 if (!DestBB ||
945 !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
946 continue;
947
948 eliminateMostlyEmptyBlock(BB);
949 MadeChange = true;
950 }
951 return MadeChange;
952}
953
954bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
955 BasicBlock *DestBB,
956 bool isPreheader) {
957 // Do not delete loop preheaders if doing so would create a critical edge.
958 // Loop preheaders can be good locations to spill registers. If the
959 // preheader is deleted and we create a critical edge, registers may be
960 // spilled in the loop body instead.
961 if (!DisablePreheaderProtect && isPreheader &&
962 !(BB->getSinglePredecessor() &&
964 return false;
965
966 // Skip merging if the block's successor is also a successor to any callbr
967 // that leads to this block.
968 // FIXME: Is this really needed? Is this a correctness issue?
969 for (BasicBlock *Pred : predecessors(BB)) {
970 if (isa<CallBrInst>(Pred->getTerminator()) &&
971 llvm::is_contained(successors(Pred), DestBB))
972 return false;
973 }
974
975 // Try to skip merging if the unique predecessor of BB is terminated by a
976 // switch or indirect branch instruction, and BB is used as an incoming block
977 // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
978 // add COPY instructions in the predecessor of BB instead of BB (if it is not
979 // merged). Note that the critical edge created by merging such blocks wont be
980 // split in MachineSink because the jump table is not analyzable. By keeping
981 // such empty block (BB), ISel will place COPY instructions in BB, not in the
982 // predecessor of BB.
983 BasicBlock *Pred = BB->getUniquePredecessor();
984 if (!Pred || !(isa<SwitchInst>(Pred->getTerminator()) ||
986 return true;
987
988 if (BB->getTerminator() != &*BB->getFirstNonPHIOrDbg())
989 return true;
990
991 // We use a simple cost heuristic which determine skipping merging is
992 // profitable if the cost of skipping merging is less than the cost of
993 // merging : Cost(skipping merging) < Cost(merging BB), where the
994 // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
995 // the Cost(merging BB) is Freq(Pred) * Cost(Copy).
996 // Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
997 // Freq(Pred) / Freq(BB) > 2.
998 // Note that if there are multiple empty blocks sharing the same incoming
999 // value for the PHIs in the DestBB, we consider them together. In such
1000 // case, Cost(merging BB) will be the sum of their frequencies.
1001
1002 if (!isa<PHINode>(DestBB->begin()))
1003 return true;
1004
1005 SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
1006
1007 // Find all other incoming blocks from which incoming values of all PHIs in
1008 // DestBB are the same as the ones from BB.
1009 for (BasicBlock *DestBBPred : predecessors(DestBB)) {
1010 if (DestBBPred == BB)
1011 continue;
1012
1013 if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) {
1014 return DestPN.getIncomingValueForBlock(BB) ==
1015 DestPN.getIncomingValueForBlock(DestBBPred);
1016 }))
1017 SameIncomingValueBBs.insert(DestBBPred);
1018 }
1019
1020 // See if all BB's incoming values are same as the value from Pred. In this
1021 // case, no reason to skip merging because COPYs are expected to be place in
1022 // Pred already.
1023 if (SameIncomingValueBBs.count(Pred))
1024 return true;
1025
1026 BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
1027 BlockFrequency BBFreq = BFI->getBlockFreq(BB);
1028
1029 for (auto *SameValueBB : SameIncomingValueBBs)
1030 if (SameValueBB->getUniquePredecessor() == Pred &&
1031 DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
1032 BBFreq += BFI->getBlockFreq(SameValueBB);
1033
1034 std::optional<BlockFrequency> Limit = BBFreq.mul(FreqRatioToSkipMerge);
1035 return !Limit || PredFreq <= *Limit;
1036}
1037
1038/// Return true if we can merge BB into DestBB if there is a single
1039/// unconditional branch between them, and BB contains no other non-phi
1040/// instructions.
1041bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
1042 const BasicBlock *DestBB) const {
1043 // We only want to eliminate blocks whose phi nodes are used by phi nodes in
1044 // the successor. If there are more complex condition (e.g. preheaders),
1045 // don't mess around with them.
1046 for (const PHINode &PN : BB->phis()) {
1047 for (const User *U : PN.users()) {
1048 const Instruction *UI = cast<Instruction>(U);
1049 if (UI->getParent() != DestBB || !isa<PHINode>(UI))
1050 return false;
1051 // If User is inside DestBB block and it is a PHINode then check
1052 // incoming value. If incoming value is not from BB then this is
1053 // a complex condition (e.g. preheaders) we want to avoid here.
1054 if (UI->getParent() == DestBB) {
1055 if (const PHINode *UPN = dyn_cast<PHINode>(UI))
1056 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
1057 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
1058 if (Insn && Insn->getParent() == BB &&
1059 Insn->getParent() != UPN->getIncomingBlock(I))
1060 return false;
1061 }
1062 }
1063 }
1064 }
1065
1066 // If BB and DestBB contain any common predecessors, then the phi nodes in BB
1067 // and DestBB may have conflicting incoming values for the block. If so, we
1068 // can't merge the block.
1069 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
1070 if (!DestBBPN)
1071 return true; // no conflict.
1072
1073 // Collect the preds of BB.
1074 SmallPtrSet<const BasicBlock *, 16> BBPreds;
1075 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1076 // It is faster to get preds from a PHI than with pred_iterator.
1077 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1078 BBPreds.insert(BBPN->getIncomingBlock(i));
1079 } else {
1080 BBPreds.insert_range(predecessors(BB));
1081 }
1082
1083 // Walk the preds of DestBB.
1084 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
1085 BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
1086 if (BBPreds.count(Pred)) { // Common predecessor?
1087 for (const PHINode &PN : DestBB->phis()) {
1088 const Value *V1 = PN.getIncomingValueForBlock(Pred);
1089 const Value *V2 = PN.getIncomingValueForBlock(BB);
1090
1091 // If V2 is a phi node in BB, look up what the mapped value will be.
1092 if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
1093 if (V2PN->getParent() == BB)
1094 V2 = V2PN->getIncomingValueForBlock(Pred);
1095
1096 // If there is a conflict, bail out.
1097 if (V1 != V2)
1098 return false;
1099 }
1100 }
1101 }
1102
1103 return true;
1104}
1105
1106/// Replace all old uses with new ones, and push the updated BBs into FreshBBs.
1107static void replaceAllUsesWith(Value *Old, Value *New,
1109 bool IsHuge) {
1110 auto *OldI = dyn_cast<Instruction>(Old);
1111 if (OldI) {
1112 for (Value::user_iterator UI = OldI->user_begin(), E = OldI->user_end();
1113 UI != E; ++UI) {
1115 if (IsHuge)
1116 FreshBBs.insert(User->getParent());
1117 }
1118 }
1119 Old->replaceAllUsesWith(New);
1120}
1121
1122/// Eliminate a basic block that has only phi's and an unconditional branch in
1123/// it.
1124void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
1125 BranchInst *BI = cast<BranchInst>(BB->getTerminator());
1126 BasicBlock *DestBB = BI->getSuccessor(0);
1127
1128 LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
1129 << *BB << *DestBB);
1130
1131 // If the destination block has a single pred, then this is a trivial edge,
1132 // just collapse it.
1133 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
1134 if (SinglePred != DestBB) {
1135 assert(SinglePred == BB &&
1136 "Single predecessor not the same as predecessor");
1137 // Merge DestBB into SinglePred/BB and delete it.
1139 // Note: BB(=SinglePred) will not be deleted on this path.
1140 // DestBB(=its single successor) is the one that was deleted.
1141 LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n");
1142
1143 if (IsHugeFunc) {
1144 // Update FreshBBs to optimize the merged BB.
1145 FreshBBs.insert(SinglePred);
1146 FreshBBs.erase(DestBB);
1147 }
1148 return;
1149 }
1150 }
1151
1152 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
1153 // to handle the new incoming edges it is about to have.
1154 for (PHINode &PN : DestBB->phis()) {
1155 // Remove the incoming value for BB, and remember it.
1156 Value *InVal = PN.removeIncomingValue(BB, false);
1157
1158 // Two options: either the InVal is a phi node defined in BB or it is some
1159 // value that dominates BB.
1160 PHINode *InValPhi = dyn_cast<PHINode>(InVal);
1161 if (InValPhi && InValPhi->getParent() == BB) {
1162 // Add all of the input values of the input PHI as inputs of this phi.
1163 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
1164 PN.addIncoming(InValPhi->getIncomingValue(i),
1165 InValPhi->getIncomingBlock(i));
1166 } else {
1167 // Otherwise, add one instance of the dominating value for each edge that
1168 // we will be adding.
1169 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1170 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1171 PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
1172 } else {
1173 for (BasicBlock *Pred : predecessors(BB))
1174 PN.addIncoming(InVal, Pred);
1175 }
1176 }
1177 }
1178
1179 // Preserve loop Metadata.
1180 if (BI->hasMetadata(LLVMContext::MD_loop)) {
1181 for (auto *Pred : predecessors(BB))
1182 Pred->getTerminator()->copyMetadata(*BI, LLVMContext::MD_loop);
1183 }
1184
1185 // The PHIs are now updated, change everything that refers to BB to use
1186 // DestBB and remove BB.
1187 BB->replaceAllUsesWith(DestBB);
1188 BB->eraseFromParent();
1189 ++NumBlocksElim;
1190
1191 LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
1192}
1193
1194// Computes a map of base pointer relocation instructions to corresponding
1195// derived pointer relocation instructions given a vector of all relocate calls
1197 const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
1199 &RelocateInstMap) {
1200 // Collect information in two maps: one primarily for locating the base object
1201 // while filling the second map; the second map is the final structure holding
1202 // a mapping between Base and corresponding Derived relocate calls
1204 for (auto *ThisRelocate : AllRelocateCalls) {
1205 auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
1206 ThisRelocate->getDerivedPtrIndex());
1207 RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
1208 }
1209 for (auto &Item : RelocateIdxMap) {
1210 std::pair<unsigned, unsigned> Key = Item.first;
1211 if (Key.first == Key.second)
1212 // Base relocation: nothing to insert
1213 continue;
1214
1215 GCRelocateInst *I = Item.second;
1216 auto BaseKey = std::make_pair(Key.first, Key.first);
1217
1218 // We're iterating over RelocateIdxMap so we cannot modify it.
1219 auto MaybeBase = RelocateIdxMap.find(BaseKey);
1220 if (MaybeBase == RelocateIdxMap.end())
1221 // TODO: We might want to insert a new base object relocate and gep off
1222 // that, if there are enough derived object relocates.
1223 continue;
1224
1225 RelocateInstMap[MaybeBase->second].push_back(I);
1226 }
1227}
1228
1229// Accepts a GEP and extracts the operands into a vector provided they're all
1230// small integer constants
1232 SmallVectorImpl<Value *> &OffsetV) {
1233 for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
1234 // Only accept small constant integer operands
1235 auto *Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
1236 if (!Op || Op->getZExtValue() > 20)
1237 return false;
1238 }
1239
1240 for (unsigned i = 1; i < GEP->getNumOperands(); i++)
1241 OffsetV.push_back(GEP->getOperand(i));
1242 return true;
1243}
1244
1245// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
1246// replace, computes a replacement, and affects it.
1247static bool
1249 const SmallVectorImpl<GCRelocateInst *> &Targets) {
1250 bool MadeChange = false;
1251 // We must ensure the relocation of derived pointer is defined after
1252 // relocation of base pointer. If we find a relocation corresponding to base
1253 // defined earlier than relocation of base then we move relocation of base
1254 // right before found relocation. We consider only relocation in the same
1255 // basic block as relocation of base. Relocations from other basic block will
1256 // be skipped by optimization and we do not care about them.
1257 for (auto R = RelocatedBase->getParent()->getFirstInsertionPt();
1258 &*R != RelocatedBase; ++R)
1259 if (auto *RI = dyn_cast<GCRelocateInst>(R))
1260 if (RI->getStatepoint() == RelocatedBase->getStatepoint())
1261 if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {
1262 RelocatedBase->moveBefore(RI->getIterator());
1263 MadeChange = true;
1264 break;
1265 }
1266
1267 for (GCRelocateInst *ToReplace : Targets) {
1268 assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&
1269 "Not relocating a derived object of the original base object");
1270 if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
1271 // A duplicate relocate call. TODO: coalesce duplicates.
1272 continue;
1273 }
1274
1275 if (RelocatedBase->getParent() != ToReplace->getParent()) {
1276 // Base and derived relocates are in different basic blocks.
1277 // In this case transform is only valid when base dominates derived
1278 // relocate. However it would be too expensive to check dominance
1279 // for each such relocate, so we skip the whole transformation.
1280 continue;
1281 }
1282
1283 Value *Base = ToReplace->getBasePtr();
1284 auto *Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
1285 if (!Derived || Derived->getPointerOperand() != Base)
1286 continue;
1287
1289 if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
1290 continue;
1291
1292 // Create a Builder and replace the target callsite with a gep
1293 assert(RelocatedBase->getNextNode() &&
1294 "Should always have one since it's not a terminator");
1295
1296 // Insert after RelocatedBase
1297 IRBuilder<> Builder(RelocatedBase->getNextNode());
1298 Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
1299
1300 // If gc_relocate does not match the actual type, cast it to the right type.
1301 // In theory, there must be a bitcast after gc_relocate if the type does not
1302 // match, and we should reuse it to get the derived pointer. But it could be
1303 // cases like this:
1304 // bb1:
1305 // ...
1306 // %g1 = call coldcc i8 addrspace(1)*
1307 // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1308 //
1309 // bb2:
1310 // ...
1311 // %g2 = call coldcc i8 addrspace(1)*
1312 // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1313 //
1314 // merge:
1315 // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
1316 // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
1317 //
1318 // In this case, we can not find the bitcast any more. So we insert a new
1319 // bitcast no matter there is already one or not. In this way, we can handle
1320 // all cases, and the extra bitcast should be optimized away in later
1321 // passes.
1322 Value *ActualRelocatedBase = RelocatedBase;
1323 if (RelocatedBase->getType() != Base->getType()) {
1324 ActualRelocatedBase =
1325 Builder.CreateBitCast(RelocatedBase, Base->getType());
1326 }
1327 Value *Replacement =
1328 Builder.CreateGEP(Derived->getSourceElementType(), ActualRelocatedBase,
1329 ArrayRef(OffsetV));
1330 Replacement->takeName(ToReplace);
1331 // If the newly generated derived pointer's type does not match the original
1332 // derived pointer's type, cast the new derived pointer to match it. Same
1333 // reasoning as above.
1334 Value *ActualReplacement = Replacement;
1335 if (Replacement->getType() != ToReplace->getType()) {
1336 ActualReplacement =
1337 Builder.CreateBitCast(Replacement, ToReplace->getType());
1338 }
1339 ToReplace->replaceAllUsesWith(ActualReplacement);
1340 ToReplace->eraseFromParent();
1341
1342 MadeChange = true;
1343 }
1344 return MadeChange;
1345}
1346
1347// Turns this:
1348//
1349// %base = ...
1350// %ptr = gep %base + 15
1351// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1352// %base' = relocate(%tok, i32 4, i32 4)
1353// %ptr' = relocate(%tok, i32 4, i32 5)
1354// %val = load %ptr'
1355//
1356// into this:
1357//
1358// %base = ...
1359// %ptr = gep %base + 15
1360// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1361// %base' = gc.relocate(%tok, i32 4, i32 4)
1362// %ptr' = gep %base' + 15
1363// %val = load %ptr'
1364bool CodeGenPrepare::simplifyOffsetableRelocate(GCStatepointInst &I) {
1365 bool MadeChange = false;
1366 SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
1367 for (auto *U : I.users())
1368 if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
1369 // Collect all the relocate calls associated with a statepoint
1370 AllRelocateCalls.push_back(Relocate);
1371
1372 // We need at least one base pointer relocation + one derived pointer
1373 // relocation to mangle
1374 if (AllRelocateCalls.size() < 2)
1375 return false;
1376
1377 // RelocateInstMap is a mapping from the base relocate instruction to the
1378 // corresponding derived relocate instructions
1379 MapVector<GCRelocateInst *, SmallVector<GCRelocateInst *, 0>> RelocateInstMap;
1380 computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
1381 if (RelocateInstMap.empty())
1382 return false;
1383
1384 for (auto &Item : RelocateInstMap)
1385 // Item.first is the RelocatedBase to offset against
1386 // Item.second is the vector of Targets to replace
1387 MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
1388 return MadeChange;
1389}
1390
1391/// Sink the specified cast instruction into its user blocks.
1392static bool SinkCast(CastInst *CI) {
1393 BasicBlock *DefBB = CI->getParent();
1394
1395 /// InsertedCasts - Only insert a cast in each block once.
1397
1398 bool MadeChange = false;
1399 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
1400 UI != E;) {
1401 Use &TheUse = UI.getUse();
1403
1404 // Figure out which BB this cast is used in. For PHI's this is the
1405 // appropriate predecessor block.
1406 BasicBlock *UserBB = User->getParent();
1407 if (PHINode *PN = dyn_cast<PHINode>(User)) {
1408 UserBB = PN->getIncomingBlock(TheUse);
1409 }
1410
1411 // Preincrement use iterator so we don't invalidate it.
1412 ++UI;
1413
1414 // The first insertion point of a block containing an EH pad is after the
1415 // pad. If the pad is the user, we cannot sink the cast past the pad.
1416 if (User->isEHPad())
1417 continue;
1418
1419 // If the block selected to receive the cast is an EH pad that does not
1420 // allow non-PHI instructions before the terminator, we can't sink the
1421 // cast.
1422 if (UserBB->getTerminator()->isEHPad())
1423 continue;
1424
1425 // If this user is in the same block as the cast, don't change the cast.
1426 if (UserBB == DefBB)
1427 continue;
1428
1429 // If we have already inserted a cast into this block, use it.
1430 CastInst *&InsertedCast = InsertedCasts[UserBB];
1431
1432 if (!InsertedCast) {
1433 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1434 assert(InsertPt != UserBB->end());
1435 InsertedCast = cast<CastInst>(CI->clone());
1436 InsertedCast->insertBefore(*UserBB, InsertPt);
1437 }
1438
1439 // Replace a use of the cast with a use of the new cast.
1440 TheUse = InsertedCast;
1441 MadeChange = true;
1442 ++NumCastUses;
1443 }
1444
1445 // If we removed all uses, nuke the cast.
1446 if (CI->use_empty()) {
1447 salvageDebugInfo(*CI);
1448 CI->eraseFromParent();
1449 MadeChange = true;
1450 }
1451
1452 return MadeChange;
1453}
1454
1455/// If the specified cast instruction is a noop copy (e.g. it's casting from
1456/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
1457/// reduce the number of virtual registers that must be created and coalesced.
1458///
1459/// Return true if any changes are made.
1461 const DataLayout &DL) {
1462 // Sink only "cheap" (or nop) address-space casts. This is a weaker condition
1463 // than sinking only nop casts, but is helpful on some platforms.
1464 if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
1465 if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(),
1466 ASC->getDestAddressSpace()))
1467 return false;
1468 }
1469
1470 // If this is a noop copy,
1471 EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1472 EVT DstVT = TLI.getValueType(DL, CI->getType());
1473
1474 // This is an fp<->int conversion?
1475 if (SrcVT.isInteger() != DstVT.isInteger())
1476 return false;
1477
1478 // If this is an extension, it will be a zero or sign extension, which
1479 // isn't a noop.
1480 if (SrcVT.bitsLT(DstVT))
1481 return false;
1482
1483 // If these values will be promoted, find out what they will be promoted
1484 // to. This helps us consider truncates on PPC as noop copies when they
1485 // are.
1486 if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
1488 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
1489 if (TLI.getTypeAction(CI->getContext(), DstVT) ==
1491 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
1492
1493 // If, after promotion, these are the same types, this is a noop copy.
1494 if (SrcVT != DstVT)
1495 return false;
1496
1497 return SinkCast(CI);
1498}
1499
1500// Match a simple increment by constant operation. Note that if a sub is
1501// matched, the step is negated (as if the step had been canonicalized to
1502// an add, even though we leave the instruction alone.)
1503static bool matchIncrement(const Instruction *IVInc, Instruction *&LHS,
1504 Constant *&Step) {
1505 if (match(IVInc, m_Add(m_Instruction(LHS), m_Constant(Step))) ||
1507 m_Instruction(LHS), m_Constant(Step)))))
1508 return true;
1509 if (match(IVInc, m_Sub(m_Instruction(LHS), m_Constant(Step))) ||
1511 m_Instruction(LHS), m_Constant(Step))))) {
1512 Step = ConstantExpr::getNeg(Step);
1513 return true;
1514 }
1515 return false;
1516}
1517
1518/// If given \p PN is an inductive variable with value IVInc coming from the
1519/// backedge, and on each iteration it gets increased by Step, return pair
1520/// <IVInc, Step>. Otherwise, return std::nullopt.
1521static std::optional<std::pair<Instruction *, Constant *>>
1522getIVIncrement(const PHINode *PN, const LoopInfo *LI) {
1523 const Loop *L = LI->getLoopFor(PN->getParent());
1524 if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch())
1525 return std::nullopt;
1526 auto *IVInc =
1527 dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
1528 if (!IVInc || LI->getLoopFor(IVInc->getParent()) != L)
1529 return std::nullopt;
1530 Instruction *LHS = nullptr;
1531 Constant *Step = nullptr;
1532 if (matchIncrement(IVInc, LHS, Step) && LHS == PN)
1533 return std::make_pair(IVInc, Step);
1534 return std::nullopt;
1535}
1536
1537static bool isIVIncrement(const Value *V, const LoopInfo *LI) {
1538 auto *I = dyn_cast<Instruction>(V);
1539 if (!I)
1540 return false;
1541 Instruction *LHS = nullptr;
1542 Constant *Step = nullptr;
1543 if (!matchIncrement(I, LHS, Step))
1544 return false;
1545 if (auto *PN = dyn_cast<PHINode>(LHS))
1546 if (auto IVInc = getIVIncrement(PN, LI))
1547 return IVInc->first == I;
1548 return false;
1549}
1550
1551bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
1552 Value *Arg0, Value *Arg1,
1553 CmpInst *Cmp,
1554 Intrinsic::ID IID) {
1555 auto IsReplacableIVIncrement = [this, &Cmp](BinaryOperator *BO) {
1556 if (!isIVIncrement(BO, LI))
1557 return false;
1558 const Loop *L = LI->getLoopFor(BO->getParent());
1559 assert(L && "L should not be null after isIVIncrement()");
1560 // Do not risk on moving increment into a child loop.
1561 if (LI->getLoopFor(Cmp->getParent()) != L)
1562 return false;
1563
1564 // Finally, we need to ensure that the insert point will dominate all
1565 // existing uses of the increment.
1566
1567 auto &DT = getDT(*BO->getParent()->getParent());
1568 if (DT.dominates(Cmp->getParent(), BO->getParent()))
1569 // If we're moving up the dom tree, all uses are trivially dominated.
1570 // (This is the common case for code produced by LSR.)
1571 return true;
1572
1573 // Otherwise, special case the single use in the phi recurrence.
1574 return BO->hasOneUse() && DT.dominates(Cmp->getParent(), L->getLoopLatch());
1575 };
1576 if (BO->getParent() != Cmp->getParent() && !IsReplacableIVIncrement(BO)) {
1577 // We used to use a dominator tree here to allow multi-block optimization.
1578 // But that was problematic because:
1579 // 1. It could cause a perf regression by hoisting the math op into the
1580 // critical path.
1581 // 2. It could cause a perf regression by creating a value that was live
1582 // across multiple blocks and increasing register pressure.
1583 // 3. Use of a dominator tree could cause large compile-time regression.
1584 // This is because we recompute the DT on every change in the main CGP
1585 // run-loop. The recomputing is probably unnecessary in many cases, so if
1586 // that was fixed, using a DT here would be ok.
1587 //
1588 // There is one important particular case we still want to handle: if BO is
1589 // the IV increment. Important properties that make it profitable:
1590 // - We can speculate IV increment anywhere in the loop (as long as the
1591 // indvar Phi is its only user);
1592 // - Upon computing Cmp, we effectively compute something equivalent to the
1593 // IV increment (despite it loops differently in the IR). So moving it up
1594 // to the cmp point does not really increase register pressure.
1595 return false;
1596 }
1597
1598 // We allow matching the canonical IR (add X, C) back to (usubo X, -C).
1599 if (BO->getOpcode() == Instruction::Add &&
1600 IID == Intrinsic::usub_with_overflow) {
1601 assert(isa<Constant>(Arg1) && "Unexpected input for usubo");
1603 }
1604
1605 // Insert at the first instruction of the pair.
1606 Instruction *InsertPt = nullptr;
1607 for (Instruction &Iter : *Cmp->getParent()) {
1608 // If BO is an XOR, it is not guaranteed that it comes after both inputs to
1609 // the overflow intrinsic are defined.
1610 if ((BO->getOpcode() != Instruction::Xor && &Iter == BO) || &Iter == Cmp) {
1611 InsertPt = &Iter;
1612 break;
1613 }
1614 }
1615 assert(InsertPt != nullptr && "Parent block did not contain cmp or binop");
1616
1617 IRBuilder<> Builder(InsertPt);
1618 Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
1619 if (BO->getOpcode() != Instruction::Xor) {
1620 Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
1621 replaceAllUsesWith(BO, Math, FreshBBs, IsHugeFunc);
1622 } else
1623 assert(BO->hasOneUse() &&
1624 "Patterns with XOr should use the BO only in the compare");
1625 Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
1626 replaceAllUsesWith(Cmp, OV, FreshBBs, IsHugeFunc);
1627 Cmp->eraseFromParent();
1628 BO->eraseFromParent();
1629 return true;
1630}
1631
1632/// Match special-case patterns that check for unsigned add overflow.
1634 BinaryOperator *&Add) {
1635 // Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val)
1636 // Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero)
1637 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1638
1639 // We are not expecting non-canonical/degenerate code. Just bail out.
1640 if (isa<Constant>(A))
1641 return false;
1642
1643 ICmpInst::Predicate Pred = Cmp->getPredicate();
1644 if (Pred == ICmpInst::ICMP_EQ && match(B, m_AllOnes()))
1645 B = ConstantInt::get(B->getType(), 1);
1646 else if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt()))
1647 B = Constant::getAllOnesValue(B->getType());
1648 else
1649 return false;
1650
1651 // Check the users of the variable operand of the compare looking for an add
1652 // with the adjusted constant.
1653 for (User *U : A->users()) {
1654 if (match(U, m_Add(m_Specific(A), m_Specific(B)))) {
1656 return true;
1657 }
1658 }
1659 return false;
1660}
1661
1662/// Try to combine the compare into a call to the llvm.uadd.with.overflow
1663/// intrinsic. Return true if any changes were made.
1664bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
1665 ModifyDT &ModifiedDT) {
1666 bool EdgeCase = false;
1667 Value *A, *B;
1668 BinaryOperator *Add;
1669 if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) {
1671 return false;
1672 // Set A and B in case we match matchUAddWithOverflowConstantEdgeCases.
1673 A = Add->getOperand(0);
1674 B = Add->getOperand(1);
1675 EdgeCase = true;
1676 }
1677
1679 TLI->getValueType(*DL, Add->getType()),
1680 Add->hasNUsesOrMore(EdgeCase ? 1 : 2)))
1681 return false;
1682
1683 // We don't want to move around uses of condition values this late, so we
1684 // check if it is legal to create the call to the intrinsic in the basic
1685 // block containing the icmp.
1686 if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse())
1687 return false;
1688
1689 if (!replaceMathCmpWithIntrinsic(Add, A, B, Cmp,
1690 Intrinsic::uadd_with_overflow))
1691 return false;
1692
1693 // Reset callers - do not crash by iterating over a dead instruction.
1694 ModifiedDT = ModifyDT::ModifyInstDT;
1695 return true;
1696}
1697
1698bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
1699 ModifyDT &ModifiedDT) {
1700 // We are not expecting non-canonical/degenerate code. Just bail out.
1701 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1702 if (isa<Constant>(A) && isa<Constant>(B))
1703 return false;
1704
1705 // Convert (A u> B) to (A u< B) to simplify pattern matching.
1706 ICmpInst::Predicate Pred = Cmp->getPredicate();
1707 if (Pred == ICmpInst::ICMP_UGT) {
1708 std::swap(A, B);
1709 Pred = ICmpInst::ICMP_ULT;
1710 }
1711 // Convert special-case: (A == 0) is the same as (A u< 1).
1712 if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
1713 B = ConstantInt::get(B->getType(), 1);
1714 Pred = ICmpInst::ICMP_ULT;
1715 }
1716 // Convert special-case: (A != 0) is the same as (0 u< A).
1717 if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) {
1718 std::swap(A, B);
1719 Pred = ICmpInst::ICMP_ULT;
1720 }
1721 if (Pred != ICmpInst::ICMP_ULT)
1722 return false;
1723
1724 // Walk the users of a variable operand of a compare looking for a subtract or
1725 // add with that same operand. Also match the 2nd operand of the compare to
1726 // the add/sub, but that may be a negated constant operand of an add.
1727 Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
1728 BinaryOperator *Sub = nullptr;
1729 for (User *U : CmpVariableOperand->users()) {
1730 // A - B, A u< B --> usubo(A, B)
1731 if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
1733 break;
1734 }
1735
1736 // A + (-C), A u< C (canonicalized form of (sub A, C))
1737 const APInt *CmpC, *AddC;
1738 if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
1739 match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
1741 break;
1742 }
1743 }
1744 if (!Sub)
1745 return false;
1746
1748 TLI->getValueType(*DL, Sub->getType()),
1749 Sub->hasNUsesOrMore(1)))
1750 return false;
1751
1752 // We don't want to move around uses of condition values this late, so we
1753 // check if it is legal to create the call to the intrinsic in the basic
1754 // block containing the icmp.
1755 if (Sub->getParent() != Cmp->getParent() && !Sub->hasOneUse())
1756 return false;
1757
1758 if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1),
1759 Cmp, Intrinsic::usub_with_overflow))
1760 return false;
1761
1762 // Reset callers - do not crash by iterating over a dead instruction.
1763 ModifiedDT = ModifyDT::ModifyInstDT;
1764 return true;
1765}
1766
1767// Decanonicalizes icmp+ctpop power-of-two test if ctpop is slow.
1768// The same transformation exists in DAG combiner, but we repeat it here because
1769// DAG builder can break the pattern by moving icmp into a successor block.
1770bool CodeGenPrepare::unfoldPowerOf2Test(CmpInst *Cmp) {
1771 CmpPredicate Pred;
1772 Value *X;
1773 const APInt *C;
1774
1775 // (icmp (ctpop x), c)
1778 return false;
1779
1780 // We're only interested in "is power of 2 [or zero]" patterns.
1781 bool IsStrictlyPowerOf2Test = ICmpInst::isEquality(Pred) && *C == 1;
1782 bool IsPowerOf2OrZeroTest = (Pred == CmpInst::ICMP_ULT && *C == 2) ||
1783 (Pred == CmpInst::ICMP_UGT && *C == 1);
1784 if (!IsStrictlyPowerOf2Test && !IsPowerOf2OrZeroTest)
1785 return false;
1786
1787 // Some targets have better codegen for `ctpop(x) u</u>= 2/1`than for
1788 // `ctpop(x) ==/!= 1`. If ctpop is fast, only try changing the comparison,
1789 // and otherwise expand ctpop into a few simple instructions.
1790 Type *OpTy = X->getType();
1791 if (TLI->isCtpopFast(TLI->getValueType(*DL, OpTy))) {
1792 // Look for `ctpop(x) ==/!= 1`, where `ctpop(x)` is known to be non-zero.
1793 if (!IsStrictlyPowerOf2Test || !isKnownNonZero(Cmp->getOperand(0), *DL))
1794 return false;
1795
1796 // ctpop(x) == 1 -> ctpop(x) u< 2
1797 // ctpop(x) != 1 -> ctpop(x) u> 1
1798 if (Pred == ICmpInst::ICMP_EQ) {
1799 Cmp->setOperand(1, ConstantInt::get(OpTy, 2));
1800 Cmp->setPredicate(ICmpInst::ICMP_ULT);
1801 } else {
1802 Cmp->setPredicate(ICmpInst::ICMP_UGT);
1803 }
1804 return true;
1805 }
1806
1807 Value *NewCmp;
1808 if (IsPowerOf2OrZeroTest ||
1809 (IsStrictlyPowerOf2Test && isKnownNonZero(Cmp->getOperand(0), *DL))) {
1810 // ctpop(x) u< 2 -> (x & (x - 1)) == 0
1811 // ctpop(x) u> 1 -> (x & (x - 1)) != 0
1812 IRBuilder<> Builder(Cmp);
1813 Value *Sub = Builder.CreateAdd(X, Constant::getAllOnesValue(OpTy));
1814 Value *And = Builder.CreateAnd(X, Sub);
1815 CmpInst::Predicate NewPred =
1816 (Pred == CmpInst::ICMP_ULT || Pred == CmpInst::ICMP_EQ)
1818 : CmpInst::ICMP_NE;
1819 NewCmp = Builder.CreateICmp(NewPred, And, ConstantInt::getNullValue(OpTy));
1820 } else {
1821 // ctpop(x) == 1 -> (x ^ (x - 1)) u> (x - 1)
1822 // ctpop(x) != 1 -> (x ^ (x - 1)) u<= (x - 1)
1823 IRBuilder<> Builder(Cmp);
1824 Value *Sub = Builder.CreateAdd(X, Constant::getAllOnesValue(OpTy));
1825 Value *Xor = Builder.CreateXor(X, Sub);
1826 CmpInst::Predicate NewPred =
1828 NewCmp = Builder.CreateICmp(NewPred, Xor, Sub);
1829 }
1830
1831 Cmp->replaceAllUsesWith(NewCmp);
1833 return true;
1834}
1835
1836/// Sink the given CmpInst into user blocks to reduce the number of virtual
1837/// registers that must be created and coalesced. This is a clear win except on
1838/// targets with multiple condition code registers (PowerPC), where it might
1839/// lose; some adjustment may be wanted there.
1840///
1841/// Return true if any changes are made.
1842static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
1843 if (TLI.hasMultipleConditionRegisters(EVT::getEVT(Cmp->getType())))
1844 return false;
1845
1846 // Avoid sinking soft-FP comparisons, since this can move them into a loop.
1847 if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
1848 return false;
1849
1850 // Only insert a cmp in each block once.
1852
1853 bool MadeChange = false;
1854 for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end();
1855 UI != E;) {
1856 Use &TheUse = UI.getUse();
1858
1859 // Preincrement use iterator so we don't invalidate it.
1860 ++UI;
1861
1862 // Don't bother for PHI nodes.
1863 if (isa<PHINode>(User))
1864 continue;
1865
1866 // Figure out which BB this cmp is used in.
1867 BasicBlock *UserBB = User->getParent();
1868 BasicBlock *DefBB = Cmp->getParent();
1869
1870 // If this user is in the same block as the cmp, don't change the cmp.
1871 if (UserBB == DefBB)
1872 continue;
1873
1874 // If we have already inserted a cmp into this block, use it.
1875 CmpInst *&InsertedCmp = InsertedCmps[UserBB];
1876
1877 if (!InsertedCmp) {
1878 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1879 assert(InsertPt != UserBB->end());
1880 InsertedCmp = CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(),
1881 Cmp->getOperand(0), Cmp->getOperand(1), "");
1882 InsertedCmp->insertBefore(*UserBB, InsertPt);
1883 // Propagate the debug info.
1884 InsertedCmp->setDebugLoc(Cmp->getDebugLoc());
1885 }
1886
1887 // Replace a use of the cmp with a use of the new cmp.
1888 TheUse = InsertedCmp;
1889 MadeChange = true;
1890 ++NumCmpUses;
1891 }
1892
1893 // If we removed all uses, nuke the cmp.
1894 if (Cmp->use_empty()) {
1895 Cmp->eraseFromParent();
1896 MadeChange = true;
1897 }
1898
1899 return MadeChange;
1900}
1901
1902/// For pattern like:
1903///
1904/// DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB)
1905/// ...
1906/// DomBB:
1907/// ...
1908/// br DomCond, TrueBB, CmpBB
1909/// CmpBB: (with DomBB being the single predecessor)
1910/// ...
1911/// Cmp = icmp eq CmpOp0, CmpOp1
1912/// ...
1913///
1914/// It would use two comparison on targets that lowering of icmp sgt/slt is
1915/// different from lowering of icmp eq (PowerPC). This function try to convert
1916/// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'.
1917/// After that, DomCond and Cmp can use the same comparison so reduce one
1918/// comparison.
1919///
1920/// Return true if any changes are made.
1922 const TargetLowering &TLI) {
1924 return false;
1925
1926 ICmpInst::Predicate Pred = Cmp->getPredicate();
1927 if (Pred != ICmpInst::ICMP_EQ)
1928 return false;
1929
1930 // If icmp eq has users other than BranchInst and SelectInst, converting it to
1931 // icmp slt/sgt would introduce more redundant LLVM IR.
1932 for (User *U : Cmp->users()) {
1933 if (isa<BranchInst>(U))
1934 continue;
1935 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == Cmp)
1936 continue;
1937 return false;
1938 }
1939
1940 // This is a cheap/incomplete check for dominance - just match a single
1941 // predecessor with a conditional branch.
1942 BasicBlock *CmpBB = Cmp->getParent();
1943 BasicBlock *DomBB = CmpBB->getSinglePredecessor();
1944 if (!DomBB)
1945 return false;
1946
1947 // We want to ensure that the only way control gets to the comparison of
1948 // interest is that a less/greater than comparison on the same operands is
1949 // false.
1950 Value *DomCond;
1951 BasicBlock *TrueBB, *FalseBB;
1952 if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB)))
1953 return false;
1954 if (CmpBB != FalseBB)
1955 return false;
1956
1957 Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1);
1958 CmpPredicate DomPred;
1959 if (!match(DomCond, m_ICmp(DomPred, m_Specific(CmpOp0), m_Specific(CmpOp1))))
1960 return false;
1961 if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT)
1962 return false;
1963
1964 // Convert the equality comparison to the opposite of the dominating
1965 // comparison and swap the direction for all branch/select users.
1966 // We have conceptually converted:
1967 // Res = (a < b) ? <LT_RES> : (a == b) ? <EQ_RES> : <GT_RES>;
1968 // to
1969 // Res = (a < b) ? <LT_RES> : (a > b) ? <GT_RES> : <EQ_RES>;
1970 // And similarly for branches.
1971 for (User *U : Cmp->users()) {
1972 if (auto *BI = dyn_cast<BranchInst>(U)) {
1973 assert(BI->isConditional() && "Must be conditional");
1974 BI->swapSuccessors();
1975 continue;
1976 }
1977 if (auto *SI = dyn_cast<SelectInst>(U)) {
1978 // Swap operands
1979 SI->swapValues();
1980 SI->swapProfMetadata();
1981 continue;
1982 }
1983 llvm_unreachable("Must be a branch or a select");
1984 }
1985 Cmp->setPredicate(CmpInst::getSwappedPredicate(DomPred));
1986 return true;
1987}
1988
1989/// Many architectures use the same instruction for both subtract and cmp. Try
1990/// to swap cmp operands to match subtract operations to allow for CSE.
1992 Value *Op0 = Cmp->getOperand(0);
1993 Value *Op1 = Cmp->getOperand(1);
1994 if (!Op0->getType()->isIntegerTy() || isa<Constant>(Op0) ||
1995 isa<Constant>(Op1) || Op0 == Op1)
1996 return false;
1997
1998 // If a subtract already has the same operands as a compare, swapping would be
1999 // bad. If a subtract has the same operands as a compare but in reverse order,
2000 // then swapping is good.
2001 int GoodToSwap = 0;
2002 unsigned NumInspected = 0;
2003 for (const User *U : Op0->users()) {
2004 // Avoid walking many users.
2005 if (++NumInspected > 128)
2006 return false;
2007 if (match(U, m_Sub(m_Specific(Op1), m_Specific(Op0))))
2008 GoodToSwap++;
2009 else if (match(U, m_Sub(m_Specific(Op0), m_Specific(Op1))))
2010 GoodToSwap--;
2011 }
2012
2013 if (GoodToSwap > 0) {
2014 Cmp->swapOperands();
2015 return true;
2016 }
2017 return false;
2018}
2019
2020static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI,
2021 const DataLayout &DL) {
2022 FCmpInst *FCmp = dyn_cast<FCmpInst>(Cmp);
2023 if (!FCmp)
2024 return false;
2025
2026 // Don't fold if the target offers free fabs and the predicate is legal.
2027 EVT VT = TLI.getValueType(DL, Cmp->getOperand(0)->getType());
2028 if (TLI.isFAbsFree(VT) &&
2030 VT.getSimpleVT()))
2031 return false;
2032
2033 // Reverse the canonicalization if it is a FP class test
2034 auto ShouldReverseTransform = [](FPClassTest ClassTest) {
2035 return ClassTest == fcInf || ClassTest == (fcInf | fcNan);
2036 };
2037 auto [ClassVal, ClassTest] =
2038 fcmpToClassTest(FCmp->getPredicate(), *FCmp->getParent()->getParent(),
2039 FCmp->getOperand(0), FCmp->getOperand(1));
2040 if (!ClassVal)
2041 return false;
2042
2043 if (!ShouldReverseTransform(ClassTest) && !ShouldReverseTransform(~ClassTest))
2044 return false;
2045
2046 IRBuilder<> Builder(Cmp);
2047 Value *IsFPClass = Builder.createIsFPClass(ClassVal, ClassTest);
2048 Cmp->replaceAllUsesWith(IsFPClass);
2050 return true;
2051}
2052
2054 Instruction *Rem, const LoopInfo *LI, Value *&RemAmtOut, Value *&AddInstOut,
2055 Value *&AddOffsetOut, PHINode *&LoopIncrPNOut) {
2056 Value *Incr, *RemAmt;
2057 // NB: If RemAmt is a power of 2 it *should* have been transformed by now.
2058 if (!match(Rem, m_URem(m_Value(Incr), m_Value(RemAmt))))
2059 return false;
2060
2061 Value *AddInst, *AddOffset;
2062 // Find out loop increment PHI.
2063 auto *PN = dyn_cast<PHINode>(Incr);
2064 if (PN != nullptr) {
2065 AddInst = nullptr;
2066 AddOffset = nullptr;
2067 } else {
2068 // Search through a NUW add on top of the loop increment.
2069 Value *V0, *V1;
2070 if (!match(Incr, m_NUWAdd(m_Value(V0), m_Value(V1))))
2071 return false;
2072
2073 AddInst = Incr;
2074 PN = dyn_cast<PHINode>(V0);
2075 if (PN != nullptr) {
2076 AddOffset = V1;
2077 } else {
2078 PN = dyn_cast<PHINode>(V1);
2079 AddOffset = V0;
2080 }
2081 }
2082
2083 if (!PN)
2084 return false;
2085
2086 // This isn't strictly necessary, what we really need is one increment and any
2087 // amount of initial values all being the same.
2088 if (PN->getNumIncomingValues() != 2)
2089 return false;
2090
2091 // Only trivially analyzable loops.
2092 Loop *L = LI->getLoopFor(PN->getParent());
2093 if (!L || !L->getLoopPreheader() || !L->getLoopLatch())
2094 return false;
2095
2096 // Req that the remainder is in the loop
2097 if (!L->contains(Rem))
2098 return false;
2099
2100 // Only works if the remainder amount is a loop invaraint
2101 if (!L->isLoopInvariant(RemAmt))
2102 return false;
2103
2104 // Only works if the AddOffset is a loop invaraint
2105 if (AddOffset && !L->isLoopInvariant(AddOffset))
2106 return false;
2107
2108 // Is the PHI a loop increment?
2109 auto LoopIncrInfo = getIVIncrement(PN, LI);
2110 if (!LoopIncrInfo)
2111 return false;
2112
2113 // We need remainder_amount % increment_amount to be zero. Increment of one
2114 // satisfies that without any special logic and is overwhelmingly the common
2115 // case.
2116 if (!match(LoopIncrInfo->second, m_One()))
2117 return false;
2118
2119 // Need the increment to not overflow.
2120 if (!match(LoopIncrInfo->first, m_c_NUWAdd(m_Specific(PN), m_Value())))
2121 return false;
2122
2123 // Set output variables.
2124 RemAmtOut = RemAmt;
2125 LoopIncrPNOut = PN;
2126 AddInstOut = AddInst;
2127 AddOffsetOut = AddOffset;
2128
2129 return true;
2130}
2131
2132// Try to transform:
2133//
2134// for(i = Start; i < End; ++i)
2135// Rem = (i nuw+ IncrLoopInvariant) u% RemAmtLoopInvariant;
2136//
2137// ->
2138//
2139// Rem = (Start nuw+ IncrLoopInvariant) % RemAmtLoopInvariant;
2140// for(i = Start; i < End; ++i, ++rem)
2141// Rem = rem == RemAmtLoopInvariant ? 0 : Rem;
2143 const LoopInfo *LI,
2145 bool IsHuge) {
2146 Value *AddOffset, *RemAmt, *AddInst;
2147 PHINode *LoopIncrPN;
2148 if (!isRemOfLoopIncrementWithLoopInvariant(Rem, LI, RemAmt, AddInst,
2149 AddOffset, LoopIncrPN))
2150 return false;
2151
2152 // Only non-constant remainder as the extra IV is probably not profitable
2153 // in that case.
2154 //
2155 // Potential TODO(1): `urem` of a const ends up as `mul` + `shift` + `add`. If
2156 // we can rule out register pressure and ensure this `urem` is executed each
2157 // iteration, its probably profitable to handle the const case as well.
2158 //
2159 // Potential TODO(2): Should we have a check for how "nested" this remainder
2160 // operation is? The new code runs every iteration so if the remainder is
2161 // guarded behind unlikely conditions this might not be worth it.
2162 if (match(RemAmt, m_ImmConstant()))
2163 return false;
2164
2165 Loop *L = LI->getLoopFor(LoopIncrPN->getParent());
2166 Value *Start = LoopIncrPN->getIncomingValueForBlock(L->getLoopPreheader());
2167 // If we have add create initial value for remainder.
2168 // The logic here is:
2169 // (urem (add nuw Start, IncrLoopInvariant), RemAmtLoopInvariant
2170 //
2171 // Only proceed if the expression simplifies (otherwise we can't fully
2172 // optimize out the urem).
2173 if (AddInst) {
2174 assert(AddOffset && "We found an add but missing values");
2175 // Without dom-condition/assumption cache we aren't likely to get much out
2176 // of a context instruction.
2177 Start = simplifyAddInst(Start, AddOffset,
2178 match(AddInst, m_NSWAdd(m_Value(), m_Value())),
2179 /*IsNUW=*/true, *DL);
2180 if (!Start)
2181 return false;
2182 }
2183
2184 // If we can't fully optimize out the `rem`, skip this transform.
2185 Start = simplifyURemInst(Start, RemAmt, *DL);
2186 if (!Start)
2187 return false;
2188
2189 // Create new remainder with induction variable.
2190 Type *Ty = Rem->getType();
2191 IRBuilder<> Builder(Rem->getContext());
2192
2193 Builder.SetInsertPoint(LoopIncrPN);
2194 PHINode *NewRem = Builder.CreatePHI(Ty, 2);
2195
2196 Builder.SetInsertPoint(cast<Instruction>(
2197 LoopIncrPN->getIncomingValueForBlock(L->getLoopLatch())));
2198 // `(add (urem x, y), 1)` is always nuw.
2199 Value *RemAdd = Builder.CreateNUWAdd(NewRem, ConstantInt::get(Ty, 1));
2200 Value *RemCmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, RemAdd, RemAmt);
2201 Value *RemSel =
2202 Builder.CreateSelect(RemCmp, Constant::getNullValue(Ty), RemAdd);
2203
2204 NewRem->addIncoming(Start, L->getLoopPreheader());
2205 NewRem->addIncoming(RemSel, L->getLoopLatch());
2206
2207 // Insert all touched BBs.
2208 FreshBBs.insert(LoopIncrPN->getParent());
2209 FreshBBs.insert(L->getLoopLatch());
2210 FreshBBs.insert(Rem->getParent());
2211 if (AddInst)
2212 FreshBBs.insert(cast<Instruction>(AddInst)->getParent());
2213 replaceAllUsesWith(Rem, NewRem, FreshBBs, IsHuge);
2214 Rem->eraseFromParent();
2215 if (AddInst && AddInst->use_empty())
2216 cast<Instruction>(AddInst)->eraseFromParent();
2217 return true;
2218}
2219
2220bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
2221 if (foldURemOfLoopIncrement(Rem, DL, LI, FreshBBs, IsHugeFunc))
2222 return true;
2223 return false;
2224}
2225
2226bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
2227 if (sinkCmpExpression(Cmp, *TLI))
2228 return true;
2229
2230 if (combineToUAddWithOverflow(Cmp, ModifiedDT))
2231 return true;
2232
2233 if (combineToUSubWithOverflow(Cmp, ModifiedDT))
2234 return true;
2235
2236 if (unfoldPowerOf2Test(Cmp))
2237 return true;
2238
2239 if (foldICmpWithDominatingICmp(Cmp, *TLI))
2240 return true;
2241
2243 return true;
2244
2245 if (foldFCmpToFPClassTest(Cmp, *TLI, *DL))
2246 return true;
2247
2248 return false;
2249}
2250
2251/// Duplicate and sink the given 'and' instruction into user blocks where it is
2252/// used in a compare to allow isel to generate better code for targets where
2253/// this operation can be combined.
2254///
2255/// Return true if any changes are made.
2257 SetOfInstrs &InsertedInsts) {
2258 // Double-check that we're not trying to optimize an instruction that was
2259 // already optimized by some other part of this pass.
2260 assert(!InsertedInsts.count(AndI) &&
2261 "Attempting to optimize already optimized and instruction");
2262 (void)InsertedInsts;
2263
2264 // Nothing to do for single use in same basic block.
2265 if (AndI->hasOneUse() &&
2266 AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())
2267 return false;
2268
2269 // Try to avoid cases where sinking/duplicating is likely to increase register
2270 // pressure.
2271 if (!isa<ConstantInt>(AndI->getOperand(0)) &&
2272 !isa<ConstantInt>(AndI->getOperand(1)) &&
2273 AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())
2274 return false;
2275
2276 for (auto *U : AndI->users()) {
2278
2279 // Only sink 'and' feeding icmp with 0.
2280 if (!isa<ICmpInst>(User))
2281 return false;
2282
2283 auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));
2284 if (!CmpC || !CmpC->isZero())
2285 return false;
2286 }
2287
2288 if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))
2289 return false;
2290
2291 LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n");
2292 LLVM_DEBUG(AndI->getParent()->dump());
2293
2294 // Push the 'and' into the same block as the icmp 0. There should only be
2295 // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
2296 // others, so we don't need to keep track of which BBs we insert into.
2297 for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
2298 UI != E;) {
2299 Use &TheUse = UI.getUse();
2301
2302 // Preincrement use iterator so we don't invalidate it.
2303 ++UI;
2304
2305 LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n");
2306
2307 // Keep the 'and' in the same place if the use is already in the same block.
2308 Instruction *InsertPt =
2309 User->getParent() == AndI->getParent() ? AndI : User;
2310 Instruction *InsertedAnd = BinaryOperator::Create(
2311 Instruction::And, AndI->getOperand(0), AndI->getOperand(1), "",
2312 InsertPt->getIterator());
2313 // Propagate the debug info.
2314 InsertedAnd->setDebugLoc(AndI->getDebugLoc());
2315
2316 // Replace a use of the 'and' with a use of the new 'and'.
2317 TheUse = InsertedAnd;
2318 ++NumAndUses;
2319 LLVM_DEBUG(User->getParent()->dump());
2320 }
2321
2322 // We removed all uses, nuke the and.
2323 AndI->eraseFromParent();
2324 return true;
2325}
2326
2327/// Check if the candidates could be combined with a shift instruction, which
2328/// includes:
2329/// 1. Truncate instruction
2330/// 2. And instruction and the imm is a mask of the low bits:
2331/// imm & (imm+1) == 0
2333 if (!isa<TruncInst>(User)) {
2334 if (User->getOpcode() != Instruction::And ||
2336 return false;
2337
2338 const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
2339
2340 if ((Cimm & (Cimm + 1)).getBoolValue())
2341 return false;
2342 }
2343 return true;
2344}
2345
2346/// Sink both shift and truncate instruction to the use of truncate's BB.
2347static bool
2350 const TargetLowering &TLI, const DataLayout &DL) {
2351 BasicBlock *UserBB = User->getParent();
2353 auto *TruncI = cast<TruncInst>(User);
2354 bool MadeChange = false;
2355
2356 for (Value::user_iterator TruncUI = TruncI->user_begin(),
2357 TruncE = TruncI->user_end();
2358 TruncUI != TruncE;) {
2359
2360 Use &TruncTheUse = TruncUI.getUse();
2361 Instruction *TruncUser = cast<Instruction>(*TruncUI);
2362 // Preincrement use iterator so we don't invalidate it.
2363
2364 ++TruncUI;
2365
2366 int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
2367 if (!ISDOpcode)
2368 continue;
2369
2370 // If the use is actually a legal node, there will not be an
2371 // implicit truncate.
2372 // FIXME: always querying the result type is just an
2373 // approximation; some nodes' legality is determined by the
2374 // operand or other means. There's no good way to find out though.
2376 ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
2377 continue;
2378
2379 // Don't bother for PHI nodes.
2380 if (isa<PHINode>(TruncUser))
2381 continue;
2382
2383 BasicBlock *TruncUserBB = TruncUser->getParent();
2384
2385 if (UserBB == TruncUserBB)
2386 continue;
2387
2388 BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
2389 CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
2390
2391 if (!InsertedShift && !InsertedTrunc) {
2392 BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
2393 assert(InsertPt != TruncUserBB->end());
2394 // Sink the shift
2395 if (ShiftI->getOpcode() == Instruction::AShr)
2396 InsertedShift =
2397 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
2398 else
2399 InsertedShift =
2400 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
2401 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2402 InsertedShift->insertBefore(*TruncUserBB, InsertPt);
2403
2404 // Sink the trunc
2405 BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
2406 TruncInsertPt++;
2407 // It will go ahead of any debug-info.
2408 TruncInsertPt.setHeadBit(true);
2409 assert(TruncInsertPt != TruncUserBB->end());
2410
2411 InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
2412 TruncI->getType(), "");
2413 InsertedTrunc->insertBefore(*TruncUserBB, TruncInsertPt);
2414 InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());
2415
2416 MadeChange = true;
2417
2418 TruncTheUse = InsertedTrunc;
2419 }
2420 }
2421 return MadeChange;
2422}
2423
2424/// Sink the shift *right* instruction into user blocks if the uses could
2425/// potentially be combined with this shift instruction and generate BitExtract
2426/// instruction. It will only be applied if the architecture supports BitExtract
2427/// instruction. Here is an example:
2428/// BB1:
2429/// %x.extract.shift = lshr i64 %arg1, 32
2430/// BB2:
2431/// %x.extract.trunc = trunc i64 %x.extract.shift to i16
2432/// ==>
2433///
2434/// BB2:
2435/// %x.extract.shift.1 = lshr i64 %arg1, 32
2436/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
2437///
2438/// CodeGen will recognize the pattern in BB2 and generate BitExtract
2439/// instruction.
2440/// Return true if any changes are made.
2442 const TargetLowering &TLI,
2443 const DataLayout &DL) {
2444 BasicBlock *DefBB = ShiftI->getParent();
2445
2446 /// Only insert instructions in each block once.
2448
2449 bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
2450
2451 bool MadeChange = false;
2452 for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
2453 UI != E;) {
2454 Use &TheUse = UI.getUse();
2456 // Preincrement use iterator so we don't invalidate it.
2457 ++UI;
2458
2459 // Don't bother for PHI nodes.
2460 if (isa<PHINode>(User))
2461 continue;
2462
2464 continue;
2465
2466 BasicBlock *UserBB = User->getParent();
2467
2468 if (UserBB == DefBB) {
2469 // If the shift and truncate instruction are in the same BB. The use of
2470 // the truncate(TruncUse) may still introduce another truncate if not
2471 // legal. In this case, we would like to sink both shift and truncate
2472 // instruction to the BB of TruncUse.
2473 // for example:
2474 // BB1:
2475 // i64 shift.result = lshr i64 opnd, imm
2476 // trunc.result = trunc shift.result to i16
2477 //
2478 // BB2:
2479 // ----> We will have an implicit truncate here if the architecture does
2480 // not have i16 compare.
2481 // cmp i16 trunc.result, opnd2
2482 //
2483 if (isa<TruncInst>(User) &&
2484 shiftIsLegal
2485 // If the type of the truncate is legal, no truncate will be
2486 // introduced in other basic blocks.
2487 && (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
2488 MadeChange =
2489 SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
2490
2491 continue;
2492 }
2493 // If we have already inserted a shift into this block, use it.
2494 BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
2495
2496 if (!InsertedShift) {
2497 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
2498 assert(InsertPt != UserBB->end());
2499
2500 if (ShiftI->getOpcode() == Instruction::AShr)
2501 InsertedShift =
2502 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
2503 else
2504 InsertedShift =
2505 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
2506 InsertedShift->insertBefore(*UserBB, InsertPt);
2507 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2508
2509 MadeChange = true;
2510 }
2511
2512 // Replace a use of the shift with a use of the new shift.
2513 TheUse = InsertedShift;
2514 }
2515
2516 // If we removed all uses, or there are none, nuke the shift.
2517 if (ShiftI->use_empty()) {
2518 salvageDebugInfo(*ShiftI);
2519 ShiftI->eraseFromParent();
2520 MadeChange = true;
2521 }
2522
2523 return MadeChange;
2524}
2525
2526/// If counting leading or trailing zeros is an expensive operation and a zero
2527/// input is defined, add a check for zero to avoid calling the intrinsic.
2528///
2529/// We want to transform:
2530/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
2531///
2532/// into:
2533/// entry:
2534/// %cmpz = icmp eq i64 %A, 0
2535/// br i1 %cmpz, label %cond.end, label %cond.false
2536/// cond.false:
2537/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
2538/// br label %cond.end
2539/// cond.end:
2540/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
2541///
2542/// If the transform is performed, return true and set ModifiedDT to true.
2543static bool despeculateCountZeros(IntrinsicInst *CountZeros, LoopInfo &LI,
2544 const TargetLowering *TLI,
2545 const DataLayout *DL, ModifyDT &ModifiedDT,
2547 bool IsHugeFunc) {
2548 // If a zero input is undefined, it doesn't make sense to despeculate that.
2549 if (match(CountZeros->getOperand(1), m_One()))
2550 return false;
2551
2552 // If it's cheap to speculate, there's nothing to do.
2553 Type *Ty = CountZeros->getType();
2554 auto IntrinsicID = CountZeros->getIntrinsicID();
2555 if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz(Ty)) ||
2556 (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz(Ty)))
2557 return false;
2558
2559 // Only handle scalar cases. Anything else requires too much work.
2560 unsigned SizeInBits = Ty->getScalarSizeInBits();
2561 if (Ty->isVectorTy())
2562 return false;
2563
2564 // Bail if the value is never zero.
2565 Use &Op = CountZeros->getOperandUse(0);
2566 if (isKnownNonZero(Op, *DL))
2567 return false;
2568
2569 // The intrinsic will be sunk behind a compare against zero and branch.
2570 BasicBlock *StartBlock = CountZeros->getParent();
2571 BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
2572 if (IsHugeFunc)
2573 FreshBBs.insert(CallBlock);
2574
2575 // Create another block after the count zero intrinsic. A PHI will be added
2576 // in this block to select the result of the intrinsic or the bit-width
2577 // constant if the input to the intrinsic is zero.
2578 BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(CountZeros));
2579 // Any debug-info after CountZeros should not be included.
2580 SplitPt.setHeadBit(true);
2581 BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
2582 if (IsHugeFunc)
2583 FreshBBs.insert(EndBlock);
2584
2585 // Update the LoopInfo. The new blocks are in the same loop as the start
2586 // block.
2587 if (Loop *L = LI.getLoopFor(StartBlock)) {
2588 L->addBasicBlockToLoop(CallBlock, LI);
2589 L->addBasicBlockToLoop(EndBlock, LI);
2590 }
2591
2592 // Set up a builder to create a compare, conditional branch, and PHI.
2593 IRBuilder<> Builder(CountZeros->getContext());
2594 Builder.SetInsertPoint(StartBlock->getTerminator());
2595 Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
2596
2597 // Replace the unconditional branch that was created by the first split with
2598 // a compare against zero and a conditional branch.
2599 Value *Zero = Constant::getNullValue(Ty);
2600 // Avoid introducing branch on poison. This also replaces the ctz operand.
2602 Op = Builder.CreateFreeze(Op, Op->getName() + ".fr");
2603 Value *Cmp = Builder.CreateICmpEQ(Op, Zero, "cmpz");
2604 Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
2605 StartBlock->getTerminator()->eraseFromParent();
2606
2607 // Create a PHI in the end block to select either the output of the intrinsic
2608 // or the bit width of the operand.
2609 Builder.SetInsertPoint(EndBlock, EndBlock->begin());
2610 PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
2611 replaceAllUsesWith(CountZeros, PN, FreshBBs, IsHugeFunc);
2612 Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
2613 PN->addIncoming(BitWidth, StartBlock);
2614 PN->addIncoming(CountZeros, CallBlock);
2615
2616 // We are explicitly handling the zero case, so we can set the intrinsic's
2617 // undefined zero argument to 'true'. This will also prevent reprocessing the
2618 // intrinsic; we only despeculate when a zero input is defined.
2619 CountZeros->setArgOperand(1, Builder.getTrue());
2620 ModifiedDT = ModifyDT::ModifyBBDT;
2621 return true;
2622}
2623
2624bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
2625 BasicBlock *BB = CI->getParent();
2626
2627 // Sink address computing for memory operands into the block.
2628 if (CI->isInlineAsm() && optimizeInlineAsmInst(CI))
2629 return true;
2630
2631 // Align the pointer arguments to this call if the target thinks it's a good
2632 // idea
2633 unsigned MinSize;
2634 Align PrefAlign;
2635 if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
2636 for (auto &Arg : CI->args()) {
2637 // We want to align both objects whose address is used directly and
2638 // objects whose address is used in casts and GEPs, though it only makes
2639 // sense for GEPs if the offset is a multiple of the desired alignment and
2640 // if size - offset meets the size threshold.
2641 if (!Arg->getType()->isPointerTy())
2642 continue;
2643 APInt Offset(DL->getIndexSizeInBits(
2644 cast<PointerType>(Arg->getType())->getAddressSpace()),
2645 0);
2646 Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset);
2647 uint64_t Offset2 = Offset.getLimitedValue();
2648 if (!isAligned(PrefAlign, Offset2))
2649 continue;
2650 AllocaInst *AI;
2651 if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlign() < PrefAlign &&
2652 DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
2653 AI->setAlignment(PrefAlign);
2654 // Global variables can only be aligned if they are defined in this
2655 // object (i.e. they are uniquely initialized in this object), and
2656 // over-aligning global variables that have an explicit section is
2657 // forbidden.
2658 GlobalVariable *GV;
2659 if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
2660 GV->getPointerAlignment(*DL) < PrefAlign &&
2661 DL->getTypeAllocSize(GV->getValueType()) >= MinSize + Offset2)
2662 GV->setAlignment(PrefAlign);
2663 }
2664 }
2665 // If this is a memcpy (or similar) then we may be able to improve the
2666 // alignment.
2667 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
2668 Align DestAlign = getKnownAlignment(MI->getDest(), *DL);
2669 MaybeAlign MIDestAlign = MI->getDestAlign();
2670 if (!MIDestAlign || DestAlign > *MIDestAlign)
2671 MI->setDestAlignment(DestAlign);
2672 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
2673 MaybeAlign MTISrcAlign = MTI->getSourceAlign();
2674 Align SrcAlign = getKnownAlignment(MTI->getSource(), *DL);
2675 if (!MTISrcAlign || SrcAlign > *MTISrcAlign)
2676 MTI->setSourceAlignment(SrcAlign);
2677 }
2678 }
2679
2680 // If we have a cold call site, try to sink addressing computation into the
2681 // cold block. This interacts with our handling for loads and stores to
2682 // ensure that we can fold all uses of a potential addressing computation
2683 // into their uses. TODO: generalize this to work over profiling data
2684 if (CI->hasFnAttr(Attribute::Cold) &&
2685 !llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
2686 for (auto &Arg : CI->args()) {
2687 if (!Arg->getType()->isPointerTy())
2688 continue;
2689 unsigned AS = Arg->getType()->getPointerAddressSpace();
2690 if (optimizeMemoryInst(CI, Arg, Arg->getType(), AS))
2691 return true;
2692 }
2693
2694 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
2695 if (II) {
2696 switch (II->getIntrinsicID()) {
2697 default:
2698 break;
2699 case Intrinsic::assume:
2700 llvm_unreachable("llvm.assume should have been removed already");
2701 case Intrinsic::allow_runtime_check:
2702 case Intrinsic::allow_ubsan_check:
2703 case Intrinsic::experimental_widenable_condition: {
2704 // Give up on future widening opportunities so that we can fold away dead
2705 // paths and merge blocks before going into block-local instruction
2706 // selection.
2707 if (II->use_empty()) {
2708 II->eraseFromParent();
2709 return true;
2710 }
2711 Constant *RetVal = ConstantInt::getTrue(II->getContext());
2712 resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
2713 replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
2714 });
2715 return true;
2716 }
2717 case Intrinsic::objectsize:
2718 llvm_unreachable("llvm.objectsize.* should have been lowered already");
2719 case Intrinsic::is_constant:
2720 llvm_unreachable("llvm.is.constant.* should have been lowered already");
2721 case Intrinsic::aarch64_stlxr:
2722 case Intrinsic::aarch64_stxr: {
2723 ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
2724 if (!ExtVal || !ExtVal->hasOneUse() ||
2725 ExtVal->getParent() == CI->getParent())
2726 return false;
2727 // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
2728 ExtVal->moveBefore(CI->getIterator());
2729 // Mark this instruction as "inserted by CGP", so that other
2730 // optimizations don't touch it.
2731 InsertedInsts.insert(ExtVal);
2732 return true;
2733 }
2734
2735 case Intrinsic::launder_invariant_group:
2736 case Intrinsic::strip_invariant_group: {
2737 Value *ArgVal = II->getArgOperand(0);
2738 auto it = LargeOffsetGEPMap.find(II);
2739 if (it != LargeOffsetGEPMap.end()) {
2740 // Merge entries in LargeOffsetGEPMap to reflect the RAUW.
2741 // Make sure not to have to deal with iterator invalidation
2742 // after possibly adding ArgVal to LargeOffsetGEPMap.
2743 auto GEPs = std::move(it->second);
2744 LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());
2745 LargeOffsetGEPMap.erase(II);
2746 }
2747
2748 replaceAllUsesWith(II, ArgVal, FreshBBs, IsHugeFunc);
2749 II->eraseFromParent();
2750 return true;
2751 }
2752 case Intrinsic::cttz:
2753 case Intrinsic::ctlz:
2754 // If counting zeros is expensive, try to avoid it.
2755 return despeculateCountZeros(II, *LI, TLI, DL, ModifiedDT, FreshBBs,
2756 IsHugeFunc);
2757 case Intrinsic::fshl:
2758 case Intrinsic::fshr:
2759 return optimizeFunnelShift(II);
2760 case Intrinsic::masked_gather:
2761 return optimizeGatherScatterInst(II, II->getArgOperand(0));
2762 case Intrinsic::masked_scatter:
2763 return optimizeGatherScatterInst(II, II->getArgOperand(1));
2764 case Intrinsic::masked_load:
2765 // Treat v1X masked load as load X type.
2766 if (auto *VT = dyn_cast<FixedVectorType>(II->getType())) {
2767 if (VT->getNumElements() == 1) {
2768 Value *PtrVal = II->getArgOperand(0);
2769 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2770 if (optimizeMemoryInst(II, PtrVal, VT->getElementType(), AS))
2771 return true;
2772 }
2773 }
2774 return false;
2775 case Intrinsic::masked_store:
2776 // Treat v1X masked store as store X type.
2777 if (auto *VT =
2778 dyn_cast<FixedVectorType>(II->getArgOperand(0)->getType())) {
2779 if (VT->getNumElements() == 1) {
2780 Value *PtrVal = II->getArgOperand(1);
2781 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2782 if (optimizeMemoryInst(II, PtrVal, VT->getElementType(), AS))
2783 return true;
2784 }
2785 }
2786 return false;
2787 }
2788
2789 SmallVector<Value *, 2> PtrOps;
2790 Type *AccessTy;
2791 if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
2792 while (!PtrOps.empty()) {
2793 Value *PtrVal = PtrOps.pop_back_val();
2794 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2795 if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
2796 return true;
2797 }
2798 }
2799
2800 // From here on out we're working with named functions.
2801 auto *Callee = CI->getCalledFunction();
2802 if (!Callee)
2803 return false;
2804
2805 // Lower all default uses of _chk calls. This is very similar
2806 // to what InstCombineCalls does, but here we are only lowering calls
2807 // to fortified library functions (e.g. __memcpy_chk) that have the default
2808 // "don't know" as the objectsize. Anything else should be left alone.
2809 FortifiedLibCallSimplifier Simplifier(TLInfo, true);
2810 IRBuilder<> Builder(CI);
2811 if (Value *V = Simplifier.optimizeCall(CI, Builder)) {
2812 replaceAllUsesWith(CI, V, FreshBBs, IsHugeFunc);
2813 CI->eraseFromParent();
2814 return true;
2815 }
2816
2817 // SCCP may have propagated, among other things, C++ static variables across
2818 // calls. If this happens to be the case, we may want to undo it in order to
2819 // avoid redundant pointer computation of the constant, as the function method
2820 // returning the constant needs to be executed anyways.
2821 auto GetUniformReturnValue = [](const Function *F) -> GlobalVariable * {
2822 if (!F->getReturnType()->isPointerTy())
2823 return nullptr;
2824
2825 GlobalVariable *UniformValue = nullptr;
2826 for (auto &BB : *F) {
2827 if (auto *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
2828 if (auto *V = dyn_cast<GlobalVariable>(RI->getReturnValue())) {
2829 if (!UniformValue)
2830 UniformValue = V;
2831 else if (V != UniformValue)
2832 return nullptr;
2833 } else {
2834 return nullptr;
2835 }
2836 }
2837 }
2838
2839 return UniformValue;
2840 };
2841
2842 if (Callee->hasExactDefinition()) {
2843 if (GlobalVariable *RV = GetUniformReturnValue(Callee)) {
2844 bool MadeChange = false;
2845 for (Use &U : make_early_inc_range(RV->uses())) {
2846 auto *I = dyn_cast<Instruction>(U.getUser());
2847 if (!I || I->getParent() != CI->getParent()) {
2848 // Limit to the same basic block to avoid extending the call-site live
2849 // range, which otherwise could increase register pressure.
2850 continue;
2851 }
2852 if (CI->comesBefore(I)) {
2853 U.set(CI);
2854 MadeChange = true;
2855 }
2856 }
2857
2858 return MadeChange;
2859 }
2860 }
2861
2862 return false;
2863}
2864
2866 const CallInst *CI) {
2867 assert(CI && CI->use_empty());
2868
2869 if (const auto *II = dyn_cast<IntrinsicInst>(CI))
2870 switch (II->getIntrinsicID()) {
2871 case Intrinsic::memset:
2872 case Intrinsic::memcpy:
2873 case Intrinsic::memmove:
2874 return true;
2875 default:
2876 return false;
2877 }
2878
2879 LibFunc LF;
2880 Function *Callee = CI->getCalledFunction();
2881 if (Callee && TLInfo && TLInfo->getLibFunc(*Callee, LF))
2882 switch (LF) {
2883 case LibFunc_strcpy:
2884 case LibFunc_strncpy:
2885 case LibFunc_strcat:
2886 case LibFunc_strncat:
2887 return true;
2888 default:
2889 return false;
2890 }
2891
2892 return false;
2893}
2894
2895/// Look for opportunities to duplicate return instructions to the predecessor
2896/// to enable tail call optimizations. The case it is currently looking for is
2897/// the following one. Known intrinsics or library function that may be tail
2898/// called are taken into account as well.
2899/// @code
2900/// bb0:
2901/// %tmp0 = tail call i32 @f0()
2902/// br label %return
2903/// bb1:
2904/// %tmp1 = tail call i32 @f1()
2905/// br label %return
2906/// bb2:
2907/// %tmp2 = tail call i32 @f2()
2908/// br label %return
2909/// return:
2910/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
2911/// ret i32 %retval
2912/// @endcode
2913///
2914/// =>
2915///
2916/// @code
2917/// bb0:
2918/// %tmp0 = tail call i32 @f0()
2919/// ret i32 %tmp0
2920/// bb1:
2921/// %tmp1 = tail call i32 @f1()
2922/// ret i32 %tmp1
2923/// bb2:
2924/// %tmp2 = tail call i32 @f2()
2925/// ret i32 %tmp2
2926/// @endcode
2927bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
2928 ModifyDT &ModifiedDT) {
2929 if (!BB->getTerminator())
2930 return false;
2931
2932 ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
2933 if (!RetI)
2934 return false;
2935
2936 assert(LI->getLoopFor(BB) == nullptr && "A return block cannot be in a loop");
2937
2938 PHINode *PN = nullptr;
2939 ExtractValueInst *EVI = nullptr;
2940 BitCastInst *BCI = nullptr;
2941 Value *V = RetI->getReturnValue();
2942 if (V) {
2943 BCI = dyn_cast<BitCastInst>(V);
2944 if (BCI)
2945 V = BCI->getOperand(0);
2946
2948 if (EVI) {
2949 V = EVI->getOperand(0);
2950 if (!llvm::all_of(EVI->indices(), [](unsigned idx) { return idx == 0; }))
2951 return false;
2952 }
2953
2954 PN = dyn_cast<PHINode>(V);
2955 }
2956
2957 if (PN && PN->getParent() != BB)
2958 return false;
2959
2960 auto isLifetimeEndOrBitCastFor = [](const Instruction *Inst) {
2961 const BitCastInst *BC = dyn_cast<BitCastInst>(Inst);
2962 if (BC && BC->hasOneUse())
2963 Inst = BC->user_back();
2964
2965 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
2966 return II->getIntrinsicID() == Intrinsic::lifetime_end;
2967 return false;
2968 };
2969
2971
2972 auto isFakeUse = [&FakeUses](const Instruction *Inst) {
2973 if (auto *II = dyn_cast<IntrinsicInst>(Inst);
2974 II && II->getIntrinsicID() == Intrinsic::fake_use) {
2975 // Record the instruction so it can be preserved when the exit block is
2976 // removed. Do not preserve the fake use that uses the result of the
2977 // PHI instruction.
2978 // Do not copy fake uses that use the result of a PHI node.
2979 // FIXME: If we do want to copy the fake use into the return blocks, we
2980 // have to figure out which of the PHI node operands to use for each
2981 // copy.
2982 if (!isa<PHINode>(II->getOperand(0))) {
2983 FakeUses.push_back(II);
2984 }
2985 return true;
2986 }
2987
2988 return false;
2989 };
2990
2991 // Make sure there are no instructions between the first instruction
2992 // and return.
2994 // Skip over pseudo-probes and the bitcast.
2995 while (&*BI == BCI || &*BI == EVI || isa<PseudoProbeInst>(BI) ||
2996 isLifetimeEndOrBitCastFor(&*BI) || isFakeUse(&*BI))
2997 BI = std::next(BI);
2998 if (&*BI != RetI)
2999 return false;
3000
3001 /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
3002 /// call.
3003 const Function *F = BB->getParent();
3004 SmallVector<BasicBlock *, 4> TailCallBBs;
3005 // Record the call instructions so we can insert any fake uses
3006 // that need to be preserved before them.
3008 if (PN) {
3009 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
3010 // Look through bitcasts.
3011 Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();
3012 CallInst *CI = dyn_cast<CallInst>(IncomingVal);
3013 BasicBlock *PredBB = PN->getIncomingBlock(I);
3014 // Make sure the phi value is indeed produced by the tail call.
3015 if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
3016 TLI->mayBeEmittedAsTailCall(CI) &&
3017 attributesPermitTailCall(F, CI, RetI, *TLI)) {
3018 TailCallBBs.push_back(PredBB);
3019 CallInsts.push_back(CI);
3020 } else {
3021 // Consider the cases in which the phi value is indirectly produced by
3022 // the tail call, for example when encountering memset(), memmove(),
3023 // strcpy(), whose return value may have been optimized out. In such
3024 // cases, the value needs to be the first function argument.
3025 //
3026 // bb0:
3027 // tail call void @llvm.memset.p0.i64(ptr %0, i8 0, i64 %1)
3028 // br label %return
3029 // return:
3030 // %phi = phi ptr [ %0, %bb0 ], [ %2, %entry ]
3031 if (PredBB && PredBB->getSingleSuccessor() == BB)
3033 PredBB->getTerminator()->getPrevNode());
3034
3035 if (CI && CI->use_empty() &&
3036 isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
3037 IncomingVal == CI->getArgOperand(0) &&
3038 TLI->mayBeEmittedAsTailCall(CI) &&
3039 attributesPermitTailCall(F, CI, RetI, *TLI)) {
3040 TailCallBBs.push_back(PredBB);
3041 CallInsts.push_back(CI);
3042 }
3043 }
3044 }
3045 } else {
3046 SmallPtrSet<BasicBlock *, 4> VisitedBBs;
3047 for (BasicBlock *Pred : predecessors(BB)) {
3048 if (!VisitedBBs.insert(Pred).second)
3049 continue;
3050 if (Instruction *I = Pred->rbegin()->getPrevNode()) {
3051 CallInst *CI = dyn_cast<CallInst>(I);
3052 if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
3053 attributesPermitTailCall(F, CI, RetI, *TLI)) {
3054 // Either we return void or the return value must be the first
3055 // argument of a known intrinsic or library function.
3056 if (!V || isa<UndefValue>(V) ||
3057 (isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
3058 V == CI->getArgOperand(0))) {
3059 TailCallBBs.push_back(Pred);
3060 CallInsts.push_back(CI);
3061 }
3062 }
3063 }
3064 }
3065 }
3066
3067 bool Changed = false;
3068 for (auto const &TailCallBB : TailCallBBs) {
3069 // Make sure the call instruction is followed by an unconditional branch to
3070 // the return block.
3071 BranchInst *BI = dyn_cast<BranchInst>(TailCallBB->getTerminator());
3072 if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
3073 continue;
3074
3075 // Duplicate the return into TailCallBB.
3076 (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB);
3078 BFI->getBlockFreq(BB) >= BFI->getBlockFreq(TailCallBB));
3079 BFI->setBlockFreq(BB,
3080 (BFI->getBlockFreq(BB) - BFI->getBlockFreq(TailCallBB)));
3081 ModifiedDT = ModifyDT::ModifyBBDT;
3082 Changed = true;
3083 ++NumRetsDup;
3084 }
3085
3086 // If we eliminated all predecessors of the block, delete the block now.
3087 if (Changed && !BB->hasAddressTaken() && pred_empty(BB)) {
3088 // Copy the fake uses found in the original return block to all blocks
3089 // that contain tail calls.
3090 for (auto *CI : CallInsts) {
3091 for (auto const *FakeUse : FakeUses) {
3092 auto *ClonedInst = FakeUse->clone();
3093 ClonedInst->insertBefore(CI->getIterator());
3094 }
3095 }
3096 BB->eraseFromParent();
3097 }
3098
3099 return Changed;
3100}
3101
3102//===----------------------------------------------------------------------===//
3103// Memory Optimization
3104//===----------------------------------------------------------------------===//
3105
3106namespace {
3107
3108/// This is an extended version of TargetLowering::AddrMode
3109/// which holds actual Value*'s for register values.
3110struct ExtAddrMode : public TargetLowering::AddrMode {
3111 Value *BaseReg = nullptr;
3112 Value *ScaledReg = nullptr;
3113 Value *OriginalValue = nullptr;
3114 bool InBounds = true;
3115
3116 enum FieldName {
3117 NoField = 0x00,
3118 BaseRegField = 0x01,
3119 BaseGVField = 0x02,
3120 BaseOffsField = 0x04,
3121 ScaledRegField = 0x08,
3122 ScaleField = 0x10,
3123 MultipleFields = 0xff
3124 };
3125
3126 ExtAddrMode() = default;
3127
3128 void print(raw_ostream &OS) const;
3129 void dump() const;
3130
3131 // Replace From in ExtAddrMode with To.
3132 // E.g., SExt insts may be promoted and deleted. We should replace them with
3133 // the promoted values.
3134 void replaceWith(Value *From, Value *To) {
3135 if (ScaledReg == From)
3136 ScaledReg = To;
3137 }
3138
3139 FieldName compare(const ExtAddrMode &other) {
3140 // First check that the types are the same on each field, as differing types
3141 // is something we can't cope with later on.
3142 if (BaseReg && other.BaseReg &&
3143 BaseReg->getType() != other.BaseReg->getType())
3144 return MultipleFields;
3145 if (BaseGV && other.BaseGV && BaseGV->getType() != other.BaseGV->getType())
3146 return MultipleFields;
3147 if (ScaledReg && other.ScaledReg &&
3148 ScaledReg->getType() != other.ScaledReg->getType())
3149 return MultipleFields;
3150
3151 // Conservatively reject 'inbounds' mismatches.
3152 if (InBounds != other.InBounds)
3153 return MultipleFields;
3154
3155 // Check each field to see if it differs.
3156 unsigned Result = NoField;
3157 if (BaseReg != other.BaseReg)
3158 Result |= BaseRegField;
3159 if (BaseGV != other.BaseGV)
3160 Result |= BaseGVField;
3161 if (BaseOffs != other.BaseOffs)
3162 Result |= BaseOffsField;
3163 if (ScaledReg != other.ScaledReg)
3164 Result |= ScaledRegField;
3165 // Don't count 0 as being a different scale, because that actually means
3166 // unscaled (which will already be counted by having no ScaledReg).
3167 if (Scale && other.Scale && Scale != other.Scale)
3168 Result |= ScaleField;
3169
3170 if (llvm::popcount(Result) > 1)
3171 return MultipleFields;
3172 else
3173 return static_cast<FieldName>(Result);
3174 }
3175
3176 // An AddrMode is trivial if it involves no calculation i.e. it is just a base
3177 // with no offset.
3178 bool isTrivial() {
3179 // An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is
3180 // trivial if at most one of these terms is nonzero, except that BaseGV and
3181 // BaseReg both being zero actually means a null pointer value, which we
3182 // consider to be 'non-zero' here.
3183 return !BaseOffs && !Scale && !(BaseGV && BaseReg);
3184 }
3185
3186 Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) {
3187 switch (Field) {
3188 default:
3189 return nullptr;
3190 case BaseRegField:
3191 return BaseReg;
3192 case BaseGVField:
3193 return BaseGV;
3194 case ScaledRegField:
3195 return ScaledReg;
3196 case BaseOffsField:
3197 return ConstantInt::get(IntPtrTy, BaseOffs);
3198 }
3199 }
3200
3201 void SetCombinedField(FieldName Field, Value *V,
3202 const SmallVectorImpl<ExtAddrMode> &AddrModes) {
3203 switch (Field) {
3204 default:
3205 llvm_unreachable("Unhandled fields are expected to be rejected earlier");
3206 break;
3207 case ExtAddrMode::BaseRegField:
3208 BaseReg = V;
3209 break;
3210 case ExtAddrMode::BaseGVField:
3211 // A combined BaseGV is an Instruction, not a GlobalValue, so it goes
3212 // in the BaseReg field.
3213 assert(BaseReg == nullptr);
3214 BaseReg = V;
3215 BaseGV = nullptr;
3216 break;
3217 case ExtAddrMode::ScaledRegField:
3218 ScaledReg = V;
3219 // If we have a mix of scaled and unscaled addrmodes then we want scale
3220 // to be the scale and not zero.
3221 if (!Scale)
3222 for (const ExtAddrMode &AM : AddrModes)
3223 if (AM.Scale) {
3224 Scale = AM.Scale;
3225 break;
3226 }
3227 break;
3228 case ExtAddrMode::BaseOffsField:
3229 // The offset is no longer a constant, so it goes in ScaledReg with a
3230 // scale of 1.
3231 assert(ScaledReg == nullptr);
3232 ScaledReg = V;
3233 Scale = 1;
3234 BaseOffs = 0;
3235 break;
3236 }
3237 }
3238};
3239
3240#ifndef NDEBUG
3241static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
3242 AM.print(OS);
3243 return OS;
3244}
3245#endif
3246
3247#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3248void ExtAddrMode::print(raw_ostream &OS) const {
3249 bool NeedPlus = false;
3250 OS << "[";
3251 if (InBounds)
3252 OS << "inbounds ";
3253 if (BaseGV) {
3254 OS << "GV:";
3255 BaseGV->printAsOperand(OS, /*PrintType=*/false);
3256 NeedPlus = true;
3257 }
3258
3259 if (BaseOffs) {
3260 OS << (NeedPlus ? " + " : "") << BaseOffs;
3261 NeedPlus = true;
3262 }
3263
3264 if (BaseReg) {
3265 OS << (NeedPlus ? " + " : "") << "Base:";
3266 BaseReg->printAsOperand(OS, /*PrintType=*/false);
3267 NeedPlus = true;
3268 }
3269 if (Scale) {
3270 OS << (NeedPlus ? " + " : "") << Scale << "*";
3271 ScaledReg->printAsOperand(OS, /*PrintType=*/false);
3272 }
3273
3274 OS << ']';
3275}
3276
3277LLVM_DUMP_METHOD void ExtAddrMode::dump() const {
3278 print(dbgs());
3279 dbgs() << '\n';
3280}
3281#endif
3282
3283} // end anonymous namespace
3284
3285namespace {
3286
3287/// This class provides transaction based operation on the IR.
3288/// Every change made through this class is recorded in the internal state and
3289/// can be undone (rollback) until commit is called.
3290/// CGP does not check if instructions could be speculatively executed when
3291/// moved. Preserving the original location would pessimize the debugging
3292/// experience, as well as negatively impact the quality of sample PGO.
3293class TypePromotionTransaction {
3294 /// This represents the common interface of the individual transaction.
3295 /// Each class implements the logic for doing one specific modification on
3296 /// the IR via the TypePromotionTransaction.
3297 class TypePromotionAction {
3298 protected:
3299 /// The Instruction modified.
3300 Instruction *Inst;
3301
3302 public:
3303 /// Constructor of the action.
3304 /// The constructor performs the related action on the IR.
3305 TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
3306
3307 virtual ~TypePromotionAction() = default;
3308
3309 /// Undo the modification done by this action.
3310 /// When this method is called, the IR must be in the same state as it was
3311 /// before this action was applied.
3312 /// \pre Undoing the action works if and only if the IR is in the exact same
3313 /// state as it was directly after this action was applied.
3314 virtual void undo() = 0;
3315
3316 /// Advocate every change made by this action.
3317 /// When the results on the IR of the action are to be kept, it is important
3318 /// to call this function, otherwise hidden information may be kept forever.
3319 virtual void commit() {
3320 // Nothing to be done, this action is not doing anything.
3321 }
3322 };
3323
3324 /// Utility to remember the position of an instruction.
3325 class InsertionHandler {
3326 /// Position of an instruction.
3327 /// Either an instruction:
3328 /// - Is the first in a basic block: BB is used.
3329 /// - Has a previous instruction: PrevInst is used.
3330 struct {
3331 BasicBlock::iterator PrevInst;
3332 BasicBlock *BB;
3333 } Point;
3334 std::optional<DbgRecord::self_iterator> BeforeDbgRecord = std::nullopt;
3335
3336 /// Remember whether or not the instruction had a previous instruction.
3337 bool HasPrevInstruction;
3338
3339 public:
3340 /// Record the position of \p Inst.
3341 InsertionHandler(Instruction *Inst) {
3342 HasPrevInstruction = (Inst != &*(Inst->getParent()->begin()));
3343 BasicBlock *BB = Inst->getParent();
3344
3345 // Record where we would have to re-insert the instruction in the sequence
3346 // of DbgRecords, if we ended up reinserting.
3347 BeforeDbgRecord = Inst->getDbgReinsertionPosition();
3348
3349 if (HasPrevInstruction) {
3350 Point.PrevInst = std::prev(Inst->getIterator());
3351 } else {
3352 Point.BB = BB;
3353 }
3354 }
3355
3356 /// Insert \p Inst at the recorded position.
3357 void insert(Instruction *Inst) {
3358 if (HasPrevInstruction) {
3359 if (Inst->getParent())
3360 Inst->removeFromParent();
3361 Inst->insertAfter(Point.PrevInst);
3362 } else {
3363 BasicBlock::iterator Position = Point.BB->getFirstInsertionPt();
3364 if (Inst->getParent())
3365 Inst->moveBefore(*Point.BB, Position);
3366 else
3367 Inst->insertBefore(*Point.BB, Position);
3368 }
3369
3370 Inst->getParent()->reinsertInstInDbgRecords(Inst, BeforeDbgRecord);
3371 }
3372 };
3373
3374 /// Move an instruction before another.
3375 class InstructionMoveBefore : public TypePromotionAction {
3376 /// Original position of the instruction.
3377 InsertionHandler Position;
3378
3379 public:
3380 /// Move \p Inst before \p Before.
3381 InstructionMoveBefore(Instruction *Inst, BasicBlock::iterator Before)
3382 : TypePromotionAction(Inst), Position(Inst) {
3383 LLVM_DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before
3384 << "\n");
3385 Inst->moveBefore(Before);
3386 }
3387
3388 /// Move the instruction back to its original position.
3389 void undo() override {
3390 LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");
3391 Position.insert(Inst);
3392 }
3393 };
3394
3395 /// Set the operand of an instruction with a new value.
3396 class OperandSetter : public TypePromotionAction {
3397 /// Original operand of the instruction.
3398 Value *Origin;
3399
3400 /// Index of the modified instruction.
3401 unsigned Idx;
3402
3403 public:
3404 /// Set \p Idx operand of \p Inst with \p NewVal.
3405 OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
3406 : TypePromotionAction(Inst), Idx(Idx) {
3407 LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"
3408 << "for:" << *Inst << "\n"
3409 << "with:" << *NewVal << "\n");
3410 Origin = Inst->getOperand(Idx);
3411 Inst->setOperand(Idx, NewVal);
3412 }
3413
3414 /// Restore the original value of the instruction.
3415 void undo() override {
3416 LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"
3417 << "for: " << *Inst << "\n"
3418 << "with: " << *Origin << "\n");
3419 Inst->setOperand(Idx, Origin);
3420 }
3421 };
3422
3423 /// Hide the operands of an instruction.
3424 /// Do as if this instruction was not using any of its operands.
3425 class OperandsHider : public TypePromotionAction {
3426 /// The list of original operands.
3427 SmallVector<Value *, 4> OriginalValues;
3428
3429 public:
3430 /// Remove \p Inst from the uses of the operands of \p Inst.
3431 OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
3432 LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");
3433 unsigned NumOpnds = Inst->getNumOperands();
3434 OriginalValues.reserve(NumOpnds);
3435 for (unsigned It = 0; It < NumOpnds; ++It) {
3436 // Save the current operand.
3437 Value *Val = Inst->getOperand(It);
3438 OriginalValues.push_back(Val);
3439 // Set a dummy one.
3440 // We could use OperandSetter here, but that would imply an overhead
3441 // that we are not willing to pay.
3442 Inst->setOperand(It, PoisonValue::get(Val->getType()));
3443 }
3444 }
3445
3446 /// Restore the original list of uses.
3447 void undo() override {
3448 LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");
3449 for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
3450 Inst->setOperand(It, OriginalValues[It]);
3451 }
3452 };
3453
3454 /// Build a truncate instruction.
3455 class TruncBuilder : public TypePromotionAction {
3456 Value *Val;
3457
3458 public:
3459 /// Build a truncate instruction of \p Opnd producing a \p Ty
3460 /// result.
3461 /// trunc Opnd to Ty.
3462 TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
3463 IRBuilder<> Builder(Opnd);
3464 Builder.SetCurrentDebugLocation(DebugLoc());
3465 Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
3466 LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
3467 }
3468
3469 /// Get the built value.
3470 Value *getBuiltValue() { return Val; }
3471
3472 /// Remove the built instruction.
3473 void undo() override {
3474 LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");
3475 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3476 IVal->eraseFromParent();
3477 }
3478 };
3479
3480 /// Build a sign extension instruction.
3481 class SExtBuilder : public TypePromotionAction {
3482 Value *Val;
3483
3484 public:
3485 /// Build a sign extension instruction of \p Opnd producing a \p Ty
3486 /// result.
3487 /// sext Opnd to Ty.
3488 SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
3489 : TypePromotionAction(InsertPt) {
3490 IRBuilder<> Builder(InsertPt);
3491 Val = Builder.CreateSExt(Opnd, Ty, "promoted");
3492 LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");
3493 }
3494
3495 /// Get the built value.
3496 Value *getBuiltValue() { return Val; }
3497
3498 /// Remove the built instruction.
3499 void undo() override {
3500 LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");
3501 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3502 IVal->eraseFromParent();
3503 }
3504 };
3505
3506 /// Build a zero extension instruction.
3507 class ZExtBuilder : public TypePromotionAction {
3508 Value *Val;
3509
3510 public:
3511 /// Build a zero extension instruction of \p Opnd producing a \p Ty
3512 /// result.
3513 /// zext Opnd to Ty.
3514 ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
3515 : TypePromotionAction(InsertPt) {
3516 IRBuilder<> Builder(InsertPt);
3517 Builder.SetCurrentDebugLocation(DebugLoc());
3518 Val = Builder.CreateZExt(Opnd, Ty, "promoted");
3519 LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
3520 }
3521
3522 /// Get the built value.
3523 Value *getBuiltValue() { return Val; }
3524
3525 /// Remove the built instruction.
3526 void undo() override {
3527 LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");
3528 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3529 IVal->eraseFromParent();
3530 }
3531 };
3532
3533 /// Mutate an instruction to another type.
3534 class TypeMutator : public TypePromotionAction {
3535 /// Record the original type.
3536 Type *OrigTy;
3537
3538 public:
3539 /// Mutate the type of \p Inst into \p NewTy.
3540 TypeMutator(Instruction *Inst, Type *NewTy)
3541 : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
3542 LLVM_DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy
3543 << "\n");
3544 Inst->mutateType(NewTy);
3545 }
3546
3547 /// Mutate the instruction back to its original type.
3548 void undo() override {
3549 LLVM_DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy
3550 << "\n");
3551 Inst->mutateType(OrigTy);
3552 }
3553 };
3554
3555 /// Replace the uses of an instruction by another instruction.
3556 class UsesReplacer : public TypePromotionAction {
3557 /// Helper structure to keep track of the replaced uses.
3558 struct InstructionAndIdx {
3559 /// The instruction using the instruction.
3560 Instruction *Inst;
3561
3562 /// The index where this instruction is used for Inst.
3563 unsigned Idx;
3564
3565 InstructionAndIdx(Instruction *Inst, unsigned Idx)
3566 : Inst(Inst), Idx(Idx) {}
3567 };
3568
3569 /// Keep track of the original uses (pair Instruction, Index).
3571 /// Keep track of the debug users.
3572 SmallVector<DbgVariableRecord *, 1> DbgVariableRecords;
3573
3574 /// Keep track of the new value so that we can undo it by replacing
3575 /// instances of the new value with the original value.
3576 Value *New;
3577
3579
3580 public:
3581 /// Replace all the use of \p Inst by \p New.
3582 UsesReplacer(Instruction *Inst, Value *New)
3583 : TypePromotionAction(Inst), New(New) {
3584 LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
3585 << "\n");
3586 // Record the original uses.
3587 for (Use &U : Inst->uses()) {
3588 Instruction *UserI = cast<Instruction>(U.getUser());
3589 OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
3590 }
3591 // Record the debug uses separately. They are not in the instruction's
3592 // use list, but they are replaced by RAUW.
3593 findDbgValues(Inst, DbgVariableRecords);
3594
3595 // Now, we can replace the uses.
3596 Inst->replaceAllUsesWith(New);
3597 }
3598
3599 /// Reassign the original uses of Inst to Inst.
3600 void undo() override {
3601 LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
3602 for (InstructionAndIdx &Use : OriginalUses)
3603 Use.Inst->setOperand(Use.Idx, Inst);
3604 // RAUW has replaced all original uses with references to the new value,
3605 // including the debug uses. Since we are undoing the replacements,
3606 // the original debug uses must also be reinstated to maintain the
3607 // correctness and utility of debug value records.
3608 for (DbgVariableRecord *DVR : DbgVariableRecords)
3609 DVR->replaceVariableLocationOp(New, Inst);
3610 }
3611 };
3612
3613 /// Remove an instruction from the IR.
3614 class InstructionRemover : public TypePromotionAction {
3615 /// Original position of the instruction.
3616 InsertionHandler Inserter;
3617
3618 /// Helper structure to hide all the link to the instruction. In other
3619 /// words, this helps to do as if the instruction was removed.
3620 OperandsHider Hider;
3621
3622 /// Keep track of the uses replaced, if any.
3623 UsesReplacer *Replacer = nullptr;
3624
3625 /// Keep track of instructions removed.
3626 SetOfInstrs &RemovedInsts;
3627
3628 public:
3629 /// Remove all reference of \p Inst and optionally replace all its
3630 /// uses with New.
3631 /// \p RemovedInsts Keep track of the instructions removed by this Action.
3632 /// \pre If !Inst->use_empty(), then New != nullptr
3633 InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
3634 Value *New = nullptr)
3635 : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
3636 RemovedInsts(RemovedInsts) {
3637 if (New)
3638 Replacer = new UsesReplacer(Inst, New);
3639 LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
3640 RemovedInsts.insert(Inst);
3641 /// The instructions removed here will be freed after completing
3642 /// optimizeBlock() for all blocks as we need to keep track of the
3643 /// removed instructions during promotion.
3644 Inst->removeFromParent();
3645 }
3646
3647 ~InstructionRemover() override { delete Replacer; }
3648
3649 InstructionRemover &operator=(const InstructionRemover &other) = delete;
3650 InstructionRemover(const InstructionRemover &other) = delete;
3651
3652 /// Resurrect the instruction and reassign it to the proper uses if
3653 /// new value was provided when build this action.
3654 void undo() override {
3655 LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");
3656 Inserter.insert(Inst);
3657 if (Replacer)
3658 Replacer->undo();
3659 Hider.undo();
3660 RemovedInsts.erase(Inst);
3661 }
3662 };
3663
3664public:
3665 /// Restoration point.
3666 /// The restoration point is a pointer to an action instead of an iterator
3667 /// because the iterator may be invalidated but not the pointer.
3668 using ConstRestorationPt = const TypePromotionAction *;
3669
3670 TypePromotionTransaction(SetOfInstrs &RemovedInsts)
3671 : RemovedInsts(RemovedInsts) {}
3672
3673 /// Advocate every changes made in that transaction. Return true if any change
3674 /// happen.
3675 bool commit();
3676
3677 /// Undo all the changes made after the given point.
3678 void rollback(ConstRestorationPt Point);
3679
3680 /// Get the current restoration point.
3681 ConstRestorationPt getRestorationPoint() const;
3682
3683 /// \name API for IR modification with state keeping to support rollback.
3684 /// @{
3685 /// Same as Instruction::setOperand.
3686 void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
3687
3688 /// Same as Instruction::eraseFromParent.
3689 void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
3690
3691 /// Same as Value::replaceAllUsesWith.
3692 void replaceAllUsesWith(Instruction *Inst, Value *New);
3693
3694 /// Same as Value::mutateType.
3695 void mutateType(Instruction *Inst, Type *NewTy);
3696
3697 /// Same as IRBuilder::createTrunc.
3698 Value *createTrunc(Instruction *Opnd, Type *Ty);
3699
3700 /// Same as IRBuilder::createSExt.
3701 Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
3702
3703 /// Same as IRBuilder::createZExt.
3704 Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
3705
3706private:
3707 /// The ordered list of actions made so far.
3709
3710 using CommitPt =
3711 SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator;
3712
3713 SetOfInstrs &RemovedInsts;
3714};
3715
3716} // end anonymous namespace
3717
3718void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
3719 Value *NewVal) {
3720 Actions.push_back(std::make_unique<TypePromotionTransaction::OperandSetter>(
3721 Inst, Idx, NewVal));
3722}
3723
3724void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
3725 Value *NewVal) {
3726 Actions.push_back(
3727 std::make_unique<TypePromotionTransaction::InstructionRemover>(
3728 Inst, RemovedInsts, NewVal));
3729}
3730
3731void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
3732 Value *New) {
3733 Actions.push_back(
3734 std::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
3735}
3736
3737void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
3738 Actions.push_back(
3739 std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
3740}
3741
3742Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, Type *Ty) {
3743 std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
3744 Value *Val = Ptr->getBuiltValue();
3745 Actions.push_back(std::move(Ptr));
3746 return Val;
3747}
3748
3749Value *TypePromotionTransaction::createSExt(Instruction *Inst, Value *Opnd,
3750 Type *Ty) {
3751 std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
3752 Value *Val = Ptr->getBuiltValue();
3753 Actions.push_back(std::move(Ptr));
3754 return Val;
3755}
3756
3757Value *TypePromotionTransaction::createZExt(Instruction *Inst, Value *Opnd,
3758 Type *Ty) {
3759 std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
3760 Value *Val = Ptr->getBuiltValue();
3761 Actions.push_back(std::move(Ptr));
3762 return Val;
3763}
3764
3765TypePromotionTransaction::ConstRestorationPt
3766TypePromotionTransaction::getRestorationPoint() const {
3767 return !Actions.empty() ? Actions.back().get() : nullptr;
3768}
3769
3770bool TypePromotionTransaction::commit() {
3771 for (std::unique_ptr<TypePromotionAction> &Action : Actions)
3772 Action->commit();
3773 bool Modified = !Actions.empty();
3774 Actions.clear();
3775 return Modified;
3776}
3777
3778void TypePromotionTransaction::rollback(
3779 TypePromotionTransaction::ConstRestorationPt Point) {
3780 while (!Actions.empty() && Point != Actions.back().get()) {
3781 std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
3782 Curr->undo();
3783 }
3784}
3785
3786namespace {
3787
3788/// A helper class for matching addressing modes.
3789///
3790/// This encapsulates the logic for matching the target-legal addressing modes.
3791class AddressingModeMatcher {
3792 SmallVectorImpl<Instruction *> &AddrModeInsts;
3793 const TargetLowering &TLI;
3794 const TargetRegisterInfo &TRI;
3795 const DataLayout &DL;
3796 const LoopInfo &LI;
3797 const std::function<const DominatorTree &()> getDTFn;
3798
3799 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
3800 /// the memory instruction that we're computing this address for.
3801 Type *AccessTy;
3802 unsigned AddrSpace;
3803 Instruction *MemoryInst;
3804
3805 /// This is the addressing mode that we're building up. This is
3806 /// part of the return value of this addressing mode matching stuff.
3807 ExtAddrMode &AddrMode;
3808
3809 /// The instructions inserted by other CodeGenPrepare optimizations.
3810 const SetOfInstrs &InsertedInsts;
3811
3812 /// A map from the instructions to their type before promotion.
3813 InstrToOrigTy &PromotedInsts;
3814
3815 /// The ongoing transaction where every action should be registered.
3816 TypePromotionTransaction &TPT;
3817
3818 // A GEP which has too large offset to be folded into the addressing mode.
3819 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP;
3820
3821 /// This is set to true when we should not do profitability checks.
3822 /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
3823 bool IgnoreProfitability;
3824
3825 /// True if we are optimizing for size.
3826 bool OptSize = false;
3827
3828 ProfileSummaryInfo *PSI;
3829 BlockFrequencyInfo *BFI;
3830
3831 AddressingModeMatcher(
3832 SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI,
3833 const TargetRegisterInfo &TRI, const LoopInfo &LI,
3834 const std::function<const DominatorTree &()> getDTFn, Type *AT,
3835 unsigned AS, Instruction *MI, ExtAddrMode &AM,
3836 const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
3837 TypePromotionTransaction &TPT,
3838 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3839 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
3840 : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
3841 DL(MI->getDataLayout()), LI(LI), getDTFn(getDTFn),
3842 AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM),
3843 InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT),
3844 LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI), BFI(BFI) {
3845 IgnoreProfitability = false;
3846 }
3847
3848public:
3849 /// Find the maximal addressing mode that a load/store of V can fold,
3850 /// give an access type of AccessTy. This returns a list of involved
3851 /// instructions in AddrModeInsts.
3852 /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
3853 /// optimizations.
3854 /// \p PromotedInsts maps the instructions to their type before promotion.
3855 /// \p The ongoing transaction where every action should be registered.
3856 static ExtAddrMode
3857 Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
3858 SmallVectorImpl<Instruction *> &AddrModeInsts,
3859 const TargetLowering &TLI, const LoopInfo &LI,
3860 const std::function<const DominatorTree &()> getDTFn,
3861 const TargetRegisterInfo &TRI, const SetOfInstrs &InsertedInsts,
3862 InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
3863 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3864 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
3865 ExtAddrMode Result;
3866
3867 bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, LI, getDTFn,
3868 AccessTy, AS, MemoryInst, Result,
3869 InsertedInsts, PromotedInsts, TPT,
3870 LargeOffsetGEP, OptSize, PSI, BFI)
3871 .matchAddr(V, 0);
3872 (void)Success;
3873 assert(Success && "Couldn't select *anything*?");
3874 return Result;
3875 }
3876
3877private:
3878 bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
3879 bool matchAddr(Value *Addr, unsigned Depth);
3880 bool matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth,
3881 bool *MovedAway = nullptr);
3882 bool isProfitableToFoldIntoAddressingMode(Instruction *I,
3883 ExtAddrMode &AMBefore,
3884 ExtAddrMode &AMAfter);
3885 bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
3886 bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
3887 Value *PromotedOperand) const;
3888};
3889
3890class PhiNodeSet;
3891
3892/// An iterator for PhiNodeSet.
3893class PhiNodeSetIterator {
3894 PhiNodeSet *const Set;
3895 size_t CurrentIndex = 0;
3896
3897public:
3898 /// The constructor. Start should point to either a valid element, or be equal
3899 /// to the size of the underlying SmallVector of the PhiNodeSet.
3900 PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start);
3901 PHINode *operator*() const;
3902 PhiNodeSetIterator &operator++();
3903 bool operator==(const PhiNodeSetIterator &RHS) const;
3904 bool operator!=(const PhiNodeSetIterator &RHS) const;
3905};
3906
3907/// Keeps a set of PHINodes.
3908///
3909/// This is a minimal set implementation for a specific use case:
3910/// It is very fast when there are very few elements, but also provides good
3911/// performance when there are many. It is similar to SmallPtrSet, but also
3912/// provides iteration by insertion order, which is deterministic and stable
3913/// across runs. It is also similar to SmallSetVector, but provides removing
3914/// elements in O(1) time. This is achieved by not actually removing the element
3915/// from the underlying vector, so comes at the cost of using more memory, but
3916/// that is fine, since PhiNodeSets are used as short lived objects.
3917class PhiNodeSet {
3918 friend class PhiNodeSetIterator;
3919
3920 using MapType = SmallDenseMap<PHINode *, size_t, 32>;
3921 using iterator = PhiNodeSetIterator;
3922
3923 /// Keeps the elements in the order of their insertion in the underlying
3924 /// vector. To achieve constant time removal, it never deletes any element.
3926
3927 /// Keeps the elements in the underlying set implementation. This (and not the
3928 /// NodeList defined above) is the source of truth on whether an element
3929 /// is actually in the collection.
3930 MapType NodeMap;
3931
3932 /// Points to the first valid (not deleted) element when the set is not empty
3933 /// and the value is not zero. Equals to the size of the underlying vector
3934 /// when the set is empty. When the value is 0, as in the beginning, the
3935 /// first element may or may not be valid.
3936 size_t FirstValidElement = 0;
3937
3938public:
3939 /// Inserts a new element to the collection.
3940 /// \returns true if the element is actually added, i.e. was not in the
3941 /// collection before the operation.
3942 bool insert(PHINode *Ptr) {
3943 if (NodeMap.insert(std::make_pair(Ptr, NodeList.size())).second) {
3945 return true;
3946 }
3947 return false;
3948 }
3949
3950 /// Removes the element from the collection.
3951 /// \returns whether the element is actually removed, i.e. was in the
3952 /// collection before the operation.
3953 bool erase(PHINode *Ptr) {
3954 if (NodeMap.erase(Ptr)) {
3955 SkipRemovedElements(FirstValidElement);
3956 return true;
3957 }
3958 return false;
3959 }
3960
3961 /// Removes all elements and clears the collection.
3962 void clear() {
3963 NodeMap.clear();
3964 NodeList.clear();
3965 FirstValidElement = 0;
3966 }
3967
3968 /// \returns an iterator that will iterate the elements in the order of
3969 /// insertion.
3970 iterator begin() {
3971 if (FirstValidElement == 0)
3972 SkipRemovedElements(FirstValidElement);
3973 return PhiNodeSetIterator(this, FirstValidElement);
3974 }
3975
3976 /// \returns an iterator that points to the end of the collection.
3977 iterator end() { return PhiNodeSetIterator(this, NodeList.size()); }
3978
3979 /// Returns the number of elements in the collection.
3980 size_t size() const { return NodeMap.size(); }
3981
3982 /// \returns 1 if the given element is in the collection, and 0 if otherwise.
3983 size_t count(PHINode *Ptr) const { return NodeMap.count(Ptr); }
3984
3985private:
3986 /// Updates the CurrentIndex so that it will point to a valid element.
3987 ///
3988 /// If the element of NodeList at CurrentIndex is valid, it does not
3989 /// change it. If there are no more valid elements, it updates CurrentIndex
3990 /// to point to the end of the NodeList.
3991 void SkipRemovedElements(size_t &CurrentIndex) {
3992 while (CurrentIndex < NodeList.size()) {
3993 auto it = NodeMap.find(NodeList[CurrentIndex]);
3994 // If the element has been deleted and added again later, NodeMap will
3995 // point to a different index, so CurrentIndex will still be invalid.
3996 if (it != NodeMap.end() && it->second == CurrentIndex)
3997 break;
3998 ++CurrentIndex;
3999 }
4000 }
4001};
4002
4003PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start)
4004 : Set(Set), CurrentIndex(Start) {}
4005
4006PHINode *PhiNodeSetIterator::operator*() const {
4007 assert(CurrentIndex < Set->NodeList.size() &&
4008 "PhiNodeSet access out of range");
4009 return Set->NodeList[CurrentIndex];
4010}
4011
4012PhiNodeSetIterator &PhiNodeSetIterator::operator++() {
4013 assert(CurrentIndex < Set->NodeList.size() &&
4014 "PhiNodeSet access out of range");
4015 ++CurrentIndex;
4016 Set->SkipRemovedElements(CurrentIndex);
4017 return *this;
4018}
4019
4020bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const {
4021 return CurrentIndex == RHS.CurrentIndex;
4022}
4023
4024bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const {
4025 return !((*this) == RHS);
4026}
4027
4028/// Keep track of simplification of Phi nodes.
4029/// Accept the set of all phi nodes and erase phi node from this set
4030/// if it is simplified.
4031class SimplificationTracker {
4032 DenseMap<Value *, Value *> Storage;
4033 const SimplifyQuery &SQ;
4034 // Tracks newly created Phi nodes. The elements are iterated by insertion
4035 // order.
4036 PhiNodeSet AllPhiNodes;
4037 // Tracks newly created Select nodes.
4038 SmallPtrSet<SelectInst *, 32> AllSelectNodes;
4039
4040public:
4041 SimplificationTracker(const SimplifyQuery &sq) : SQ(sq) {}
4042
4043 Value *Get(Value *V) {
4044 do {
4045 auto SV = Storage.find(V);
4046 if (SV == Storage.end())
4047 return V;
4048 V = SV->second;
4049 } while (true);
4050 }
4051
4052 Value *Simplify(Value *Val) {
4053 SmallVector<Value *, 32> WorkList;
4054 SmallPtrSet<Value *, 32> Visited;
4055 WorkList.push_back(Val);
4056 while (!WorkList.empty()) {
4057 auto *P = WorkList.pop_back_val();
4058 if (!Visited.insert(P).second)
4059 continue;
4060 if (auto *PI = dyn_cast<Instruction>(P))
4061 if (Value *V = simplifyInstruction(cast<Instruction>(PI), SQ)) {
4062 for (auto *U : PI->users())
4063 WorkList.push_back(cast<Value>(U));
4064 Put(PI, V);
4065 PI->replaceAllUsesWith(V);
4066 if (auto *PHI = dyn_cast<PHINode>(PI))
4067 AllPhiNodes.erase(PHI);
4068 if (auto *Select = dyn_cast<SelectInst>(PI))
4069 AllSelectNodes.erase(Select);
4070 PI->eraseFromParent();
4071 }
4072 }
4073 return Get(Val);
4074 }
4075
4076 void Put(Value *From, Value *To) { Storage.insert({From, To}); }
4077
4078 void ReplacePhi(PHINode *From, PHINode *To) {
4079 Value *OldReplacement = Get(From);
4080 while (OldReplacement != From) {
4081 From = To;
4082 To = dyn_cast<PHINode>(OldReplacement);
4083 OldReplacement = Get(From);
4084 }
4085 assert(To && Get(To) == To && "Replacement PHI node is already replaced.");
4086 Put(From, To);
4087 From->replaceAllUsesWith(To);
4088 AllPhiNodes.erase(From);
4089 From->eraseFromParent();
4090 }
4091
4092 PhiNodeSet &newPhiNodes() { return AllPhiNodes; }
4093
4094 void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); }
4095
4096 void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); }
4097
4098 unsigned countNewPhiNodes() const { return AllPhiNodes.size(); }
4099
4100 unsigned countNewSelectNodes() const { return AllSelectNodes.size(); }
4101
4102 void destroyNewNodes(Type *CommonType) {
4103 // For safe erasing, replace the uses with dummy value first.
4104 auto *Dummy = PoisonValue::get(CommonType);
4105 for (auto *I : AllPhiNodes) {
4106 I->replaceAllUsesWith(Dummy);
4107 I->eraseFromParent();
4108 }
4109 AllPhiNodes.clear();
4110 for (auto *I : AllSelectNodes) {
4111 I->replaceAllUsesWith(Dummy);
4112 I->eraseFromParent();
4113 }
4114 AllSelectNodes.clear();
4115 }
4116};
4117
4118/// A helper class for combining addressing modes.
4119class AddressingModeCombiner {
4120 typedef DenseMap<Value *, Value *> FoldAddrToValueMapping;
4121 typedef std::pair<PHINode *, PHINode *> PHIPair;
4122
4123private:
4124 /// The addressing modes we've collected.
4126
4127 /// The field in which the AddrModes differ, when we have more than one.
4128 ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField;
4129
4130 /// Are the AddrModes that we have all just equal to their original values?
4131 bool AllAddrModesTrivial = true;
4132
4133 /// Common Type for all different fields in addressing modes.
4134 Type *CommonType = nullptr;
4135
4136 /// SimplifyQuery for simplifyInstruction utility.
4137 const SimplifyQuery &SQ;
4138
4139 /// Original Address.
4140 Value *Original;
4141
4142 /// Common value among addresses
4143 Value *CommonValue = nullptr;
4144
4145public:
4146 AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue)
4147 : SQ(_SQ), Original(OriginalValue) {}
4148
4149 ~AddressingModeCombiner() { eraseCommonValueIfDead(); }
4150
4151 /// Get the combined AddrMode
4152 const ExtAddrMode &getAddrMode() const { return AddrModes[0]; }
4153
4154 /// Add a new AddrMode if it's compatible with the AddrModes we already
4155 /// have.
4156 /// \return True iff we succeeded in doing so.
4157 bool addNewAddrMode(ExtAddrMode &NewAddrMode) {
4158 // Take note of if we have any non-trivial AddrModes, as we need to detect
4159 // when all AddrModes are trivial as then we would introduce a phi or select
4160 // which just duplicates what's already there.
4161 AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial();
4162
4163 // If this is the first addrmode then everything is fine.
4164 if (AddrModes.empty()) {
4165 AddrModes.emplace_back(NewAddrMode);
4166 return true;
4167 }
4168
4169 // Figure out how different this is from the other address modes, which we
4170 // can do just by comparing against the first one given that we only care
4171 // about the cumulative difference.
4172 ExtAddrMode::FieldName ThisDifferentField =
4173 AddrModes[0].compare(NewAddrMode);
4174 if (DifferentField == ExtAddrMode::NoField)
4175 DifferentField = ThisDifferentField;
4176 else if (DifferentField != ThisDifferentField)
4177 DifferentField = ExtAddrMode::MultipleFields;
4178
4179 // If NewAddrMode differs in more than one dimension we cannot handle it.
4180 bool CanHandle = DifferentField != ExtAddrMode::MultipleFields;
4181
4182 // If Scale Field is different then we reject.
4183 CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField;
4184
4185 // We also must reject the case when base offset is different and
4186 // scale reg is not null, we cannot handle this case due to merge of
4187 // different offsets will be used as ScaleReg.
4188 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField ||
4189 !NewAddrMode.ScaledReg);
4190
4191 // We also must reject the case when GV is different and BaseReg installed
4192 // due to we want to use base reg as a merge of GV values.
4193 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField ||
4194 !NewAddrMode.HasBaseReg);
4195
4196 // Even if NewAddMode is the same we still need to collect it due to
4197 // original value is different. And later we will need all original values
4198 // as anchors during finding the common Phi node.
4199 if (CanHandle)
4200 AddrModes.emplace_back(NewAddrMode);
4201 else
4202 AddrModes.clear();
4203
4204 return CanHandle;
4205 }
4206
4207 /// Combine the addressing modes we've collected into a single
4208 /// addressing mode.
4209 /// \return True iff we successfully combined them or we only had one so
4210 /// didn't need to combine them anyway.
4211 bool combineAddrModes() {
4212 // If we have no AddrModes then they can't be combined.
4213 if (AddrModes.size() == 0)
4214 return false;
4215
4216 // A single AddrMode can trivially be combined.
4217 if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField)
4218 return true;
4219
4220 // If the AddrModes we collected are all just equal to the value they are
4221 // derived from then combining them wouldn't do anything useful.
4222 if (AllAddrModesTrivial)
4223 return false;
4224
4225 if (!addrModeCombiningAllowed())
4226 return false;
4227
4228 // Build a map between <original value, basic block where we saw it> to
4229 // value of base register.
4230 // Bail out if there is no common type.
4231 FoldAddrToValueMapping Map;
4232 if (!initializeMap(Map))
4233 return false;
4234
4235 CommonValue = findCommon(Map);
4236 if (CommonValue)
4237 AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
4238 return CommonValue != nullptr;
4239 }
4240
4241private:
4242 /// `CommonValue` may be a placeholder inserted by us.
4243 /// If the placeholder is not used, we should remove this dead instruction.
4244 void eraseCommonValueIfDead() {
4245 if (CommonValue && CommonValue->use_empty())
4246 if (Instruction *CommonInst = dyn_cast<Instruction>(CommonValue))
4247 CommonInst->eraseFromParent();
4248 }
4249
4250 /// Initialize Map with anchor values. For address seen
4251 /// we set the value of different field saw in this address.
4252 /// At the same time we find a common type for different field we will
4253 /// use to create new Phi/Select nodes. Keep it in CommonType field.
4254 /// Return false if there is no common type found.
4255 bool initializeMap(FoldAddrToValueMapping &Map) {
4256 // Keep track of keys where the value is null. We will need to replace it
4257 // with constant null when we know the common type.
4258 SmallVector<Value *, 2> NullValue;
4259 Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
4260 for (auto &AM : AddrModes) {
4261 Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
4262 if (DV) {
4263 auto *Type = DV->getType();
4264 if (CommonType && CommonType != Type)
4265 return false;
4266 CommonType = Type;
4267 Map[AM.OriginalValue] = DV;
4268 } else {
4269 NullValue.push_back(AM.OriginalValue);
4270 }
4271 }
4272 assert(CommonType && "At least one non-null value must be!");
4273 for (auto *V : NullValue)
4274 Map[V] = Constant::getNullValue(CommonType);
4275 return true;
4276 }
4277
4278 /// We have mapping between value A and other value B where B was a field in
4279 /// addressing mode represented by A. Also we have an original value C
4280 /// representing an address we start with. Traversing from C through phi and
4281 /// selects we ended up with A's in a map. This utility function tries to find
4282 /// a value V which is a field in addressing mode C and traversing through phi
4283 /// nodes and selects we will end up in corresponded values B in a map.
4284 /// The utility will create a new Phi/Selects if needed.
4285 // The simple example looks as follows:
4286 // BB1:
4287 // p1 = b1 + 40
4288 // br cond BB2, BB3
4289 // BB2:
4290 // p2 = b2 + 40
4291 // br BB3
4292 // BB3:
4293 // p = phi [p1, BB1], [p2, BB2]
4294 // v = load p
4295 // Map is
4296 // p1 -> b1
4297 // p2 -> b2
4298 // Request is
4299 // p -> ?
4300 // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3.
4301 Value *findCommon(FoldAddrToValueMapping &Map) {
4302 // Tracks the simplification of newly created phi nodes. The reason we use
4303 // this mapping is because we will add new created Phi nodes in AddrToBase.
4304 // Simplification of Phi nodes is recursive, so some Phi node may
4305 // be simplified after we added it to AddrToBase. In reality this
4306 // simplification is possible only if original phi/selects were not
4307 // simplified yet.
4308 // Using this mapping we can find the current value in AddrToBase.
4309 SimplificationTracker ST(SQ);
4310
4311 // First step, DFS to create PHI nodes for all intermediate blocks.
4312 // Also fill traverse order for the second step.
4313 SmallVector<Value *, 32> TraverseOrder;
4314 InsertPlaceholders(Map, TraverseOrder, ST);
4315
4316 // Second Step, fill new nodes by merged values and simplify if possible.
4317 FillPlaceholders(Map, TraverseOrder, ST);
4318
4319 if (!AddrSinkNewSelects && ST.countNewSelectNodes() > 0) {
4320 ST.destroyNewNodes(CommonType);
4321 return nullptr;
4322 }
4323
4324 // Now we'd like to match New Phi nodes to existed ones.
4325 unsigned PhiNotMatchedCount = 0;
4326 if (!MatchPhiSet(ST, AddrSinkNewPhis, PhiNotMatchedCount)) {
4327 ST.destroyNewNodes(CommonType);
4328 return nullptr;
4329 }
4330
4331 auto *Result = ST.Get(Map.find(Original)->second);
4332 if (Result) {
4333 NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount;
4334 NumMemoryInstsSelectCreated += ST.countNewSelectNodes();
4335 }
4336 return Result;
4337 }
4338
4339 /// Try to match PHI node to Candidate.
4340 /// Matcher tracks the matched Phi nodes.
4341 bool MatchPhiNode(PHINode *PHI, PHINode *Candidate,
4342 SmallSetVector<PHIPair, 8> &Matcher,
4343 PhiNodeSet &PhiNodesToMatch) {
4344 SmallVector<PHIPair, 8> WorkList;
4345 Matcher.insert({PHI, Candidate});
4346 SmallPtrSet<PHINode *, 8> MatchedPHIs;
4347 MatchedPHIs.insert(PHI);
4348 WorkList.push_back({PHI, Candidate});
4349 SmallSet<PHIPair, 8> Visited;
4350 while (!WorkList.empty()) {
4351 auto Item = WorkList.pop_back_val();
4352 if (!Visited.insert(Item).second)
4353 continue;
4354 // We iterate over all incoming values to Phi to compare them.
4355 // If values are different and both of them Phi and the first one is a
4356 // Phi we added (subject to match) and both of them is in the same basic
4357 // block then we can match our pair if values match. So we state that
4358 // these values match and add it to work list to verify that.
4359 for (auto *B : Item.first->blocks()) {
4360 Value *FirstValue = Item.first->getIncomingValueForBlock(B);
4361 Value *SecondValue = Item.second->getIncomingValueForBlock(B);
4362 if (FirstValue == SecondValue)
4363 continue;
4364
4365 PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue);
4366 PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue);
4367
4368 // One of them is not Phi or
4369 // The first one is not Phi node from the set we'd like to match or
4370 // Phi nodes from different basic blocks then
4371 // we will not be able to match.
4372 if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) ||
4373 FirstPhi->getParent() != SecondPhi->getParent())
4374 return false;
4375
4376 // If we already matched them then continue.
4377 if (Matcher.count({FirstPhi, SecondPhi}))
4378 continue;
4379 // So the values are different and does not match. So we need them to
4380 // match. (But we register no more than one match per PHI node, so that
4381 // we won't later try to replace them twice.)
4382 if (MatchedPHIs.insert(FirstPhi).second)
4383 Matcher.insert({FirstPhi, SecondPhi});
4384 // But me must check it.
4385 WorkList.push_back({FirstPhi, SecondPhi});
4386 }
4387 }
4388 return true;
4389 }
4390
4391 /// For the given set of PHI nodes (in the SimplificationTracker) try
4392 /// to find their equivalents.
4393 /// Returns false if this matching fails and creation of new Phi is disabled.
4394 bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes,
4395 unsigned &PhiNotMatchedCount) {
4396 // Matched and PhiNodesToMatch iterate their elements in a deterministic
4397 // order, so the replacements (ReplacePhi) are also done in a deterministic
4398 // order.
4399 SmallSetVector<PHIPair, 8> Matched;
4400 SmallPtrSet<PHINode *, 8> WillNotMatch;
4401 PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes();
4402 while (PhiNodesToMatch.size()) {
4403 PHINode *PHI = *PhiNodesToMatch.begin();
4404
4405 // Add us, if no Phi nodes in the basic block we do not match.
4406 WillNotMatch.clear();
4407 WillNotMatch.insert(PHI);
4408
4409 // Traverse all Phis until we found equivalent or fail to do that.
4410 bool IsMatched = false;
4411 for (auto &P : PHI->getParent()->phis()) {
4412 // Skip new Phi nodes.
4413 if (PhiNodesToMatch.count(&P))
4414 continue;
4415 if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))
4416 break;
4417 // If it does not match, collect all Phi nodes from matcher.
4418 // if we end up with no match, them all these Phi nodes will not match
4419 // later.
4420 WillNotMatch.insert_range(llvm::make_first_range(Matched));
4421 Matched.clear();
4422 }
4423 if (IsMatched) {
4424 // Replace all matched values and erase them.
4425 for (auto MV : Matched)
4426 ST.ReplacePhi(MV.first, MV.second);
4427 Matched.clear();
4428 continue;
4429 }
4430 // If we are not allowed to create new nodes then bail out.
4431 if (!AllowNewPhiNodes)
4432 return false;
4433 // Just remove all seen values in matcher. They will not match anything.
4434 PhiNotMatchedCount += WillNotMatch.size();
4435 for (auto *P : WillNotMatch)
4436 PhiNodesToMatch.erase(P);
4437 }
4438 return true;
4439 }
4440 /// Fill the placeholders with values from predecessors and simplify them.
4441 void FillPlaceholders(FoldAddrToValueMapping &Map,
4442 SmallVectorImpl<Value *> &TraverseOrder,
4443 SimplificationTracker &ST) {
4444 while (!TraverseOrder.empty()) {
4445 Value *Current = TraverseOrder.pop_back_val();
4446 assert(Map.contains(Current) && "No node to fill!!!");
4447 Value *V = Map[Current];
4448
4449 if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
4450 // CurrentValue also must be Select.
4451 auto *CurrentSelect = cast<SelectInst>(Current);
4452 auto *TrueValue = CurrentSelect->getTrueValue();
4453 assert(Map.contains(TrueValue) && "No True Value!");
4454 Select->setTrueValue(ST.Get(Map[TrueValue]));
4455 auto *FalseValue = CurrentSelect->getFalseValue();
4456 assert(Map.contains(FalseValue) && "No False Value!");
4457 Select->setFalseValue(ST.Get(Map[FalseValue]));
4458 } else {
4459 // Must be a Phi node then.
4460 auto *PHI = cast<PHINode>(V);
4461 // Fill the Phi node with values from predecessors.
4462 for (auto *B : predecessors(PHI->getParent())) {
4463 Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B);
4464 assert(Map.contains(PV) && "No predecessor Value!");
4465 PHI->addIncoming(ST.Get(Map[PV]), B);
4466 }
4467 }
4468 Map[Current] = ST.Simplify(V);
4469 }
4470 }
4471
4472 /// Starting from original value recursively iterates over def-use chain up to
4473 /// known ending values represented in a map. For each traversed phi/select
4474 /// inserts a placeholder Phi or Select.
4475 /// Reports all new created Phi/Select nodes by adding them to set.
4476 /// Also reports and order in what values have been traversed.
4477 void InsertPlaceholders(FoldAddrToValueMapping &Map,
4478 SmallVectorImpl<Value *> &TraverseOrder,
4479 SimplificationTracker &ST) {
4480 SmallVector<Value *, 32> Worklist;
4481 assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&
4482 "Address must be a Phi or Select node");
4483 auto *Dummy = PoisonValue::get(CommonType);
4484 Worklist.push_back(Original);
4485 while (!Worklist.empty()) {
4486 Value *Current = Worklist.pop_back_val();
4487 // if it is already visited or it is an ending value then skip it.
4488 if (Map.contains(Current))
4489 continue;
4490 TraverseOrder.push_back(Current);
4491
4492 // CurrentValue must be a Phi node or select. All others must be covered
4493 // by anchors.
4494 if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) {
4495 // Is it OK to get metadata from OrigSelect?!
4496 // Create a Select placeholder with dummy value.
4497 SelectInst *Select =
4498 SelectInst::Create(CurrentSelect->getCondition(), Dummy, Dummy,
4499 CurrentSelect->getName(),
4500 CurrentSelect->getIterator(), CurrentSelect);
4501 Map[Current] = Select;
4502 ST.insertNewSelect(Select);
4503 // We are interested in True and False values.
4504 Worklist.push_back(CurrentSelect->getTrueValue());
4505 Worklist.push_back(CurrentSelect->getFalseValue());
4506 } else {
4507 // It must be a Phi node then.
4508 PHINode *CurrentPhi = cast<PHINode>(Current);
4509 unsigned PredCount = CurrentPhi->getNumIncomingValues();
4510 PHINode *PHI =
4511 PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi->getIterator());
4512 Map[Current] = PHI;
4513 ST.insertNewPhi(PHI);
4514 append_range(Worklist, CurrentPhi->incoming_values());
4515 }
4516 }
4517 }
4518
4519 bool addrModeCombiningAllowed() {
4521 return false;
4522 switch (DifferentField) {
4523 default:
4524 return false;
4525 case ExtAddrMode::BaseRegField:
4527 case ExtAddrMode::BaseGVField:
4528 return AddrSinkCombineBaseGV;
4529 case ExtAddrMode::BaseOffsField:
4531 case ExtAddrMode::ScaledRegField:
4533 }
4534 }
4535};
4536} // end anonymous namespace
4537
4538/// Try adding ScaleReg*Scale to the current addressing mode.
4539/// Return true and update AddrMode if this addr mode is legal for the target,
4540/// false if not.
4541bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
4542 unsigned Depth) {
4543 // If Scale is 1, then this is the same as adding ScaleReg to the addressing
4544 // mode. Just process that directly.
4545 if (Scale == 1)
4546 return matchAddr(ScaleReg, Depth);
4547
4548 // If the scale is 0, it takes nothing to add this.
4549 if (Scale == 0)
4550 return true;
4551
4552 // If we already have a scale of this value, we can add to it, otherwise, we
4553 // need an available scale field.
4554 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
4555 return false;
4556
4557 ExtAddrMode TestAddrMode = AddrMode;
4558
4559 // Add scale to turn X*4+X*3 -> X*7. This could also do things like
4560 // [A+B + A*7] -> [B+A*8].
4561 TestAddrMode.Scale += Scale;
4562 TestAddrMode.ScaledReg = ScaleReg;
4563
4564 // If the new address isn't legal, bail out.
4565 if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
4566 return false;
4567
4568 // It was legal, so commit it.
4569 AddrMode = TestAddrMode;
4570
4571 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
4572 // to see if ScaleReg is actually X+C. If so, we can turn this into adding
4573 // X*Scale + C*Scale to addr mode. If we found available IV increment, do not
4574 // go any further: we can reuse it and cannot eliminate it.
4575 ConstantInt *CI = nullptr;
4576 Value *AddLHS = nullptr;
4577 if (isa<Instruction>(ScaleReg) && // not a constant expr.
4578 match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI))) &&
4579 !isIVIncrement(ScaleReg, &LI) && CI->getValue().isSignedIntN(64)) {
4580 TestAddrMode.InBounds = false;
4581 TestAddrMode.ScaledReg = AddLHS;
4582 TestAddrMode.BaseOffs += CI->getSExtValue() * TestAddrMode.Scale;
4583
4584 // If this addressing mode is legal, commit it and remember that we folded
4585 // this instruction.
4586 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
4587 AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
4588 AddrMode = TestAddrMode;
4589 return true;
4590 }
4591 // Restore status quo.
4592 TestAddrMode = AddrMode;
4593 }
4594
4595 // If this is an add recurrence with a constant step, return the increment
4596 // instruction and the canonicalized step.
4597 auto GetConstantStep =
4598 [this](const Value *V) -> std::optional<std::pair<Instruction *, APInt>> {
4599 auto *PN = dyn_cast<PHINode>(V);
4600 if (!PN)
4601 return std::nullopt;
4602 auto IVInc = getIVIncrement(PN, &LI);
4603 if (!IVInc)
4604 return std::nullopt;
4605 // TODO: The result of the intrinsics above is two-complement. However when
4606 // IV inc is expressed as add or sub, iv.next is potentially a poison value.
4607 // If it has nuw or nsw flags, we need to make sure that these flags are
4608 // inferrable at the point of memory instruction. Otherwise we are replacing
4609 // well-defined two-complement computation with poison. Currently, to avoid
4610 // potentially complex analysis needed to prove this, we reject such cases.
4611 if (auto *OIVInc = dyn_cast<OverflowingBinaryOperator>(IVInc->first))
4612 if (OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap())
4613 return std::nullopt;
4614 if (auto *ConstantStep = dyn_cast<ConstantInt>(IVInc->second))
4615 return std::make_pair(IVInc->first, ConstantStep->getValue());
4616 return std::nullopt;
4617 };
4618
4619 // Try to account for the following special case:
4620 // 1. ScaleReg is an inductive variable;
4621 // 2. We use it with non-zero offset;
4622 // 3. IV's increment is available at the point of memory instruction.
4623 //
4624 // In this case, we may reuse the IV increment instead of the IV Phi to
4625 // achieve the following advantages:
4626 // 1. If IV step matches the offset, we will have no need in the offset;
4627 // 2. Even if they don't match, we will reduce the overlap of living IV
4628 // and IV increment, that will potentially lead to better register
4629 // assignment.
4630 if (AddrMode.BaseOffs) {
4631 if (auto IVStep = GetConstantStep(ScaleReg)) {
4632 Instruction *IVInc = IVStep->first;
4633 // The following assert is important to ensure a lack of infinite loops.
4634 // This transforms is (intentionally) the inverse of the one just above.
4635 // If they don't agree on the definition of an increment, we'd alternate
4636 // back and forth indefinitely.
4637 assert(isIVIncrement(IVInc, &LI) && "implied by GetConstantStep");
4638 APInt Step = IVStep->second;
4639 APInt Offset = Step * AddrMode.Scale;
4640 if (Offset.isSignedIntN(64)) {
4641 TestAddrMode.InBounds = false;
4642 TestAddrMode.ScaledReg = IVInc;
4643 TestAddrMode.BaseOffs -= Offset.getLimitedValue();
4644 // If this addressing mode is legal, commit it..
4645 // (Note that we defer the (expensive) domtree base legality check
4646 // to the very last possible point.)
4647 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace) &&
4648 getDTFn().dominates(IVInc, MemoryInst)) {
4649 AddrModeInsts.push_back(cast<Instruction>(IVInc));
4650 AddrMode = TestAddrMode;
4651 return true;
4652 }
4653 // Restore status quo.
4654 TestAddrMode = AddrMode;
4655 }
4656 }
4657 }
4658
4659 // Otherwise, just return what we have.
4660 return true;
4661}
4662
4663/// This is a little filter, which returns true if an addressing computation
4664/// involving I might be folded into a load/store accessing it.
4665/// This doesn't need to be perfect, but needs to accept at least
4666/// the set of instructions that MatchOperationAddr can.
4668 switch (I->getOpcode()) {
4669 case Instruction::BitCast:
4670 case Instruction::AddrSpaceCast:
4671 // Don't touch identity bitcasts.
4672 if (I->getType() == I->getOperand(0)->getType())
4673 return false;
4674 return I->getType()->isIntOrPtrTy();
4675 case Instruction::PtrToInt:
4676 // PtrToInt is always a noop, as we know that the int type is pointer sized.
4677 return true;
4678 case Instruction::IntToPtr:
4679 // We know the input is intptr_t, so this is foldable.
4680 return true;
4681 case Instruction::Add:
4682 return true;
4683 case Instruction::Mul:
4684 case Instruction::Shl:
4685 // Can only handle X*C and X << C.
4686 return isa<ConstantInt>(I->getOperand(1));
4687 case Instruction::GetElementPtr:
4688 return true;
4689 default:
4690 return false;
4691 }
4692}
4693
4694/// Check whether or not \p Val is a legal instruction for \p TLI.
4695/// \note \p Val is assumed to be the product of some type promotion.
4696/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
4697/// to be legal, as the non-promoted value would have had the same state.
4699 const DataLayout &DL, Value *Val) {
4700 Instruction *PromotedInst = dyn_cast<Instruction>(Val);
4701 if (!PromotedInst)
4702 return false;
4703 int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
4704 // If the ISDOpcode is undefined, it was undefined before the promotion.
4705 if (!ISDOpcode)
4706 return true;
4707 // Otherwise, check if the promoted instruction is legal or not.
4708 return TLI.isOperationLegalOrCustom(
4709 ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
4710}
4711
4712namespace {
4713
4714/// Hepler class to perform type promotion.
4715class TypePromotionHelper {
4716 /// Utility function to add a promoted instruction \p ExtOpnd to
4717 /// \p PromotedInsts and record the type of extension we have seen.
4718 static void addPromotedInst(InstrToOrigTy &PromotedInsts,
4719 Instruction *ExtOpnd, bool IsSExt) {
4720 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4721 auto [It, Inserted] = PromotedInsts.try_emplace(ExtOpnd);
4722 if (!Inserted) {
4723 // If the new extension is same as original, the information in
4724 // PromotedInsts[ExtOpnd] is still correct.
4725 if (It->second.getInt() == ExtTy)
4726 return;
4727
4728 // Now the new extension is different from old extension, we make
4729 // the type information invalid by setting extension type to
4730 // BothExtension.
4731 ExtTy = BothExtension;
4732 }
4733 It->second = TypeIsSExt(ExtOpnd->getType(), ExtTy);
4734 }
4735
4736 /// Utility function to query the original type of instruction \p Opnd
4737 /// with a matched extension type. If the extension doesn't match, we
4738 /// cannot use the information we had on the original type.
4739 /// BothExtension doesn't match any extension type.
4740 static const Type *getOrigType(const InstrToOrigTy &PromotedInsts,
4741 Instruction *Opnd, bool IsSExt) {
4742 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4743 InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
4744 if (It != PromotedInsts.end() && It->second.getInt() == ExtTy)
4745 return It->second.getPointer();
4746 return nullptr;
4747 }
4748
4749 /// Utility function to check whether or not a sign or zero extension
4750 /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
4751 /// either using the operands of \p Inst or promoting \p Inst.
4752 /// The type of the extension is defined by \p IsSExt.
4753 /// In other words, check if:
4754 /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
4755 /// #1 Promotion applies:
4756 /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
4757 /// #2 Operand reuses:
4758 /// ext opnd1 to ConsideredExtType.
4759 /// \p PromotedInsts maps the instructions to their type before promotion.
4760 static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
4761 const InstrToOrigTy &PromotedInsts, bool IsSExt);
4762
4763 /// Utility function to determine if \p OpIdx should be promoted when
4764 /// promoting \p Inst.
4765 static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
4766 return !(isa<SelectInst>(Inst) && OpIdx == 0);
4767 }
4768
4769 /// Utility function to promote the operand of \p Ext when this
4770 /// operand is a promotable trunc or sext or zext.
4771 /// \p PromotedInsts maps the instructions to their type before promotion.
4772 /// \p CreatedInstsCost[out] contains the cost of all instructions
4773 /// created to promote the operand of Ext.
4774 /// Newly added extensions are inserted in \p Exts.
4775 /// Newly added truncates are inserted in \p Truncs.
4776 /// Should never be called directly.
4777 /// \return The promoted value which is used instead of Ext.
4778 static Value *promoteOperandForTruncAndAnyExt(
4779 Instruction *Ext, TypePromotionTransaction &TPT,
4780 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4781 SmallVectorImpl<Instruction *> *Exts,
4782 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI);
4783
4784 /// Utility function to promote the operand of \p Ext when this
4785 /// operand is promotable and is not a supported trunc or sext.
4786 /// \p PromotedInsts maps the instructions to their type before promotion.
4787 /// \p CreatedInstsCost[out] contains the cost of all the instructions
4788 /// created to promote the operand of Ext.
4789 /// Newly added extensions are inserted in \p Exts.
4790 /// Newly added truncates are inserted in \p Truncs.
4791 /// Should never be called directly.
4792 /// \return The promoted value which is used instead of Ext.
4793 static Value *promoteOperandForOther(Instruction *Ext,
4794 TypePromotionTransaction &TPT,
4795 InstrToOrigTy &PromotedInsts,
4796 unsigned &CreatedInstsCost,
4797 SmallVectorImpl<Instruction *> *Exts,
4798 SmallVectorImpl<Instruction *> *Truncs,
4799 const TargetLowering &TLI, bool IsSExt);
4800
4801 /// \see promoteOperandForOther.
4802 static Value *signExtendOperandForOther(
4803 Instruction *Ext, TypePromotionTransaction &TPT,
4804 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4805 SmallVectorImpl<Instruction *> *Exts,
4806 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4807 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4808 Exts, Truncs, TLI, true);
4809 }
4810
4811 /// \see promoteOperandForOther.
4812 static Value *zeroExtendOperandForOther(
4813 Instruction *Ext, TypePromotionTransaction &TPT,
4814 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4815 SmallVectorImpl<Instruction *> *Exts,
4816 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4817 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4818 Exts, Truncs, TLI, false);
4819 }
4820
4821public:
4822 /// Type for the utility function that promotes the operand of Ext.
4823 using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT,
4824 InstrToOrigTy &PromotedInsts,
4825 unsigned &CreatedInstsCost,
4826 SmallVectorImpl<Instruction *> *Exts,
4827 SmallVectorImpl<Instruction *> *Truncs,
4828 const TargetLowering &TLI);
4829
4830 /// Given a sign/zero extend instruction \p Ext, return the appropriate
4831 /// action to promote the operand of \p Ext instead of using Ext.
4832 /// \return NULL if no promotable action is possible with the current
4833 /// sign extension.
4834 /// \p InsertedInsts keeps track of all the instructions inserted by the
4835 /// other CodeGenPrepare optimizations. This information is important
4836 /// because we do not want to promote these instructions as CodeGenPrepare
4837 /// will reinsert them later. Thus creating an infinite loop: create/remove.
4838 /// \p PromotedInsts maps the instructions to their type before promotion.
4839 static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,
4840 const TargetLowering &TLI,
4841 const InstrToOrigTy &PromotedInsts);
4842};
4843
4844} // end anonymous namespace
4845
4846bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
4847 Type *ConsideredExtType,
4848 const InstrToOrigTy &PromotedInsts,
4849 bool IsSExt) {
4850 // The promotion helper does not know how to deal with vector types yet.
4851 // To be able to fix that, we would need to fix the places where we
4852 // statically extend, e.g., constants and such.
4853 if (Inst->getType()->isVectorTy())
4854 return false;
4855
4856 // We can always get through zext.
4857 if (isa<ZExtInst>(Inst))
4858 return true;
4859
4860 // sext(sext) is ok too.
4861 if (IsSExt && isa<SExtInst>(Inst))
4862 return true;
4863
4864 // We can get through binary operator, if it is legal. In other words, the
4865 // binary operator must have a nuw or nsw flag.
4866 if (const auto *BinOp = dyn_cast<BinaryOperator>(Inst))
4867 if (isa<OverflowingBinaryOperator>(BinOp) &&
4868 ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
4869 (IsSExt && BinOp->hasNoSignedWrap())))
4870 return true;
4871
4872 // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst))
4873 if ((Inst->getOpcode() == Instruction::And ||
4874 Inst->getOpcode() == Instruction::Or))
4875 return true;
4876
4877 // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst))
4878 if (Inst->getOpcode() == Instruction::Xor) {
4879 // Make sure it is not a NOT.
4880 if (const auto *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1)))
4881 if (!Cst->getValue().isAllOnes())
4882 return true;
4883 }
4884
4885 // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst))
4886 // It may change a poisoned value into a regular value, like
4887 // zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12
4888 // poisoned value regular value
4889 // It should be OK since undef covers valid value.
4890 if (Inst->getOpcode() == Instruction::LShr && !IsSExt)
4891 return true;
4892
4893 // and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst)
4894 // It may change a poisoned value into a regular value, like
4895 // zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12
4896 // poisoned value regular value
4897 // It should be OK since undef covers valid value.
4898 if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) {
4899 const auto *ExtInst = cast<const Instruction>(*Inst->user_begin());
4900 if (ExtInst->hasOneUse()) {
4901 const auto *AndInst = dyn_cast<const Instruction>(*ExtInst->user_begin());
4902 if (AndInst && AndInst->getOpcode() == Instruction::And) {
4903 const auto *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
4904 if (Cst &&
4905 Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth()))
4906 return true;
4907 }
4908 }
4909 }
4910
4911 // Check if we can do the following simplification.
4912 // ext(trunc(opnd)) --> ext(opnd)
4913 if (!isa<TruncInst>(Inst))
4914 return false;
4915
4916 Value *OpndVal = Inst->getOperand(0);
4917 // Check if we can use this operand in the extension.
4918 // If the type is larger than the result type of the extension, we cannot.
4919 if (!OpndVal->getType()->isIntegerTy() ||
4920 OpndVal->getType()->getIntegerBitWidth() >
4921 ConsideredExtType->getIntegerBitWidth())
4922 return false;
4923
4924 // If the operand of the truncate is not an instruction, we will not have
4925 // any information on the dropped bits.
4926 // (Actually we could for constant but it is not worth the extra logic).
4927 Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
4928 if (!Opnd)
4929 return false;
4930
4931 // Check if the source of the type is narrow enough.
4932 // I.e., check that trunc just drops extended bits of the same kind of
4933 // the extension.
4934 // #1 get the type of the operand and check the kind of the extended bits.
4935 const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt);
4936 if (OpndType)
4937 ;
4938 else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
4939 OpndType = Opnd->getOperand(0)->getType();
4940 else
4941 return false;
4942
4943 // #2 check that the truncate just drops extended bits.
4944 return Inst->getType()->getIntegerBitWidth() >=
4945 OpndType->getIntegerBitWidth();
4946}
4947
4948TypePromotionHelper::Action TypePromotionHelper::getAction(
4949 Instruction *Ext, const SetOfInstrs &InsertedInsts,
4950 const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
4951 assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
4952 "Unexpected instruction type");
4953 Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
4954 Type *ExtTy = Ext->getType();
4955 bool IsSExt = isa<SExtInst>(Ext);
4956 // If the operand of the extension is not an instruction, we cannot
4957 // get through.
4958 // If it, check we can get through.
4959 if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
4960 return nullptr;
4961
4962 // Do not promote if the operand has been added by codegenprepare.
4963 // Otherwise, it means we are undoing an optimization that is likely to be
4964 // redone, thus causing potential infinite loop.
4965 if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
4966 return nullptr;
4967
4968 // SExt or Trunc instructions.
4969 // Return the related handler.
4970 if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
4971 isa<ZExtInst>(ExtOpnd))
4972 return promoteOperandForTruncAndAnyExt;
4973
4974 // Regular instruction.
4975 // Abort early if we will have to insert non-free instructions.
4976 if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
4977 return nullptr;
4978 return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
4979}
4980
4981Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
4982 Instruction *SExt, TypePromotionTransaction &TPT,
4983 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4984 SmallVectorImpl<Instruction *> *Exts,
4985 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4986 // By construction, the operand of SExt is an instruction. Otherwise we cannot
4987 // get through it and this method should not be called.
4988 Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
4989 Value *ExtVal = SExt;
4990 bool HasMergedNonFreeExt = false;
4991 if (isa<ZExtInst>(SExtOpnd)) {
4992 // Replace s|zext(zext(opnd))
4993 // => zext(opnd).
4994 HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
4995 Value *ZExt =
4996 TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
4997 TPT.replaceAllUsesWith(SExt, ZExt);
4998 TPT.eraseInstruction(SExt);
4999 ExtVal = ZExt;
5000 } else {
5001 // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
5002 // => z|sext(opnd).
5003 TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
5004 }
5005 CreatedInstsCost = 0;
5006
5007 // Remove dead code.
5008 if (SExtOpnd->use_empty())
5009 TPT.eraseInstruction(SExtOpnd);
5010
5011 // Check if the extension is still needed.
5012 Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
5013 if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
5014 if (ExtInst) {
5015 if (Exts)
5016 Exts->push_back(ExtInst);
5017 CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
5018 }
5019 return ExtVal;
5020 }
5021
5022 // At this point we have: ext ty opnd to ty.
5023 // Reassign the uses of ExtInst to the opnd and remove ExtInst.
5024 Value *NextVal = ExtInst->getOperand(0);
5025 TPT.eraseInstruction(ExtInst, NextVal);
5026 return NextVal;
5027}
5028
5029Value *TypePromotionHelper::promoteOperandForOther(
5030 Instruction *Ext, TypePromotionTransaction &TPT,
5031 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
5032 SmallVectorImpl<Instruction *> *Exts,
5033 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI,
5034 bool IsSExt) {
5035 // By construction, the operand of Ext is an instruction. Otherwise we cannot
5036 // get through it and this method should not be called.
5037 Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
5038 CreatedInstsCost = 0;
5039 if (!ExtOpnd->hasOneUse()) {
5040 // ExtOpnd will be promoted.
5041 // All its uses, but Ext, will need to use a truncated value of the
5042 // promoted version.
5043 // Create the truncate now.
5044 Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
5045 if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
5046 // Insert it just after the definition.
5047 ITrunc->moveAfter(ExtOpnd);
5048 if (Truncs)
5049 Truncs->push_back(ITrunc);
5050 }
5051
5052 TPT.replaceAllUsesWith(ExtOpnd, Trunc);
5053 // Restore the operand of Ext (which has been replaced by the previous call
5054 // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
5055 TPT.setOperand(Ext, 0, ExtOpnd);
5056 }
5057
5058 // Get through the Instruction:
5059 // 1. Update its type.
5060 // 2. Replace the uses of Ext by Inst.
5061 // 3. Extend each operand that needs to be extended.
5062
5063 // Remember the original type of the instruction before promotion.
5064 // This is useful to know that the high bits are sign extended bits.
5065 addPromotedInst(PromotedInsts, ExtOpnd, IsSExt);
5066 // Step #1.
5067 TPT.mutateType(ExtOpnd, Ext->getType());
5068 // Step #2.
5069 TPT.replaceAllUsesWith(Ext, ExtOpnd);
5070 // Step #3.
5071 LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n");
5072 for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
5073 ++OpIdx) {
5074 LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n');
5075 if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
5076 !shouldExtOperand(ExtOpnd, OpIdx)) {
5077 LLVM_DEBUG(dbgs() << "No need to propagate\n");
5078 continue;
5079 }
5080 // Check if we can statically extend the operand.
5081 Value *Opnd = ExtOpnd->getOperand(OpIdx);
5082 if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
5083 LLVM_DEBUG(dbgs() << "Statically extend\n");
5084 unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
5085 APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
5086 : Cst->getValue().zext(BitWidth);
5087 TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
5088 continue;
5089 }
5090 // UndefValue are typed, so we have to statically sign extend them.
5091 if (isa<UndefValue>(Opnd)) {
5092 LLVM_DEBUG(dbgs() << "Statically extend\n");
5093 TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
5094 continue;
5095 }
5096
5097 // Otherwise we have to explicitly sign extend the operand.
5098 Value *ValForExtOpnd = IsSExt
5099 ? TPT.createSExt(ExtOpnd, Opnd, Ext->getType())
5100 : TPT.createZExt(ExtOpnd, Opnd, Ext->getType());
5101 TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
5102 Instruction *InstForExtOpnd = dyn_cast<Instruction>(ValForExtOpnd);
5103 if (!InstForExtOpnd)
5104 continue;
5105
5106 if (Exts)
5107 Exts->push_back(InstForExtOpnd);
5108
5109 CreatedInstsCost += !TLI.isExtFree(InstForExtOpnd);
5110 }
5111 LLVM_DEBUG(dbgs() << "Extension is useless now\n");
5112 TPT.eraseInstruction(Ext);
5113 return ExtOpnd;
5114}
5115
5116/// Check whether or not promoting an instruction to a wider type is profitable.
5117/// \p NewCost gives the cost of extension instructions created by the
5118/// promotion.
5119/// \p OldCost gives the cost of extension instructions before the promotion
5120/// plus the number of instructions that have been
5121/// matched in the addressing mode the promotion.
5122/// \p PromotedOperand is the value that has been promoted.
5123/// \return True if the promotion is profitable, false otherwise.
5124bool AddressingModeMatcher::isPromotionProfitable(
5125 unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
5126 LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost
5127 << '\n');
5128 // The cost of the new extensions is greater than the cost of the
5129 // old extension plus what we folded.
5130 // This is not profitable.
5131 if (NewCost > OldCost)
5132 return false;
5133 if (NewCost < OldCost)
5134 return true;
5135 // The promotion is neutral but it may help folding the sign extension in
5136 // loads for instance.
5137 // Check that we did not create an illegal instruction.
5138 return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
5139}
5140
5141/// Given an instruction or constant expr, see if we can fold the operation
5142/// into the addressing mode. If so, update the addressing mode and return
5143/// true, otherwise return false without modifying AddrMode.
5144/// If \p MovedAway is not NULL, it contains the information of whether or
5145/// not AddrInst has to be folded into the addressing mode on success.
5146/// If \p MovedAway == true, \p AddrInst will not be part of the addressing
5147/// because it has been moved away.
5148/// Thus AddrInst must not be added in the matched instructions.
5149/// This state can happen when AddrInst is a sext, since it may be moved away.
5150/// Therefore, AddrInst may not be valid when MovedAway is true and it must
5151/// not be referenced anymore.
5152bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
5153 unsigned Depth,
5154 bool *MovedAway) {
5155 // Avoid exponential behavior on extremely deep expression trees.
5156 if (Depth >= 5)
5157 return false;
5158
5159 // By default, all matched instructions stay in place.
5160 if (MovedAway)
5161 *MovedAway = false;
5162
5163 switch (Opcode) {
5164 case Instruction::PtrToInt:
5165 // PtrToInt is always a noop, as we know that the int type is pointer sized.
5166 return matchAddr(AddrInst->getOperand(0), Depth);
5167 case Instruction::IntToPtr: {
5168 auto AS = AddrInst->getType()->getPointerAddressSpace();
5169 auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
5170 // This inttoptr is a no-op if the integer type is pointer sized.
5171 if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
5172 return matchAddr(AddrInst->getOperand(0), Depth);
5173 return false;
5174 }
5175 case Instruction::BitCast:
5176 // BitCast is always a noop, and we can handle it as long as it is
5177 // int->int or pointer->pointer (we don't want int<->fp or something).
5178 if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() &&
5179 // Don't touch identity bitcasts. These were probably put here by LSR,
5180 // and we don't want to mess around with them. Assume it knows what it
5181 // is doing.
5182 AddrInst->getOperand(0)->getType() != AddrInst->getType())
5183 return matchAddr(AddrInst->getOperand(0), Depth);
5184 return false;
5185 case Instruction::AddrSpaceCast: {
5186 unsigned SrcAS =
5187 AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
5188 unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
5189 if (TLI.getTargetMachine().isNoopAddrSpaceCast(SrcAS, DestAS))
5190 return matchAddr(AddrInst->getOperand(0), Depth);
5191 return false;
5192 }
5193 case Instruction::Add: {
5194 // Check to see if we can merge in one operand, then the other. If so, we
5195 // win.
5196 ExtAddrMode BackupAddrMode = AddrMode;
5197 unsigned OldSize = AddrModeInsts.size();
5198 // Start a transaction at this point.
5199 // The LHS may match but not the RHS.
5200 // Therefore, we need a higher level restoration point to undo partially
5201 // matched operation.
5202 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5203 TPT.getRestorationPoint();
5204
5205 // Try to match an integer constant second to increase its chance of ending
5206 // up in `BaseOffs`, resp. decrease its chance of ending up in `BaseReg`.
5207 int First = 0, Second = 1;
5208 if (isa<ConstantInt>(AddrInst->getOperand(First))
5209 && !isa<ConstantInt>(AddrInst->getOperand(Second)))
5210 std::swap(First, Second);
5211 AddrMode.InBounds = false;
5212 if (matchAddr(AddrInst->getOperand(First), Depth + 1) &&
5213 matchAddr(AddrInst->getOperand(Second), Depth + 1))
5214 return true;
5215
5216 // Restore the old addr mode info.
5217 AddrMode = BackupAddrMode;
5218 AddrModeInsts.resize(OldSize);
5219 TPT.rollback(LastKnownGood);
5220
5221 // Otherwise this was over-aggressive. Try merging operands in the opposite
5222 // order.
5223 if (matchAddr(AddrInst->getOperand(Second), Depth + 1) &&
5224 matchAddr(AddrInst->getOperand(First), Depth + 1))
5225 return true;
5226
5227 // Otherwise we definitely can't merge the ADD in.
5228 AddrMode = BackupAddrMode;
5229 AddrModeInsts.resize(OldSize);
5230 TPT.rollback(LastKnownGood);
5231 break;
5232 }
5233 // case Instruction::Or:
5234 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
5235 // break;
5236 case Instruction::Mul:
5237 case Instruction::Shl: {
5238 // Can only handle X*C and X << C.
5239 AddrMode.InBounds = false;
5240 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
5241 if (!RHS || RHS->getBitWidth() > 64)
5242 return false;
5243 int64_t Scale = Opcode == Instruction::Shl
5244 ? 1LL << RHS->getLimitedValue(RHS->getBitWidth() - 1)
5245 : RHS->getSExtValue();
5246
5247 return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
5248 }
5249 case Instruction::GetElementPtr: {
5250 // Scan the GEP. We check it if it contains constant offsets and at most
5251 // one variable offset.
5252 int VariableOperand = -1;
5253 unsigned VariableScale = 0;
5254
5255 int64_t ConstantOffset = 0;
5256 gep_type_iterator GTI = gep_type_begin(AddrInst);
5257 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
5258 if (StructType *STy = GTI.getStructTypeOrNull()) {
5259 const StructLayout *SL = DL.getStructLayout(STy);
5260 unsigned Idx =
5261 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
5262 ConstantOffset += SL->getElementOffset(Idx);
5263 } else {
5264 TypeSize TS = GTI.getSequentialElementStride(DL);
5265 if (TS.isNonZero()) {
5266 // The optimisations below currently only work for fixed offsets.
5267 if (TS.isScalable())
5268 return false;
5269 int64_t TypeSize = TS.getFixedValue();
5270 if (ConstantInt *CI =
5271 dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
5272 const APInt &CVal = CI->getValue();
5273 if (CVal.getSignificantBits() <= 64) {
5274 ConstantOffset += CVal.getSExtValue() * TypeSize;
5275 continue;
5276 }
5277 }
5278 // We only allow one variable index at the moment.
5279 if (VariableOperand != -1)
5280 return false;
5281
5282 // Remember the variable index.
5283 VariableOperand = i;
5284 VariableScale = TypeSize;
5285 }
5286 }
5287 }
5288
5289 // A common case is for the GEP to only do a constant offset. In this case,
5290 // just add it to the disp field and check validity.
5291 if (VariableOperand == -1) {
5292 AddrMode.BaseOffs += ConstantOffset;
5293 if (matchAddr(AddrInst->getOperand(0), Depth + 1)) {
5294 if (!cast<GEPOperator>(AddrInst)->isInBounds())
5295 AddrMode.InBounds = false;
5296 return true;
5297 }
5298 AddrMode.BaseOffs -= ConstantOffset;
5299
5301 TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
5302 ConstantOffset > 0) {
5303 // Record GEPs with non-zero offsets as candidates for splitting in
5304 // the event that the offset cannot fit into the r+i addressing mode.
5305 // Simple and common case that only one GEP is used in calculating the
5306 // address for the memory access.
5307 Value *Base = AddrInst->getOperand(0);
5308 auto *BaseI = dyn_cast<Instruction>(Base);
5309 auto *GEP = cast<GetElementPtrInst>(AddrInst);
5311 (BaseI && !isa<CastInst>(BaseI) &&
5312 !isa<GetElementPtrInst>(BaseI))) {
5313 // Make sure the parent block allows inserting non-PHI instructions
5314 // before the terminator.
5315 BasicBlock *Parent = BaseI ? BaseI->getParent()
5316 : &GEP->getFunction()->getEntryBlock();
5317 if (!Parent->getTerminator()->isEHPad())
5318 LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
5319 }
5320 }
5321
5322 return false;
5323 }
5324
5325 // Save the valid addressing mode in case we can't match.
5326 ExtAddrMode BackupAddrMode = AddrMode;
5327 unsigned OldSize = AddrModeInsts.size();
5328
5329 // See if the scale and offset amount is valid for this target.
5330 AddrMode.BaseOffs += ConstantOffset;
5331 if (!cast<GEPOperator>(AddrInst)->isInBounds())
5332 AddrMode.InBounds = false;
5333
5334 // Match the base operand of the GEP.
5335 if (!matchAddr(AddrInst->getOperand(0), Depth + 1)) {
5336 // If it couldn't be matched, just stuff the value in a register.
5337 if (AddrMode.HasBaseReg) {
5338 AddrMode = BackupAddrMode;
5339 AddrModeInsts.resize(OldSize);
5340 return false;
5341 }
5342 AddrMode.HasBaseReg = true;
5343 AddrMode.BaseReg = AddrInst->getOperand(0);
5344 }
5345
5346 // Match the remaining variable portion of the GEP.
5347 if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
5348 Depth)) {
5349 // If it couldn't be matched, try stuffing the base into a register
5350 // instead of matching it, and retrying the match of the scale.
5351 AddrMode = BackupAddrMode;
5352 AddrModeInsts.resize(OldSize);
5353 if (AddrMode.HasBaseReg)
5354 return false;
5355 AddrMode.HasBaseReg = true;
5356 AddrMode.BaseReg = AddrInst->getOperand(0);
5357 AddrMode.BaseOffs += ConstantOffset;
5358 if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
5359 VariableScale, Depth)) {
5360 // If even that didn't work, bail.
5361 AddrMode = BackupAddrMode;
5362 AddrModeInsts.resize(OldSize);
5363 return false;
5364 }
5365 }
5366
5367 return true;
5368 }
5369 case Instruction::SExt:
5370 case Instruction::ZExt: {
5372 if (!Ext)
5373 return false;
5374
5375 // Try to move this ext out of the way of the addressing mode.
5376 // Ask for a method for doing so.
5377 TypePromotionHelper::Action TPH =
5378 TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
5379 if (!TPH)
5380 return false;
5381
5382 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5383 TPT.getRestorationPoint();
5384 unsigned CreatedInstsCost = 0;
5385 unsigned ExtCost = !TLI.isExtFree(Ext);
5386 Value *PromotedOperand =
5387 TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
5388 // SExt has been moved away.
5389 // Thus either it will be rematched later in the recursive calls or it is
5390 // gone. Anyway, we must not fold it into the addressing mode at this point.
5391 // E.g.,
5392 // op = add opnd, 1
5393 // idx = ext op
5394 // addr = gep base, idx
5395 // is now:
5396 // promotedOpnd = ext opnd <- no match here
5397 // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)
5398 // addr = gep base, op <- match
5399 if (MovedAway)
5400 *MovedAway = true;
5401
5402 assert(PromotedOperand &&
5403 "TypePromotionHelper should have filtered out those cases");
5404
5405 ExtAddrMode BackupAddrMode = AddrMode;
5406 unsigned OldSize = AddrModeInsts.size();
5407
5408 if (!matchAddr(PromotedOperand, Depth) ||
5409 // The total of the new cost is equal to the cost of the created
5410 // instructions.
5411 // The total of the old cost is equal to the cost of the extension plus
5412 // what we have saved in the addressing mode.
5413 !isPromotionProfitable(CreatedInstsCost,
5414 ExtCost + (AddrModeInsts.size() - OldSize),
5415 PromotedOperand)) {
5416 AddrMode = BackupAddrMode;
5417 AddrModeInsts.resize(OldSize);
5418 LLVM_DEBUG(dbgs() << "Sign extension does not pay off: rollback\n");
5419 TPT.rollback(LastKnownGood);
5420 return false;
5421 }
5422
5423 // SExt has been deleted. Make sure it is not referenced by the AddrMode.
5424 AddrMode.replaceWith(Ext, PromotedOperand);
5425 return true;
5426 }
5427 case Instruction::Call:
5428 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(AddrInst)) {
5429 if (II->getIntrinsicID() == Intrinsic::threadlocal_address) {
5430 GlobalValue &GV = cast<GlobalValue>(*II->getArgOperand(0));
5431 if (TLI.addressingModeSupportsTLS(GV))
5432 return matchAddr(AddrInst->getOperand(0), Depth);
5433 }
5434 }
5435 break;
5436 }
5437 return false;
5438}
5439
5440/// If we can, try to add the value of 'Addr' into the current addressing mode.
5441/// If Addr can't be added to AddrMode this returns false and leaves AddrMode
5442/// unmodified. This assumes that Addr is either a pointer type or intptr_t
5443/// for the target.
5444///
5445bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
5446 // Start a transaction at this point that we will rollback if the matching
5447 // fails.
5448 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5449 TPT.getRestorationPoint();
5450 if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
5451 if (CI->getValue().isSignedIntN(64)) {
5452 // Check if the addition would result in a signed overflow.
5453 int64_t Result;
5454 bool Overflow =
5455 AddOverflow(AddrMode.BaseOffs, CI->getSExtValue(), Result);
5456 if (!Overflow) {
5457 // Fold in immediates if legal for the target.
5458 AddrMode.BaseOffs = Result;
5459 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5460 return true;
5461 AddrMode.BaseOffs -= CI->getSExtValue();
5462 }
5463 }
5464 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
5465 // If this is a global variable, try to fold it into the addressing mode.
5466 if (!AddrMode.BaseGV) {
5467 AddrMode.BaseGV = GV;
5468 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5469 return true;
5470 AddrMode.BaseGV = nullptr;
5471 }
5472 } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
5473 ExtAddrMode BackupAddrMode = AddrMode;
5474 unsigned OldSize = AddrModeInsts.size();
5475
5476 // Check to see if it is possible to fold this operation.
5477 bool MovedAway = false;
5478 if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
5479 // This instruction may have been moved away. If so, there is nothing
5480 // to check here.
5481 if (MovedAway)
5482 return true;
5483 // Okay, it's possible to fold this. Check to see if it is actually
5484 // *profitable* to do so. We use a simple cost model to avoid increasing
5485 // register pressure too much.
5486 if (I->hasOneUse() ||
5487 isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
5488 AddrModeInsts.push_back(I);
5489 return true;
5490 }
5491
5492 // It isn't profitable to do this, roll back.
5493 AddrMode = BackupAddrMode;
5494 AddrModeInsts.resize(OldSize);
5495 TPT.rollback(LastKnownGood);
5496 }
5497 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
5498 if (matchOperationAddr(CE, CE->getOpcode(), Depth))
5499 return true;
5500 TPT.rollback(LastKnownGood);
5501 } else if (isa<ConstantPointerNull>(Addr)) {
5502 // Null pointer gets folded without affecting the addressing mode.
5503 return true;
5504 }
5505
5506 // Worse case, the target should support [reg] addressing modes. :)
5507 if (!AddrMode.HasBaseReg) {
5508 AddrMode.HasBaseReg = true;
5509 AddrMode.BaseReg = Addr;
5510 // Still check for legality in case the target supports [imm] but not [i+r].
5511 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5512 return true;
5513 AddrMode.HasBaseReg = false;
5514 AddrMode.BaseReg = nullptr;
5515 }
5516
5517 // If the base register is already taken, see if we can do [r+r].
5518 if (AddrMode.Scale == 0) {
5519 AddrMode.Scale = 1;
5520 AddrMode.ScaledReg = Addr;
5521 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5522 return true;
5523 AddrMode.Scale = 0;
5524 AddrMode.ScaledReg = nullptr;
5525 }
5526 // Couldn't match.
5527 TPT.rollback(LastKnownGood);
5528 return false;
5529}
5530
5531/// Check to see if all uses of OpVal by the specified inline asm call are due
5532/// to memory operands. If so, return true, otherwise return false.
5534 const TargetLowering &TLI,
5535 const TargetRegisterInfo &TRI) {
5536 const Function *F = CI->getFunction();
5537 TargetLowering::AsmOperandInfoVector TargetConstraints =
5538 TLI.ParseConstraints(F->getDataLayout(), &TRI, *CI);
5539
5540 for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
5541 // Compute the constraint code and ConstraintType to use.
5542 TLI.ComputeConstraintToUse(OpInfo, SDValue());
5543
5544 // If this asm operand is our Value*, and if it isn't an indirect memory
5545 // operand, we can't fold it! TODO: Also handle C_Address?
5546 if (OpInfo.CallOperandVal == OpVal &&
5547 (OpInfo.ConstraintType != TargetLowering::C_Memory ||
5548 !OpInfo.isIndirect))
5549 return false;
5550 }
5551
5552 return true;
5553}
5554
5555/// Recursively walk all the uses of I until we find a memory use.
5556/// If we find an obviously non-foldable instruction, return true.
5557/// Add accessed addresses and types to MemoryUses.
5559 Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
5560 SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
5561 const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI,
5562 BlockFrequencyInfo *BFI, unsigned &SeenInsts) {
5563 // If we already considered this instruction, we're done.
5564 if (!ConsideredInsts.insert(I).second)
5565 return false;
5566
5567 // If this is an obviously unfoldable instruction, bail out.
5568 if (!MightBeFoldableInst(I))
5569 return true;
5570
5571 // Loop over all the uses, recursively processing them.
5572 for (Use &U : I->uses()) {
5573 // Conservatively return true if we're seeing a large number or a deep chain
5574 // of users. This avoids excessive compilation times in pathological cases.
5575 if (SeenInsts++ >= MaxAddressUsersToScan)
5576 return true;
5577
5578 Instruction *UserI = cast<Instruction>(U.getUser());
5579 if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
5580 MemoryUses.push_back({&U, LI->getType()});
5581 continue;
5582 }
5583
5584 if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
5585 if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
5586 return true; // Storing addr, not into addr.
5587 MemoryUses.push_back({&U, SI->getValueOperand()->getType()});
5588 continue;
5589 }
5590
5591 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
5592 if (U.getOperandNo() != AtomicRMWInst::getPointerOperandIndex())
5593 return true; // Storing addr, not into addr.
5594 MemoryUses.push_back({&U, RMW->getValOperand()->getType()});
5595 continue;
5596 }
5597
5599 if (U.getOperandNo() != AtomicCmpXchgInst::getPointerOperandIndex())
5600 return true; // Storing addr, not into addr.
5601 MemoryUses.push_back({&U, CmpX->getCompareOperand()->getType()});
5602 continue;
5603 }
5604
5607 Type *AccessTy;
5608 if (!TLI.getAddrModeArguments(II, PtrOps, AccessTy))
5609 return true;
5610
5611 if (!find(PtrOps, U.get()))
5612 return true;
5613
5614 MemoryUses.push_back({&U, AccessTy});
5615 continue;
5616 }
5617
5618 if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
5619 if (CI->hasFnAttr(Attribute::Cold)) {
5620 // If this is a cold call, we can sink the addressing calculation into
5621 // the cold path. See optimizeCallInst
5622 if (!llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI))
5623 continue;
5624 }
5625
5626 InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand());
5627 if (!IA)
5628 return true;
5629
5630 // If this is a memory operand, we're cool, otherwise bail out.
5631 if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI))
5632 return true;
5633 continue;
5634 }
5635
5636 if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5637 PSI, BFI, SeenInsts))
5638 return true;
5639 }
5640
5641 return false;
5642}
5643
5645 Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
5646 const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize,
5648 unsigned SeenInsts = 0;
5649 SmallPtrSet<Instruction *, 16> ConsideredInsts;
5650 return FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5651 PSI, BFI, SeenInsts);
5652}
5653
5654
5655/// Return true if Val is already known to be live at the use site that we're
5656/// folding it into. If so, there is no cost to include it in the addressing
5657/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
5658/// instruction already.
5659bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,
5660 Value *KnownLive1,
5661 Value *KnownLive2) {
5662 // If Val is either of the known-live values, we know it is live!
5663 if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
5664 return true;
5665
5666 // All values other than instructions and arguments (e.g. constants) are live.
5667 if (!isa<Instruction>(Val) && !isa<Argument>(Val))
5668 return true;
5669
5670 // If Val is a constant sized alloca in the entry block, it is live, this is
5671 // true because it is just a reference to the stack/frame pointer, which is
5672 // live for the whole function.
5673 if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
5674 if (AI->isStaticAlloca())
5675 return true;
5676
5677 // Check to see if this value is already used in the memory instruction's
5678 // block. If so, it's already live into the block at the very least, so we
5679 // can reasonably fold it.
5680 return Val->isUsedInBasicBlock(MemoryInst->getParent());
5681}
5682
5683/// It is possible for the addressing mode of the machine to fold the specified
5684/// instruction into a load or store that ultimately uses it.
5685/// However, the specified instruction has multiple uses.
5686/// Given this, it may actually increase register pressure to fold it
5687/// into the load. For example, consider this code:
5688///
5689/// X = ...
5690/// Y = X+1
5691/// use(Y) -> nonload/store
5692/// Z = Y+1
5693/// load Z
5694///
5695/// In this case, Y has multiple uses, and can be folded into the load of Z
5696/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
5697/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
5698/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
5699/// number of computations either.
5700///
5701/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
5702/// X was live across 'load Z' for other reasons, we actually *would* want to
5703/// fold the addressing mode in the Z case. This would make Y die earlier.
5704bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode(
5705 Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode &AMAfter) {
5706 if (IgnoreProfitability)
5707 return true;
5708
5709 // AMBefore is the addressing mode before this instruction was folded into it,
5710 // and AMAfter is the addressing mode after the instruction was folded. Get
5711 // the set of registers referenced by AMAfter and subtract out those
5712 // referenced by AMBefore: this is the set of values which folding in this
5713 // address extends the lifetime of.
5714 //
5715 // Note that there are only two potential values being referenced here,
5716 // BaseReg and ScaleReg (global addresses are always available, as are any
5717 // folded immediates).
5718 Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
5719
5720 // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
5721 // lifetime wasn't extended by adding this instruction.
5722 if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5723 BaseReg = nullptr;
5724 if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5725 ScaledReg = nullptr;
5726
5727 // If folding this instruction (and it's subexprs) didn't extend any live
5728 // ranges, we're ok with it.
5729 if (!BaseReg && !ScaledReg)
5730 return true;
5731
5732 // If all uses of this instruction can have the address mode sunk into them,
5733 // we can remove the addressing mode and effectively trade one live register
5734 // for another (at worst.) In this context, folding an addressing mode into
5735 // the use is just a particularly nice way of sinking it.
5737 if (FindAllMemoryUses(I, MemoryUses, TLI, TRI, OptSize, PSI, BFI))
5738 return false; // Has a non-memory, non-foldable use!
5739
5740 // Now that we know that all uses of this instruction are part of a chain of
5741 // computation involving only operations that could theoretically be folded
5742 // into a memory use, loop over each of these memory operation uses and see
5743 // if they could *actually* fold the instruction. The assumption is that
5744 // addressing modes are cheap and that duplicating the computation involved
5745 // many times is worthwhile, even on a fastpath. For sinking candidates
5746 // (i.e. cold call sites), this serves as a way to prevent excessive code
5747 // growth since most architectures have some reasonable small and fast way to
5748 // compute an effective address. (i.e LEA on x86)
5749 SmallVector<Instruction *, 32> MatchedAddrModeInsts;
5750 for (const std::pair<Use *, Type *> &Pair : MemoryUses) {
5751 Value *Address = Pair.first->get();
5752 Instruction *UserI = cast<Instruction>(Pair.first->getUser());
5753 Type *AddressAccessTy = Pair.second;
5754 unsigned AS = Address->getType()->getPointerAddressSpace();
5755
5756 // Do a match against the root of this address, ignoring profitability. This
5757 // will tell us if the addressing mode for the memory operation will
5758 // *actually* cover the shared instruction.
5759 ExtAddrMode Result;
5760 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5761 0);
5762 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5763 TPT.getRestorationPoint();
5764 AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, getDTFn,
5765 AddressAccessTy, AS, UserI, Result,
5766 InsertedInsts, PromotedInsts, TPT,
5767 LargeOffsetGEP, OptSize, PSI, BFI);
5768 Matcher.IgnoreProfitability = true;
5769 bool Success = Matcher.matchAddr(Address, 0);
5770 (void)Success;
5771 assert(Success && "Couldn't select *anything*?");
5772
5773 // The match was to check the profitability, the changes made are not
5774 // part of the original matcher. Therefore, they should be dropped
5775 // otherwise the original matcher will not present the right state.
5776 TPT.rollback(LastKnownGood);
5777
5778 // If the match didn't cover I, then it won't be shared by it.
5779 if (!is_contained(MatchedAddrModeInsts, I))
5780 return false;
5781
5782 MatchedAddrModeInsts.clear();
5783 }
5784
5785 return true;
5786}
5787
5788/// Return true if the specified values are defined in a
5789/// different basic block than BB.
5790static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
5792 return I->getParent() != BB;
5793 return false;
5794}
5795
5796// Find an insert position of Addr for MemoryInst. We can't guarantee MemoryInst
5797// is the first instruction that will use Addr. So we need to find the first
5798// user of Addr in current BB.
5800 Value *SunkAddr) {
5801 if (Addr->hasOneUse())
5802 return MemoryInst->getIterator();
5803
5804 // We already have a SunkAddr in current BB, but we may need to insert cast
5805 // instruction after it.
5806 if (SunkAddr) {
5807 if (Instruction *AddrInst = dyn_cast<Instruction>(SunkAddr))
5808 return std::next(AddrInst->getIterator());
5809 }
5810
5811 // Find the first user of Addr in current BB.
5812 Instruction *Earliest = MemoryInst;
5813 for (User *U : Addr->users()) {
5814 Instruction *UserInst = dyn_cast<Instruction>(U);
5815 if (UserInst && UserInst->getParent() == MemoryInst->getParent()) {
5816 if (isa<PHINode>(UserInst) || UserInst->isDebugOrPseudoInst())
5817 continue;
5818 if (UserInst->comesBefore(Earliest))
5819 Earliest = UserInst;
5820 }
5821 }
5822 return Earliest->getIterator();
5823}
5824
5825/// Sink addressing mode computation immediate before MemoryInst if doing so
5826/// can be done without increasing register pressure. The need for the
5827/// register pressure constraint means this can end up being an all or nothing
5828/// decision for all uses of the same addressing computation.
5829///
5830/// Load and Store Instructions often have addressing modes that can do
5831/// significant amounts of computation. As such, instruction selection will try
5832/// to get the load or store to do as much computation as possible for the
5833/// program. The problem is that isel can only see within a single block. As
5834/// such, we sink as much legal addressing mode work into the block as possible.
5835///
5836/// This method is used to optimize both load/store and inline asms with memory
5837/// operands. It's also used to sink addressing computations feeding into cold
5838/// call sites into their (cold) basic block.
5839///
5840/// The motivation for handling sinking into cold blocks is that doing so can
5841/// both enable other address mode sinking (by satisfying the register pressure
5842/// constraint above), and reduce register pressure globally (by removing the
5843/// addressing mode computation from the fast path entirely.).
5844bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
5845 Type *AccessTy, unsigned AddrSpace) {
5846 Value *Repl = Addr;
5847
5848 // Try to collapse single-value PHI nodes. This is necessary to undo
5849 // unprofitable PRE transformations.
5850 SmallVector<Value *, 8> worklist;
5851 SmallPtrSet<Value *, 16> Visited;
5852 worklist.push_back(Addr);
5853
5854 // Use a worklist to iteratively look through PHI and select nodes, and
5855 // ensure that the addressing mode obtained from the non-PHI/select roots of
5856 // the graph are compatible.
5857 bool PhiOrSelectSeen = false;
5858 SmallVector<Instruction *, 16> AddrModeInsts;
5859 const SimplifyQuery SQ(*DL, TLInfo);
5860 AddressingModeCombiner AddrModes(SQ, Addr);
5861 TypePromotionTransaction TPT(RemovedInsts);
5862 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5863 TPT.getRestorationPoint();
5864 while (!worklist.empty()) {
5865 Value *V = worklist.pop_back_val();
5866
5867 // We allow traversing cyclic Phi nodes.
5868 // In case of success after this loop we ensure that traversing through
5869 // Phi nodes ends up with all cases to compute address of the form
5870 // BaseGV + Base + Scale * Index + Offset
5871 // where Scale and Offset are constans and BaseGV, Base and Index
5872 // are exactly the same Values in all cases.
5873 // It means that BaseGV, Scale and Offset dominate our memory instruction
5874 // and have the same value as they had in address computation represented
5875 // as Phi. So we can safely sink address computation to memory instruction.
5876 if (!Visited.insert(V).second)
5877 continue;
5878
5879 // For a PHI node, push all of its incoming values.
5880 if (PHINode *P = dyn_cast<PHINode>(V)) {
5881 append_range(worklist, P->incoming_values());
5882 PhiOrSelectSeen = true;
5883 continue;
5884 }
5885 // Similar for select.
5886 if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
5887 worklist.push_back(SI->getFalseValue());
5888 worklist.push_back(SI->getTrueValue());
5889 PhiOrSelectSeen = true;
5890 continue;
5891 }
5892
5893 // For non-PHIs, determine the addressing mode being computed. Note that
5894 // the result may differ depending on what other uses our candidate
5895 // addressing instructions might have.
5896 AddrModeInsts.clear();
5897 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5898 0);
5899 // Defer the query (and possible computation of) the dom tree to point of
5900 // actual use. It's expected that most address matches don't actually need
5901 // the domtree.
5902 auto getDTFn = [MemoryInst, this]() -> const DominatorTree & {
5903 Function *F = MemoryInst->getParent()->getParent();
5904 return this->getDT(*F);
5905 };
5906 ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
5907 V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, getDTFn,
5908 *TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
5909 BFI.get());
5910
5911 GetElementPtrInst *GEP = LargeOffsetGEP.first;
5912 if (GEP && !NewGEPBases.count(GEP)) {
5913 // If splitting the underlying data structure can reduce the offset of a
5914 // GEP, collect the GEP. Skip the GEPs that are the new bases of
5915 // previously split data structures.
5916 LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP);
5917 LargeOffsetGEPID.insert(std::make_pair(GEP, LargeOffsetGEPID.size()));
5918 }
5919
5920 NewAddrMode.OriginalValue = V;
5921 if (!AddrModes.addNewAddrMode(NewAddrMode))
5922 break;
5923 }
5924
5925 // Try to combine the AddrModes we've collected. If we couldn't collect any,
5926 // or we have multiple but either couldn't combine them or combining them
5927 // wouldn't do anything useful, bail out now.
5928 if (!AddrModes.combineAddrModes()) {
5929 TPT.rollback(LastKnownGood);
5930 return false;
5931 }
5932 bool Modified = TPT.commit();
5933
5934 // Get the combined AddrMode (or the only AddrMode, if we only had one).
5935 ExtAddrMode AddrMode = AddrModes.getAddrMode();
5936
5937 // If all the instructions matched are already in this BB, don't do anything.
5938 // If we saw a Phi node then it is not local definitely, and if we saw a
5939 // select then we want to push the address calculation past it even if it's
5940 // already in this BB.
5941 if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) {
5942 return IsNonLocalValue(V, MemoryInst->getParent());
5943 })) {
5944 LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode
5945 << "\n");
5946 return Modified;
5947 }
5948
5949 // Now that we determined the addressing expression we want to use and know
5950 // that we have to sink it into this block. Check to see if we have already
5951 // done this for some other load/store instr in this block. If so, reuse
5952 // the computation. Before attempting reuse, check if the address is valid
5953 // as it may have been erased.
5954
5955 WeakTrackingVH SunkAddrVH = SunkAddrs[Addr];
5956
5957 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
5958 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
5959
5960 // The current BB may be optimized multiple times, we can't guarantee the
5961 // reuse of Addr happens later, call findInsertPos to find an appropriate
5962 // insert position.
5963 auto InsertPos = findInsertPos(Addr, MemoryInst, SunkAddr);
5964
5965 // TODO: Adjust insert point considering (Base|Scaled)Reg if possible.
5966 if (!SunkAddr) {
5967 auto &DT = getDT(*MemoryInst->getFunction());
5968 if ((AddrMode.BaseReg && !DT.dominates(AddrMode.BaseReg, &*InsertPos)) ||
5969 (AddrMode.ScaledReg && !DT.dominates(AddrMode.ScaledReg, &*InsertPos)))
5970 return Modified;
5971 }
5972
5973 IRBuilder<> Builder(MemoryInst->getParent(), InsertPos);
5974
5975 if (SunkAddr) {
5976 LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
5977 << " for " << *MemoryInst << "\n");
5978 if (SunkAddr->getType() != Addr->getType()) {
5979 if (SunkAddr->getType()->getPointerAddressSpace() !=
5980 Addr->getType()->getPointerAddressSpace() &&
5981 !DL->isNonIntegralPointerType(Addr->getType())) {
5982 // There are two reasons the address spaces might not match: a no-op
5983 // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
5984 // ptrtoint/inttoptr pair to ensure we match the original semantics.
5985 // TODO: allow bitcast between different address space pointers with the
5986 // same size.
5987 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
5988 SunkAddr =
5989 Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
5990 } else
5991 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
5992 }
5994 SubtargetInfo->addrSinkUsingGEPs())) {
5995 // By default, we use the GEP-based method when AA is used later. This
5996 // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
5997 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
5998 << " for " << *MemoryInst << "\n");
5999 Value *ResultPtr = nullptr, *ResultIndex = nullptr;
6000
6001 // First, find the pointer.
6002 if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
6003 ResultPtr = AddrMode.BaseReg;
6004 AddrMode.BaseReg = nullptr;
6005 }
6006
6007 if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
6008 // We can't add more than one pointer together, nor can we scale a
6009 // pointer (both of which seem meaningless).
6010 if (ResultPtr || AddrMode.Scale != 1)
6011 return Modified;
6012
6013 ResultPtr = AddrMode.ScaledReg;
6014 AddrMode.Scale = 0;
6015 }
6016
6017 // It is only safe to sign extend the BaseReg if we know that the math
6018 // required to create it did not overflow before we extend it. Since
6019 // the original IR value was tossed in favor of a constant back when
6020 // the AddrMode was created we need to bail out gracefully if widths
6021 // do not match instead of extending it.
6022 //
6023 // (See below for code to add the scale.)
6024 if (AddrMode.Scale) {
6025 Type *ScaledRegTy = AddrMode.ScaledReg->getType();
6026 if (cast<IntegerType>(IntPtrTy)->getBitWidth() >
6027 cast<IntegerType>(ScaledRegTy)->getBitWidth())
6028 return Modified;
6029 }
6030
6031 GlobalValue *BaseGV = AddrMode.BaseGV;
6032 if (BaseGV != nullptr) {
6033 if (ResultPtr)
6034 return Modified;
6035
6036 if (BaseGV->isThreadLocal()) {
6037 ResultPtr = Builder.CreateThreadLocalAddress(BaseGV);
6038 } else {
6039 ResultPtr = BaseGV;
6040 }
6041 }
6042
6043 // If the real base value actually came from an inttoptr, then the matcher
6044 // will look through it and provide only the integer value. In that case,
6045 // use it here.
6046 if (!DL->isNonIntegralPointerType(Addr->getType())) {
6047 if (!ResultPtr && AddrMode.BaseReg) {
6048 ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(),
6049 "sunkaddr");
6050 AddrMode.BaseReg = nullptr;
6051 } else if (!ResultPtr && AddrMode.Scale == 1) {
6052 ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(),
6053 "sunkaddr");
6054 AddrMode.Scale = 0;
6055 }
6056 }
6057
6058 if (!ResultPtr && !AddrMode.BaseReg && !AddrMode.Scale &&
6059 !AddrMode.BaseOffs) {
6060 SunkAddr = Constant::getNullValue(Addr->getType());
6061 } else if (!ResultPtr) {
6062 return Modified;
6063 } else {
6064 Type *I8PtrTy =
6065 Builder.getPtrTy(Addr->getType()->getPointerAddressSpace());
6066
6067 // Start with the base register. Do this first so that subsequent address
6068 // matching finds it last, which will prevent it from trying to match it
6069 // as the scaled value in case it happens to be a mul. That would be
6070 // problematic if we've sunk a different mul for the scale, because then
6071 // we'd end up sinking both muls.
6072 if (AddrMode.BaseReg) {
6073 Value *V = AddrMode.BaseReg;
6074 if (V->getType() != IntPtrTy)
6075 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
6076
6077 ResultIndex = V;
6078 }
6079
6080 // Add the scale value.
6081 if (AddrMode.Scale) {
6082 Value *V = AddrMode.ScaledReg;
6083 if (V->getType() == IntPtrTy) {
6084 // done.
6085 } else {
6086 assert(cast<IntegerType>(IntPtrTy)->getBitWidth() <
6087 cast<IntegerType>(V->getType())->getBitWidth() &&
6088 "We can't transform if ScaledReg is too narrow");
6089 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
6090 }
6091
6092 if (AddrMode.Scale != 1)
6093 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
6094 "sunkaddr");
6095 if (ResultIndex)
6096 ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
6097 else
6098 ResultIndex = V;
6099 }
6100
6101 // Add in the Base Offset if present.
6102 if (AddrMode.BaseOffs) {
6103 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
6104 if (ResultIndex) {
6105 // We need to add this separately from the scale above to help with
6106 // SDAG consecutive load/store merging.
6107 if (ResultPtr->getType() != I8PtrTy)
6108 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
6109 ResultPtr = Builder.CreatePtrAdd(ResultPtr, ResultIndex, "sunkaddr",
6110 AddrMode.InBounds);
6111 }
6112
6113 ResultIndex = V;
6114 }
6115
6116 if (!ResultIndex) {
6117 auto PtrInst = dyn_cast<Instruction>(ResultPtr);
6118 // We know that we have a pointer without any offsets. If this pointer
6119 // originates from a different basic block than the current one, we
6120 // must be able to recreate it in the current basic block.
6121 // We do not support the recreation of any instructions yet.
6122 if (PtrInst && PtrInst->getParent() != MemoryInst->getParent())
6123 return Modified;
6124 SunkAddr = ResultPtr;
6125 } else {
6126 if (ResultPtr->getType() != I8PtrTy)
6127 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
6128 SunkAddr = Builder.CreatePtrAdd(ResultPtr, ResultIndex, "sunkaddr",
6129 AddrMode.InBounds);
6130 }
6131
6132 if (SunkAddr->getType() != Addr->getType()) {
6133 if (SunkAddr->getType()->getPointerAddressSpace() !=
6134 Addr->getType()->getPointerAddressSpace() &&
6135 !DL->isNonIntegralPointerType(Addr->getType())) {
6136 // There are two reasons the address spaces might not match: a no-op
6137 // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
6138 // ptrtoint/inttoptr pair to ensure we match the original semantics.
6139 // TODO: allow bitcast between different address space pointers with
6140 // the same size.
6141 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
6142 SunkAddr =
6143 Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
6144 } else
6145 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
6146 }
6147 }
6148 } else {
6149 // We'd require a ptrtoint/inttoptr down the line, which we can't do for
6150 // non-integral pointers, so in that case bail out now.
6151 Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr;
6152 Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr;
6153 PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy);
6154 PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy);
6155 if (DL->isNonIntegralPointerType(Addr->getType()) ||
6156 (BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) ||
6157 (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) ||
6158 (AddrMode.BaseGV &&
6159 DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))
6160 return Modified;
6161
6162 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
6163 << " for " << *MemoryInst << "\n");
6164 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
6165 Value *Result = nullptr;
6166
6167 // Start with the base register. Do this first so that subsequent address
6168 // matching finds it last, which will prevent it from trying to match it
6169 // as the scaled value in case it happens to be a mul. That would be
6170 // problematic if we've sunk a different mul for the scale, because then
6171 // we'd end up sinking both muls.
6172 if (AddrMode.BaseReg) {
6173 Value *V = AddrMode.BaseReg;
6174 if (V->getType()->isPointerTy())
6175 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
6176 if (V->getType() != IntPtrTy)
6177 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
6178 Result = V;
6179 }
6180
6181 // Add the scale value.
6182 if (AddrMode.Scale) {
6183 Value *V = AddrMode.ScaledReg;
6184 if (V->getType() == IntPtrTy) {
6185 // done.
6186 } else if (V->getType()->isPointerTy()) {
6187 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
6188 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
6189 cast<IntegerType>(V->getType())->getBitWidth()) {
6190 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
6191 } else {
6192 // It is only safe to sign extend the BaseReg if we know that the math
6193 // required to create it did not overflow before we extend it. Since
6194 // the original IR value was tossed in favor of a constant back when
6195 // the AddrMode was created we need to bail out gracefully if widths
6196 // do not match instead of extending it.
6198 if (I && (Result != AddrMode.BaseReg))
6199 I->eraseFromParent();
6200 return Modified;
6201 }
6202 if (AddrMode.Scale != 1)
6203 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
6204 "sunkaddr");
6205 if (Result)
6206 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6207 else
6208 Result = V;
6209 }
6210
6211 // Add in the BaseGV if present.
6212 GlobalValue *BaseGV = AddrMode.BaseGV;
6213 if (BaseGV != nullptr) {
6214 Value *BaseGVPtr;
6215 if (BaseGV->isThreadLocal()) {
6216 BaseGVPtr = Builder.CreateThreadLocalAddress(BaseGV);
6217 } else {
6218 BaseGVPtr = BaseGV;
6219 }
6220 Value *V = Builder.CreatePtrToInt(BaseGVPtr, IntPtrTy, "sunkaddr");
6221 if (Result)
6222 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6223 else
6224 Result = V;
6225 }
6226
6227 // Add in the Base Offset if present.
6228 if (AddrMode.BaseOffs) {
6229 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
6230 if (Result)
6231 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6232 else
6233 Result = V;
6234 }
6235
6236 if (!Result)
6237 SunkAddr = Constant::getNullValue(Addr->getType());
6238 else
6239 SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
6240 }
6241
6242 MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
6243 // Store the newly computed address into the cache. In the case we reused a
6244 // value, this should be idempotent.
6245 SunkAddrs[Addr] = WeakTrackingVH(SunkAddr);
6246
6247 // If we have no uses, recursively delete the value and all dead instructions
6248 // using it.
6249 if (Repl->use_empty()) {
6250 resetIteratorIfInvalidatedWhileCalling(CurInstIterator->getParent(), [&]() {
6251 RecursivelyDeleteTriviallyDeadInstructions(
6252 Repl, TLInfo, nullptr,
6253 [&](Value *V) { removeAllAssertingVHReferences(V); });
6254 });
6255 }
6256 ++NumMemoryInsts;
6257 return true;
6258}
6259
6260/// Rewrite GEP input to gather/scatter to enable SelectionDAGBuilder to find
6261/// a uniform base to use for ISD::MGATHER/MSCATTER. SelectionDAGBuilder can
6262/// only handle a 2 operand GEP in the same basic block or a splat constant
6263/// vector. The 2 operands to the GEP must have a scalar pointer and a vector
6264/// index.
6265///
6266/// If the existing GEP has a vector base pointer that is splat, we can look
6267/// through the splat to find the scalar pointer. If we can't find a scalar
6268/// pointer there's nothing we can do.
6269///
6270/// If we have a GEP with more than 2 indices where the middle indices are all
6271/// zeroes, we can replace it with 2 GEPs where the second has 2 operands.
6272///
6273/// If the final index isn't a vector or is a splat, we can emit a scalar GEP
6274/// followed by a GEP with an all zeroes vector index. This will enable
6275/// SelectionDAGBuilder to use the scalar GEP as the uniform base and have a
6276/// zero index.
6277bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
6278 Value *Ptr) {
6279 Value *NewAddr;
6280
6281 if (const auto *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
6282 // Don't optimize GEPs that don't have indices.
6283 if (!GEP->hasIndices())
6284 return false;
6285
6286 // If the GEP and the gather/scatter aren't in the same BB, don't optimize.
6287 // FIXME: We should support this by sinking the GEP.
6288 if (MemoryInst->getParent() != GEP->getParent())
6289 return false;
6290
6291 SmallVector<Value *, 2> Ops(GEP->operands());
6292
6293 bool RewriteGEP = false;
6294
6295 if (Ops[0]->getType()->isVectorTy()) {
6296 Ops[0] = getSplatValue(Ops[0]);
6297 if (!Ops[0])
6298 return false;
6299 RewriteGEP = true;
6300 }
6301
6302 unsigned FinalIndex = Ops.size() - 1;
6303
6304 // Ensure all but the last index is 0.
6305 // FIXME: This isn't strictly required. All that's required is that they are
6306 // all scalars or splats.
6307 for (unsigned i = 1; i < FinalIndex; ++i) {
6308 auto *C = dyn_cast<Constant>(Ops[i]);
6309 if (!C)
6310 return false;
6311 if (isa<VectorType>(C->getType()))
6312 C = C->getSplatValue();
6313 auto *CI = dyn_cast_or_null<ConstantInt>(C);
6314 if (!CI || !CI->isZero())
6315 return false;
6316 // Scalarize the index if needed.
6317 Ops[i] = CI;
6318 }
6319
6320 // Try to scalarize the final index.
6321 if (Ops[FinalIndex]->getType()->isVectorTy()) {
6322 if (Value *V = getSplatValue(Ops[FinalIndex])) {
6323 auto *C = dyn_cast<ConstantInt>(V);
6324 // Don't scalarize all zeros vector.
6325 if (!C || !C->isZero()) {
6326 Ops[FinalIndex] = V;
6327 RewriteGEP = true;
6328 }
6329 }
6330 }
6331
6332 // If we made any changes or the we have extra operands, we need to generate
6333 // new instructions.
6334 if (!RewriteGEP && Ops.size() == 2)
6335 return false;
6336
6337 auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
6338
6339 IRBuilder<> Builder(MemoryInst);
6340
6341 Type *SourceTy = GEP->getSourceElementType();
6342 Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType());
6343
6344 // If the final index isn't a vector, emit a scalar GEP containing all ops
6345 // and a vector GEP with all zeroes final index.
6346 if (!Ops[FinalIndex]->getType()->isVectorTy()) {
6347 NewAddr = Builder.CreateGEP(SourceTy, Ops[0], ArrayRef(Ops).drop_front());
6348 auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
6349 auto *SecondTy = GetElementPtrInst::getIndexedType(
6350 SourceTy, ArrayRef(Ops).drop_front());
6351 NewAddr =
6352 Builder.CreateGEP(SecondTy, NewAddr, Constant::getNullValue(IndexTy));
6353 } else {
6354 Value *Base = Ops[0];
6355 Value *Index = Ops[FinalIndex];
6356
6357 // Create a scalar GEP if there are more than 2 operands.
6358 if (Ops.size() != 2) {
6359 // Replace the last index with 0.
6360 Ops[FinalIndex] =
6361 Constant::getNullValue(Ops[FinalIndex]->getType()->getScalarType());
6362 Base = Builder.CreateGEP(SourceTy, Base, ArrayRef(Ops).drop_front());
6364 SourceTy, ArrayRef(Ops).drop_front());
6365 }
6366
6367 // Now create the GEP with scalar pointer and vector index.
6368 NewAddr = Builder.CreateGEP(SourceTy, Base, Index);
6369 }
6370 } else if (!isa<Constant>(Ptr)) {
6371 // Not a GEP, maybe its a splat and we can create a GEP to enable
6372 // SelectionDAGBuilder to use it as a uniform base.
6374 if (!V)
6375 return false;
6376
6377 auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
6378
6379 IRBuilder<> Builder(MemoryInst);
6380
6381 // Emit a vector GEP with a scalar pointer and all 0s vector index.
6382 Type *ScalarIndexTy = DL->getIndexType(V->getType()->getScalarType());
6383 auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
6384 Type *ScalarTy;
6385 if (cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
6386 Intrinsic::masked_gather) {
6387 ScalarTy = MemoryInst->getType()->getScalarType();
6388 } else {
6389 assert(cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
6390 Intrinsic::masked_scatter);
6391 ScalarTy = MemoryInst->getOperand(0)->getType()->getScalarType();
6392 }
6393 NewAddr = Builder.CreateGEP(ScalarTy, V, Constant::getNullValue(IndexTy));
6394 } else {
6395 // Constant, SelectionDAGBuilder knows to check if its a splat.
6396 return false;
6397 }
6398
6399 MemoryInst->replaceUsesOfWith(Ptr, NewAddr);
6400
6401 // If we have no uses, recursively delete the value and all dead instructions
6402 // using it.
6403 if (Ptr->use_empty())
6405 Ptr, TLInfo, nullptr,
6406 [&](Value *V) { removeAllAssertingVHReferences(V); });
6407
6408 return true;
6409}
6410
6411/// If there are any memory operands, use OptimizeMemoryInst to sink their
6412/// address computing into the block when possible / profitable.
6413bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
6414 bool MadeChange = false;
6415
6416 const TargetRegisterInfo *TRI =
6417 TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo();
6418 TargetLowering::AsmOperandInfoVector TargetConstraints =
6419 TLI->ParseConstraints(*DL, TRI, *CS);
6420 unsigned ArgNo = 0;
6421 for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
6422 // Compute the constraint code and ConstraintType to use.
6423 TLI->ComputeConstraintToUse(OpInfo, SDValue());
6424
6425 // TODO: Also handle C_Address?
6426 if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
6427 OpInfo.isIndirect) {
6428 Value *OpVal = CS->getArgOperand(ArgNo++);
6429 MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
6430 } else if (OpInfo.Type == InlineAsm::isInput)
6431 ArgNo++;
6432 }
6433
6434 return MadeChange;
6435}
6436
6437/// Check if all the uses of \p Val are equivalent (or free) zero or
6438/// sign extensions.
6439static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
6440 assert(!Val->use_empty() && "Input must have at least one use");
6441 const Instruction *FirstUser = cast<Instruction>(*Val->user_begin());
6442 bool IsSExt = isa<SExtInst>(FirstUser);
6443 Type *ExtTy = FirstUser->getType();
6444 for (const User *U : Val->users()) {
6445 const Instruction *UI = cast<Instruction>(U);
6446 if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
6447 return false;
6448 Type *CurTy = UI->getType();
6449 // Same input and output types: Same instruction after CSE.
6450 if (CurTy == ExtTy)
6451 continue;
6452
6453 // If IsSExt is true, we are in this situation:
6454 // a = Val
6455 // b = sext ty1 a to ty2
6456 // c = sext ty1 a to ty3
6457 // Assuming ty2 is shorter than ty3, this could be turned into:
6458 // a = Val
6459 // b = sext ty1 a to ty2
6460 // c = sext ty2 b to ty3
6461 // However, the last sext is not free.
6462 if (IsSExt)
6463 return false;
6464
6465 // This is a ZExt, maybe this is free to extend from one type to another.
6466 // In that case, we would not account for a different use.
6467 Type *NarrowTy;
6468 Type *LargeTy;
6469 if (ExtTy->getScalarType()->getIntegerBitWidth() >
6470 CurTy->getScalarType()->getIntegerBitWidth()) {
6471 NarrowTy = CurTy;
6472 LargeTy = ExtTy;
6473 } else {
6474 NarrowTy = ExtTy;
6475 LargeTy = CurTy;
6476 }
6477
6478 if (!TLI.isZExtFree(NarrowTy, LargeTy))
6479 return false;
6480 }
6481 // All uses are the same or can be derived from one another for free.
6482 return true;
6483}
6484
6485/// Try to speculatively promote extensions in \p Exts and continue
6486/// promoting through newly promoted operands recursively as far as doing so is
6487/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.
6488/// When some promotion happened, \p TPT contains the proper state to revert
6489/// them.
6490///
6491/// \return true if some promotion happened, false otherwise.
6492bool CodeGenPrepare::tryToPromoteExts(
6493 TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
6494 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
6495 unsigned CreatedInstsCost) {
6496 bool Promoted = false;
6497
6498 // Iterate over all the extensions to try to promote them.
6499 for (auto *I : Exts) {
6500 // Early check if we directly have ext(load).
6501 if (isa<LoadInst>(I->getOperand(0))) {
6502 ProfitablyMovedExts.push_back(I);
6503 continue;
6504 }
6505
6506 // Check whether or not we want to do any promotion. The reason we have
6507 // this check inside the for loop is to catch the case where an extension
6508 // is directly fed by a load because in such case the extension can be moved
6509 // up without any promotion on its operands.
6511 return false;
6512
6513 // Get the action to perform the promotion.
6514 TypePromotionHelper::Action TPH =
6515 TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts);
6516 // Check if we can promote.
6517 if (!TPH) {
6518 // Save the current extension as we cannot move up through its operand.
6519 ProfitablyMovedExts.push_back(I);
6520 continue;
6521 }
6522
6523 // Save the current state.
6524 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
6525 TPT.getRestorationPoint();
6526 SmallVector<Instruction *, 4> NewExts;
6527 unsigned NewCreatedInstsCost = 0;
6528 unsigned ExtCost = !TLI->isExtFree(I);
6529 // Promote.
6530 Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
6531 &NewExts, nullptr, *TLI);
6532 assert(PromotedVal &&
6533 "TypePromotionHelper should have filtered out those cases");
6534
6535 // We would be able to merge only one extension in a load.
6536 // Therefore, if we have more than 1 new extension we heuristically
6537 // cut this search path, because it means we degrade the code quality.
6538 // With exactly 2, the transformation is neutral, because we will merge
6539 // one extension but leave one. However, we optimistically keep going,
6540 // because the new extension may be removed too. Also avoid replacing a
6541 // single free extension with multiple extensions, as this increases the
6542 // number of IR instructions while not providing any savings.
6543 long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
6544 // FIXME: It would be possible to propagate a negative value instead of
6545 // conservatively ceiling it to 0.
6546 TotalCreatedInstsCost =
6547 std::max((long long)0, (TotalCreatedInstsCost - ExtCost));
6548 if (!StressExtLdPromotion &&
6549 (TotalCreatedInstsCost > 1 ||
6550 !isPromotedInstructionLegal(*TLI, *DL, PromotedVal) ||
6551 (ExtCost == 0 && NewExts.size() > 1))) {
6552 // This promotion is not profitable, rollback to the previous state, and
6553 // save the current extension in ProfitablyMovedExts as the latest
6554 // speculative promotion turned out to be unprofitable.
6555 TPT.rollback(LastKnownGood);
6556 ProfitablyMovedExts.push_back(I);
6557 continue;
6558 }
6559 // Continue promoting NewExts as far as doing so is profitable.
6560 SmallVector<Instruction *, 2> NewlyMovedExts;
6561 (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);
6562 bool NewPromoted = false;
6563 for (auto *ExtInst : NewlyMovedExts) {
6564 Instruction *MovedExt = cast<Instruction>(ExtInst);
6565 Value *ExtOperand = MovedExt->getOperand(0);
6566 // If we have reached to a load, we need this extra profitability check
6567 // as it could potentially be merged into an ext(load).
6568 if (isa<LoadInst>(ExtOperand) &&
6569 !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
6570 (ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI))))
6571 continue;
6572
6573 ProfitablyMovedExts.push_back(MovedExt);
6574 NewPromoted = true;
6575 }
6576
6577 // If none of speculative promotions for NewExts is profitable, rollback
6578 // and save the current extension (I) as the last profitable extension.
6579 if (!NewPromoted) {
6580 TPT.rollback(LastKnownGood);
6581 ProfitablyMovedExts.push_back(I);
6582 continue;
6583 }
6584 // The promotion is profitable.
6585 Promoted = true;
6586 }
6587 return Promoted;
6588}
6589
6590/// Merging redundant sexts when one is dominating the other.
6591bool CodeGenPrepare::mergeSExts(Function &F) {
6592 bool Changed = false;
6593 for (auto &Entry : ValToSExtendedUses) {
6594 SExts &Insts = Entry.second;
6595 SExts CurPts;
6596 for (Instruction *Inst : Insts) {
6597 if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
6598 Inst->getOperand(0) != Entry.first)
6599 continue;
6600 bool inserted = false;
6601 for (auto &Pt : CurPts) {
6602 if (getDT(F).dominates(Inst, Pt)) {
6603 replaceAllUsesWith(Pt, Inst, FreshBBs, IsHugeFunc);
6604 RemovedInsts.insert(Pt);
6605 Pt->removeFromParent();
6606 Pt = Inst;
6607 inserted = true;
6608 Changed = true;
6609 break;
6610 }
6611 if (!getDT(F).dominates(Pt, Inst))
6612 // Give up if we need to merge in a common dominator as the
6613 // experiments show it is not profitable.
6614 continue;
6615 replaceAllUsesWith(Inst, Pt, FreshBBs, IsHugeFunc);
6616 RemovedInsts.insert(Inst);
6617 Inst->removeFromParent();
6618 inserted = true;
6619 Changed = true;
6620 break;
6621 }
6622 if (!inserted)
6623 CurPts.push_back(Inst);
6624 }
6625 }
6626 return Changed;
6627}
6628
6629// Splitting large data structures so that the GEPs accessing them can have
6630// smaller offsets so that they can be sunk to the same blocks as their users.
6631// For example, a large struct starting from %base is split into two parts
6632// where the second part starts from %new_base.
6633//
6634// Before:
6635// BB0:
6636// %base =
6637//
6638// BB1:
6639// %gep0 = gep %base, off0
6640// %gep1 = gep %base, off1
6641// %gep2 = gep %base, off2
6642//
6643// BB2:
6644// %load1 = load %gep0
6645// %load2 = load %gep1
6646// %load3 = load %gep2
6647//
6648// After:
6649// BB0:
6650// %base =
6651// %new_base = gep %base, off0
6652//
6653// BB1:
6654// %new_gep0 = %new_base
6655// %new_gep1 = gep %new_base, off1 - off0
6656// %new_gep2 = gep %new_base, off2 - off0
6657//
6658// BB2:
6659// %load1 = load i32, i32* %new_gep0
6660// %load2 = load i32, i32* %new_gep1
6661// %load3 = load i32, i32* %new_gep2
6662//
6663// %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because
6664// their offsets are smaller enough to fit into the addressing mode.
6665bool CodeGenPrepare::splitLargeGEPOffsets() {
6666 bool Changed = false;
6667 for (auto &Entry : LargeOffsetGEPMap) {
6668 Value *OldBase = Entry.first;
6669 SmallVectorImpl<std::pair<AssertingVH<GetElementPtrInst>, int64_t>>
6670 &LargeOffsetGEPs = Entry.second;
6671 auto compareGEPOffset =
6672 [&](const std::pair<GetElementPtrInst *, int64_t> &LHS,
6673 const std::pair<GetElementPtrInst *, int64_t> &RHS) {
6674 if (LHS.first == RHS.first)
6675 return false;
6676 if (LHS.second != RHS.second)
6677 return LHS.second < RHS.second;
6678 return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first];
6679 };
6680 // Sorting all the GEPs of the same data structures based on the offsets.
6681 llvm::sort(LargeOffsetGEPs, compareGEPOffset);
6682 LargeOffsetGEPs.erase(llvm::unique(LargeOffsetGEPs), LargeOffsetGEPs.end());
6683 // Skip if all the GEPs have the same offsets.
6684 if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second)
6685 continue;
6686 GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first;
6687 int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
6688 Value *NewBaseGEP = nullptr;
6689
6690 auto createNewBase = [&](int64_t BaseOffset, Value *OldBase,
6691 GetElementPtrInst *GEP) {
6692 LLVMContext &Ctx = GEP->getContext();
6693 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
6694 Type *I8PtrTy =
6695 PointerType::get(Ctx, GEP->getType()->getPointerAddressSpace());
6696
6697 BasicBlock::iterator NewBaseInsertPt;
6698 BasicBlock *NewBaseInsertBB;
6699 if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
6700 // If the base of the struct is an instruction, the new base will be
6701 // inserted close to it.
6702 NewBaseInsertBB = BaseI->getParent();
6703 if (isa<PHINode>(BaseI))
6704 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6705 else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
6706 NewBaseInsertBB =
6707 SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI);
6708 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6709 } else
6710 NewBaseInsertPt = std::next(BaseI->getIterator());
6711 } else {
6712 // If the current base is an argument or global value, the new base
6713 // will be inserted to the entry block.
6714 NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
6715 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6716 }
6717 IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
6718 // Create a new base.
6719 Value *BaseIndex = ConstantInt::get(PtrIdxTy, BaseOffset);
6720 NewBaseGEP = OldBase;
6721 if (NewBaseGEP->getType() != I8PtrTy)
6722 NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
6723 NewBaseGEP =
6724 NewBaseBuilder.CreatePtrAdd(NewBaseGEP, BaseIndex, "splitgep");
6725 NewGEPBases.insert(NewBaseGEP);
6726 return;
6727 };
6728
6729 // Check whether all the offsets can be encoded with prefered common base.
6730 if (int64_t PreferBase = TLI->getPreferredLargeGEPBaseOffset(
6731 LargeOffsetGEPs.front().second, LargeOffsetGEPs.back().second)) {
6732 BaseOffset = PreferBase;
6733 // Create a new base if the offset of the BaseGEP can be decoded with one
6734 // instruction.
6735 createNewBase(BaseOffset, OldBase, BaseGEP);
6736 }
6737
6738 auto *LargeOffsetGEP = LargeOffsetGEPs.begin();
6739 while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
6740 GetElementPtrInst *GEP = LargeOffsetGEP->first;
6741 int64_t Offset = LargeOffsetGEP->second;
6742 if (Offset != BaseOffset) {
6743 TargetLowering::AddrMode AddrMode;
6744 AddrMode.HasBaseReg = true;
6745 AddrMode.BaseOffs = Offset - BaseOffset;
6746 // The result type of the GEP might not be the type of the memory
6747 // access.
6748 if (!TLI->isLegalAddressingMode(*DL, AddrMode,
6749 GEP->getResultElementType(),
6750 GEP->getAddressSpace())) {
6751 // We need to create a new base if the offset to the current base is
6752 // too large to fit into the addressing mode. So, a very large struct
6753 // may be split into several parts.
6754 BaseGEP = GEP;
6755 BaseOffset = Offset;
6756 NewBaseGEP = nullptr;
6757 }
6758 }
6759
6760 // Generate a new GEP to replace the current one.
6761 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
6762
6763 if (!NewBaseGEP) {
6764 // Create a new base if we don't have one yet. Find the insertion
6765 // pointer for the new base first.
6766 createNewBase(BaseOffset, OldBase, GEP);
6767 }
6768
6769 IRBuilder<> Builder(GEP);
6770 Value *NewGEP = NewBaseGEP;
6771 if (Offset != BaseOffset) {
6772 // Calculate the new offset for the new GEP.
6773 Value *Index = ConstantInt::get(PtrIdxTy, Offset - BaseOffset);
6774 NewGEP = Builder.CreatePtrAdd(NewBaseGEP, Index);
6775 }
6776 replaceAllUsesWith(GEP, NewGEP, FreshBBs, IsHugeFunc);
6777 LargeOffsetGEPID.erase(GEP);
6778 LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP);
6779 GEP->eraseFromParent();
6780 Changed = true;
6781 }
6782 }
6783 return Changed;
6784}
6785
6786bool CodeGenPrepare::optimizePhiType(
6787 PHINode *I, SmallPtrSetImpl<PHINode *> &Visited,
6788 SmallPtrSetImpl<Instruction *> &DeletedInstrs) {
6789 // We are looking for a collection on interconnected phi nodes that together
6790 // only use loads/bitcasts and are used by stores/bitcasts, and the bitcasts
6791 // are of the same type. Convert the whole set of nodes to the type of the
6792 // bitcast.
6793 Type *PhiTy = I->getType();
6794 Type *ConvertTy = nullptr;
6795 if (Visited.count(I) ||
6796 (!I->getType()->isIntegerTy() && !I->getType()->isFloatingPointTy()))
6797 return false;
6798
6799 SmallVector<Instruction *, 4> Worklist;
6800 Worklist.push_back(cast<Instruction>(I));
6801 SmallPtrSet<PHINode *, 4> PhiNodes;
6802 SmallPtrSet<ConstantData *, 4> Constants;
6803 PhiNodes.insert(I);
6804 Visited.insert(I);
6805 SmallPtrSet<Instruction *, 4> Defs;
6806 SmallPtrSet<Instruction *, 4> Uses;
6807 // This works by adding extra bitcasts between load/stores and removing
6808 // existing bicasts. If we have a phi(bitcast(load)) or a store(bitcast(phi))
6809 // we can get in the situation where we remove a bitcast in one iteration
6810 // just to add it again in the next. We need to ensure that at least one
6811 // bitcast we remove are anchored to something that will not change back.
6812 bool AnyAnchored = false;
6813
6814 while (!Worklist.empty()) {
6815 Instruction *II = Worklist.pop_back_val();
6816
6817 if (auto *Phi = dyn_cast<PHINode>(II)) {
6818 // Handle Defs, which might also be PHI's
6819 for (Value *V : Phi->incoming_values()) {
6820 if (auto *OpPhi = dyn_cast<PHINode>(V)) {
6821 if (!PhiNodes.count(OpPhi)) {
6822 if (!Visited.insert(OpPhi).second)
6823 return false;
6824 PhiNodes.insert(OpPhi);
6825 Worklist.push_back(OpPhi);
6826 }
6827 } else if (auto *OpLoad = dyn_cast<LoadInst>(V)) {
6828 if (!OpLoad->isSimple())
6829 return false;
6830 if (Defs.insert(OpLoad).second)
6831 Worklist.push_back(OpLoad);
6832 } else if (auto *OpEx = dyn_cast<ExtractElementInst>(V)) {
6833 if (Defs.insert(OpEx).second)
6834 Worklist.push_back(OpEx);
6835 } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
6836 if (!ConvertTy)
6837 ConvertTy = OpBC->getOperand(0)->getType();
6838 if (OpBC->getOperand(0)->getType() != ConvertTy)
6839 return false;
6840 if (Defs.insert(OpBC).second) {
6841 Worklist.push_back(OpBC);
6842 AnyAnchored |= !isa<LoadInst>(OpBC->getOperand(0)) &&
6843 !isa<ExtractElementInst>(OpBC->getOperand(0));
6844 }
6845 } else if (auto *OpC = dyn_cast<ConstantData>(V))
6846 Constants.insert(OpC);
6847 else
6848 return false;
6849 }
6850 }
6851
6852 // Handle uses which might also be phi's
6853 for (User *V : II->users()) {
6854 if (auto *OpPhi = dyn_cast<PHINode>(V)) {
6855 if (!PhiNodes.count(OpPhi)) {
6856 if (Visited.count(OpPhi))
6857 return false;
6858 PhiNodes.insert(OpPhi);
6859 Visited.insert(OpPhi);
6860 Worklist.push_back(OpPhi);
6861 }
6862 } else if (auto *OpStore = dyn_cast<StoreInst>(V)) {
6863 if (!OpStore->isSimple() || OpStore->getOperand(0) != II)
6864 return false;
6865 Uses.insert(OpStore);
6866 } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
6867 if (!ConvertTy)
6868 ConvertTy = OpBC->getType();
6869 if (OpBC->getType() != ConvertTy)
6870 return false;
6871 Uses.insert(OpBC);
6872 AnyAnchored |=
6873 any_of(OpBC->users(), [](User *U) { return !isa<StoreInst>(U); });
6874 } else {
6875 return false;
6876 }
6877 }
6878 }
6879
6880 if (!ConvertTy || !AnyAnchored ||
6881 !TLI->shouldConvertPhiType(PhiTy, ConvertTy))
6882 return false;
6883
6884 LLVM_DEBUG(dbgs() << "Converting " << *I << "\n and connected nodes to "
6885 << *ConvertTy << "\n");
6886
6887 // Create all the new phi nodes of the new type, and bitcast any loads to the
6888 // correct type.
6889 ValueToValueMap ValMap;
6890 for (ConstantData *C : Constants)
6891 ValMap[C] = ConstantExpr::getBitCast(C, ConvertTy);
6892 for (Instruction *D : Defs) {
6893 if (isa<BitCastInst>(D)) {
6894 ValMap[D] = D->getOperand(0);
6895 DeletedInstrs.insert(D);
6896 } else {
6897 BasicBlock::iterator insertPt = std::next(D->getIterator());
6898 ValMap[D] = new BitCastInst(D, ConvertTy, D->getName() + ".bc", insertPt);
6899 }
6900 }
6901 for (PHINode *Phi : PhiNodes)
6902 ValMap[Phi] = PHINode::Create(ConvertTy, Phi->getNumIncomingValues(),
6903 Phi->getName() + ".tc", Phi->getIterator());
6904 // Pipe together all the PhiNodes.
6905 for (PHINode *Phi : PhiNodes) {
6906 PHINode *NewPhi = cast<PHINode>(ValMap[Phi]);
6907 for (int i = 0, e = Phi->getNumIncomingValues(); i < e; i++)
6908 NewPhi->addIncoming(ValMap[Phi->getIncomingValue(i)],
6909 Phi->getIncomingBlock(i));
6910 Visited.insert(NewPhi);
6911 }
6912 // And finally pipe up the stores and bitcasts
6913 for (Instruction *U : Uses) {
6914 if (isa<BitCastInst>(U)) {
6915 DeletedInstrs.insert(U);
6916 replaceAllUsesWith(U, ValMap[U->getOperand(0)], FreshBBs, IsHugeFunc);
6917 } else {
6918 U->setOperand(0, new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc",
6919 U->getIterator()));
6920 }
6921 }
6922
6923 // Save the removed phis to be deleted later.
6924 DeletedInstrs.insert_range(PhiNodes);
6925 return true;
6926}
6927
6928bool CodeGenPrepare::optimizePhiTypes(Function &F) {
6929 if (!OptimizePhiTypes)
6930 return false;
6931
6932 bool Changed = false;
6933 SmallPtrSet<PHINode *, 4> Visited;
6934 SmallPtrSet<Instruction *, 4> DeletedInstrs;
6935
6936 // Attempt to optimize all the phis in the functions to the correct type.
6937 for (auto &BB : F)
6938 for (auto &Phi : BB.phis())
6939 Changed |= optimizePhiType(&Phi, Visited, DeletedInstrs);
6940
6941 // Remove any old phi's that have been converted.
6942 for (auto *I : DeletedInstrs) {
6943 replaceAllUsesWith(I, PoisonValue::get(I->getType()), FreshBBs, IsHugeFunc);
6944 I->eraseFromParent();
6945 }
6946
6947 return Changed;
6948}
6949
6950/// Return true, if an ext(load) can be formed from an extension in
6951/// \p MovedExts.
6952bool CodeGenPrepare::canFormExtLd(
6953 const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI,
6954 Instruction *&Inst, bool HasPromoted) {
6955 for (auto *MovedExtInst : MovedExts) {
6956 if (isa<LoadInst>(MovedExtInst->getOperand(0))) {
6957 LI = cast<LoadInst>(MovedExtInst->getOperand(0));
6958 Inst = MovedExtInst;
6959 break;
6960 }
6961 }
6962 if (!LI)
6963 return false;
6964
6965 // If they're already in the same block, there's nothing to do.
6966 // Make the cheap checks first if we did not promote.
6967 // If we promoted, we need to check if it is indeed profitable.
6968 if (!HasPromoted && LI->getParent() == Inst->getParent())
6969 return false;
6970
6971 return TLI->isExtLoad(LI, Inst, *DL);
6972}
6973
6974/// Move a zext or sext fed by a load into the same basic block as the load,
6975/// unless conditions are unfavorable. This allows SelectionDAG to fold the
6976/// extend into the load.
6977///
6978/// E.g.,
6979/// \code
6980/// %ld = load i32* %addr
6981/// %add = add nuw i32 %ld, 4
6982/// %zext = zext i32 %add to i64
6983// \endcode
6984/// =>
6985/// \code
6986/// %ld = load i32* %addr
6987/// %zext = zext i32 %ld to i64
6988/// %add = add nuw i64 %zext, 4
6989/// \encode
6990/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
6991/// allow us to match zext(load i32*) to i64.
6992///
6993/// Also, try to promote the computations used to obtain a sign extended
6994/// value used into memory accesses.
6995/// E.g.,
6996/// \code
6997/// a = add nsw i32 b, 3
6998/// d = sext i32 a to i64
6999/// e = getelementptr ..., i64 d
7000/// \endcode
7001/// =>
7002/// \code
7003/// f = sext i32 b to i64
7004/// a = add nsw i64 f, 3
7005/// e = getelementptr ..., i64 a
7006/// \endcode
7007///
7008/// \p Inst[in/out] the extension may be modified during the process if some
7009/// promotions apply.
7010bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
7011 bool AllowPromotionWithoutCommonHeader = false;
7012 /// See if it is an interesting sext operations for the address type
7013 /// promotion before trying to promote it, e.g., the ones with the right
7014 /// type and used in memory accesses.
7015 bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
7016 *Inst, AllowPromotionWithoutCommonHeader);
7017 TypePromotionTransaction TPT(RemovedInsts);
7018 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
7019 TPT.getRestorationPoint();
7021 SmallVector<Instruction *, 2> SpeculativelyMovedExts;
7022 Exts.push_back(Inst);
7023
7024 bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
7025
7026 // Look for a load being extended.
7027 LoadInst *LI = nullptr;
7028 Instruction *ExtFedByLoad;
7029
7030 // Try to promote a chain of computation if it allows to form an extended
7031 // load.
7032 if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
7033 assert(LI && ExtFedByLoad && "Expect a valid load and extension");
7034 TPT.commit();
7035 // Move the extend into the same block as the load.
7036 ExtFedByLoad->moveAfter(LI);
7037 ++NumExtsMoved;
7038 Inst = ExtFedByLoad;
7039 return true;
7040 }
7041
7042 // Continue promoting SExts if known as considerable depending on targets.
7043 if (ATPConsiderable &&
7044 performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
7045 HasPromoted, TPT, SpeculativelyMovedExts))
7046 return true;
7047
7048 TPT.rollback(LastKnownGood);
7049 return false;
7050}
7051
7052// Perform address type promotion if doing so is profitable.
7053// If AllowPromotionWithoutCommonHeader == false, we should find other sext
7054// instructions that sign extended the same initial value. However, if
7055// AllowPromotionWithoutCommonHeader == true, we expect promoting the
7056// extension is just profitable.
7057bool CodeGenPrepare::performAddressTypePromotion(
7058 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
7059 bool HasPromoted, TypePromotionTransaction &TPT,
7060 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
7061 bool Promoted = false;
7062 SmallPtrSet<Instruction *, 1> UnhandledExts;
7063 bool AllSeenFirst = true;
7064 for (auto *I : SpeculativelyMovedExts) {
7065 Value *HeadOfChain = I->getOperand(0);
7066 DenseMap<Value *, Instruction *>::iterator AlreadySeen =
7067 SeenChainsForSExt.find(HeadOfChain);
7068 // If there is an unhandled SExt which has the same header, try to promote
7069 // it as well.
7070 if (AlreadySeen != SeenChainsForSExt.end()) {
7071 if (AlreadySeen->second != nullptr)
7072 UnhandledExts.insert(AlreadySeen->second);
7073 AllSeenFirst = false;
7074 }
7075 }
7076
7077 if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
7078 SpeculativelyMovedExts.size() == 1)) {
7079 TPT.commit();
7080 if (HasPromoted)
7081 Promoted = true;
7082 for (auto *I : SpeculativelyMovedExts) {
7083 Value *HeadOfChain = I->getOperand(0);
7084 SeenChainsForSExt[HeadOfChain] = nullptr;
7085 ValToSExtendedUses[HeadOfChain].push_back(I);
7086 }
7087 // Update Inst as promotion happen.
7088 Inst = SpeculativelyMovedExts.pop_back_val();
7089 } else {
7090 // This is the first chain visited from the header, keep the current chain
7091 // as unhandled. Defer to promote this until we encounter another SExt
7092 // chain derived from the same header.
7093 for (auto *I : SpeculativelyMovedExts) {
7094 Value *HeadOfChain = I->getOperand(0);
7095 SeenChainsForSExt[HeadOfChain] = Inst;
7096 }
7097 return false;
7098 }
7099
7100 if (!AllSeenFirst && !UnhandledExts.empty())
7101 for (auto *VisitedSExt : UnhandledExts) {
7102 if (RemovedInsts.count(VisitedSExt))
7103 continue;
7104 TypePromotionTransaction TPT(RemovedInsts);
7106 SmallVector<Instruction *, 2> Chains;
7107 Exts.push_back(VisitedSExt);
7108 bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
7109 TPT.commit();
7110 if (HasPromoted)
7111 Promoted = true;
7112 for (auto *I : Chains) {
7113 Value *HeadOfChain = I->getOperand(0);
7114 // Mark this as handled.
7115 SeenChainsForSExt[HeadOfChain] = nullptr;
7116 ValToSExtendedUses[HeadOfChain].push_back(I);
7117 }
7118 }
7119 return Promoted;
7120}
7121
7122bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
7123 BasicBlock *DefBB = I->getParent();
7124
7125 // If the result of a {s|z}ext and its source are both live out, rewrite all
7126 // other uses of the source with result of extension.
7127 Value *Src = I->getOperand(0);
7128 if (Src->hasOneUse())
7129 return false;
7130
7131 // Only do this xform if truncating is free.
7132 if (!TLI->isTruncateFree(I->getType(), Src->getType()))
7133 return false;
7134
7135 // Only safe to perform the optimization if the source is also defined in
7136 // this block.
7137 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
7138 return false;
7139
7140 bool DefIsLiveOut = false;
7141 for (User *U : I->users()) {
7143
7144 // Figure out which BB this ext is used in.
7145 BasicBlock *UserBB = UI->getParent();
7146 if (UserBB == DefBB)
7147 continue;
7148 DefIsLiveOut = true;
7149 break;
7150 }
7151 if (!DefIsLiveOut)
7152 return false;
7153
7154 // Make sure none of the uses are PHI nodes.
7155 for (User *U : Src->users()) {
7157 BasicBlock *UserBB = UI->getParent();
7158 if (UserBB == DefBB)
7159 continue;
7160 // Be conservative. We don't want this xform to end up introducing
7161 // reloads just before load / store instructions.
7162 if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
7163 return false;
7164 }
7165
7166 // InsertedTruncs - Only insert one trunc in each block once.
7167 DenseMap<BasicBlock *, Instruction *> InsertedTruncs;
7168
7169 bool MadeChange = false;
7170 for (Use &U : Src->uses()) {
7171 Instruction *User = cast<Instruction>(U.getUser());
7172
7173 // Figure out which BB this ext is used in.
7174 BasicBlock *UserBB = User->getParent();
7175 if (UserBB == DefBB)
7176 continue;
7177
7178 // Both src and def are live in this block. Rewrite the use.
7179 Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
7180
7181 if (!InsertedTrunc) {
7182 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
7183 assert(InsertPt != UserBB->end());
7184 InsertedTrunc = new TruncInst(I, Src->getType(), "");
7185 InsertedTrunc->insertBefore(*UserBB, InsertPt);
7186 InsertedInsts.insert(InsertedTrunc);
7187 }
7188
7189 // Replace a use of the {s|z}ext source with a use of the result.
7190 U = InsertedTrunc;
7191 ++NumExtUses;
7192 MadeChange = true;
7193 }
7194
7195 return MadeChange;
7196}
7197
7198// Find loads whose uses only use some of the loaded value's bits. Add an "and"
7199// just after the load if the target can fold this into one extload instruction,
7200// with the hope of eliminating some of the other later "and" instructions using
7201// the loaded value. "and"s that are made trivially redundant by the insertion
7202// of the new "and" are removed by this function, while others (e.g. those whose
7203// path from the load goes through a phi) are left for isel to potentially
7204// remove.
7205//
7206// For example:
7207//
7208// b0:
7209// x = load i32
7210// ...
7211// b1:
7212// y = and x, 0xff
7213// z = use y
7214//
7215// becomes:
7216//
7217// b0:
7218// x = load i32
7219// x' = and x, 0xff
7220// ...
7221// b1:
7222// z = use x'
7223//
7224// whereas:
7225//
7226// b0:
7227// x1 = load i32
7228// ...
7229// b1:
7230// x2 = load i32
7231// ...
7232// b2:
7233// x = phi x1, x2
7234// y = and x, 0xff
7235//
7236// becomes (after a call to optimizeLoadExt for each load):
7237//
7238// b0:
7239// x1 = load i32
7240// x1' = and x1, 0xff
7241// ...
7242// b1:
7243// x2 = load i32
7244// x2' = and x2, 0xff
7245// ...
7246// b2:
7247// x = phi x1', x2'
7248// y = and x, 0xff
7249bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
7250 if (!Load->isSimple() || !Load->getType()->isIntOrPtrTy())
7251 return false;
7252
7253 // Skip loads we've already transformed.
7254 if (Load->hasOneUse() &&
7255 InsertedInsts.count(cast<Instruction>(*Load->user_begin())))
7256 return false;
7257
7258 // Look at all uses of Load, looking through phis, to determine how many bits
7259 // of the loaded value are needed.
7260 SmallVector<Instruction *, 8> WorkList;
7261 SmallPtrSet<Instruction *, 16> Visited;
7262 SmallVector<Instruction *, 8> AndsToMaybeRemove;
7263 SmallVector<Instruction *, 8> DropFlags;
7264 for (auto *U : Load->users())
7265 WorkList.push_back(cast<Instruction>(U));
7266
7267 EVT LoadResultVT = TLI->getValueType(*DL, Load->getType());
7268 unsigned BitWidth = LoadResultVT.getSizeInBits();
7269 // If the BitWidth is 0, do not try to optimize the type
7270 if (BitWidth == 0)
7271 return false;
7272
7273 APInt DemandBits(BitWidth, 0);
7274 APInt WidestAndBits(BitWidth, 0);
7275
7276 while (!WorkList.empty()) {
7277 Instruction *I = WorkList.pop_back_val();
7278
7279 // Break use-def graph loops.
7280 if (!Visited.insert(I).second)
7281 continue;
7282
7283 // For a PHI node, push all of its users.
7284 if (auto *Phi = dyn_cast<PHINode>(I)) {
7285 for (auto *U : Phi->users())
7286 WorkList.push_back(cast<Instruction>(U));
7287 continue;
7288 }
7289
7290 switch (I->getOpcode()) {
7291 case Instruction::And: {
7292 auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));
7293 if (!AndC)
7294 return false;
7295 APInt AndBits = AndC->getValue();
7296 DemandBits |= AndBits;
7297 // Keep track of the widest and mask we see.
7298 if (AndBits.ugt(WidestAndBits))
7299 WidestAndBits = AndBits;
7300 if (AndBits == WidestAndBits && I->getOperand(0) == Load)
7301 AndsToMaybeRemove.push_back(I);
7302 break;
7303 }
7304
7305 case Instruction::Shl: {
7306 auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));
7307 if (!ShlC)
7308 return false;
7309 uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
7310 DemandBits.setLowBits(BitWidth - ShiftAmt);
7311 DropFlags.push_back(I);
7312 break;
7313 }
7314
7315 case Instruction::Trunc: {
7316 EVT TruncVT = TLI->getValueType(*DL, I->getType());
7317 unsigned TruncBitWidth = TruncVT.getSizeInBits();
7318 DemandBits.setLowBits(TruncBitWidth);
7319 DropFlags.push_back(I);
7320 break;
7321 }
7322
7323 default:
7324 return false;
7325 }
7326 }
7327
7328 uint32_t ActiveBits = DemandBits.getActiveBits();
7329 // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the
7330 // target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example,
7331 // for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but
7332 // (and (load x) 1) is not matched as a single instruction, rather as a LDR
7333 // followed by an AND.
7334 // TODO: Look into removing this restriction by fixing backends to either
7335 // return false for isLoadExtLegal for i1 or have them select this pattern to
7336 // a single instruction.
7337 //
7338 // Also avoid hoisting if we didn't see any ands with the exact DemandBits
7339 // mask, since these are the only ands that will be removed by isel.
7340 if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) ||
7341 WidestAndBits != DemandBits)
7342 return false;
7343
7344 LLVMContext &Ctx = Load->getType()->getContext();
7345 Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits);
7346 EVT TruncVT = TLI->getValueType(*DL, TruncTy);
7347
7348 // Reject cases that won't be matched as extloads.
7349 if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() ||
7350 !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT))
7351 return false;
7352
7353 IRBuilder<> Builder(Load->getNextNode());
7354 auto *NewAnd = cast<Instruction>(
7355 Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
7356 // Mark this instruction as "inserted by CGP", so that other
7357 // optimizations don't touch it.
7358 InsertedInsts.insert(NewAnd);
7359
7360 // Replace all uses of load with new and (except for the use of load in the
7361 // new and itself).
7362 replaceAllUsesWith(Load, NewAnd, FreshBBs, IsHugeFunc);
7363 NewAnd->setOperand(0, Load);
7364
7365 // Remove any and instructions that are now redundant.
7366 for (auto *And : AndsToMaybeRemove)
7367 // Check that the and mask is the same as the one we decided to put on the
7368 // new and.
7369 if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) {
7370 replaceAllUsesWith(And, NewAnd, FreshBBs, IsHugeFunc);
7371 if (&*CurInstIterator == And)
7372 CurInstIterator = std::next(And->getIterator());
7373 And->eraseFromParent();
7374 ++NumAndUses;
7375 }
7376
7377 // NSW flags may not longer hold.
7378 for (auto *Inst : DropFlags)
7379 Inst->setHasNoSignedWrap(false);
7380
7381 ++NumAndsAdded;
7382 return true;
7383}
7384
7385/// Check if V (an operand of a select instruction) is an expensive instruction
7386/// that is only used once.
7388 auto *I = dyn_cast<Instruction>(V);
7389 // If it's safe to speculatively execute, then it should not have side
7390 // effects; therefore, it's safe to sink and possibly *not* execute.
7391 return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
7392 TTI->isExpensiveToSpeculativelyExecute(I);
7393}
7394
7395/// Returns true if a SelectInst should be turned into an explicit branch.
7397 const TargetLowering *TLI,
7398 SelectInst *SI) {
7399 // If even a predictable select is cheap, then a branch can't be cheaper.
7400 if (!TLI->isPredictableSelectExpensive())
7401 return false;
7402
7403 // FIXME: This should use the same heuristics as IfConversion to determine
7404 // whether a select is better represented as a branch.
7405
7406 // If metadata tells us that the select condition is obviously predictable,
7407 // then we want to replace the select with a branch.
7408 uint64_t TrueWeight, FalseWeight;
7409 if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
7410 uint64_t Max = std::max(TrueWeight, FalseWeight);
7411 uint64_t Sum = TrueWeight + FalseWeight;
7412 if (Sum != 0) {
7413 auto Probability = BranchProbability::getBranchProbability(Max, Sum);
7414 if (Probability > TTI->getPredictableBranchThreshold())
7415 return true;
7416 }
7417 }
7418
7419 CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
7420
7421 // If a branch is predictable, an out-of-order CPU can avoid blocking on its
7422 // comparison condition. If the compare has more than one use, there's
7423 // probably another cmov or setcc around, so it's not worth emitting a branch.
7424 if (!Cmp || !Cmp->hasOneUse())
7425 return false;
7426
7427 // If either operand of the select is expensive and only needed on one side
7428 // of the select, we should form a branch.
7429 if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
7430 sinkSelectOperand(TTI, SI->getFalseValue()))
7431 return true;
7432
7433 return false;
7434}
7435
7436/// If \p isTrue is true, return the true value of \p SI, otherwise return
7437/// false value of \p SI. If the true/false value of \p SI is defined by any
7438/// select instructions in \p Selects, look through the defining select
7439/// instruction until the true/false value is not defined in \p Selects.
7440static Value *
7442 const SmallPtrSet<const Instruction *, 2> &Selects) {
7443 Value *V = nullptr;
7444
7445 for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
7446 DefSI = dyn_cast<SelectInst>(V)) {
7447 assert(DefSI->getCondition() == SI->getCondition() &&
7448 "The condition of DefSI does not match with SI");
7449 V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
7450 }
7451
7452 assert(V && "Failed to get select true/false value");
7453 return V;
7454}
7455
7456bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
7457 assert(Shift->isShift() && "Expected a shift");
7458
7459 // If this is (1) a vector shift, (2) shifts by scalars are cheaper than
7460 // general vector shifts, and (3) the shift amount is a select-of-splatted
7461 // values, hoist the shifts before the select:
7462 // shift Op0, (select Cond, TVal, FVal) -->
7463 // select Cond, (shift Op0, TVal), (shift Op0, FVal)
7464 //
7465 // This is inverting a generic IR transform when we know that the cost of a
7466 // general vector shift is more than the cost of 2 shift-by-scalars.
7467 // We can't do this effectively in SDAG because we may not be able to
7468 // determine if the select operands are splats from within a basic block.
7469 Type *Ty = Shift->getType();
7470 if (!Ty->isVectorTy() || !TTI->isVectorShiftByScalarCheap(Ty))
7471 return false;
7472 Value *Cond, *TVal, *FVal;
7473 if (!match(Shift->getOperand(1),
7474 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
7475 return false;
7476 if (!isSplatValue(TVal) || !isSplatValue(FVal))
7477 return false;
7478
7479 IRBuilder<> Builder(Shift);
7480 BinaryOperator::BinaryOps Opcode = Shift->getOpcode();
7481 Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal);
7482 Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal);
7483 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
7484 replaceAllUsesWith(Shift, NewSel, FreshBBs, IsHugeFunc);
7485 Shift->eraseFromParent();
7486 return true;
7487}
7488
7489bool CodeGenPrepare::optimizeFunnelShift(IntrinsicInst *Fsh) {
7490 Intrinsic::ID Opcode = Fsh->getIntrinsicID();
7491 assert((Opcode == Intrinsic::fshl || Opcode == Intrinsic::fshr) &&
7492 "Expected a funnel shift");
7493
7494 // If this is (1) a vector funnel shift, (2) shifts by scalars are cheaper
7495 // than general vector shifts, and (3) the shift amount is select-of-splatted
7496 // values, hoist the funnel shifts before the select:
7497 // fsh Op0, Op1, (select Cond, TVal, FVal) -->
7498 // select Cond, (fsh Op0, Op1, TVal), (fsh Op0, Op1, FVal)
7499 //
7500 // This is inverting a generic IR transform when we know that the cost of a
7501 // general vector shift is more than the cost of 2 shift-by-scalars.
7502 // We can't do this effectively in SDAG because we may not be able to
7503 // determine if the select operands are splats from within a basic block.
7504 Type *Ty = Fsh->getType();
7505 if (!Ty->isVectorTy() || !TTI->isVectorShiftByScalarCheap(Ty))
7506 return false;
7507 Value *Cond, *TVal, *FVal;
7508 if (!match(Fsh->getOperand(2),
7509 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
7510 return false;
7511 if (!isSplatValue(TVal) || !isSplatValue(FVal))
7512 return false;
7513
7514 IRBuilder<> Builder(Fsh);
7515 Value *X = Fsh->getOperand(0), *Y = Fsh->getOperand(1);
7516 Value *NewTVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, TVal});
7517 Value *NewFVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, FVal});
7518 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
7519 replaceAllUsesWith(Fsh, NewSel, FreshBBs, IsHugeFunc);
7520 Fsh->eraseFromParent();
7521 return true;
7522}
7523
7524/// If we have a SelectInst that will likely profit from branch prediction,
7525/// turn it into a branch.
7526bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
7528 return false;
7529
7530 // If the SelectOptimize pass is enabled, selects have already been optimized.
7532 return false;
7533
7534 // Find all consecutive select instructions that share the same condition.
7536 ASI.push_back(SI);
7538 It != SI->getParent()->end(); ++It) {
7539 SelectInst *I = dyn_cast<SelectInst>(&*It);
7540 if (I && SI->getCondition() == I->getCondition()) {
7541 ASI.push_back(I);
7542 } else {
7543 break;
7544 }
7545 }
7546
7547 SelectInst *LastSI = ASI.back();
7548 // Increment the current iterator to skip all the rest of select instructions
7549 // because they will be either "not lowered" or "all lowered" to branch.
7550 CurInstIterator = std::next(LastSI->getIterator());
7551 // Examine debug-info attached to the consecutive select instructions. They
7552 // won't be individually optimised by optimizeInst, so we need to perform
7553 // DbgVariableRecord maintenence here instead.
7554 for (SelectInst *SI : ArrayRef(ASI).drop_front())
7555 fixupDbgVariableRecordsOnInst(*SI);
7556
7557 bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
7558
7559 // Can we convert the 'select' to CF ?
7560 if (VectorCond || SI->getMetadata(LLVMContext::MD_unpredictable))
7561 return false;
7562
7563 TargetLowering::SelectSupportKind SelectKind;
7564 if (SI->getType()->isVectorTy())
7565 SelectKind = TargetLowering::ScalarCondVectorVal;
7566 else
7567 SelectKind = TargetLowering::ScalarValSelect;
7568
7569 if (TLI->isSelectSupported(SelectKind) &&
7571 llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get())))
7572 return false;
7573
7574 // The DominatorTree needs to be rebuilt by any consumers after this
7575 // transformation. We simply reset here rather than setting the ModifiedDT
7576 // flag to avoid restarting the function walk in runOnFunction for each
7577 // select optimized.
7578 DT.reset();
7579
7580 // Transform a sequence like this:
7581 // start:
7582 // %cmp = cmp uge i32 %a, %b
7583 // %sel = select i1 %cmp, i32 %c, i32 %d
7584 //
7585 // Into:
7586 // start:
7587 // %cmp = cmp uge i32 %a, %b
7588 // %cmp.frozen = freeze %cmp
7589 // br i1 %cmp.frozen, label %select.true, label %select.false
7590 // select.true:
7591 // br label %select.end
7592 // select.false:
7593 // br label %select.end
7594 // select.end:
7595 // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
7596 //
7597 // %cmp should be frozen, otherwise it may introduce undefined behavior.
7598 // In addition, we may sink instructions that produce %c or %d from
7599 // the entry block into the destination(s) of the new branch.
7600 // If the true or false blocks do not contain a sunken instruction, that
7601 // block and its branch may be optimized away. In that case, one side of the
7602 // first branch will point directly to select.end, and the corresponding PHI
7603 // predecessor block will be the start block.
7604
7605 // Collect values that go on the true side and the values that go on the false
7606 // side.
7607 SmallVector<Instruction *> TrueInstrs, FalseInstrs;
7608 for (SelectInst *SI : ASI) {
7609 if (Value *V = SI->getTrueValue(); sinkSelectOperand(TTI, V))
7610 TrueInstrs.push_back(cast<Instruction>(V));
7611 if (Value *V = SI->getFalseValue(); sinkSelectOperand(TTI, V))
7612 FalseInstrs.push_back(cast<Instruction>(V));
7613 }
7614
7615 // Split the select block, according to how many (if any) values go on each
7616 // side.
7617 BasicBlock *StartBlock = SI->getParent();
7618 BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(LastSI));
7619 // We should split before any debug-info.
7620 SplitPt.setHeadBit(true);
7621
7622 IRBuilder<> IB(SI);
7623 auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
7624
7625 BasicBlock *TrueBlock = nullptr;
7626 BasicBlock *FalseBlock = nullptr;
7627 BasicBlock *EndBlock = nullptr;
7628 BranchInst *TrueBranch = nullptr;
7629 BranchInst *FalseBranch = nullptr;
7630 if (TrueInstrs.size() == 0) {
7632 CondFr, SplitPt, false, nullptr, nullptr, LI));
7633 FalseBlock = FalseBranch->getParent();
7634 EndBlock = cast<BasicBlock>(FalseBranch->getOperand(0));
7635 } else if (FalseInstrs.size() == 0) {
7637 CondFr, SplitPt, false, nullptr, nullptr, LI));
7638 TrueBlock = TrueBranch->getParent();
7639 EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
7640 } else {
7641 Instruction *ThenTerm = nullptr;
7642 Instruction *ElseTerm = nullptr;
7643 SplitBlockAndInsertIfThenElse(CondFr, SplitPt, &ThenTerm, &ElseTerm,
7644 nullptr, nullptr, LI);
7645 TrueBranch = cast<BranchInst>(ThenTerm);
7646 FalseBranch = cast<BranchInst>(ElseTerm);
7647 TrueBlock = TrueBranch->getParent();
7648 FalseBlock = FalseBranch->getParent();
7649 EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
7650 }
7651
7652 EndBlock->setName("select.end");
7653 if (TrueBlock)
7654 TrueBlock->setName("select.true.sink");
7655 if (FalseBlock)
7656 FalseBlock->setName(FalseInstrs.size() == 0 ? "select.false"
7657 : "select.false.sink");
7658
7659 if (IsHugeFunc) {
7660 if (TrueBlock)
7661 FreshBBs.insert(TrueBlock);
7662 if (FalseBlock)
7663 FreshBBs.insert(FalseBlock);
7664 FreshBBs.insert(EndBlock);
7665 }
7666
7667 BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock));
7668
7669 static const unsigned MD[] = {
7670 LLVMContext::MD_prof, LLVMContext::MD_unpredictable,
7671 LLVMContext::MD_make_implicit, LLVMContext::MD_dbg};
7672 StartBlock->getTerminator()->copyMetadata(*SI, MD);
7673
7674 // Sink expensive instructions into the conditional blocks to avoid executing
7675 // them speculatively.
7676 for (Instruction *I : TrueInstrs)
7677 I->moveBefore(TrueBranch->getIterator());
7678 for (Instruction *I : FalseInstrs)
7679 I->moveBefore(FalseBranch->getIterator());
7680
7681 // If we did not create a new block for one of the 'true' or 'false' paths
7682 // of the condition, it means that side of the branch goes to the end block
7683 // directly and the path originates from the start block from the point of
7684 // view of the new PHI.
7685 if (TrueBlock == nullptr)
7686 TrueBlock = StartBlock;
7687 else if (FalseBlock == nullptr)
7688 FalseBlock = StartBlock;
7689
7690 SmallPtrSet<const Instruction *, 2> INS(llvm::from_range, ASI);
7691 // Use reverse iterator because later select may use the value of the
7692 // earlier select, and we need to propagate value through earlier select
7693 // to get the PHI operand.
7694 for (SelectInst *SI : llvm::reverse(ASI)) {
7695 // The select itself is replaced with a PHI Node.
7696 PHINode *PN = PHINode::Create(SI->getType(), 2, "");
7697 PN->insertBefore(EndBlock->begin());
7698 PN->takeName(SI);
7699 PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
7700 PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
7701 PN->setDebugLoc(SI->getDebugLoc());
7702
7703 replaceAllUsesWith(SI, PN, FreshBBs, IsHugeFunc);
7704 SI->eraseFromParent();
7705 INS.erase(SI);
7706 ++NumSelectsExpanded;
7707 }
7708
7709 // Instruct OptimizeBlock to skip to the next block.
7710 CurInstIterator = StartBlock->end();
7711 return true;
7712}
7713
7714/// Some targets only accept certain types for splat inputs. For example a VDUP
7715/// in MVE takes a GPR (integer) register, and the instruction that incorporate
7716/// a VDUP (such as a VADD qd, qm, rm) also require a gpr register.
7717bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
7718 // Accept shuf(insertelem(undef/poison, val, 0), undef/poison, <0,0,..>) only
7720 m_Undef(), m_ZeroMask())))
7721 return false;
7722 Type *NewType = TLI->shouldConvertSplatType(SVI);
7723 if (!NewType)
7724 return false;
7725
7726 auto *SVIVecType = cast<FixedVectorType>(SVI->getType());
7727 assert(!NewType->isVectorTy() && "Expected a scalar type!");
7728 assert(NewType->getScalarSizeInBits() == SVIVecType->getScalarSizeInBits() &&
7729 "Expected a type of the same size!");
7730 auto *NewVecType =
7731 FixedVectorType::get(NewType, SVIVecType->getNumElements());
7732
7733 // Create a bitcast (shuffle (insert (bitcast(..))))
7734 IRBuilder<> Builder(SVI->getContext());
7735 Builder.SetInsertPoint(SVI);
7736 Value *BC1 = Builder.CreateBitCast(
7737 cast<Instruction>(SVI->getOperand(0))->getOperand(1), NewType);
7738 Value *Shuffle = Builder.CreateVectorSplat(NewVecType->getNumElements(), BC1);
7739 Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType);
7740
7741 replaceAllUsesWith(SVI, BC2, FreshBBs, IsHugeFunc);
7743 SVI, TLInfo, nullptr,
7744 [&](Value *V) { removeAllAssertingVHReferences(V); });
7745
7746 // Also hoist the bitcast up to its operand if it they are not in the same
7747 // block.
7748 if (auto *BCI = dyn_cast<Instruction>(BC1))
7749 if (auto *Op = dyn_cast<Instruction>(BCI->getOperand(0)))
7750 if (BCI->getParent() != Op->getParent() && !isa<PHINode>(Op) &&
7751 !Op->isTerminator() && !Op->isEHPad())
7752 BCI->moveAfter(Op);
7753
7754 return true;
7755}
7756
7757bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
7758 // If the operands of I can be folded into a target instruction together with
7759 // I, duplicate and sink them.
7760 SmallVector<Use *, 4> OpsToSink;
7761 if (!TTI->isProfitableToSinkOperands(I, OpsToSink))
7762 return false;
7763
7764 // OpsToSink can contain multiple uses in a use chain (e.g.
7765 // (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating
7766 // uses must come first, so we process the ops in reverse order so as to not
7767 // create invalid IR.
7768 BasicBlock *TargetBB = I->getParent();
7769 bool Changed = false;
7770 SmallVector<Use *, 4> ToReplace;
7771 Instruction *InsertPoint = I;
7772 DenseMap<const Instruction *, unsigned long> InstOrdering;
7773 unsigned long InstNumber = 0;
7774 for (const auto &I : *TargetBB)
7775 InstOrdering[&I] = InstNumber++;
7776
7777 for (Use *U : reverse(OpsToSink)) {
7778 auto *UI = cast<Instruction>(U->get());
7779 if (isa<PHINode>(UI))
7780 continue;
7781 if (UI->getParent() == TargetBB) {
7782 if (InstOrdering[UI] < InstOrdering[InsertPoint])
7783 InsertPoint = UI;
7784 continue;
7785 }
7786 ToReplace.push_back(U);
7787 }
7788
7789 SetVector<Instruction *> MaybeDead;
7790 DenseMap<Instruction *, Instruction *> NewInstructions;
7791 for (Use *U : ToReplace) {
7792 auto *UI = cast<Instruction>(U->get());
7793 Instruction *NI = UI->clone();
7794
7795 if (IsHugeFunc) {
7796 // Now we clone an instruction, its operands' defs may sink to this BB
7797 // now. So we put the operands defs' BBs into FreshBBs to do optimization.
7798 for (Value *Op : NI->operands())
7799 if (auto *OpDef = dyn_cast<Instruction>(Op))
7800 FreshBBs.insert(OpDef->getParent());
7801 }
7802
7803 NewInstructions[UI] = NI;
7804 MaybeDead.insert(UI);
7805 LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n");
7806 NI->insertBefore(InsertPoint->getIterator());
7807 InsertPoint = NI;
7808 InsertedInsts.insert(NI);
7809
7810 // Update the use for the new instruction, making sure that we update the
7811 // sunk instruction uses, if it is part of a chain that has already been
7812 // sunk.
7813 Instruction *OldI = cast<Instruction>(U->getUser());
7814 if (auto It = NewInstructions.find(OldI); It != NewInstructions.end())
7815 It->second->setOperand(U->getOperandNo(), NI);
7816 else
7817 U->set(NI);
7818 Changed = true;
7819 }
7820
7821 // Remove instructions that are dead after sinking.
7822 for (auto *I : MaybeDead) {
7823 if (!I->hasNUsesOrMore(1)) {
7824 LLVM_DEBUG(dbgs() << "Removing dead instruction: " << *I << "\n");
7825 I->eraseFromParent();
7826 }
7827 }
7828
7829 return Changed;
7830}
7831
7832bool CodeGenPrepare::optimizeSwitchType(SwitchInst *SI) {
7833 Value *Cond = SI->getCondition();
7834 Type *OldType = Cond->getType();
7835 LLVMContext &Context = Cond->getContext();
7836 EVT OldVT = TLI->getValueType(*DL, OldType);
7837 MVT RegType = TLI->getPreferredSwitchConditionType(Context, OldVT);
7838 unsigned RegWidth = RegType.getSizeInBits();
7839
7840 if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
7841 return false;
7842
7843 // If the register width is greater than the type width, expand the condition
7844 // of the switch instruction and each case constant to the width of the
7845 // register. By widening the type of the switch condition, subsequent
7846 // comparisons (for case comparisons) will not need to be extended to the
7847 // preferred register width, so we will potentially eliminate N-1 extends,
7848 // where N is the number of cases in the switch.
7849 auto *NewType = Type::getIntNTy(Context, RegWidth);
7850
7851 // Extend the switch condition and case constants using the target preferred
7852 // extend unless the switch condition is a function argument with an extend
7853 // attribute. In that case, we can avoid an unnecessary mask/extension by
7854 // matching the argument extension instead.
7855 Instruction::CastOps ExtType = Instruction::ZExt;
7856 // Some targets prefer SExt over ZExt.
7857 if (TLI->isSExtCheaperThanZExt(OldVT, RegType))
7858 ExtType = Instruction::SExt;
7859
7860 if (auto *Arg = dyn_cast<Argument>(Cond)) {
7861 if (Arg->hasSExtAttr())
7862 ExtType = Instruction::SExt;
7863 if (Arg->hasZExtAttr())
7864 ExtType = Instruction::ZExt;
7865 }
7866
7867 auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
7868 ExtInst->insertBefore(SI->getIterator());
7869 ExtInst->setDebugLoc(SI->getDebugLoc());
7870 SI->setCondition(ExtInst);
7871 for (auto Case : SI->cases()) {
7872 const APInt &NarrowConst = Case.getCaseValue()->getValue();
7873 APInt WideConst = (ExtType == Instruction::ZExt)
7874 ? NarrowConst.zext(RegWidth)
7875 : NarrowConst.sext(RegWidth);
7876 Case.setValue(ConstantInt::get(Context, WideConst));
7877 }
7878
7879 return true;
7880}
7881
7882bool CodeGenPrepare::optimizeSwitchPhiConstants(SwitchInst *SI) {
7883 // The SCCP optimization tends to produce code like this:
7884 // switch(x) { case 42: phi(42, ...) }
7885 // Materializing the constant for the phi-argument needs instructions; So we
7886 // change the code to:
7887 // switch(x) { case 42: phi(x, ...) }
7888
7889 Value *Condition = SI->getCondition();
7890 // Avoid endless loop in degenerate case.
7891 if (isa<ConstantInt>(*Condition))
7892 return false;
7893
7894 bool Changed = false;
7895 BasicBlock *SwitchBB = SI->getParent();
7896 Type *ConditionType = Condition->getType();
7897
7898 for (const SwitchInst::CaseHandle &Case : SI->cases()) {
7899 ConstantInt *CaseValue = Case.getCaseValue();
7900 BasicBlock *CaseBB = Case.getCaseSuccessor();
7901 // Set to true if we previously checked that `CaseBB` is only reached by
7902 // a single case from this switch.
7903 bool CheckedForSinglePred = false;
7904 for (PHINode &PHI : CaseBB->phis()) {
7905 Type *PHIType = PHI.getType();
7906 // If ZExt is free then we can also catch patterns like this:
7907 // switch((i32)x) { case 42: phi((i64)42, ...); }
7908 // and replace `(i64)42` with `zext i32 %x to i64`.
7909 bool TryZExt =
7910 PHIType->isIntegerTy() &&
7911 PHIType->getIntegerBitWidth() > ConditionType->getIntegerBitWidth() &&
7912 TLI->isZExtFree(ConditionType, PHIType);
7913 if (PHIType == ConditionType || TryZExt) {
7914 // Set to true to skip this case because of multiple preds.
7915 bool SkipCase = false;
7916 Value *Replacement = nullptr;
7917 for (unsigned I = 0, E = PHI.getNumIncomingValues(); I != E; I++) {
7918 Value *PHIValue = PHI.getIncomingValue(I);
7919 if (PHIValue != CaseValue) {
7920 if (!TryZExt)
7921 continue;
7922 ConstantInt *PHIValueInt = dyn_cast<ConstantInt>(PHIValue);
7923 if (!PHIValueInt ||
7924 PHIValueInt->getValue() !=
7925 CaseValue->getValue().zext(PHIType->getIntegerBitWidth()))
7926 continue;
7927 }
7928 if (PHI.getIncomingBlock(I) != SwitchBB)
7929 continue;
7930 // We cannot optimize if there are multiple case labels jumping to
7931 // this block. This check may get expensive when there are many
7932 // case labels so we test for it last.
7933 if (!CheckedForSinglePred) {
7934 CheckedForSinglePred = true;
7935 if (SI->findCaseDest(CaseBB) == nullptr) {
7936 SkipCase = true;
7937 break;
7938 }
7939 }
7940
7941 if (Replacement == nullptr) {
7942 if (PHIValue == CaseValue) {
7943 Replacement = Condition;
7944 } else {
7945 IRBuilder<> Builder(SI);
7946 Replacement = Builder.CreateZExt(Condition, PHIType);
7947 }
7948 }
7949 PHI.setIncomingValue(I, Replacement);
7950 Changed = true;
7951 }
7952 if (SkipCase)
7953 break;
7954 }
7955 }
7956 }
7957 return Changed;
7958}
7959
7960bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
7961 bool Changed = optimizeSwitchType(SI);
7962 Changed |= optimizeSwitchPhiConstants(SI);
7963 return Changed;
7964}
7965
7966namespace {
7967
7968/// Helper class to promote a scalar operation to a vector one.
7969/// This class is used to move downward extractelement transition.
7970/// E.g.,
7971/// a = vector_op <2 x i32>
7972/// b = extractelement <2 x i32> a, i32 0
7973/// c = scalar_op b
7974/// store c
7975///
7976/// =>
7977/// a = vector_op <2 x i32>
7978/// c = vector_op a (equivalent to scalar_op on the related lane)
7979/// * d = extractelement <2 x i32> c, i32 0
7980/// * store d
7981/// Assuming both extractelement and store can be combine, we get rid of the
7982/// transition.
7983class VectorPromoteHelper {
7984 /// DataLayout associated with the current module.
7985 const DataLayout &DL;
7986
7987 /// Used to perform some checks on the legality of vector operations.
7988 const TargetLowering &TLI;
7989
7990 /// Used to estimated the cost of the promoted chain.
7991 const TargetTransformInfo &TTI;
7992
7993 /// The transition being moved downwards.
7994 Instruction *Transition;
7995
7996 /// The sequence of instructions to be promoted.
7997 SmallVector<Instruction *, 4> InstsToBePromoted;
7998
7999 /// Cost of combining a store and an extract.
8000 unsigned StoreExtractCombineCost;
8001
8002 /// Instruction that will be combined with the transition.
8003 Instruction *CombineInst = nullptr;
8004
8005 /// The instruction that represents the current end of the transition.
8006 /// Since we are faking the promotion until we reach the end of the chain
8007 /// of computation, we need a way to get the current end of the transition.
8008 Instruction *getEndOfTransition() const {
8009 if (InstsToBePromoted.empty())
8010 return Transition;
8011 return InstsToBePromoted.back();
8012 }
8013
8014 /// Return the index of the original value in the transition.
8015 /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
8016 /// c, is at index 0.
8017 unsigned getTransitionOriginalValueIdx() const {
8018 assert(isa<ExtractElementInst>(Transition) &&
8019 "Other kind of transitions are not supported yet");
8020 return 0;
8021 }
8022
8023 /// Return the index of the index in the transition.
8024 /// E.g., for "extractelement <2 x i32> c, i32 0" the index
8025 /// is at index 1.
8026 unsigned getTransitionIdx() const {
8027 assert(isa<ExtractElementInst>(Transition) &&
8028 "Other kind of transitions are not supported yet");
8029 return 1;
8030 }
8031
8032 /// Get the type of the transition.
8033 /// This is the type of the original value.
8034 /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
8035 /// transition is <2 x i32>.
8036 Type *getTransitionType() const {
8037 return Transition->getOperand(getTransitionOriginalValueIdx())->getType();
8038 }
8039
8040 /// Promote \p ToBePromoted by moving \p Def downward through.
8041 /// I.e., we have the following sequence:
8042 /// Def = Transition <ty1> a to <ty2>
8043 /// b = ToBePromoted <ty2> Def, ...
8044 /// =>
8045 /// b = ToBePromoted <ty1> a, ...
8046 /// Def = Transition <ty1> ToBePromoted to <ty2>
8047 void promoteImpl(Instruction *ToBePromoted);
8048
8049 /// Check whether or not it is profitable to promote all the
8050 /// instructions enqueued to be promoted.
8051 bool isProfitableToPromote() {
8052 Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());
8053 unsigned Index = isa<ConstantInt>(ValIdx)
8054 ? cast<ConstantInt>(ValIdx)->getZExtValue()
8055 : -1;
8056 Type *PromotedType = getTransitionType();
8057
8058 StoreInst *ST = cast<StoreInst>(CombineInst);
8059 unsigned AS = ST->getPointerAddressSpace();
8060 // Check if this store is supported.
8062 TLI.getValueType(DL, ST->getValueOperand()->getType()), AS,
8063 ST->getAlign())) {
8064 // If this is not supported, there is no way we can combine
8065 // the extract with the store.
8066 return false;
8067 }
8068
8069 // The scalar chain of computation has to pay for the transition
8070 // scalar to vector.
8071 // The vector chain has to account for the combining cost.
8074 InstructionCost ScalarCost =
8075 TTI.getVectorInstrCost(*Transition, PromotedType, CostKind, Index);
8076 InstructionCost VectorCost = StoreExtractCombineCost;
8077 for (const auto &Inst : InstsToBePromoted) {
8078 // Compute the cost.
8079 // By construction, all instructions being promoted are arithmetic ones.
8080 // Moreover, one argument is a constant that can be viewed as a splat
8081 // constant.
8082 Value *Arg0 = Inst->getOperand(0);
8083 bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
8084 isa<ConstantFP>(Arg0);
8085 TargetTransformInfo::OperandValueInfo Arg0Info, Arg1Info;
8086 if (IsArg0Constant)
8088 else
8090
8091 ScalarCost += TTI.getArithmeticInstrCost(
8092 Inst->getOpcode(), Inst->getType(), CostKind, Arg0Info, Arg1Info);
8093 VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
8094 CostKind, Arg0Info, Arg1Info);
8095 }
8096 LLVM_DEBUG(
8097 dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
8098 << ScalarCost << "\nVector: " << VectorCost << '\n');
8099 return ScalarCost > VectorCost;
8100 }
8101
8102 /// Generate a constant vector with \p Val with the same
8103 /// number of elements as the transition.
8104 /// \p UseSplat defines whether or not \p Val should be replicated
8105 /// across the whole vector.
8106 /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
8107 /// otherwise we generate a vector with as many poison as possible:
8108 /// <poison, ..., poison, Val, poison, ..., poison> where \p Val is only
8109 /// used at the index of the extract.
8110 Value *getConstantVector(Constant *Val, bool UseSplat) const {
8111 unsigned ExtractIdx = std::numeric_limits<unsigned>::max();
8112 if (!UseSplat) {
8113 // If we cannot determine where the constant must be, we have to
8114 // use a splat constant.
8115 Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());
8116 if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))
8117 ExtractIdx = CstVal->getSExtValue();
8118 else
8119 UseSplat = true;
8120 }
8121
8122 ElementCount EC = cast<VectorType>(getTransitionType())->getElementCount();
8123 if (UseSplat)
8124 return ConstantVector::getSplat(EC, Val);
8125
8126 if (!EC.isScalable()) {
8128 PoisonValue *PoisonVal = PoisonValue::get(Val->getType());
8129 for (unsigned Idx = 0; Idx != EC.getKnownMinValue(); ++Idx) {
8130 if (Idx == ExtractIdx)
8131 ConstVec.push_back(Val);
8132 else
8133 ConstVec.push_back(PoisonVal);
8134 }
8135 return ConstantVector::get(ConstVec);
8136 } else
8138 "Generate scalable vector for non-splat is unimplemented");
8139 }
8140
8141 /// Check if promoting to a vector type an operand at \p OperandIdx
8142 /// in \p Use can trigger undefined behavior.
8143 static bool canCauseUndefinedBehavior(const Instruction *Use,
8144 unsigned OperandIdx) {
8145 // This is not safe to introduce undef when the operand is on
8146 // the right hand side of a division-like instruction.
8147 if (OperandIdx != 1)
8148 return false;
8149 switch (Use->getOpcode()) {
8150 default:
8151 return false;
8152 case Instruction::SDiv:
8153 case Instruction::UDiv:
8154 case Instruction::SRem:
8155 case Instruction::URem:
8156 return true;
8157 case Instruction::FDiv:
8158 case Instruction::FRem:
8159 return !Use->hasNoNaNs();
8160 }
8161 llvm_unreachable(nullptr);
8162 }
8163
8164public:
8165 VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI,
8166 const TargetTransformInfo &TTI, Instruction *Transition,
8167 unsigned CombineCost)
8168 : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition),
8169 StoreExtractCombineCost(CombineCost) {
8170 assert(Transition && "Do not know how to promote null");
8171 }
8172
8173 /// Check if we can promote \p ToBePromoted to \p Type.
8174 bool canPromote(const Instruction *ToBePromoted) const {
8175 // We could support CastInst too.
8176 return isa<BinaryOperator>(ToBePromoted);
8177 }
8178
8179 /// Check if it is profitable to promote \p ToBePromoted
8180 /// by moving downward the transition through.
8181 bool shouldPromote(const Instruction *ToBePromoted) const {
8182 // Promote only if all the operands can be statically expanded.
8183 // Indeed, we do not want to introduce any new kind of transitions.
8184 for (const Use &U : ToBePromoted->operands()) {
8185 const Value *Val = U.get();
8186 if (Val == getEndOfTransition()) {
8187 // If the use is a division and the transition is on the rhs,
8188 // we cannot promote the operation, otherwise we may create a
8189 // division by zero.
8190 if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()))
8191 return false;
8192 continue;
8193 }
8194 if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
8195 !isa<ConstantFP>(Val))
8196 return false;
8197 }
8198 // Check that the resulting operation is legal.
8199 int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());
8200 if (!ISDOpcode)
8201 return false;
8202 return StressStoreExtract ||
8204 ISDOpcode, TLI.getValueType(DL, getTransitionType(), true));
8205 }
8206
8207 /// Check whether or not \p Use can be combined
8208 /// with the transition.
8209 /// I.e., is it possible to do Use(Transition) => AnotherUse?
8210 bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); }
8211
8212 /// Record \p ToBePromoted as part of the chain to be promoted.
8213 void enqueueForPromotion(Instruction *ToBePromoted) {
8214 InstsToBePromoted.push_back(ToBePromoted);
8215 }
8216
8217 /// Set the instruction that will be combined with the transition.
8218 void recordCombineInstruction(Instruction *ToBeCombined) {
8219 assert(canCombine(ToBeCombined) && "Unsupported instruction to combine");
8220 CombineInst = ToBeCombined;
8221 }
8222
8223 /// Promote all the instructions enqueued for promotion if it is
8224 /// is profitable.
8225 /// \return True if the promotion happened, false otherwise.
8226 bool promote() {
8227 // Check if there is something to promote.
8228 // Right now, if we do not have anything to combine with,
8229 // we assume the promotion is not profitable.
8230 if (InstsToBePromoted.empty() || !CombineInst)
8231 return false;
8232
8233 // Check cost.
8234 if (!StressStoreExtract && !isProfitableToPromote())
8235 return false;
8236
8237 // Promote.
8238 for (auto &ToBePromoted : InstsToBePromoted)
8239 promoteImpl(ToBePromoted);
8240 InstsToBePromoted.clear();
8241 return true;
8242 }
8243};
8244
8245} // end anonymous namespace
8246
8247void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
8248 // At this point, we know that all the operands of ToBePromoted but Def
8249 // can be statically promoted.
8250 // For Def, we need to use its parameter in ToBePromoted:
8251 // b = ToBePromoted ty1 a
8252 // Def = Transition ty1 b to ty2
8253 // Move the transition down.
8254 // 1. Replace all uses of the promoted operation by the transition.
8255 // = ... b => = ... Def.
8256 assert(ToBePromoted->getType() == Transition->getType() &&
8257 "The type of the result of the transition does not match "
8258 "the final type");
8259 ToBePromoted->replaceAllUsesWith(Transition);
8260 // 2. Update the type of the uses.
8261 // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.
8262 Type *TransitionTy = getTransitionType();
8263 ToBePromoted->mutateType(TransitionTy);
8264 // 3. Update all the operands of the promoted operation with promoted
8265 // operands.
8266 // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.
8267 for (Use &U : ToBePromoted->operands()) {
8268 Value *Val = U.get();
8269 Value *NewVal = nullptr;
8270 if (Val == Transition)
8271 NewVal = Transition->getOperand(getTransitionOriginalValueIdx());
8272 else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||
8273 isa<ConstantFP>(Val)) {
8274 // Use a splat constant if it is not safe to use undef.
8275 NewVal = getConstantVector(
8276 cast<Constant>(Val),
8277 isa<UndefValue>(Val) ||
8278 canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));
8279 } else
8280 llvm_unreachable("Did you modified shouldPromote and forgot to update "
8281 "this?");
8282 ToBePromoted->setOperand(U.getOperandNo(), NewVal);
8283 }
8284 Transition->moveAfter(ToBePromoted);
8285 Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
8286}
8287
8288/// Some targets can do store(extractelement) with one instruction.
8289/// Try to push the extractelement towards the stores when the target
8290/// has this feature and this is profitable.
8291bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
8292 unsigned CombineCost = std::numeric_limits<unsigned>::max();
8293 if (DisableStoreExtract ||
8296 Inst->getOperand(1), CombineCost)))
8297 return false;
8298
8299 // At this point we know that Inst is a vector to scalar transition.
8300 // Try to move it down the def-use chain, until:
8301 // - We can combine the transition with its single use
8302 // => we got rid of the transition.
8303 // - We escape the current basic block
8304 // => we would need to check that we are moving it at a cheaper place and
8305 // we do not do that for now.
8306 BasicBlock *Parent = Inst->getParent();
8307 LLVM_DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');
8308 VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost);
8309 // If the transition has more than one use, assume this is not going to be
8310 // beneficial.
8311 while (Inst->hasOneUse()) {
8312 Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());
8313 LLVM_DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n');
8314
8315 if (ToBePromoted->getParent() != Parent) {
8316 LLVM_DEBUG(dbgs() << "Instruction to promote is in a different block ("
8317 << ToBePromoted->getParent()->getName()
8318 << ") than the transition (" << Parent->getName()
8319 << ").\n");
8320 return false;
8321 }
8322
8323 if (VPH.canCombine(ToBePromoted)) {
8324 LLVM_DEBUG(dbgs() << "Assume " << *Inst << '\n'
8325 << "will be combined with: " << *ToBePromoted << '\n');
8326 VPH.recordCombineInstruction(ToBePromoted);
8327 bool Changed = VPH.promote();
8328 NumStoreExtractExposed += Changed;
8329 return Changed;
8330 }
8331
8332 LLVM_DEBUG(dbgs() << "Try promoting.\n");
8333 if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
8334 return false;
8335
8336 LLVM_DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n");
8337
8338 VPH.enqueueForPromotion(ToBePromoted);
8339 Inst = ToBePromoted;
8340 }
8341 return false;
8342}
8343
8344/// For the instruction sequence of store below, F and I values
8345/// are bundled together as an i64 value before being stored into memory.
8346/// Sometimes it is more efficient to generate separate stores for F and I,
8347/// which can remove the bitwise instructions or sink them to colder places.
8348///
8349/// (store (or (zext (bitcast F to i32) to i64),
8350/// (shl (zext I to i64), 32)), addr) -->
8351/// (store F, addr) and (store I, addr+4)
8352///
8353/// Similarly, splitting for other merged store can also be beneficial, like:
8354/// For pair of {i32, i32}, i64 store --> two i32 stores.
8355/// For pair of {i32, i16}, i64 store --> two i32 stores.
8356/// For pair of {i16, i16}, i32 store --> two i16 stores.
8357/// For pair of {i16, i8}, i32 store --> two i16 stores.
8358/// For pair of {i8, i8}, i16 store --> two i8 stores.
8359///
8360/// We allow each target to determine specifically which kind of splitting is
8361/// supported.
8362///
8363/// The store patterns are commonly seen from the simple code snippet below
8364/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
8365/// void goo(const std::pair<int, float> &);
8366/// hoo() {
8367/// ...
8368/// goo(std::make_pair(tmp, ftmp));
8369/// ...
8370/// }
8371///
8372/// Although we already have similar splitting in DAG Combine, we duplicate
8373/// it in CodeGenPrepare to catch the case in which pattern is across
8374/// multiple BBs. The logic in DAG Combine is kept to catch case generated
8375/// during code expansion.
8377 const TargetLowering &TLI) {
8378 // Handle simple but common cases only.
8379 Type *StoreType = SI.getValueOperand()->getType();
8380
8381 // The code below assumes shifting a value by <number of bits>,
8382 // whereas scalable vectors would have to be shifted by
8383 // <2log(vscale) + number of bits> in order to store the
8384 // low/high parts. Bailing out for now.
8385 if (StoreType->isScalableTy())
8386 return false;
8387
8388 if (!DL.typeSizeEqualsStoreSize(StoreType) ||
8389 DL.getTypeSizeInBits(StoreType) == 0)
8390 return false;
8391
8392 unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;
8393 Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);
8394 if (!DL.typeSizeEqualsStoreSize(SplitStoreType))
8395 return false;
8396
8397 // Don't split the store if it is volatile.
8398 if (SI.isVolatile())
8399 return false;
8400
8401 // Match the following patterns:
8402 // (store (or (zext LValue to i64),
8403 // (shl (zext HValue to i64), 32)), HalfValBitSize)
8404 // or
8405 // (store (or (shl (zext HValue to i64), 32)), HalfValBitSize)
8406 // (zext LValue to i64),
8407 // Expect both operands of OR and the first operand of SHL have only
8408 // one use.
8409 Value *LValue, *HValue;
8410 if (!match(SI.getValueOperand(),
8413 m_SpecificInt(HalfValBitSize))))))
8414 return false;
8415
8416 // Check LValue and HValue are int with size less or equal than 32.
8417 if (!LValue->getType()->isIntegerTy() ||
8418 DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize ||
8419 !HValue->getType()->isIntegerTy() ||
8420 DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize)
8421 return false;
8422
8423 // If LValue/HValue is a bitcast instruction, use the EVT before bitcast
8424 // as the input of target query.
8425 auto *LBC = dyn_cast<BitCastInst>(LValue);
8426 auto *HBC = dyn_cast<BitCastInst>(HValue);
8427 EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType())
8428 : EVT::getEVT(LValue->getType());
8429 EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType())
8430 : EVT::getEVT(HValue->getType());
8431 if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
8432 return false;
8433
8434 // Start to split store.
8435 IRBuilder<> Builder(SI.getContext());
8436 Builder.SetInsertPoint(&SI);
8437
8438 // If LValue/HValue is a bitcast in another BB, create a new one in current
8439 // BB so it may be merged with the splitted stores by dag combiner.
8440 if (LBC && LBC->getParent() != SI.getParent())
8441 LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType());
8442 if (HBC && HBC->getParent() != SI.getParent())
8443 HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType());
8444
8445 bool IsLE = SI.getDataLayout().isLittleEndian();
8446 auto CreateSplitStore = [&](Value *V, bool Upper) {
8447 V = Builder.CreateZExtOrBitCast(V, SplitStoreType);
8448 Value *Addr = SI.getPointerOperand();
8449 Align Alignment = SI.getAlign();
8450 const bool IsOffsetStore = (IsLE && Upper) || (!IsLE && !Upper);
8451 if (IsOffsetStore) {
8452 Addr = Builder.CreateGEP(
8453 SplitStoreType, Addr,
8454 ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1));
8455
8456 // When splitting the store in half, naturally one half will retain the
8457 // alignment of the original wider store, regardless of whether it was
8458 // over-aligned or not, while the other will require adjustment.
8459 Alignment = commonAlignment(Alignment, HalfValBitSize / 8);
8460 }
8461 Builder.CreateAlignedStore(V, Addr, Alignment);
8462 };
8463
8464 CreateSplitStore(LValue, false);
8465 CreateSplitStore(HValue, true);
8466
8467 // Delete the old store.
8468 SI.eraseFromParent();
8469 return true;
8470}
8471
8472// Return true if the GEP has two operands, the first operand is of a sequential
8473// type, and the second operand is a constant.
8476 return GEP->getNumOperands() == 2 && I.isSequential() &&
8477 isa<ConstantInt>(GEP->getOperand(1));
8478}
8479
8480// Try unmerging GEPs to reduce liveness interference (register pressure) across
8481// IndirectBr edges. Since IndirectBr edges tend to touch on many blocks,
8482// reducing liveness interference across those edges benefits global register
8483// allocation. Currently handles only certain cases.
8484//
8485// For example, unmerge %GEPI and %UGEPI as below.
8486//
8487// ---------- BEFORE ----------
8488// SrcBlock:
8489// ...
8490// %GEPIOp = ...
8491// ...
8492// %GEPI = gep %GEPIOp, Idx
8493// ...
8494// indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ]
8495// (* %GEPI is alive on the indirectbr edges due to other uses ahead)
8496// (* %GEPIOp is alive on the indirectbr edges only because of it's used by
8497// %UGEPI)
8498//
8499// DstB0: ... (there may be a gep similar to %UGEPI to be unmerged)
8500// DstB1: ... (there may be a gep similar to %UGEPI to be unmerged)
8501// ...
8502//
8503// DstBi:
8504// ...
8505// %UGEPI = gep %GEPIOp, UIdx
8506// ...
8507// ---------------------------
8508//
8509// ---------- AFTER ----------
8510// SrcBlock:
8511// ... (same as above)
8512// (* %GEPI is still alive on the indirectbr edges)
8513// (* %GEPIOp is no longer alive on the indirectbr edges as a result of the
8514// unmerging)
8515// ...
8516//
8517// DstBi:
8518// ...
8519// %UGEPI = gep %GEPI, (UIdx-Idx)
8520// ...
8521// ---------------------------
8522//
8523// The register pressure on the IndirectBr edges is reduced because %GEPIOp is
8524// no longer alive on them.
8525//
8526// We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging
8527// of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as
8528// not to disable further simplications and optimizations as a result of GEP
8529// merging.
8530//
8531// Note this unmerging may increase the length of the data flow critical path
8532// (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff
8533// between the register pressure and the length of data-flow critical
8534// path. Restricting this to the uncommon IndirectBr case would minimize the
8535// impact of potentially longer critical path, if any, and the impact on compile
8536// time.
8538 const TargetTransformInfo *TTI) {
8539 BasicBlock *SrcBlock = GEPI->getParent();
8540 // Check that SrcBlock ends with an IndirectBr. If not, give up. The common
8541 // (non-IndirectBr) cases exit early here.
8542 if (!isa<IndirectBrInst>(SrcBlock->getTerminator()))
8543 return false;
8544 // Check that GEPI is a simple gep with a single constant index.
8545 if (!GEPSequentialConstIndexed(GEPI))
8546 return false;
8547 ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1));
8548 // Check that GEPI is a cheap one.
8549 if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType(),
8552 return false;
8553 Value *GEPIOp = GEPI->getOperand(0);
8554 // Check that GEPIOp is an instruction that's also defined in SrcBlock.
8555 if (!isa<Instruction>(GEPIOp))
8556 return false;
8557 auto *GEPIOpI = cast<Instruction>(GEPIOp);
8558 if (GEPIOpI->getParent() != SrcBlock)
8559 return false;
8560 // Check that GEP is used outside the block, meaning it's alive on the
8561 // IndirectBr edge(s).
8562 if (llvm::none_of(GEPI->users(), [&](User *Usr) {
8563 if (auto *I = dyn_cast<Instruction>(Usr)) {
8564 if (I->getParent() != SrcBlock) {
8565 return true;
8566 }
8567 }
8568 return false;
8569 }))
8570 return false;
8571 // The second elements of the GEP chains to be unmerged.
8572 std::vector<GetElementPtrInst *> UGEPIs;
8573 // Check each user of GEPIOp to check if unmerging would make GEPIOp not alive
8574 // on IndirectBr edges.
8575 for (User *Usr : GEPIOp->users()) {
8576 if (Usr == GEPI)
8577 continue;
8578 // Check if Usr is an Instruction. If not, give up.
8579 if (!isa<Instruction>(Usr))
8580 return false;
8581 auto *UI = cast<Instruction>(Usr);
8582 // Check if Usr in the same block as GEPIOp, which is fine, skip.
8583 if (UI->getParent() == SrcBlock)
8584 continue;
8585 // Check if Usr is a GEP. If not, give up.
8586 if (!isa<GetElementPtrInst>(Usr))
8587 return false;
8588 auto *UGEPI = cast<GetElementPtrInst>(Usr);
8589 // Check if UGEPI is a simple gep with a single constant index and GEPIOp is
8590 // the pointer operand to it. If so, record it in the vector. If not, give
8591 // up.
8592 if (!GEPSequentialConstIndexed(UGEPI))
8593 return false;
8594 if (UGEPI->getOperand(0) != GEPIOp)
8595 return false;
8596 if (UGEPI->getSourceElementType() != GEPI->getSourceElementType())
8597 return false;
8598 if (GEPIIdx->getType() !=
8599 cast<ConstantInt>(UGEPI->getOperand(1))->getType())
8600 return false;
8601 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8602 if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType(),
8605 return false;
8606 UGEPIs.push_back(UGEPI);
8607 }
8608 if (UGEPIs.size() == 0)
8609 return false;
8610 // Check the materializing cost of (Uidx-Idx).
8611 for (GetElementPtrInst *UGEPI : UGEPIs) {
8612 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8613 APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();
8615 NewIdx, GEPIIdx->getType(), TargetTransformInfo::TCK_SizeAndLatency);
8616 if (ImmCost > TargetTransformInfo::TCC_Basic)
8617 return false;
8618 }
8619 // Now unmerge between GEPI and UGEPIs.
8620 for (GetElementPtrInst *UGEPI : UGEPIs) {
8621 UGEPI->setOperand(0, GEPI);
8622 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8623 Constant *NewUGEPIIdx = ConstantInt::get(
8624 GEPIIdx->getType(), UGEPIIdx->getValue() - GEPIIdx->getValue());
8625 UGEPI->setOperand(1, NewUGEPIIdx);
8626 // If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not
8627 // inbounds to avoid UB.
8628 if (!GEPI->isInBounds()) {
8629 UGEPI->setIsInBounds(false);
8630 }
8631 }
8632 // After unmerging, verify that GEPIOp is actually only used in SrcBlock (not
8633 // alive on IndirectBr edges).
8634 assert(llvm::none_of(GEPIOp->users(),
8635 [&](User *Usr) {
8636 return cast<Instruction>(Usr)->getParent() != SrcBlock;
8637 }) &&
8638 "GEPIOp is used outside SrcBlock");
8639 return true;
8640}
8641
8642static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI,
8644 bool IsHugeFunc) {
8645 // Try and convert
8646 // %c = icmp ult %x, 8
8647 // br %c, bla, blb
8648 // %tc = lshr %x, 3
8649 // to
8650 // %tc = lshr %x, 3
8651 // %c = icmp eq %tc, 0
8652 // br %c, bla, blb
8653 // Creating the cmp to zero can be better for the backend, especially if the
8654 // lshr produces flags that can be used automatically.
8655 if (!TLI.preferZeroCompareBranch() || !Branch->isConditional())
8656 return false;
8657
8658 ICmpInst *Cmp = dyn_cast<ICmpInst>(Branch->getCondition());
8659 if (!Cmp || !isa<ConstantInt>(Cmp->getOperand(1)) || !Cmp->hasOneUse())
8660 return false;
8661
8662 Value *X = Cmp->getOperand(0);
8663 if (!X->hasUseList())
8664 return false;
8665
8666 APInt CmpC = cast<ConstantInt>(Cmp->getOperand(1))->getValue();
8667
8668 for (auto *U : X->users()) {
8670 // A quick dominance check
8671 if (!UI ||
8672 (UI->getParent() != Branch->getParent() &&
8673 UI->getParent() != Branch->getSuccessor(0) &&
8674 UI->getParent() != Branch->getSuccessor(1)) ||
8675 (UI->getParent() != Branch->getParent() &&
8676 !UI->getParent()->getSinglePredecessor()))
8677 continue;
8678
8679 if (CmpC.isPowerOf2() && Cmp->getPredicate() == ICmpInst::ICMP_ULT &&
8680 match(UI, m_Shr(m_Specific(X), m_SpecificInt(CmpC.logBase2())))) {
8681 IRBuilder<> Builder(Branch);
8682 if (UI->getParent() != Branch->getParent())
8683 UI->moveBefore(Branch->getIterator());
8685 Value *NewCmp = Builder.CreateCmp(ICmpInst::ICMP_EQ, UI,
8686 ConstantInt::get(UI->getType(), 0));
8687 LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
8688 LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
8689 replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
8690 return true;
8691 }
8692 if (Cmp->isEquality() &&
8693 (match(UI, m_Add(m_Specific(X), m_SpecificInt(-CmpC))) ||
8694 match(UI, m_Sub(m_Specific(X), m_SpecificInt(CmpC))) ||
8695 match(UI, m_Xor(m_Specific(X), m_SpecificInt(CmpC))))) {
8696 IRBuilder<> Builder(Branch);
8697 if (UI->getParent() != Branch->getParent())
8698 UI->moveBefore(Branch->getIterator());
8700 Value *NewCmp = Builder.CreateCmp(Cmp->getPredicate(), UI,
8701 ConstantInt::get(UI->getType(), 0));
8702 LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
8703 LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
8704 replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
8705 return true;
8706 }
8707 }
8708 return false;
8709}
8710
8711bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
8712 bool AnyChange = false;
8713 AnyChange = fixupDbgVariableRecordsOnInst(*I);
8714
8715 // Bail out if we inserted the instruction to prevent optimizations from
8716 // stepping on each other's toes.
8717 if (InsertedInsts.count(I))
8718 return AnyChange;
8719
8720 // TODO: Move into the switch on opcode below here.
8721 if (PHINode *P = dyn_cast<PHINode>(I)) {
8722 // It is possible for very late stage optimizations (such as SimplifyCFG)
8723 // to introduce PHI nodes too late to be cleaned up. If we detect such a
8724 // trivial PHI, go ahead and zap it here.
8725 if (Value *V = simplifyInstruction(P, {*DL, TLInfo})) {
8726 LargeOffsetGEPMap.erase(P);
8727 replaceAllUsesWith(P, V, FreshBBs, IsHugeFunc);
8728 P->eraseFromParent();
8729 ++NumPHIsElim;
8730 return true;
8731 }
8732 return AnyChange;
8733 }
8734
8735 if (CastInst *CI = dyn_cast<CastInst>(I)) {
8736 // If the source of the cast is a constant, then this should have
8737 // already been constant folded. The only reason NOT to constant fold
8738 // it is if something (e.g. LSR) was careful to place the constant
8739 // evaluation in a block other than then one that uses it (e.g. to hoist
8740 // the address of globals out of a loop). If this is the case, we don't
8741 // want to forward-subst the cast.
8742 if (isa<Constant>(CI->getOperand(0)))
8743 return AnyChange;
8744
8745 if (OptimizeNoopCopyExpression(CI, *TLI, *DL))
8746 return true;
8747
8749 isa<TruncInst>(I)) &&
8751 I, LI->getLoopFor(I->getParent()), *TTI))
8752 return true;
8753
8754 if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
8755 /// Sink a zext or sext into its user blocks if the target type doesn't
8756 /// fit in one register
8757 if (TLI->getTypeAction(CI->getContext(),
8758 TLI->getValueType(*DL, CI->getType())) ==
8759 TargetLowering::TypeExpandInteger) {
8760 return SinkCast(CI);
8761 } else {
8763 I, LI->getLoopFor(I->getParent()), *TTI))
8764 return true;
8765
8766 bool MadeChange = optimizeExt(I);
8767 return MadeChange | optimizeExtUses(I);
8768 }
8769 }
8770 return AnyChange;
8771 }
8772
8773 if (auto *Cmp = dyn_cast<CmpInst>(I))
8774 if (optimizeCmp(Cmp, ModifiedDT))
8775 return true;
8776
8777 if (match(I, m_URem(m_Value(), m_Value())))
8778 if (optimizeURem(I))
8779 return true;
8780
8781 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
8782 LI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
8783 bool Modified = optimizeLoadExt(LI);
8784 unsigned AS = LI->getPointerAddressSpace();
8785 Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
8786 return Modified;
8787 }
8788
8789 if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
8790 if (splitMergedValStore(*SI, *DL, *TLI))
8791 return true;
8792 SI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
8793 unsigned AS = SI->getPointerAddressSpace();
8794 return optimizeMemoryInst(I, SI->getOperand(1),
8795 SI->getOperand(0)->getType(), AS);
8796 }
8797
8798 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
8799 unsigned AS = RMW->getPointerAddressSpace();
8800 return optimizeMemoryInst(I, RMW->getPointerOperand(), RMW->getType(), AS);
8801 }
8802
8803 if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(I)) {
8804 unsigned AS = CmpX->getPointerAddressSpace();
8805 return optimizeMemoryInst(I, CmpX->getPointerOperand(),
8806 CmpX->getCompareOperand()->getType(), AS);
8807 }
8808
8809 BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
8810
8811 if (BinOp && BinOp->getOpcode() == Instruction::And && EnableAndCmpSinking &&
8812 sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts))
8813 return true;
8814
8815 // TODO: Move this into the switch on opcode - it handles shifts already.
8816 if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
8817 BinOp->getOpcode() == Instruction::LShr)) {
8818 ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
8819 if (CI && TLI->hasExtractBitsInsn())
8820 if (OptimizeExtractBits(BinOp, CI, *TLI, *DL))
8821 return true;
8822 }
8823
8824 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
8825 if (GEPI->hasAllZeroIndices()) {
8826 /// The GEP operand must be a pointer, so must its result -> BitCast
8827 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
8828 GEPI->getName(), GEPI->getIterator());
8829 NC->setDebugLoc(GEPI->getDebugLoc());
8830 replaceAllUsesWith(GEPI, NC, FreshBBs, IsHugeFunc);
8832 GEPI, TLInfo, nullptr,
8833 [&](Value *V) { removeAllAssertingVHReferences(V); });
8834 ++NumGEPsElim;
8835 optimizeInst(NC, ModifiedDT);
8836 return true;
8837 }
8839 return true;
8840 }
8841 }
8842
8843 if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
8844 // freeze(icmp a, const)) -> icmp (freeze a), const
8845 // This helps generate efficient conditional jumps.
8846 Instruction *CmpI = nullptr;
8847 if (ICmpInst *II = dyn_cast<ICmpInst>(FI->getOperand(0)))
8848 CmpI = II;
8849 else if (FCmpInst *F = dyn_cast<FCmpInst>(FI->getOperand(0)))
8850 CmpI = F->getFastMathFlags().none() ? F : nullptr;
8851
8852 if (CmpI && CmpI->hasOneUse()) {
8853 auto Op0 = CmpI->getOperand(0), Op1 = CmpI->getOperand(1);
8854 bool Const0 = isa<ConstantInt>(Op0) || isa<ConstantFP>(Op0) ||
8856 bool Const1 = isa<ConstantInt>(Op1) || isa<ConstantFP>(Op1) ||
8858 if (Const0 || Const1) {
8859 if (!Const0 || !Const1) {
8860 auto *F = new FreezeInst(Const0 ? Op1 : Op0, "", CmpI->getIterator());
8861 F->takeName(FI);
8862 CmpI->setOperand(Const0 ? 1 : 0, F);
8863 }
8864 replaceAllUsesWith(FI, CmpI, FreshBBs, IsHugeFunc);
8865 FI->eraseFromParent();
8866 return true;
8867 }
8868 }
8869 return AnyChange;
8870 }
8871
8872 if (tryToSinkFreeOperands(I))
8873 return true;
8874
8875 switch (I->getOpcode()) {
8876 case Instruction::Shl:
8877 case Instruction::LShr:
8878 case Instruction::AShr:
8879 return optimizeShiftInst(cast<BinaryOperator>(I));
8880 case Instruction::Call:
8881 return optimizeCallInst(cast<CallInst>(I), ModifiedDT);
8882 case Instruction::Select:
8883 return optimizeSelectInst(cast<SelectInst>(I));
8884 case Instruction::ShuffleVector:
8885 return optimizeShuffleVectorInst(cast<ShuffleVectorInst>(I));
8886 case Instruction::Switch:
8887 return optimizeSwitchInst(cast<SwitchInst>(I));
8888 case Instruction::ExtractElement:
8889 return optimizeExtractElementInst(cast<ExtractElementInst>(I));
8890 case Instruction::Br:
8891 return optimizeBranch(cast<BranchInst>(I), *TLI, FreshBBs, IsHugeFunc);
8892 }
8893
8894 return AnyChange;
8895}
8896
8897/// Given an OR instruction, check to see if this is a bitreverse
8898/// idiom. If so, insert the new intrinsic and return true.
8899bool CodeGenPrepare::makeBitReverse(Instruction &I) {
8900 if (!I.getType()->isIntegerTy() ||
8902 TLI->getValueType(*DL, I.getType(), true)))
8903 return false;
8904
8905 SmallVector<Instruction *, 4> Insts;
8906 if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts))
8907 return false;
8908 Instruction *LastInst = Insts.back();
8909 replaceAllUsesWith(&I, LastInst, FreshBBs, IsHugeFunc);
8911 &I, TLInfo, nullptr,
8912 [&](Value *V) { removeAllAssertingVHReferences(V); });
8913 return true;
8914}
8915
8916// In this pass we look for GEP and cast instructions that are used
8917// across basic blocks and rewrite them to improve basic-block-at-a-time
8918// selection.
8919bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT) {
8920 SunkAddrs.clear();
8921 bool MadeChange = false;
8922
8923 do {
8924 CurInstIterator = BB.begin();
8925 ModifiedDT = ModifyDT::NotModifyDT;
8926 while (CurInstIterator != BB.end()) {
8927 MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
8928 if (ModifiedDT != ModifyDT::NotModifyDT) {
8929 // For huge function we tend to quickly go though the inner optmization
8930 // opportunities in the BB. So we go back to the BB head to re-optimize
8931 // each instruction instead of go back to the function head.
8932 if (IsHugeFunc) {
8933 DT.reset();
8934 getDT(*BB.getParent());
8935 break;
8936 } else {
8937 return true;
8938 }
8939 }
8940 }
8941 } while (ModifiedDT == ModifyDT::ModifyInstDT);
8942
8943 bool MadeBitReverse = true;
8944 while (MadeBitReverse) {
8945 MadeBitReverse = false;
8946 for (auto &I : reverse(BB)) {
8947 if (makeBitReverse(I)) {
8948 MadeBitReverse = MadeChange = true;
8949 break;
8950 }
8951 }
8952 }
8953 MadeChange |= dupRetToEnableTailCallOpts(&BB, ModifiedDT);
8954
8955 return MadeChange;
8956}
8957
8958bool CodeGenPrepare::fixupDbgVariableRecordsOnInst(Instruction &I) {
8959 bool AnyChange = false;
8960 for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
8961 AnyChange |= fixupDbgVariableRecord(DVR);
8962 return AnyChange;
8963}
8964
8965// FIXME: should updating debug-info really cause the "changed" flag to fire,
8966// which can cause a function to be reprocessed?
8967bool CodeGenPrepare::fixupDbgVariableRecord(DbgVariableRecord &DVR) {
8968 if (DVR.Type != DbgVariableRecord::LocationType::Value &&
8969 DVR.Type != DbgVariableRecord::LocationType::Assign)
8970 return false;
8971
8972 // Does this DbgVariableRecord refer to a sunk address calculation?
8973 bool AnyChange = false;
8974 SmallDenseSet<Value *> LocationOps(DVR.location_ops().begin(),
8975 DVR.location_ops().end());
8976 for (Value *Location : LocationOps) {
8977 WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
8978 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
8979 if (SunkAddr) {
8980 // Point dbg.value at locally computed address, which should give the best
8981 // opportunity to be accurately lowered. This update may change the type
8982 // of pointer being referred to; however this makes no difference to
8983 // debugging information, and we can't generate bitcasts that may affect
8984 // codegen.
8985 DVR.replaceVariableLocationOp(Location, SunkAddr);
8986 AnyChange = true;
8987 }
8988 }
8989 return AnyChange;
8990}
8991
8993 DVR->removeFromParent();
8994 BasicBlock *VIBB = VI->getParent();
8995 if (isa<PHINode>(VI))
8996 VIBB->insertDbgRecordBefore(DVR, VIBB->getFirstInsertionPt());
8997 else
8998 VIBB->insertDbgRecordAfter(DVR, &*VI);
8999}
9000
9001// A llvm.dbg.value may be using a value before its definition, due to
9002// optimizations in this pass and others. Scan for such dbg.values, and rescue
9003// them by moving the dbg.value to immediately after the value definition.
9004// FIXME: Ideally this should never be necessary, and this has the potential
9005// to re-order dbg.value intrinsics.
9006bool CodeGenPrepare::placeDbgValues(Function &F) {
9007 bool MadeChange = false;
9008 DominatorTree DT(F);
9009
9010 auto DbgProcessor = [&](auto *DbgItem, Instruction *Position) {
9011 SmallVector<Instruction *, 4> VIs;
9012 for (Value *V : DbgItem->location_ops())
9013 if (Instruction *VI = dyn_cast_or_null<Instruction>(V))
9014 VIs.push_back(VI);
9015
9016 // This item may depend on multiple instructions, complicating any
9017 // potential sink. This block takes the defensive approach, opting to
9018 // "undef" the item if it has more than one instruction and any of them do
9019 // not dominate iem.
9020 for (Instruction *VI : VIs) {
9021 if (VI->isTerminator())
9022 continue;
9023
9024 // If VI is a phi in a block with an EHPad terminator, we can't insert
9025 // after it.
9026 if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
9027 continue;
9028
9029 // If the defining instruction dominates the dbg.value, we do not need
9030 // to move the dbg.value.
9031 if (DT.dominates(VI, Position))
9032 continue;
9033
9034 // If we depend on multiple instructions and any of them doesn't
9035 // dominate this DVI, we probably can't salvage it: moving it to
9036 // after any of the instructions could cause us to lose the others.
9037 if (VIs.size() > 1) {
9038 LLVM_DEBUG(
9039 dbgs()
9040 << "Unable to find valid location for Debug Value, undefing:\n"
9041 << *DbgItem);
9042 DbgItem->setKillLocation();
9043 break;
9044 }
9045
9046 LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
9047 << *DbgItem << ' ' << *VI);
9048 DbgInserterHelper(DbgItem, VI->getIterator());
9049 MadeChange = true;
9050 ++NumDbgValueMoved;
9051 }
9052 };
9053
9054 for (BasicBlock &BB : F) {
9055 for (Instruction &Insn : llvm::make_early_inc_range(BB)) {
9056 // Process any DbgVariableRecord records attached to this
9057 // instruction.
9058 for (DbgVariableRecord &DVR : llvm::make_early_inc_range(
9059 filterDbgVars(Insn.getDbgRecordRange()))) {
9060 if (DVR.Type != DbgVariableRecord::LocationType::Value)
9061 continue;
9062 DbgProcessor(&DVR, &Insn);
9063 }
9064 }
9065 }
9066
9067 return MadeChange;
9068}
9069
9070// Group scattered pseudo probes in a block to favor SelectionDAG. Scattered
9071// probes can be chained dependencies of other regular DAG nodes and block DAG
9072// combine optimizations.
9073bool CodeGenPrepare::placePseudoProbes(Function &F) {
9074 bool MadeChange = false;
9075 for (auto &Block : F) {
9076 // Move the rest probes to the beginning of the block.
9077 auto FirstInst = Block.getFirstInsertionPt();
9078 while (FirstInst != Block.end() && FirstInst->isDebugOrPseudoInst())
9079 ++FirstInst;
9080 BasicBlock::iterator I(FirstInst);
9081 I++;
9082 while (I != Block.end()) {
9083 if (auto *II = dyn_cast<PseudoProbeInst>(I++)) {
9084 II->moveBefore(FirstInst);
9085 MadeChange = true;
9086 }
9087 }
9088 }
9089 return MadeChange;
9090}
9091
9092/// Scale down both weights to fit into uint32_t.
9093static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
9094 uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
9095 uint32_t Scale = (NewMax / std::numeric_limits<uint32_t>::max()) + 1;
9096 NewTrue = NewTrue / Scale;
9097 NewFalse = NewFalse / Scale;
9098}
9099
9100/// Some targets prefer to split a conditional branch like:
9101/// \code
9102/// %0 = icmp ne i32 %a, 0
9103/// %1 = icmp ne i32 %b, 0
9104/// %or.cond = or i1 %0, %1
9105/// br i1 %or.cond, label %TrueBB, label %FalseBB
9106/// \endcode
9107/// into multiple branch instructions like:
9108/// \code
9109/// bb1:
9110/// %0 = icmp ne i32 %a, 0
9111/// br i1 %0, label %TrueBB, label %bb2
9112/// bb2:
9113/// %1 = icmp ne i32 %b, 0
9114/// br i1 %1, label %TrueBB, label %FalseBB
9115/// \endcode
9116/// This usually allows instruction selection to do even further optimizations
9117/// and combine the compare with the branch instruction. Currently this is
9118/// applied for targets which have "cheap" jump instructions.
9119///
9120/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
9121///
9122bool CodeGenPrepare::splitBranchCondition(Function &F, ModifyDT &ModifiedDT) {
9123 if (!TM->Options.EnableFastISel || TLI->isJumpExpensive())
9124 return false;
9125
9126 bool MadeChange = false;
9127 for (auto &BB : F) {
9128 // Does this BB end with the following?
9129 // %cond1 = icmp|fcmp|binary instruction ...
9130 // %cond2 = icmp|fcmp|binary instruction ...
9131 // %cond.or = or|and i1 %cond1, cond2
9132 // br i1 %cond.or label %dest1, label %dest2"
9133 Instruction *LogicOp;
9134 BasicBlock *TBB, *FBB;
9135 if (!match(BB.getTerminator(),
9136 m_Br(m_OneUse(m_Instruction(LogicOp)), TBB, FBB)))
9137 continue;
9138
9139 auto *Br1 = cast<BranchInst>(BB.getTerminator());
9140 if (Br1->getMetadata(LLVMContext::MD_unpredictable))
9141 continue;
9142
9143 // The merging of mostly empty BB can cause a degenerate branch.
9144 if (TBB == FBB)
9145 continue;
9146
9147 unsigned Opc;
9148 Value *Cond1, *Cond2;
9149 if (match(LogicOp,
9150 m_LogicalAnd(m_OneUse(m_Value(Cond1)), m_OneUse(m_Value(Cond2)))))
9151 Opc = Instruction::And;
9152 else if (match(LogicOp, m_LogicalOr(m_OneUse(m_Value(Cond1)),
9153 m_OneUse(m_Value(Cond2)))))
9154 Opc = Instruction::Or;
9155 else
9156 continue;
9157
9158 auto IsGoodCond = [](Value *Cond) {
9159 return match(
9160 Cond,
9162 m_LogicalOr(m_Value(), m_Value()))));
9163 };
9164 if (!IsGoodCond(Cond1) || !IsGoodCond(Cond2))
9165 continue;
9166
9167 LLVM_DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
9168
9169 // Create a new BB.
9170 auto *TmpBB =
9171 BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split",
9172 BB.getParent(), BB.getNextNode());
9173 if (IsHugeFunc)
9174 FreshBBs.insert(TmpBB);
9175
9176 // Update original basic block by using the first condition directly by the
9177 // branch instruction and removing the no longer needed and/or instruction.
9178 Br1->setCondition(Cond1);
9179 LogicOp->eraseFromParent();
9180
9181 // Depending on the condition we have to either replace the true or the
9182 // false successor of the original branch instruction.
9183 if (Opc == Instruction::And)
9184 Br1->setSuccessor(0, TmpBB);
9185 else
9186 Br1->setSuccessor(1, TmpBB);
9187
9188 // Fill in the new basic block.
9189 auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB);
9190 if (auto *I = dyn_cast<Instruction>(Cond2)) {
9191 I->removeFromParent();
9192 I->insertBefore(Br2->getIterator());
9193 }
9194
9195 // Update PHI nodes in both successors. The original BB needs to be
9196 // replaced in one successor's PHI nodes, because the branch comes now from
9197 // the newly generated BB (NewBB). In the other successor we need to add one
9198 // incoming edge to the PHI nodes, because both branch instructions target
9199 // now the same successor. Depending on the original branch condition
9200 // (and/or) we have to swap the successors (TrueDest, FalseDest), so that
9201 // we perform the correct update for the PHI nodes.
9202 // This doesn't change the successor order of the just created branch
9203 // instruction (or any other instruction).
9204 if (Opc == Instruction::Or)
9205 std::swap(TBB, FBB);
9206
9207 // Replace the old BB with the new BB.
9208 TBB->replacePhiUsesWith(&BB, TmpBB);
9209
9210 // Add another incoming edge from the new BB.
9211 for (PHINode &PN : FBB->phis()) {
9212 auto *Val = PN.getIncomingValueForBlock(&BB);
9213 PN.addIncoming(Val, TmpBB);
9214 }
9215
9216 // Update the branch weights (from SelectionDAGBuilder::
9217 // FindMergedConditions).
9218 if (Opc == Instruction::Or) {
9219 // Codegen X | Y as:
9220 // BB1:
9221 // jmp_if_X TBB
9222 // jmp TmpBB
9223 // TmpBB:
9224 // jmp_if_Y TBB
9225 // jmp FBB
9226 //
9227
9228 // We have flexibility in setting Prob for BB1 and Prob for NewBB.
9229 // The requirement is that
9230 // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
9231 // = TrueProb for original BB.
9232 // Assuming the original weights are A and B, one choice is to set BB1's
9233 // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
9234 // assumes that
9235 // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
9236 // Another choice is to assume TrueProb for BB1 equals to TrueProb for
9237 // TmpBB, but the math is more complicated.
9238 uint64_t TrueWeight, FalseWeight;
9239 if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
9240 uint64_t NewTrueWeight = TrueWeight;
9241 uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
9242 scaleWeights(NewTrueWeight, NewFalseWeight);
9243 Br1->setMetadata(LLVMContext::MD_prof,
9244 MDBuilder(Br1->getContext())
9245 .createBranchWeights(TrueWeight, FalseWeight,
9246 hasBranchWeightOrigin(*Br1)));
9247
9248 NewTrueWeight = TrueWeight;
9249 NewFalseWeight = 2 * FalseWeight;
9250 scaleWeights(NewTrueWeight, NewFalseWeight);
9251 Br2->setMetadata(LLVMContext::MD_prof,
9252 MDBuilder(Br2->getContext())
9253 .createBranchWeights(TrueWeight, FalseWeight));
9254 }
9255 } else {
9256 // Codegen X & Y as:
9257 // BB1:
9258 // jmp_if_X TmpBB
9259 // jmp FBB
9260 // TmpBB:
9261 // jmp_if_Y TBB
9262 // jmp FBB
9263 //
9264 // This requires creation of TmpBB after CurBB.
9265
9266 // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
9267 // The requirement is that
9268 // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
9269 // = FalseProb for original BB.
9270 // Assuming the original weights are A and B, one choice is to set BB1's
9271 // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
9272 // assumes that
9273 // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
9274 uint64_t TrueWeight, FalseWeight;
9275 if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
9276 uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
9277 uint64_t NewFalseWeight = FalseWeight;
9278 scaleWeights(NewTrueWeight, NewFalseWeight);
9279 Br1->setMetadata(LLVMContext::MD_prof,
9280 MDBuilder(Br1->getContext())
9281 .createBranchWeights(TrueWeight, FalseWeight));
9282
9283 NewTrueWeight = 2 * TrueWeight;
9284 NewFalseWeight = FalseWeight;
9285 scaleWeights(NewTrueWeight, NewFalseWeight);
9286 Br2->setMetadata(LLVMContext::MD_prof,
9287 MDBuilder(Br2->getContext())
9288 .createBranchWeights(TrueWeight, FalseWeight));
9289 }
9290 }
9291
9292 ModifiedDT = ModifyDT::ModifyBBDT;
9293 MadeChange = true;
9294
9295 LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
9296 TmpBB->dump());
9297 }
9298 return MadeChange;
9299}
#define Success
return SDValue()
static unsigned getIntrinsicID(const SDNode *N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool sinkAndCmp0Expression(Instruction *AndI, const TargetLowering &TLI, SetOfInstrs &InsertedInsts)
Duplicate and sink the given 'and' instruction into user blocks where it is used in a compare to allo...
static bool SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, DenseMap< BasicBlock *, BinaryOperator * > &InsertedShifts, const TargetLowering &TLI, const DataLayout &DL)
Sink both shift and truncate instruction to the use of truncate's BB.
static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, SmallVectorImpl< Value * > &OffsetV)
static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V)
Check if V (an operand of a select instruction) is an expensive instruction that is only used once.
static bool isExtractBitsCandidateUse(Instruction *User)
Check if the candidates could be combined with a shift instruction, which includes:
static cl::opt< unsigned > MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100), cl::Hidden, cl::desc("Max number of address users to look at"))
static cl::opt< bool > OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true), cl::desc("Enable converting phi types in CodeGenPrepare"))
static cl::opt< bool > DisableStoreExtract("disable-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Disable store(extract) optimizations in CodeGenPrepare"))
static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI, const DataLayout &DL)
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI)
Sink the given CmpInst into user blocks to reduce the number of virtual registers that must be create...
static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse)
Scale down both weights to fit into uint32_t.
static cl::opt< bool > ProfileUnknownInSpecialSection("profile-unknown-in-special-section", cl::Hidden, cl::desc("In profiling mode like sampleFDO, if a function doesn't have " "profile, we cannot tell the function is cold for sure because " "it may be a function newly added without ever being sampled. " "With the flag enabled, compiler can put such profile unknown " "functions into a special section, so runtime system can choose " "to handle it in a different way than .text section, to save " "RAM for example. "))
static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, const TargetLowering &TLI, const DataLayout &DL)
Sink the shift right instruction into user blocks if the uses could potentially be combined with this...
static cl::opt< bool > DisableExtLdPromotion("disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in " "CodeGenPrepare"))
static cl::opt< bool > DisablePreheaderProtect("disable-preheader-prot", cl::Hidden, cl::init(false), cl::desc("Disable protection against removing loop preheaders"))
static cl::opt< bool > AddrSinkCombineBaseOffs("addr-sink-combine-base-offs", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseOffs field in Address sinking."))
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, const DataLayout &DL)
If the specified cast instruction is a noop copy (e.g.
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static bool SinkCast(CastInst *CI)
Sink the specified cast instruction into its user blocks.
static bool swapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp)
Many architectures use the same instruction for both subtract and cmp.
static cl::opt< bool > AddrSinkCombineBaseReg("addr-sink-combine-base-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseReg field in Address sinking."))
static bool FindAllMemoryUses(Instruction *I, SmallVectorImpl< std::pair< Use *, Type * > > &MemoryUses, SmallPtrSetImpl< Instruction * > &ConsideredInsts, const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, unsigned &SeenInsts)
Recursively walk all the uses of I until we find a memory use.
static cl::opt< bool > StressStoreExtract("stress-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"))
static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, const TargetLowering *TLI, SelectInst *SI)
Returns true if a SelectInst should be turned into an explicit branch.
static std::optional< std::pair< Instruction *, Constant * > > getIVIncrement(const PHINode *PN, const LoopInfo *LI)
If given PN is an inductive variable with value IVInc coming from the backedge, and on each iteration...
static cl::opt< bool > AddrSinkCombineBaseGV("addr-sink-combine-base-gv", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseGV field in Address sinking."))
static cl::opt< bool > AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true), cl::desc("Address sinking in CGP using GEPs."))
static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)
If isTrue is true, return the true value of SI, otherwise return false value of SI.
static cl::opt< bool > DisableBranchOpts("disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare"))
static cl::opt< bool > EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden, cl::desc("Enable merging of redundant sexts when one is dominating" " the other."), cl::init(true))
static cl::opt< bool > ProfileGuidedSectionPrefix("profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use profile info to add section prefix for hot/cold functions"))
static cl::opt< unsigned > HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden, cl::desc("Least BB number of huge function."))
static cl::opt< bool > AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true), cl::desc("Allow creation of selects in Address sinking."))
static bool foldURemOfLoopIncrement(Instruction *Rem, const DataLayout *DL, const LoopInfo *LI, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, const TargetTransformInfo *TTI)
static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, const TargetLowering &TLI, const TargetRegisterInfo &TRI)
Check to see if all uses of OpVal by the specified inline asm call are due to memory operands.
static bool isIntrinsicOrLFToBeTailCalled(const TargetLibraryInfo *TLInfo, const CallInst *CI)
static void replaceAllUsesWith(Value *Old, Value *New, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
Replace all old uses with new ones, and push the updated BBs into FreshBBs.
static cl::opt< bool > ForceSplitStore("force-split-store", cl::Hidden, cl::init(false), cl::desc("Force store splitting no matter what the target query says."))
static void computeBaseDerivedRelocateMap(const SmallVectorImpl< GCRelocateInst * > &AllRelocateCalls, MapVector< GCRelocateInst *, SmallVector< GCRelocateInst *, 0 > > &RelocateInstMap)
static bool simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, const SmallVectorImpl< GCRelocateInst * > &Targets)
static cl::opt< bool > AddrSinkCombineScaledReg("addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of ScaledReg field in Address sinking."))
static bool foldICmpWithDominatingICmp(CmpInst *Cmp, const TargetLowering &TLI)
For pattern like:
static bool MightBeFoldableInst(Instruction *I)
This is a little filter, which returns true if an addressing computation involving I might be folded ...
static bool matchIncrement(const Instruction *IVInc, Instruction *&LHS, Constant *&Step)
static cl::opt< bool > EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden, cl::init(true), cl::desc("Enable splitting large offset of GEP."))
static cl::opt< bool > DisableComplexAddrModes("disable-complex-addr-modes", cl::Hidden, cl::init(false), cl::desc("Disables combining addressing modes with different parts " "in optimizeMemoryInst."))
static cl::opt< bool > EnableICMP_EQToICMP_ST("cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false), cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."))
static cl::opt< bool > VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false), cl::desc("Enable BFI update verification for " "CodeGenPrepare."))
static cl::opt< bool > BBSectionsGuidedSectionPrefix("bbsections-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use the basic-block-sections profile to determine the text " "section prefix for hot functions. Functions with " "basic-block-sections profile will be placed in `.text.hot` " "regardless of their FDO profile info. Other functions won't be " "impacted, i.e., their prefixes will be decided by FDO/sampleFDO " "profiles."))
static bool isRemOfLoopIncrementWithLoopInvariant(Instruction *Rem, const LoopInfo *LI, Value *&RemAmtOut, Value *&AddInstOut, Value *&AddOffsetOut, PHINode *&LoopIncrPNOut)
static bool isIVIncrement(const Value *V, const LoopInfo *LI)
static cl::opt< bool > DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), cl::desc("Disable GC optimizations in CodeGenPrepare"))
static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP)
static void DbgInserterHelper(DbgVariableRecord *DVR, BasicBlock::iterator VI)
static bool isPromotedInstructionLegal(const TargetLowering &TLI, const DataLayout &DL, Value *Val)
Check whether or not Val is a legal instruction for TLI.
static cl::opt< uint64_t > FreqRatioToSkipMerge("cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), cl::desc("Skip merging empty blocks if (frequency of empty block) / " "(frequency of destination block) is greater than this ratio"))
static BasicBlock::iterator findInsertPos(Value *Addr, Instruction *MemoryInst, Value *SunkAddr)
static bool IsNonLocalValue(Value *V, BasicBlock *BB)
Return true if the specified values are defined in a different basic block than BB.
static cl::opt< bool > EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true), cl::desc("Enable sinking and/cmp into branches."))
static bool hasSameExtUse(Value *Val, const TargetLowering &TLI)
Check if all the uses of Val are equivalent (or free) zero or sign extensions.
static bool despeculateCountZeros(IntrinsicInst *CountZeros, LoopInfo &LI, const TargetLowering *TLI, const DataLayout *DL, ModifyDT &ModifiedDT, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
If counting leading or trailing zeros is an expensive operation and a zero input is defined,...
static cl::opt< bool > StressExtLdPromotion("stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) " "optimization in CodeGenPrepare"))
static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp, BinaryOperator *&Add)
Match special-case patterns that check for unsigned add overflow.
static cl::opt< bool > DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion."))
static cl::opt< bool > DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false), cl::desc("Disable elimination of dead PHI nodes."))
static cl::opt< bool > AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false), cl::desc("Allow creation of Phis in Address sinking."))
Defines an IR pass for CodeGen Prepare.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:638
#define LLVM_ATTRIBUTE_UNUSED
Definition Compiler.h:298
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
Hexagon Common GEP
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv users
Definition IVUsers.cpp:48
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
Definition LICM.cpp:1450
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Register const TargetRegisterInfo * TRI
This file implements a map that provides insertion order iteration.
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
OptimizedStructLayoutField Field
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file defines the PointerIntPair class.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, const MachineInstr &B)
Remove Loads Into Fake Uses
This file contains some templates that are useful if you are working with the STL at all.
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc=0)
This file describes how to lower LLVM code to machine code.
static cl::opt< bool > DisableSelectOptimize("disable-select-optimize", cl::init(true), cl::Hidden, cl::desc("Disable the select-optimization pass from running"))
Disable the select optimization pass.
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
static Constant * getConstantVector(MVT VT, ArrayRef< APInt > Bits, const APInt &Undefs, LLVMContext &C)
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:435
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1531
unsigned logBase2() const
Definition APInt.h:1761
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:440
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
void setAlignment(Align Align)
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
An instruction that atomically checks whether a specified value is in a memory location,...
static unsigned getPointerOperandIndex()
an instruction that atomically reads a memory location, combines it with another value,...
static unsigned getPointerOperandIndex()
Analysis pass providing the BasicBlockSectionsProfileReader.
bool isFunctionHot(StringRef FuncName) const
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:528
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition BasicBlock.h:690
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
InstListType::const_iterator const_iterator
Definition BasicBlock.h:171
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
LLVM_ABI void insertDbgRecordAfter(DbgRecord *DR, Instruction *I)
Insert a DbgRecord into a block at the position given by I.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
BinaryOps getOpcode() const
Definition InstrTypes.h:374
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
LLVM_ABI void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Analysis providing branch probability information.
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
bool isInlineAsm() const
Check if this call is an inline asm statement.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
This class is the base class for the comparison instructions.
Definition InstrTypes.h:666
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:678
@ ICMP_SLT
signed less than
Definition InstrTypes.h:707
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:701
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:705
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:703
@ ICMP_NE
not equal
Definition InstrTypes.h:700
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:704
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:829
static LLVM_ABI CmpInst * Create(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:767
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
static LLVM_ABI Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:169
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
LLVM_ABI IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
LLVM_ABI void removeFromParent()
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LocationType Type
Classification of the debug-info record that this DbgVariableRecord represents.
LLVM_ABI void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
LLVM_ABI iterator_range< location_op_iterator > location_ops() const
Get the locations corresponding to the variable referenced by the debug info intrinsic.
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:165
bool erase(const KeyT &Val)
Definition DenseMap.h:303
unsigned size() const
Definition DenseMap.h:108
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:214
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
iterator_range< idx_iterator > indices() const
This instruction compares its operands according to the predicate given to the constructor.
bool none() const
Definition FMF.h:57
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:803
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const BasicBlock & getEntryBlock() const
Definition Function.h:807
LLVM_ABI const Value * getStatepoint() const
The statepoint with which this gc.relocate is associated.
Represents calls to the gc.relocate intrinsic.
unsigned getBasePtrIndex() const
The index into the associate statepoint's argument list which contains the base pointer of the pointe...
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static LLVM_ABI Type * getIndexedType(Type *Ty, ArrayRef< Value * > IdxList)
Returns the result type of a getelementptr with the given source element type and indexes.
LLVM_ABI bool canIncreaseAlignment() const
Returns true if the alignment of the value can be unilaterally increased.
Definition Globals.cpp:342
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Type * getValueType() const
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalVariable.
This instruction compares its operands according to the predicate given to the constructor.
bool isEquality() const
Return true if this predicate is either EQ or NE.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2780
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI bool isDebugOrPseudoInst() const LLVM_READONLY
Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool isShift() const
LLVM_ABI void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
LLVM_ABI std::optional< simple_ilist< DbgRecord >::iterator > getDbgReinsertionPosition()
Return an iterator to the position of the "Next" DbgRecord after this instruction,...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:569
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Definition LoopInfo.h:596
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
LLVM_ABI void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
iterator end()
Definition MapVector.h:67
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition MapVector.h:167
iterator find(const KeyT &Key)
Definition MapVector.h:141
bool empty() const
Definition MapVector.h:75
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition MapVector.h:115
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PointerIntPair - This class implements a pair of a pointer and small integer.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool isFunctionColdInCallGraph(const FuncT *F, BFIT &BFI) const
Returns true if F contains only cold code.
LLVM_ABI bool isFunctionHotnessUnknown(const Function &F) const
Returns true if the hotness of F is unknown.
bool isFunctionHotInCallGraph(const FuncT *F, BFIT &BFI) const
Returns true if F contains hot code.
LLVM_ABI bool hasPartialSampleProfile() const
Returns true if module M has partial-profile sample profile.
LLVM_ABI bool hasHugeWorkingSetSize() const
Returns true if the working set size of the code is considered huge.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
void clear()
Completely clear the SetVector.
Definition SetVector.h:284
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition SetVector.h:279
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:99
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:168
value_type pop_back_val()
Definition SetVector.h:296
VectorType * getType() const
Overload to return most specific vector type.
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
bool erase(const T &V)
Definition SmallSet.h:197
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:181
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::iterator iterator
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
static unsigned getPointerOperandIndex()
TypeSize getElementOffset(unsigned Idx) const
Definition DataLayout.h:743
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool isSelectSupported(SelectSupportKind) const
virtual bool isEqualityCmpFoldedWithSignedCmp() const
Return true if instruction generated for equality comparison is folded with instruction generated for...
virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT, bool MathUsed) const
Try to convert math with an overflow comparison into the corresponding DAG node operation.
virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const
Return if the target supports combining a chain like:
bool isExtLoad(const LoadInst *Load, const Instruction *Ext, const DataLayout &DL) const
Return true if Load and Ext can form an ExtLoad.
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool enableExtLdPromotion() const
Return true if the target wants to use the optimization that turns ext(promotableInst1(....
virtual bool isCheapToSpeculateCttz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isJumpExpensive() const
Return true if Flow Control is an expensive operation that should be avoided.
bool hasExtractBitsInsn() const
Return true if the target has BitExtract instructions.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isSlowDivBypassed() const
Returns true if target has indicated at least one type should be bypassed.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool hasMultipleConditionRegisters(EVT VT) const
Does the target have multiple (allocatable) condition registers that can be used to store the results...
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual MVT getPreferredSwitchConditionType(LLVMContext &Context, EVT ConditionVT) const
Returns preferred type for switch condition.
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const
Return true if the target can combine store(extractelement VectorTy,Idx).
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
virtual bool shouldConsiderGEPOffsetSplit() const
bool isExtFree(const Instruction *I) const
Return true if the extension represented by I is free.
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
virtual bool getAddrModeArguments(const IntrinsicInst *, SmallVectorImpl< Value * > &, Type *&) const
CodeGenPrepare sinks address calculations into the same BB as Load/Store instructions reading the add...
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
const DenseMap< unsigned int, unsigned int > & getBypassSlowDivWidths() const
Returns map of slow types for division or remainder with corresponding fast types.
virtual bool isCheapToSpeculateCtlz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic ctlz.
virtual bool useSoftFloat() const
virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset) const
Return the prefered common base offset.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool shouldAlignPointerArgs(CallInst *, unsigned &, Align &) const
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
virtual Type * shouldConvertSplatType(ShuffleVectorInst *SVI) const
Given a shuffle vector SVI representing a vector splat, return a new scalar type of size equal to SVI...
virtual bool addressingModeSupportsTLS(const GlobalValue &) const
Returns true if the targets addressing mode can target thread local storage (TLS).
virtual bool shouldConvertPhiType(Type *From, Type *To) const
Given a set in interconnected phis of type 'From' that are loaded/stored or bitcast to type 'To',...
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
virtual bool preferZeroCompareBranch() const
Return true if the heuristic to prefer icmp eq zero should be used in code gen prepare.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool optimizeExtendOrTruncateConversion(Instruction *I, Loop *L, const TargetTransformInfo &TTI) const
Try to optimize extending or truncating conversion instructions (like zext, trunc,...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
std::vector< AsmOperandInfo > AsmOperandInfoVector
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual bool mayBeEmittedAsTailCall(const CallInst *) const
Return true if the target may be able emit the call instruction as a tail call.
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
virtual bool addrSinkUsingGEPs() const
Sink addresses into blocks using GEP instructions rather than pointer casts and arithmetic.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, const Value *Op0=nullptr, const Value *Op1=nullptr) const
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
LLVM_ABI InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
LLVM_ABI InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
LLVM_ABI bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
@ TCC_Basic
The cost of a typical 'add' instruction.
LLVM_ABI bool isVectorShiftByScalarCheap(Type *Ty) const
Return true if it's significantly cheaper to shift a vector by a uniform scalar than by an amount whi...
LLVM_ABI bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
Return true if sinking I's operands to the same basic block as I is profitable, e....
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
Definition Type.cpp:62
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:297
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:231
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition Type.h:255
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
op_range operands()
Definition User.h:292
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:21
const Use & getOperandUse(unsigned i) const
Definition User.h:245
void setOperand(unsigned i, Value *Val)
Definition User.h:237
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
user_iterator user_begin()
Definition Value.h:402
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:390
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:956
LLVM_ABI bool isUsedInBasicBlock(const BasicBlock *BB) const
Check if this value is used in the specified basic block.
Definition Value.cpp:242
LLVM_ABI void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:701
bool use_empty() const
Definition Value.h:346
user_iterator user_end()
Definition Value.h:410
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1101
iterator_range< use_iterator > uses()
Definition Value.h:380
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
Definition Value.h:838
user_iterator_impl< User > user_iterator
Definition Value.h:391
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
LLVM_ABI void dump() const
Support for debugging, callable in GDB: V->dump()
bool pointsToAliveValue() const
int getNumOccurrences() const
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isNonZero() const
Definition TypeSize.h:156
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:169
TypeSize getSequentialElementStride(const DataLayout &DL) const
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
@ Entry
Definition COFF.h:862
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
unsigned getAddrMode(MCInstrInfo const &MCII, MCInst const &MCI)
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
ap_match< APInt > m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap, true > m_c_NUWAdd(const LHS &L, const RHS &R)
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
UAddWithOverflow_match< LHS_t, RHS_t, Sum_t > m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S)
Match an icmp instruction checking for unsigned overflow on addition.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
auto m_Undef()
Match an arbitrary undef constant.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
int compare(DigitsT LDigits, int16_t LScale, DigitsT RDigits, int16_t RScale)
Compare two scaled numbers.
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:48
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ Assume
Do not drop type tests (default).
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
SmallVector< Node, 4 > NodeList
Definition RDFGraph.h:550
iterator end() const
Definition BasicBlock.h:89
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
LLVM_ABI iterator begin() const
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:318
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1731
LLVM_ABI bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1705
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1657
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
LLVM_ABI bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition Local.cpp:134
LLVM_ABI void findDbgValues(Value *V, SmallVectorImpl< DbgVariableRecord * > &DbgVariableRecords)
Finds the dbg.values describing a value.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
APInt operator*(APInt a, uint64_t RHS)
Definition APInt.h:2235
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition Utils.cpp:1725
auto successors(const MachineBasicBlock *BB)
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI ReturnInst * FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, BasicBlock *Pred, DomTreeUpdater *DTU=nullptr)
This method duplicates the specified return instruction into a predecessor which ends in an unconditi...
bool operator!=(uint64_t V1, const APInt &V2)
Definition APInt.h:2113
constexpr from_range_t from_range
LLVM_ABI Instruction * SplitBlockAndInsertIfElse(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ElseBlock=nullptr)
Similar to SplitBlockAndInsertIfThen, but the inserted block is on the false path of the branch.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2116
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
auto cast_or_null(const Y &Val)
Definition Casting.h:720
LLVM_ABI void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
LLVM_ABI void initializeCodeGenPrepareLegacyPassPass(PassRegistry &)
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2056
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool hasBranchWeightOrigin(const Instruction &I)
Check if Branch Weight Metadata has an "expected" field from an llvm.expect* intrinsic.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI Value * simplifyAddInst(Value *LHS, Value *RHS, bool IsNSW, bool IsNUW, const SimplifyQuery &Q)
Given operands for an Add, fold the result or return null.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:759
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition Local.h:252
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition STLExtras.h:2108
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1712
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
LLVM_ABI bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr, SmallSetVector< Instruction *, 8 > *UnsimplifiedUsers=nullptr)
Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
LLVM_ABI bool recognizeBSwapOrBitReverseIdiom(Instruction *I, bool MatchBSwaps, bool MatchBitReversals, SmallVectorImpl< Instruction * > &InsertedInsts)
Try to match a bswap or bitreverse idiom.
Definition Local.cpp:3728
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1624
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1719
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1399
generic_gep_type_iterator<> gep_type_iterator
LLVM_ABI FunctionPass * createCodeGenPrepareLegacyPass()
createCodeGenPrepareLegacyPass - Transform the code to expose more pattern matching during instructio...
ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred)
getFCmpCondCode - Return the ISD condition code corresponding to the given LLVM IR floating-point con...
Definition Analysis.cpp:207
LLVM_ABI bool VerifyLoopInfo
Enable verification of loop info.
Definition LoopInfo.cpp:51
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
bool attributesPermitTailCall(const Function *F, const Instruction *I, const ReturnInst *Ret, const TargetLoweringBase &TLI, bool *AllowDifferingSizes=nullptr)
Test if given that the input instruction is in the tail call position, if there is an attribute misma...
Definition Analysis.cpp:592
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1934
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
bool bypassSlowDivision(BasicBlock *BB, const DenseMap< unsigned int, unsigned int > &BypassWidth)
This optimization identifies DIV instructions in a BB that can be profitably bypassed and carried out...
gep_type_iterator gep_type_begin(const User *GEP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2100
auto predecessors(const MachineBasicBlock *BB)
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:363
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1877
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
std::enable_if_t< std::is_signed_v< T >, T > AddOverflow(T X, T Y, T &Result)
Add two signed integers, computing the two's complement truncated result, returning true if overflow ...
Definition MathExtras.h:712
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
std::pair< Value *, FPClassTest > fcmpToClassTest(FCmpInst::Predicate Pred, const Function &F, Value *LHS, Value *RHS, bool LookThroughSrc=true)
Returns a pair of values, which if passed to llvm.is.fpclass, returns the same result as an fcmp with...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI Value * simplifyURemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a URem, fold the result or return null.
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
DenseMap< const Value *, Value * > ValueToValueMap
LLVM_ABI CGPassBuilderOption getCGPassBuilderOption()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define NC
Definition regutils.h:42
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition ValueTypes.h:248
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
const DataLayout & DL
This contains information for each constraint that we are lowering.