LLVM 21.0.0git
CodeGenPrepare.cpp
Go to the documentation of this file.
1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass munges the code in the input function to better prepare it for
10// SelectionDAG-based code generation. This works around limitations in it's
11// basic-block-at-a-time approach. It should eventually be removed.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/MapVector.h"
21#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/Statistic.h"
44#include "llvm/Config/llvm-config.h"
45#include "llvm/IR/Argument.h"
46#include "llvm/IR/Attributes.h"
47#include "llvm/IR/BasicBlock.h"
48#include "llvm/IR/Constant.h"
49#include "llvm/IR/Constants.h"
50#include "llvm/IR/DataLayout.h"
51#include "llvm/IR/DebugInfo.h"
53#include "llvm/IR/Dominators.h"
54#include "llvm/IR/Function.h"
56#include "llvm/IR/GlobalValue.h"
58#include "llvm/IR/IRBuilder.h"
59#include "llvm/IR/InlineAsm.h"
60#include "llvm/IR/InstrTypes.h"
61#include "llvm/IR/Instruction.h"
64#include "llvm/IR/Intrinsics.h"
65#include "llvm/IR/IntrinsicsAArch64.h"
66#include "llvm/IR/LLVMContext.h"
67#include "llvm/IR/MDBuilder.h"
68#include "llvm/IR/Module.h"
69#include "llvm/IR/Operator.h"
72#include "llvm/IR/Statepoint.h"
73#include "llvm/IR/Type.h"
74#include "llvm/IR/Use.h"
75#include "llvm/IR/User.h"
76#include "llvm/IR/Value.h"
77#include "llvm/IR/ValueHandle.h"
78#include "llvm/IR/ValueMap.h"
80#include "llvm/Pass.h"
86#include "llvm/Support/Debug.h"
96#include <algorithm>
97#include <cassert>
98#include <cstdint>
99#include <iterator>
100#include <limits>
101#include <memory>
102#include <optional>
103#include <utility>
104#include <vector>
105
106using namespace llvm;
107using namespace llvm::PatternMatch;
108
109#define DEBUG_TYPE "codegenprepare"
110
111STATISTIC(NumBlocksElim, "Number of blocks eliminated");
112STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
113STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
114STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
115 "sunken Cmps");
116STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
117 "of sunken Casts");
118STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
119 "computations were sunk");
120STATISTIC(NumMemoryInstsPhiCreated,
121 "Number of phis created when address "
122 "computations were sunk to memory instructions");
123STATISTIC(NumMemoryInstsSelectCreated,
124 "Number of select created when address "
125 "computations were sunk to memory instructions");
126STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
127STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
128STATISTIC(NumAndsAdded,
129 "Number of and mask instructions added to form ext loads");
130STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
131STATISTIC(NumRetsDup, "Number of return instructions duplicated");
132STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
133STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
134STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
135
137 "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
138 cl::desc("Disable branch optimizations in CodeGenPrepare"));
139
140static cl::opt<bool>
141 DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
142 cl::desc("Disable GC optimizations in CodeGenPrepare"));
143
144static cl::opt<bool>
145 DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden,
146 cl::init(false),
147 cl::desc("Disable select to branch conversion."));
148
149static cl::opt<bool>
150 AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true),
151 cl::desc("Address sinking in CGP using GEPs."));
152
153static cl::opt<bool>
154 EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true),
155 cl::desc("Enable sinking and/cmp into branches."));
156
158 "disable-cgp-store-extract", cl::Hidden, cl::init(false),
159 cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
160
162 "stress-cgp-store-extract", cl::Hidden, cl::init(false),
163 cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
164
166 "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
167 cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
168 "CodeGenPrepare"));
169
171 "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
172 cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
173 "optimization in CodeGenPrepare"));
174
176 "disable-preheader-prot", cl::Hidden, cl::init(false),
177 cl::desc("Disable protection against removing loop preheaders"));
178
180 "profile-guided-section-prefix", cl::Hidden, cl::init(true),
181 cl::desc("Use profile info to add section prefix for hot/cold functions"));
182
184 "profile-unknown-in-special-section", cl::Hidden,
185 cl::desc("In profiling mode like sampleFDO, if a function doesn't have "
186 "profile, we cannot tell the function is cold for sure because "
187 "it may be a function newly added without ever being sampled. "
188 "With the flag enabled, compiler can put such profile unknown "
189 "functions into a special section, so runtime system can choose "
190 "to handle it in a different way than .text section, to save "
191 "RAM for example. "));
192
194 "bbsections-guided-section-prefix", cl::Hidden, cl::init(true),
195 cl::desc("Use the basic-block-sections profile to determine the text "
196 "section prefix for hot functions. Functions with "
197 "basic-block-sections profile will be placed in `.text.hot` "
198 "regardless of their FDO profile info. Other functions won't be "
199 "impacted, i.e., their prefixes will be decided by FDO/sampleFDO "
200 "profiles."));
201
203 "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
204 cl::desc("Skip merging empty blocks if (frequency of empty block) / "
205 "(frequency of destination block) is greater than this ratio"));
206
208 "force-split-store", cl::Hidden, cl::init(false),
209 cl::desc("Force store splitting no matter what the target query says."));
210
212 "cgp-type-promotion-merge", cl::Hidden,
213 cl::desc("Enable merging of redundant sexts when one is dominating"
214 " the other."),
215 cl::init(true));
216
218 "disable-complex-addr-modes", cl::Hidden, cl::init(false),
219 cl::desc("Disables combining addressing modes with different parts "
220 "in optimizeMemoryInst."));
221
222static cl::opt<bool>
223 AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false),
224 cl::desc("Allow creation of Phis in Address sinking."));
225
227 "addr-sink-new-select", cl::Hidden, cl::init(true),
228 cl::desc("Allow creation of selects in Address sinking."));
229
231 "addr-sink-combine-base-reg", cl::Hidden, cl::init(true),
232 cl::desc("Allow combining of BaseReg field in Address sinking."));
233
235 "addr-sink-combine-base-gv", cl::Hidden, cl::init(true),
236 cl::desc("Allow combining of BaseGV field in Address sinking."));
237
239 "addr-sink-combine-base-offs", cl::Hidden, cl::init(true),
240 cl::desc("Allow combining of BaseOffs field in Address sinking."));
241
243 "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true),
244 cl::desc("Allow combining of ScaledReg field in Address sinking."));
245
246static cl::opt<bool>
247 EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden,
248 cl::init(true),
249 cl::desc("Enable splitting large offset of GEP."));
250
252 "cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false),
253 cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."));
254
255static cl::opt<bool>
256 VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false),
257 cl::desc("Enable BFI update verification for "
258 "CodeGenPrepare."));
259
260static cl::opt<bool>
261 OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true),
262 cl::desc("Enable converting phi types in CodeGenPrepare"));
263
265 HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden,
266 cl::desc("Least BB number of huge function."));
267
269 MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100),
271 cl::desc("Max number of address users to look at"));
272
273static cl::opt<bool>
274 DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false),
275 cl::desc("Disable elimination of dead PHI nodes."));
276
277namespace {
278
279enum ExtType {
280 ZeroExtension, // Zero extension has been seen.
281 SignExtension, // Sign extension has been seen.
282 BothExtension // This extension type is used if we saw sext after
283 // ZeroExtension had been set, or if we saw zext after
284 // SignExtension had been set. It makes the type
285 // information of a promoted instruction invalid.
286};
287
288enum ModifyDT {
289 NotModifyDT, // Not Modify any DT.
290 ModifyBBDT, // Modify the Basic Block Dominator Tree.
291 ModifyInstDT // Modify the Instruction Dominator in a Basic Block,
292 // This usually means we move/delete/insert instruction
293 // in a Basic Block. So we should re-iterate instructions
294 // in such Basic Block.
295};
296
297using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
298using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>;
299using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>;
301using ValueToSExts = MapVector<Value *, SExts>;
302
303class TypePromotionTransaction;
304
305class CodeGenPrepare {
306 friend class CodeGenPrepareLegacyPass;
307 const TargetMachine *TM = nullptr;
308 const TargetSubtargetInfo *SubtargetInfo = nullptr;
309 const TargetLowering *TLI = nullptr;
310 const TargetRegisterInfo *TRI = nullptr;
311 const TargetTransformInfo *TTI = nullptr;
312 const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr;
313 const TargetLibraryInfo *TLInfo = nullptr;
314 LoopInfo *LI = nullptr;
315 std::unique_ptr<BlockFrequencyInfo> BFI;
316 std::unique_ptr<BranchProbabilityInfo> BPI;
317 ProfileSummaryInfo *PSI = nullptr;
318
319 /// As we scan instructions optimizing them, this is the next instruction
320 /// to optimize. Transforms that can invalidate this should update it.
321 BasicBlock::iterator CurInstIterator;
322
323 /// Keeps track of non-local addresses that have been sunk into a block.
324 /// This allows us to avoid inserting duplicate code for blocks with
325 /// multiple load/stores of the same address. The usage of WeakTrackingVH
326 /// enables SunkAddrs to be treated as a cache whose entries can be
327 /// invalidated if a sunken address computation has been erased.
329
330 /// Keeps track of all instructions inserted for the current function.
331 SetOfInstrs InsertedInsts;
332
333 /// Keeps track of the type of the related instruction before their
334 /// promotion for the current function.
335 InstrToOrigTy PromotedInsts;
336
337 /// Keep track of instructions removed during promotion.
338 SetOfInstrs RemovedInsts;
339
340 /// Keep track of sext chains based on their initial value.
341 DenseMap<Value *, Instruction *> SeenChainsForSExt;
342
343 /// Keep track of GEPs accessing the same data structures such as structs or
344 /// arrays that are candidates to be split later because of their large
345 /// size.
348 LargeOffsetGEPMap;
349
350 /// Keep track of new GEP base after splitting the GEPs having large offset.
351 SmallSet<AssertingVH<Value>, 2> NewGEPBases;
352
353 /// Map serial numbers to Large offset GEPs.
354 DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID;
355
356 /// Keep track of SExt promoted.
357 ValueToSExts ValToSExtendedUses;
358
359 /// True if the function has the OptSize attribute.
360 bool OptSize;
361
362 /// DataLayout for the Function being processed.
363 const DataLayout *DL = nullptr;
364
365 /// Building the dominator tree can be expensive, so we only build it
366 /// lazily and update it when required.
367 std::unique_ptr<DominatorTree> DT;
368
369public:
370 CodeGenPrepare(){};
371 CodeGenPrepare(const TargetMachine *TM) : TM(TM){};
372 /// If encounter huge function, we need to limit the build time.
373 bool IsHugeFunc = false;
374
375 /// FreshBBs is like worklist, it collected the updated BBs which need
376 /// to be optimized again.
377 /// Note: Consider building time in this pass, when a BB updated, we need
378 /// to insert such BB into FreshBBs for huge function.
380
381 void releaseMemory() {
382 // Clear per function information.
383 InsertedInsts.clear();
384 PromotedInsts.clear();
385 FreshBBs.clear();
386 BPI.reset();
387 BFI.reset();
388 }
389
391
392private:
393 template <typename F>
394 void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) {
395 // Substituting can cause recursive simplifications, which can invalidate
396 // our iterator. Use a WeakTrackingVH to hold onto it in case this
397 // happens.
398 Value *CurValue = &*CurInstIterator;
399 WeakTrackingVH IterHandle(CurValue);
400
401 f();
402
403 // If the iterator instruction was recursively deleted, start over at the
404 // start of the block.
405 if (IterHandle != CurValue) {
406 CurInstIterator = BB->begin();
407 SunkAddrs.clear();
408 }
409 }
410
411 // Get the DominatorTree, building if necessary.
412 DominatorTree &getDT(Function &F) {
413 if (!DT)
414 DT = std::make_unique<DominatorTree>(F);
415 return *DT;
416 }
417
418 void removeAllAssertingVHReferences(Value *V);
419 bool eliminateAssumptions(Function &F);
420 bool eliminateFallThrough(Function &F, DominatorTree *DT = nullptr);
421 bool eliminateMostlyEmptyBlocks(Function &F);
422 BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
423 bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
424 void eliminateMostlyEmptyBlock(BasicBlock *BB);
425 bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
426 bool isPreheader);
427 bool makeBitReverse(Instruction &I);
428 bool optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT);
429 bool optimizeInst(Instruction *I, ModifyDT &ModifiedDT);
430 bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy,
431 unsigned AddrSpace);
432 bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr);
433 bool optimizeInlineAsmInst(CallInst *CS);
434 bool optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT);
435 bool optimizeExt(Instruction *&I);
436 bool optimizeExtUses(Instruction *I);
437 bool optimizeLoadExt(LoadInst *Load);
438 bool optimizeShiftInst(BinaryOperator *BO);
439 bool optimizeFunnelShift(IntrinsicInst *Fsh);
440 bool optimizeSelectInst(SelectInst *SI);
441 bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
442 bool optimizeSwitchType(SwitchInst *SI);
443 bool optimizeSwitchPhiConstants(SwitchInst *SI);
444 bool optimizeSwitchInst(SwitchInst *SI);
445 bool optimizeExtractElementInst(Instruction *Inst);
446 bool dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT);
447 bool fixupDbgValue(Instruction *I);
448 bool fixupDbgVariableRecord(DbgVariableRecord &I);
449 bool fixupDbgVariableRecordsOnInst(Instruction &I);
450 bool placeDbgValues(Function &F);
451 bool placePseudoProbes(Function &F);
452 bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
453 LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
454 bool tryToPromoteExts(TypePromotionTransaction &TPT,
456 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
457 unsigned CreatedInstsCost = 0);
458 bool mergeSExts(Function &F);
459 bool splitLargeGEPOffsets();
460 bool optimizePhiType(PHINode *Inst, SmallPtrSetImpl<PHINode *> &Visited,
461 SmallPtrSetImpl<Instruction *> &DeletedInstrs);
462 bool optimizePhiTypes(Function &F);
463 bool performAddressTypePromotion(
464 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
465 bool HasPromoted, TypePromotionTransaction &TPT,
466 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
467 bool splitBranchCondition(Function &F, ModifyDT &ModifiedDT);
468 bool simplifyOffsetableRelocate(GCStatepointInst &I);
469
470 bool tryToSinkFreeOperands(Instruction *I);
471 bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1,
472 CmpInst *Cmp, Intrinsic::ID IID);
473 bool optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT);
474 bool optimizeURem(Instruction *Rem);
475 bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
476 bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
477 void verifyBFIUpdates(Function &F);
478 bool _run(Function &F);
479};
480
481class CodeGenPrepareLegacyPass : public FunctionPass {
482public:
483 static char ID; // Pass identification, replacement for typeid
484
485 CodeGenPrepareLegacyPass() : FunctionPass(ID) {
487 }
488
489 bool runOnFunction(Function &F) override;
490
491 StringRef getPassName() const override { return "CodeGen Prepare"; }
492
493 void getAnalysisUsage(AnalysisUsage &AU) const override {
494 // FIXME: When we can selectively preserve passes, preserve the domtree.
501 }
502};
503
504} // end anonymous namespace
505
506char CodeGenPrepareLegacyPass::ID = 0;
507
508bool CodeGenPrepareLegacyPass::runOnFunction(Function &F) {
509 if (skipFunction(F))
510 return false;
511 auto TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
512 CodeGenPrepare CGP(TM);
513 CGP.DL = &F.getDataLayout();
514 CGP.SubtargetInfo = TM->getSubtargetImpl(F);
515 CGP.TLI = CGP.SubtargetInfo->getTargetLowering();
516 CGP.TRI = CGP.SubtargetInfo->getRegisterInfo();
517 CGP.TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
518 CGP.TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
519 CGP.LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
520 CGP.BPI.reset(new BranchProbabilityInfo(F, *CGP.LI));
521 CGP.BFI.reset(new BlockFrequencyInfo(F, *CGP.BPI, *CGP.LI));
522 CGP.PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
523 auto BBSPRWP =
524 getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
525 CGP.BBSectionsProfileReader = BBSPRWP ? &BBSPRWP->getBBSPR() : nullptr;
526
527 return CGP._run(F);
528}
529
530INITIALIZE_PASS_BEGIN(CodeGenPrepareLegacyPass, DEBUG_TYPE,
531 "Optimize for code generation", false, false)
538INITIALIZE_PASS_END(CodeGenPrepareLegacyPass, DEBUG_TYPE,
540
542 return new CodeGenPrepareLegacyPass();
543}
544
547 CodeGenPrepare CGP(TM);
548
549 bool Changed = CGP.run(F, AM);
550 if (!Changed)
551 return PreservedAnalyses::all();
552
557 return PA;
558}
559
560bool CodeGenPrepare::run(Function &F, FunctionAnalysisManager &AM) {
561 DL = &F.getDataLayout();
562 SubtargetInfo = TM->getSubtargetImpl(F);
563 TLI = SubtargetInfo->getTargetLowering();
564 TRI = SubtargetInfo->getRegisterInfo();
565 TLInfo = &AM.getResult<TargetLibraryAnalysis>(F);
567 LI = &AM.getResult<LoopAnalysis>(F);
568 BPI.reset(new BranchProbabilityInfo(F, *LI));
569 BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
570 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
571 PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
572 BBSectionsProfileReader =
574 return _run(F);
575}
576
577bool CodeGenPrepare::_run(Function &F) {
578 bool EverMadeChange = false;
579
580 OptSize = F.hasOptSize();
581 // Use the basic-block-sections profile to promote hot functions to .text.hot
582 // if requested.
583 if (BBSectionsGuidedSectionPrefix && BBSectionsProfileReader &&
584 BBSectionsProfileReader->isFunctionHot(F.getName())) {
585 F.setSectionPrefix("hot");
586 } else if (ProfileGuidedSectionPrefix) {
587 // The hot attribute overwrites profile count based hotness while profile
588 // counts based hotness overwrite the cold attribute.
589 // This is a conservative behabvior.
590 if (F.hasFnAttribute(Attribute::Hot) ||
591 PSI->isFunctionHotInCallGraph(&F, *BFI))
592 F.setSectionPrefix("hot");
593 // If PSI shows this function is not hot, we will placed the function
594 // into unlikely section if (1) PSI shows this is a cold function, or
595 // (2) the function has a attribute of cold.
596 else if (PSI->isFunctionColdInCallGraph(&F, *BFI) ||
597 F.hasFnAttribute(Attribute::Cold))
598 F.setSectionPrefix("unlikely");
599 else if (ProfileUnknownInSpecialSection && PSI->hasPartialSampleProfile() &&
600 PSI->isFunctionHotnessUnknown(F))
601 F.setSectionPrefix("unknown");
602 }
603
604 /// This optimization identifies DIV instructions that can be
605 /// profitably bypassed and carried out with a shorter, faster divide.
606 if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI->isSlowDivBypassed()) {
607 const DenseMap<unsigned int, unsigned int> &BypassWidths =
609 BasicBlock *BB = &*F.begin();
610 while (BB != nullptr) {
611 // bypassSlowDivision may create new BBs, but we don't want to reapply the
612 // optimization to those blocks.
613 BasicBlock *Next = BB->getNextNode();
614 if (!llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
615 EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
616 BB = Next;
617 }
618 }
619
620 // Get rid of @llvm.assume builtins before attempting to eliminate empty
621 // blocks, since there might be blocks that only contain @llvm.assume calls
622 // (plus arguments that we can get rid of).
623 EverMadeChange |= eliminateAssumptions(F);
624
625 // Eliminate blocks that contain only PHI nodes and an
626 // unconditional branch.
627 EverMadeChange |= eliminateMostlyEmptyBlocks(F);
628
629 ModifyDT ModifiedDT = ModifyDT::NotModifyDT;
631 EverMadeChange |= splitBranchCondition(F, ModifiedDT);
632
633 // Split some critical edges where one of the sources is an indirect branch,
634 // to help generate sane code for PHIs involving such edges.
635 EverMadeChange |=
636 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/true);
637
638 // If we are optimzing huge function, we need to consider the build time.
639 // Because the basic algorithm's complex is near O(N!).
640 IsHugeFunc = F.size() > HugeFuncThresholdInCGPP;
641
642 // Transformations above may invalidate dominator tree and/or loop info.
643 DT.reset();
644 LI->releaseMemory();
645 LI->analyze(getDT(F));
646
647 bool MadeChange = true;
648 bool FuncIterated = false;
649 while (MadeChange) {
650 MadeChange = false;
651
653 if (FuncIterated && !FreshBBs.contains(&BB))
654 continue;
655
656 ModifyDT ModifiedDTOnIteration = ModifyDT::NotModifyDT;
657 bool Changed = optimizeBlock(BB, ModifiedDTOnIteration);
658
659 if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT)
660 DT.reset();
661
662 MadeChange |= Changed;
663 if (IsHugeFunc) {
664 // If the BB is updated, it may still has chance to be optimized.
665 // This usually happen at sink optimization.
666 // For example:
667 //
668 // bb0:
669 // %and = and i32 %a, 4
670 // %cmp = icmp eq i32 %and, 0
671 //
672 // If the %cmp sink to other BB, the %and will has chance to sink.
673 if (Changed)
674 FreshBBs.insert(&BB);
675 else if (FuncIterated)
676 FreshBBs.erase(&BB);
677 } else {
678 // For small/normal functions, we restart BB iteration if the dominator
679 // tree of the Function was changed.
680 if (ModifiedDTOnIteration != ModifyDT::NotModifyDT)
681 break;
682 }
683 }
684 // We have iterated all the BB in the (only work for huge) function.
685 FuncIterated = IsHugeFunc;
686
687 if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
688 MadeChange |= mergeSExts(F);
689 if (!LargeOffsetGEPMap.empty())
690 MadeChange |= splitLargeGEPOffsets();
691 MadeChange |= optimizePhiTypes(F);
692
693 if (MadeChange)
694 eliminateFallThrough(F, DT.get());
695
696#ifndef NDEBUG
697 if (MadeChange && VerifyLoopInfo)
698 LI->verify(getDT(F));
699#endif
700
701 // Really free removed instructions during promotion.
702 for (Instruction *I : RemovedInsts)
703 I->deleteValue();
704
705 EverMadeChange |= MadeChange;
706 SeenChainsForSExt.clear();
707 ValToSExtendedUses.clear();
708 RemovedInsts.clear();
709 LargeOffsetGEPMap.clear();
710 LargeOffsetGEPID.clear();
711 }
712
713 NewGEPBases.clear();
714 SunkAddrs.clear();
715
716 if (!DisableBranchOpts) {
717 MadeChange = false;
718 // Use a set vector to get deterministic iteration order. The order the
719 // blocks are removed may affect whether or not PHI nodes in successors
720 // are removed.
722 for (BasicBlock &BB : F) {
724 MadeChange |= ConstantFoldTerminator(&BB, true);
725 if (!MadeChange)
726 continue;
727
728 for (BasicBlock *Succ : Successors)
729 if (pred_empty(Succ))
730 WorkList.insert(Succ);
731 }
732
733 // Delete the dead blocks and any of their dead successors.
734 MadeChange |= !WorkList.empty();
735 while (!WorkList.empty()) {
736 BasicBlock *BB = WorkList.pop_back_val();
738
739 DeleteDeadBlock(BB);
740
741 for (BasicBlock *Succ : Successors)
742 if (pred_empty(Succ))
743 WorkList.insert(Succ);
744 }
745
746 // Merge pairs of basic blocks with unconditional branches, connected by
747 // a single edge.
748 if (EverMadeChange || MadeChange)
749 MadeChange |= eliminateFallThrough(F);
750
751 EverMadeChange |= MadeChange;
752 }
753
754 if (!DisableGCOpts) {
756 for (BasicBlock &BB : F)
757 for (Instruction &I : BB)
758 if (auto *SP = dyn_cast<GCStatepointInst>(&I))
759 Statepoints.push_back(SP);
760 for (auto &I : Statepoints)
761 EverMadeChange |= simplifyOffsetableRelocate(*I);
762 }
763
764 // Do this last to clean up use-before-def scenarios introduced by other
765 // preparatory transforms.
766 EverMadeChange |= placeDbgValues(F);
767 EverMadeChange |= placePseudoProbes(F);
768
769#ifndef NDEBUG
771 verifyBFIUpdates(F);
772#endif
773
774 return EverMadeChange;
775}
776
777bool CodeGenPrepare::eliminateAssumptions(Function &F) {
778 bool MadeChange = false;
779 for (BasicBlock &BB : F) {
780 CurInstIterator = BB.begin();
781 while (CurInstIterator != BB.end()) {
782 Instruction *I = &*(CurInstIterator++);
783 if (auto *Assume = dyn_cast<AssumeInst>(I)) {
784 MadeChange = true;
785 Value *Operand = Assume->getOperand(0);
786 Assume->eraseFromParent();
787
788 resetIteratorIfInvalidatedWhileCalling(&BB, [&]() {
789 RecursivelyDeleteTriviallyDeadInstructions(Operand, TLInfo, nullptr);
790 });
791 }
792 }
793 }
794 return MadeChange;
795}
796
797/// An instruction is about to be deleted, so remove all references to it in our
798/// GEP-tracking data strcutures.
799void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {
800 LargeOffsetGEPMap.erase(V);
801 NewGEPBases.erase(V);
802
803 auto GEP = dyn_cast<GetElementPtrInst>(V);
804 if (!GEP)
805 return;
806
807 LargeOffsetGEPID.erase(GEP);
808
809 auto VecI = LargeOffsetGEPMap.find(GEP->getPointerOperand());
810 if (VecI == LargeOffsetGEPMap.end())
811 return;
812
813 auto &GEPVector = VecI->second;
814 llvm::erase_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; });
815
816 if (GEPVector.empty())
817 LargeOffsetGEPMap.erase(VecI);
818}
819
820// Verify BFI has been updated correctly by recomputing BFI and comparing them.
821void LLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) {
822 DominatorTree NewDT(F);
823 LoopInfo NewLI(NewDT);
824 BranchProbabilityInfo NewBPI(F, NewLI, TLInfo);
825 BlockFrequencyInfo NewBFI(F, NewBPI, NewLI);
826 NewBFI.verifyMatch(*BFI);
827}
828
829/// Merge basic blocks which are connected by a single edge, where one of the
830/// basic blocks has a single successor pointing to the other basic block,
831/// which has a single predecessor.
832bool CodeGenPrepare::eliminateFallThrough(Function &F, DominatorTree *DT) {
833 bool Changed = false;
834 // Scan all of the blocks in the function, except for the entry block.
835 // Use a temporary array to avoid iterator being invalidated when
836 // deleting blocks.
838 for (auto &Block : llvm::drop_begin(F))
839 Blocks.push_back(&Block);
840
842 for (auto &Block : Blocks) {
843 auto *BB = cast_or_null<BasicBlock>(Block);
844 if (!BB)
845 continue;
846 // If the destination block has a single pred, then this is a trivial
847 // edge, just collapse it.
848 BasicBlock *SinglePred = BB->getSinglePredecessor();
849
850 // Don't merge if BB's address is taken.
851 if (!SinglePred || SinglePred == BB || BB->hasAddressTaken())
852 continue;
853
854 // Make an effort to skip unreachable blocks.
855 if (DT && !DT->isReachableFromEntry(BB))
856 continue;
857
858 BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
859 if (Term && !Term->isConditional()) {
860 Changed = true;
861 LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n");
862
863 // Merge BB into SinglePred and delete it.
864 MergeBlockIntoPredecessor(BB, /* DTU */ nullptr, LI, /* MSSAU */ nullptr,
865 /* MemDep */ nullptr,
866 /* PredecessorWithTwoSuccessors */ false, DT);
867 Preds.insert(SinglePred);
868
869 if (IsHugeFunc) {
870 // Update FreshBBs to optimize the merged BB.
871 FreshBBs.insert(SinglePred);
872 FreshBBs.erase(BB);
873 }
874 }
875 }
876
877 // (Repeatedly) merging blocks into their predecessors can create redundant
878 // debug intrinsics.
879 for (const auto &Pred : Preds)
880 if (auto *BB = cast_or_null<BasicBlock>(Pred))
882
883 return Changed;
884}
885
886/// Find a destination block from BB if BB is mergeable empty block.
887BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
888 // If this block doesn't end with an uncond branch, ignore it.
889 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
890 if (!BI || !BI->isUnconditional())
891 return nullptr;
892
893 // If the instruction before the branch (skipping debug info) isn't a phi
894 // node, then other stuff is happening here.
896 if (BBI != BB->begin()) {
897 --BBI;
898 while (isa<DbgInfoIntrinsic>(BBI)) {
899 if (BBI == BB->begin())
900 break;
901 --BBI;
902 }
903 if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
904 return nullptr;
905 }
906
907 // Do not break infinite loops.
908 BasicBlock *DestBB = BI->getSuccessor(0);
909 if (DestBB == BB)
910 return nullptr;
911
912 if (!canMergeBlocks(BB, DestBB))
913 DestBB = nullptr;
914
915 return DestBB;
916}
917
918/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
919/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
920/// edges in ways that are non-optimal for isel. Start by eliminating these
921/// blocks so we can split them the way we want them.
922bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
924 SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
925 while (!LoopList.empty()) {
926 Loop *L = LoopList.pop_back_val();
927 llvm::append_range(LoopList, *L);
928 if (BasicBlock *Preheader = L->getLoopPreheader())
929 Preheaders.insert(Preheader);
930 }
931
932 bool MadeChange = false;
933 // Copy blocks into a temporary array to avoid iterator invalidation issues
934 // as we remove them.
935 // Note that this intentionally skips the entry block.
937 for (auto &Block : llvm::drop_begin(F)) {
938 // Delete phi nodes that could block deleting other empty blocks.
940 MadeChange |= DeleteDeadPHIs(&Block, TLInfo);
941 Blocks.push_back(&Block);
942 }
943
944 for (auto &Block : Blocks) {
945 BasicBlock *BB = cast_or_null<BasicBlock>(Block);
946 if (!BB)
947 continue;
948 BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
949 if (!DestBB ||
950 !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
951 continue;
952
953 eliminateMostlyEmptyBlock(BB);
954 MadeChange = true;
955 }
956 return MadeChange;
957}
958
959bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
960 BasicBlock *DestBB,
961 bool isPreheader) {
962 // Do not delete loop preheaders if doing so would create a critical edge.
963 // Loop preheaders can be good locations to spill registers. If the
964 // preheader is deleted and we create a critical edge, registers may be
965 // spilled in the loop body instead.
966 if (!DisablePreheaderProtect && isPreheader &&
967 !(BB->getSinglePredecessor() &&
969 return false;
970
971 // Skip merging if the block's successor is also a successor to any callbr
972 // that leads to this block.
973 // FIXME: Is this really needed? Is this a correctness issue?
974 for (BasicBlock *Pred : predecessors(BB)) {
975 if (isa<CallBrInst>(Pred->getTerminator()) &&
976 llvm::is_contained(successors(Pred), DestBB))
977 return false;
978 }
979
980 // Try to skip merging if the unique predecessor of BB is terminated by a
981 // switch or indirect branch instruction, and BB is used as an incoming block
982 // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
983 // add COPY instructions in the predecessor of BB instead of BB (if it is not
984 // merged). Note that the critical edge created by merging such blocks wont be
985 // split in MachineSink because the jump table is not analyzable. By keeping
986 // such empty block (BB), ISel will place COPY instructions in BB, not in the
987 // predecessor of BB.
988 BasicBlock *Pred = BB->getUniquePredecessor();
989 if (!Pred || !(isa<SwitchInst>(Pred->getTerminator()) ||
990 isa<IndirectBrInst>(Pred->getTerminator())))
991 return true;
992
993 if (BB->getTerminator() != &*BB->getFirstNonPHIOrDbg())
994 return true;
995
996 // We use a simple cost heuristic which determine skipping merging is
997 // profitable if the cost of skipping merging is less than the cost of
998 // merging : Cost(skipping merging) < Cost(merging BB), where the
999 // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
1000 // the Cost(merging BB) is Freq(Pred) * Cost(Copy).
1001 // Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
1002 // Freq(Pred) / Freq(BB) > 2.
1003 // Note that if there are multiple empty blocks sharing the same incoming
1004 // value for the PHIs in the DestBB, we consider them together. In such
1005 // case, Cost(merging BB) will be the sum of their frequencies.
1006
1007 if (!isa<PHINode>(DestBB->begin()))
1008 return true;
1009
1010 SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
1011
1012 // Find all other incoming blocks from which incoming values of all PHIs in
1013 // DestBB are the same as the ones from BB.
1014 for (BasicBlock *DestBBPred : predecessors(DestBB)) {
1015 if (DestBBPred == BB)
1016 continue;
1017
1018 if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) {
1019 return DestPN.getIncomingValueForBlock(BB) ==
1020 DestPN.getIncomingValueForBlock(DestBBPred);
1021 }))
1022 SameIncomingValueBBs.insert(DestBBPred);
1023 }
1024
1025 // See if all BB's incoming values are same as the value from Pred. In this
1026 // case, no reason to skip merging because COPYs are expected to be place in
1027 // Pred already.
1028 if (SameIncomingValueBBs.count(Pred))
1029 return true;
1030
1031 BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
1032 BlockFrequency BBFreq = BFI->getBlockFreq(BB);
1033
1034 for (auto *SameValueBB : SameIncomingValueBBs)
1035 if (SameValueBB->getUniquePredecessor() == Pred &&
1036 DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
1037 BBFreq += BFI->getBlockFreq(SameValueBB);
1038
1039 std::optional<BlockFrequency> Limit = BBFreq.mul(FreqRatioToSkipMerge);
1040 return !Limit || PredFreq <= *Limit;
1041}
1042
1043/// Return true if we can merge BB into DestBB if there is a single
1044/// unconditional branch between them, and BB contains no other non-phi
1045/// instructions.
1046bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
1047 const BasicBlock *DestBB) const {
1048 // We only want to eliminate blocks whose phi nodes are used by phi nodes in
1049 // the successor. If there are more complex condition (e.g. preheaders),
1050 // don't mess around with them.
1051 for (const PHINode &PN : BB->phis()) {
1052 for (const User *U : PN.users()) {
1053 const Instruction *UI = cast<Instruction>(U);
1054 if (UI->getParent() != DestBB || !isa<PHINode>(UI))
1055 return false;
1056 // If User is inside DestBB block and it is a PHINode then check
1057 // incoming value. If incoming value is not from BB then this is
1058 // a complex condition (e.g. preheaders) we want to avoid here.
1059 if (UI->getParent() == DestBB) {
1060 if (const PHINode *UPN = dyn_cast<PHINode>(UI))
1061 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
1062 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
1063 if (Insn && Insn->getParent() == BB &&
1064 Insn->getParent() != UPN->getIncomingBlock(I))
1065 return false;
1066 }
1067 }
1068 }
1069 }
1070
1071 // If BB and DestBB contain any common predecessors, then the phi nodes in BB
1072 // and DestBB may have conflicting incoming values for the block. If so, we
1073 // can't merge the block.
1074 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
1075 if (!DestBBPN)
1076 return true; // no conflict.
1077
1078 // Collect the preds of BB.
1080 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1081 // It is faster to get preds from a PHI than with pred_iterator.
1082 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1083 BBPreds.insert(BBPN->getIncomingBlock(i));
1084 } else {
1085 BBPreds.insert(pred_begin(BB), pred_end(BB));
1086 }
1087
1088 // Walk the preds of DestBB.
1089 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
1090 BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
1091 if (BBPreds.count(Pred)) { // Common predecessor?
1092 for (const PHINode &PN : DestBB->phis()) {
1093 const Value *V1 = PN.getIncomingValueForBlock(Pred);
1094 const Value *V2 = PN.getIncomingValueForBlock(BB);
1095
1096 // If V2 is a phi node in BB, look up what the mapped value will be.
1097 if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
1098 if (V2PN->getParent() == BB)
1099 V2 = V2PN->getIncomingValueForBlock(Pred);
1100
1101 // If there is a conflict, bail out.
1102 if (V1 != V2)
1103 return false;
1104 }
1105 }
1106 }
1107
1108 return true;
1109}
1110
1111/// Replace all old uses with new ones, and push the updated BBs into FreshBBs.
1112static void replaceAllUsesWith(Value *Old, Value *New,
1114 bool IsHuge) {
1115 auto *OldI = dyn_cast<Instruction>(Old);
1116 if (OldI) {
1117 for (Value::user_iterator UI = OldI->user_begin(), E = OldI->user_end();
1118 UI != E; ++UI) {
1119 Instruction *User = cast<Instruction>(*UI);
1120 if (IsHuge)
1121 FreshBBs.insert(User->getParent());
1122 }
1123 }
1124 Old->replaceAllUsesWith(New);
1125}
1126
1127/// Eliminate a basic block that has only phi's and an unconditional branch in
1128/// it.
1129void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
1130 BranchInst *BI = cast<BranchInst>(BB->getTerminator());
1131 BasicBlock *DestBB = BI->getSuccessor(0);
1132
1133 LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
1134 << *BB << *DestBB);
1135
1136 // If the destination block has a single pred, then this is a trivial edge,
1137 // just collapse it.
1138 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
1139 if (SinglePred != DestBB) {
1140 assert(SinglePred == BB &&
1141 "Single predecessor not the same as predecessor");
1142 // Merge DestBB into SinglePred/BB and delete it.
1144 // Note: BB(=SinglePred) will not be deleted on this path.
1145 // DestBB(=its single successor) is the one that was deleted.
1146 LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n");
1147
1148 if (IsHugeFunc) {
1149 // Update FreshBBs to optimize the merged BB.
1150 FreshBBs.insert(SinglePred);
1151 FreshBBs.erase(DestBB);
1152 }
1153 return;
1154 }
1155 }
1156
1157 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
1158 // to handle the new incoming edges it is about to have.
1159 for (PHINode &PN : DestBB->phis()) {
1160 // Remove the incoming value for BB, and remember it.
1161 Value *InVal = PN.removeIncomingValue(BB, false);
1162
1163 // Two options: either the InVal is a phi node defined in BB or it is some
1164 // value that dominates BB.
1165 PHINode *InValPhi = dyn_cast<PHINode>(InVal);
1166 if (InValPhi && InValPhi->getParent() == BB) {
1167 // Add all of the input values of the input PHI as inputs of this phi.
1168 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
1169 PN.addIncoming(InValPhi->getIncomingValue(i),
1170 InValPhi->getIncomingBlock(i));
1171 } else {
1172 // Otherwise, add one instance of the dominating value for each edge that
1173 // we will be adding.
1174 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1175 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1176 PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
1177 } else {
1178 for (BasicBlock *Pred : predecessors(BB))
1179 PN.addIncoming(InVal, Pred);
1180 }
1181 }
1182 }
1183
1184 // Preserve loop Metadata.
1185 if (BI->hasMetadata(LLVMContext::MD_loop)) {
1186 for (auto *Pred : predecessors(BB))
1187 Pred->getTerminator()->copyMetadata(*BI, LLVMContext::MD_loop);
1188 }
1189
1190 // The PHIs are now updated, change everything that refers to BB to use
1191 // DestBB and remove BB.
1192 BB->replaceAllUsesWith(DestBB);
1193 BB->eraseFromParent();
1194 ++NumBlocksElim;
1195
1196 LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
1197}
1198
1199// Computes a map of base pointer relocation instructions to corresponding
1200// derived pointer relocation instructions given a vector of all relocate calls
1202 const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
1204 &RelocateInstMap) {
1205 // Collect information in two maps: one primarily for locating the base object
1206 // while filling the second map; the second map is the final structure holding
1207 // a mapping between Base and corresponding Derived relocate calls
1209 for (auto *ThisRelocate : AllRelocateCalls) {
1210 auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
1211 ThisRelocate->getDerivedPtrIndex());
1212 RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
1213 }
1214 for (auto &Item : RelocateIdxMap) {
1215 std::pair<unsigned, unsigned> Key = Item.first;
1216 if (Key.first == Key.second)
1217 // Base relocation: nothing to insert
1218 continue;
1219
1220 GCRelocateInst *I = Item.second;
1221 auto BaseKey = std::make_pair(Key.first, Key.first);
1222
1223 // We're iterating over RelocateIdxMap so we cannot modify it.
1224 auto MaybeBase = RelocateIdxMap.find(BaseKey);
1225 if (MaybeBase == RelocateIdxMap.end())
1226 // TODO: We might want to insert a new base object relocate and gep off
1227 // that, if there are enough derived object relocates.
1228 continue;
1229
1230 RelocateInstMap[MaybeBase->second].push_back(I);
1231 }
1232}
1233
1234// Accepts a GEP and extracts the operands into a vector provided they're all
1235// small integer constants
1237 SmallVectorImpl<Value *> &OffsetV) {
1238 for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
1239 // Only accept small constant integer operands
1240 auto *Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
1241 if (!Op || Op->getZExtValue() > 20)
1242 return false;
1243 }
1244
1245 for (unsigned i = 1; i < GEP->getNumOperands(); i++)
1246 OffsetV.push_back(GEP->getOperand(i));
1247 return true;
1248}
1249
1250// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
1251// replace, computes a replacement, and affects it.
1252static bool
1254 const SmallVectorImpl<GCRelocateInst *> &Targets) {
1255 bool MadeChange = false;
1256 // We must ensure the relocation of derived pointer is defined after
1257 // relocation of base pointer. If we find a relocation corresponding to base
1258 // defined earlier than relocation of base then we move relocation of base
1259 // right before found relocation. We consider only relocation in the same
1260 // basic block as relocation of base. Relocations from other basic block will
1261 // be skipped by optimization and we do not care about them.
1262 for (auto R = RelocatedBase->getParent()->getFirstInsertionPt();
1263 &*R != RelocatedBase; ++R)
1264 if (auto *RI = dyn_cast<GCRelocateInst>(R))
1265 if (RI->getStatepoint() == RelocatedBase->getStatepoint())
1266 if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {
1267 RelocatedBase->moveBefore(RI->getIterator());
1268 MadeChange = true;
1269 break;
1270 }
1271
1272 for (GCRelocateInst *ToReplace : Targets) {
1273 assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&
1274 "Not relocating a derived object of the original base object");
1275 if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
1276 // A duplicate relocate call. TODO: coalesce duplicates.
1277 continue;
1278 }
1279
1280 if (RelocatedBase->getParent() != ToReplace->getParent()) {
1281 // Base and derived relocates are in different basic blocks.
1282 // In this case transform is only valid when base dominates derived
1283 // relocate. However it would be too expensive to check dominance
1284 // for each such relocate, so we skip the whole transformation.
1285 continue;
1286 }
1287
1288 Value *Base = ToReplace->getBasePtr();
1289 auto *Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
1290 if (!Derived || Derived->getPointerOperand() != Base)
1291 continue;
1292
1294 if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
1295 continue;
1296
1297 // Create a Builder and replace the target callsite with a gep
1298 assert(RelocatedBase->getNextNode() &&
1299 "Should always have one since it's not a terminator");
1300
1301 // Insert after RelocatedBase
1302 IRBuilder<> Builder(RelocatedBase->getNextNode());
1303 Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
1304
1305 // If gc_relocate does not match the actual type, cast it to the right type.
1306 // In theory, there must be a bitcast after gc_relocate if the type does not
1307 // match, and we should reuse it to get the derived pointer. But it could be
1308 // cases like this:
1309 // bb1:
1310 // ...
1311 // %g1 = call coldcc i8 addrspace(1)*
1312 // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1313 //
1314 // bb2:
1315 // ...
1316 // %g2 = call coldcc i8 addrspace(1)*
1317 // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1318 //
1319 // merge:
1320 // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
1321 // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
1322 //
1323 // In this case, we can not find the bitcast any more. So we insert a new
1324 // bitcast no matter there is already one or not. In this way, we can handle
1325 // all cases, and the extra bitcast should be optimized away in later
1326 // passes.
1327 Value *ActualRelocatedBase = RelocatedBase;
1328 if (RelocatedBase->getType() != Base->getType()) {
1329 ActualRelocatedBase =
1330 Builder.CreateBitCast(RelocatedBase, Base->getType());
1331 }
1332 Value *Replacement =
1333 Builder.CreateGEP(Derived->getSourceElementType(), ActualRelocatedBase,
1334 ArrayRef(OffsetV));
1335 Replacement->takeName(ToReplace);
1336 // If the newly generated derived pointer's type does not match the original
1337 // derived pointer's type, cast the new derived pointer to match it. Same
1338 // reasoning as above.
1339 Value *ActualReplacement = Replacement;
1340 if (Replacement->getType() != ToReplace->getType()) {
1341 ActualReplacement =
1342 Builder.CreateBitCast(Replacement, ToReplace->getType());
1343 }
1344 ToReplace->replaceAllUsesWith(ActualReplacement);
1345 ToReplace->eraseFromParent();
1346
1347 MadeChange = true;
1348 }
1349 return MadeChange;
1350}
1351
1352// Turns this:
1353//
1354// %base = ...
1355// %ptr = gep %base + 15
1356// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1357// %base' = relocate(%tok, i32 4, i32 4)
1358// %ptr' = relocate(%tok, i32 4, i32 5)
1359// %val = load %ptr'
1360//
1361// into this:
1362//
1363// %base = ...
1364// %ptr = gep %base + 15
1365// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1366// %base' = gc.relocate(%tok, i32 4, i32 4)
1367// %ptr' = gep %base' + 15
1368// %val = load %ptr'
1369bool CodeGenPrepare::simplifyOffsetableRelocate(GCStatepointInst &I) {
1370 bool MadeChange = false;
1371 SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
1372 for (auto *U : I.users())
1373 if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
1374 // Collect all the relocate calls associated with a statepoint
1375 AllRelocateCalls.push_back(Relocate);
1376
1377 // We need at least one base pointer relocation + one derived pointer
1378 // relocation to mangle
1379 if (AllRelocateCalls.size() < 2)
1380 return false;
1381
1382 // RelocateInstMap is a mapping from the base relocate instruction to the
1383 // corresponding derived relocate instructions
1385 computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
1386 if (RelocateInstMap.empty())
1387 return false;
1388
1389 for (auto &Item : RelocateInstMap)
1390 // Item.first is the RelocatedBase to offset against
1391 // Item.second is the vector of Targets to replace
1392 MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
1393 return MadeChange;
1394}
1395
1396/// Sink the specified cast instruction into its user blocks.
1397static bool SinkCast(CastInst *CI) {
1398 BasicBlock *DefBB = CI->getParent();
1399
1400 /// InsertedCasts - Only insert a cast in each block once.
1402
1403 bool MadeChange = false;
1404 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
1405 UI != E;) {
1406 Use &TheUse = UI.getUse();
1407 Instruction *User = cast<Instruction>(*UI);
1408
1409 // Figure out which BB this cast is used in. For PHI's this is the
1410 // appropriate predecessor block.
1411 BasicBlock *UserBB = User->getParent();
1412 if (PHINode *PN = dyn_cast<PHINode>(User)) {
1413 UserBB = PN->getIncomingBlock(TheUse);
1414 }
1415
1416 // Preincrement use iterator so we don't invalidate it.
1417 ++UI;
1418
1419 // The first insertion point of a block containing an EH pad is after the
1420 // pad. If the pad is the user, we cannot sink the cast past the pad.
1421 if (User->isEHPad())
1422 continue;
1423
1424 // If the block selected to receive the cast is an EH pad that does not
1425 // allow non-PHI instructions before the terminator, we can't sink the
1426 // cast.
1427 if (UserBB->getTerminator()->isEHPad())
1428 continue;
1429
1430 // If this user is in the same block as the cast, don't change the cast.
1431 if (UserBB == DefBB)
1432 continue;
1433
1434 // If we have already inserted a cast into this block, use it.
1435 CastInst *&InsertedCast = InsertedCasts[UserBB];
1436
1437 if (!InsertedCast) {
1438 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1439 assert(InsertPt != UserBB->end());
1440 InsertedCast = cast<CastInst>(CI->clone());
1441 InsertedCast->insertBefore(*UserBB, InsertPt);
1442 }
1443
1444 // Replace a use of the cast with a use of the new cast.
1445 TheUse = InsertedCast;
1446 MadeChange = true;
1447 ++NumCastUses;
1448 }
1449
1450 // If we removed all uses, nuke the cast.
1451 if (CI->use_empty()) {
1452 salvageDebugInfo(*CI);
1453 CI->eraseFromParent();
1454 MadeChange = true;
1455 }
1456
1457 return MadeChange;
1458}
1459
1460/// If the specified cast instruction is a noop copy (e.g. it's casting from
1461/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
1462/// reduce the number of virtual registers that must be created and coalesced.
1463///
1464/// Return true if any changes are made.
1466 const DataLayout &DL) {
1467 // Sink only "cheap" (or nop) address-space casts. This is a weaker condition
1468 // than sinking only nop casts, but is helpful on some platforms.
1469 if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
1470 if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(),
1471 ASC->getDestAddressSpace()))
1472 return false;
1473 }
1474
1475 // If this is a noop copy,
1476 EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1477 EVT DstVT = TLI.getValueType(DL, CI->getType());
1478
1479 // This is an fp<->int conversion?
1480 if (SrcVT.isInteger() != DstVT.isInteger())
1481 return false;
1482
1483 // If this is an extension, it will be a zero or sign extension, which
1484 // isn't a noop.
1485 if (SrcVT.bitsLT(DstVT))
1486 return false;
1487
1488 // If these values will be promoted, find out what they will be promoted
1489 // to. This helps us consider truncates on PPC as noop copies when they
1490 // are.
1491 if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
1493 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
1494 if (TLI.getTypeAction(CI->getContext(), DstVT) ==
1496 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
1497
1498 // If, after promotion, these are the same types, this is a noop copy.
1499 if (SrcVT != DstVT)
1500 return false;
1501
1502 return SinkCast(CI);
1503}
1504
1505// Match a simple increment by constant operation. Note that if a sub is
1506// matched, the step is negated (as if the step had been canonicalized to
1507// an add, even though we leave the instruction alone.)
1508static bool matchIncrement(const Instruction *IVInc, Instruction *&LHS,
1509 Constant *&Step) {
1510 if (match(IVInc, m_Add(m_Instruction(LHS), m_Constant(Step))) ||
1511 match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::uadd_with_overflow>(
1512 m_Instruction(LHS), m_Constant(Step)))))
1513 return true;
1514 if (match(IVInc, m_Sub(m_Instruction(LHS), m_Constant(Step))) ||
1515 match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::usub_with_overflow>(
1516 m_Instruction(LHS), m_Constant(Step))))) {
1517 Step = ConstantExpr::getNeg(Step);
1518 return true;
1519 }
1520 return false;
1521}
1522
1523/// If given \p PN is an inductive variable with value IVInc coming from the
1524/// backedge, and on each iteration it gets increased by Step, return pair
1525/// <IVInc, Step>. Otherwise, return std::nullopt.
1526static std::optional<std::pair<Instruction *, Constant *>>
1527getIVIncrement(const PHINode *PN, const LoopInfo *LI) {
1528 const Loop *L = LI->getLoopFor(PN->getParent());
1529 if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch())
1530 return std::nullopt;
1531 auto *IVInc =
1532 dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
1533 if (!IVInc || LI->getLoopFor(IVInc->getParent()) != L)
1534 return std::nullopt;
1535 Instruction *LHS = nullptr;
1536 Constant *Step = nullptr;
1537 if (matchIncrement(IVInc, LHS, Step) && LHS == PN)
1538 return std::make_pair(IVInc, Step);
1539 return std::nullopt;
1540}
1541
1542static bool isIVIncrement(const Value *V, const LoopInfo *LI) {
1543 auto *I = dyn_cast<Instruction>(V);
1544 if (!I)
1545 return false;
1546 Instruction *LHS = nullptr;
1547 Constant *Step = nullptr;
1548 if (!matchIncrement(I, LHS, Step))
1549 return false;
1550 if (auto *PN = dyn_cast<PHINode>(LHS))
1551 if (auto IVInc = getIVIncrement(PN, LI))
1552 return IVInc->first == I;
1553 return false;
1554}
1555
1556bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
1557 Value *Arg0, Value *Arg1,
1558 CmpInst *Cmp,
1559 Intrinsic::ID IID) {
1560 auto IsReplacableIVIncrement = [this, &Cmp](BinaryOperator *BO) {
1561 if (!isIVIncrement(BO, LI))
1562 return false;
1563 const Loop *L = LI->getLoopFor(BO->getParent());
1564 assert(L && "L should not be null after isIVIncrement()");
1565 // Do not risk on moving increment into a child loop.
1566 if (LI->getLoopFor(Cmp->getParent()) != L)
1567 return false;
1568
1569 // Finally, we need to ensure that the insert point will dominate all
1570 // existing uses of the increment.
1571
1572 auto &DT = getDT(*BO->getParent()->getParent());
1573 if (DT.dominates(Cmp->getParent(), BO->getParent()))
1574 // If we're moving up the dom tree, all uses are trivially dominated.
1575 // (This is the common case for code produced by LSR.)
1576 return true;
1577
1578 // Otherwise, special case the single use in the phi recurrence.
1579 return BO->hasOneUse() && DT.dominates(Cmp->getParent(), L->getLoopLatch());
1580 };
1581 if (BO->getParent() != Cmp->getParent() && !IsReplacableIVIncrement(BO)) {
1582 // We used to use a dominator tree here to allow multi-block optimization.
1583 // But that was problematic because:
1584 // 1. It could cause a perf regression by hoisting the math op into the
1585 // critical path.
1586 // 2. It could cause a perf regression by creating a value that was live
1587 // across multiple blocks and increasing register pressure.
1588 // 3. Use of a dominator tree could cause large compile-time regression.
1589 // This is because we recompute the DT on every change in the main CGP
1590 // run-loop. The recomputing is probably unnecessary in many cases, so if
1591 // that was fixed, using a DT here would be ok.
1592 //
1593 // There is one important particular case we still want to handle: if BO is
1594 // the IV increment. Important properties that make it profitable:
1595 // - We can speculate IV increment anywhere in the loop (as long as the
1596 // indvar Phi is its only user);
1597 // - Upon computing Cmp, we effectively compute something equivalent to the
1598 // IV increment (despite it loops differently in the IR). So moving it up
1599 // to the cmp point does not really increase register pressure.
1600 return false;
1601 }
1602
1603 // We allow matching the canonical IR (add X, C) back to (usubo X, -C).
1604 if (BO->getOpcode() == Instruction::Add &&
1605 IID == Intrinsic::usub_with_overflow) {
1606 assert(isa<Constant>(Arg1) && "Unexpected input for usubo");
1607 Arg1 = ConstantExpr::getNeg(cast<Constant>(Arg1));
1608 }
1609
1610 // Insert at the first instruction of the pair.
1611 Instruction *InsertPt = nullptr;
1612 for (Instruction &Iter : *Cmp->getParent()) {
1613 // If BO is an XOR, it is not guaranteed that it comes after both inputs to
1614 // the overflow intrinsic are defined.
1615 if ((BO->getOpcode() != Instruction::Xor && &Iter == BO) || &Iter == Cmp) {
1616 InsertPt = &Iter;
1617 break;
1618 }
1619 }
1620 assert(InsertPt != nullptr && "Parent block did not contain cmp or binop");
1621
1622 IRBuilder<> Builder(InsertPt);
1623 Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
1624 if (BO->getOpcode() != Instruction::Xor) {
1625 Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
1626 replaceAllUsesWith(BO, Math, FreshBBs, IsHugeFunc);
1627 } else
1628 assert(BO->hasOneUse() &&
1629 "Patterns with XOr should use the BO only in the compare");
1630 Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
1631 replaceAllUsesWith(Cmp, OV, FreshBBs, IsHugeFunc);
1632 Cmp->eraseFromParent();
1633 BO->eraseFromParent();
1634 return true;
1635}
1636
1637/// Match special-case patterns that check for unsigned add overflow.
1639 BinaryOperator *&Add) {
1640 // Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val)
1641 // Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero)
1642 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1643
1644 // We are not expecting non-canonical/degenerate code. Just bail out.
1645 if (isa<Constant>(A))
1646 return false;
1647
1648 ICmpInst::Predicate Pred = Cmp->getPredicate();
1649 if (Pred == ICmpInst::ICMP_EQ && match(B, m_AllOnes()))
1650 B = ConstantInt::get(B->getType(), 1);
1651 else if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt()))
1652 B = Constant::getAllOnesValue(B->getType());
1653 else
1654 return false;
1655
1656 // Check the users of the variable operand of the compare looking for an add
1657 // with the adjusted constant.
1658 for (User *U : A->users()) {
1659 if (match(U, m_Add(m_Specific(A), m_Specific(B)))) {
1660 Add = cast<BinaryOperator>(U);
1661 return true;
1662 }
1663 }
1664 return false;
1665}
1666
1667/// Try to combine the compare into a call to the llvm.uadd.with.overflow
1668/// intrinsic. Return true if any changes were made.
1669bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
1670 ModifyDT &ModifiedDT) {
1671 bool EdgeCase = false;
1672 Value *A, *B;
1674 if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) {
1676 return false;
1677 // Set A and B in case we match matchUAddWithOverflowConstantEdgeCases.
1678 A = Add->getOperand(0);
1679 B = Add->getOperand(1);
1680 EdgeCase = true;
1681 }
1682
1684 TLI->getValueType(*DL, Add->getType()),
1685 Add->hasNUsesOrMore(EdgeCase ? 1 : 2)))
1686 return false;
1687
1688 // We don't want to move around uses of condition values this late, so we
1689 // check if it is legal to create the call to the intrinsic in the basic
1690 // block containing the icmp.
1691 if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse())
1692 return false;
1693
1694 if (!replaceMathCmpWithIntrinsic(Add, A, B, Cmp,
1695 Intrinsic::uadd_with_overflow))
1696 return false;
1697
1698 // Reset callers - do not crash by iterating over a dead instruction.
1699 ModifiedDT = ModifyDT::ModifyInstDT;
1700 return true;
1701}
1702
1703bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
1704 ModifyDT &ModifiedDT) {
1705 // We are not expecting non-canonical/degenerate code. Just bail out.
1706 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1707 if (isa<Constant>(A) && isa<Constant>(B))
1708 return false;
1709
1710 // Convert (A u> B) to (A u< B) to simplify pattern matching.
1711 ICmpInst::Predicate Pred = Cmp->getPredicate();
1712 if (Pred == ICmpInst::ICMP_UGT) {
1713 std::swap(A, B);
1714 Pred = ICmpInst::ICMP_ULT;
1715 }
1716 // Convert special-case: (A == 0) is the same as (A u< 1).
1717 if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
1718 B = ConstantInt::get(B->getType(), 1);
1719 Pred = ICmpInst::ICMP_ULT;
1720 }
1721 // Convert special-case: (A != 0) is the same as (0 u< A).
1722 if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) {
1723 std::swap(A, B);
1724 Pred = ICmpInst::ICMP_ULT;
1725 }
1726 if (Pred != ICmpInst::ICMP_ULT)
1727 return false;
1728
1729 // Walk the users of a variable operand of a compare looking for a subtract or
1730 // add with that same operand. Also match the 2nd operand of the compare to
1731 // the add/sub, but that may be a negated constant operand of an add.
1732 Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
1733 BinaryOperator *Sub = nullptr;
1734 for (User *U : CmpVariableOperand->users()) {
1735 // A - B, A u< B --> usubo(A, B)
1736 if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
1737 Sub = cast<BinaryOperator>(U);
1738 break;
1739 }
1740
1741 // A + (-C), A u< C (canonicalized form of (sub A, C))
1742 const APInt *CmpC, *AddC;
1743 if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
1744 match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
1745 Sub = cast<BinaryOperator>(U);
1746 break;
1747 }
1748 }
1749 if (!Sub)
1750 return false;
1751
1753 TLI->getValueType(*DL, Sub->getType()),
1754 Sub->hasNUsesOrMore(1)))
1755 return false;
1756
1757 if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1),
1758 Cmp, Intrinsic::usub_with_overflow))
1759 return false;
1760
1761 // Reset callers - do not crash by iterating over a dead instruction.
1762 ModifiedDT = ModifyDT::ModifyInstDT;
1763 return true;
1764}
1765
1766/// Sink the given CmpInst into user blocks to reduce the number of virtual
1767/// registers that must be created and coalesced. This is a clear win except on
1768/// targets with multiple condition code registers (PowerPC), where it might
1769/// lose; some adjustment may be wanted there.
1770///
1771/// Return true if any changes are made.
1772static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
1774 return false;
1775
1776 // Avoid sinking soft-FP comparisons, since this can move them into a loop.
1777 if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
1778 return false;
1779
1780 // Only insert a cmp in each block once.
1782
1783 bool MadeChange = false;
1784 for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end();
1785 UI != E;) {
1786 Use &TheUse = UI.getUse();
1787 Instruction *User = cast<Instruction>(*UI);
1788
1789 // Preincrement use iterator so we don't invalidate it.
1790 ++UI;
1791
1792 // Don't bother for PHI nodes.
1793 if (isa<PHINode>(User))
1794 continue;
1795
1796 // Figure out which BB this cmp is used in.
1797 BasicBlock *UserBB = User->getParent();
1798 BasicBlock *DefBB = Cmp->getParent();
1799
1800 // If this user is in the same block as the cmp, don't change the cmp.
1801 if (UserBB == DefBB)
1802 continue;
1803
1804 // If we have already inserted a cmp into this block, use it.
1805 CmpInst *&InsertedCmp = InsertedCmps[UserBB];
1806
1807 if (!InsertedCmp) {
1808 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1809 assert(InsertPt != UserBB->end());
1810 InsertedCmp = CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(),
1811 Cmp->getOperand(0), Cmp->getOperand(1), "");
1812 InsertedCmp->insertBefore(*UserBB, InsertPt);
1813 // Propagate the debug info.
1814 InsertedCmp->setDebugLoc(Cmp->getDebugLoc());
1815 }
1816
1817 // Replace a use of the cmp with a use of the new cmp.
1818 TheUse = InsertedCmp;
1819 MadeChange = true;
1820 ++NumCmpUses;
1821 }
1822
1823 // If we removed all uses, nuke the cmp.
1824 if (Cmp->use_empty()) {
1825 Cmp->eraseFromParent();
1826 MadeChange = true;
1827 }
1828
1829 return MadeChange;
1830}
1831
1832/// For pattern like:
1833///
1834/// DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB)
1835/// ...
1836/// DomBB:
1837/// ...
1838/// br DomCond, TrueBB, CmpBB
1839/// CmpBB: (with DomBB being the single predecessor)
1840/// ...
1841/// Cmp = icmp eq CmpOp0, CmpOp1
1842/// ...
1843///
1844/// It would use two comparison on targets that lowering of icmp sgt/slt is
1845/// different from lowering of icmp eq (PowerPC). This function try to convert
1846/// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'.
1847/// After that, DomCond and Cmp can use the same comparison so reduce one
1848/// comparison.
1849///
1850/// Return true if any changes are made.
1852 const TargetLowering &TLI) {
1854 return false;
1855
1856 ICmpInst::Predicate Pred = Cmp->getPredicate();
1857 if (Pred != ICmpInst::ICMP_EQ)
1858 return false;
1859
1860 // If icmp eq has users other than BranchInst and SelectInst, converting it to
1861 // icmp slt/sgt would introduce more redundant LLVM IR.
1862 for (User *U : Cmp->users()) {
1863 if (isa<BranchInst>(U))
1864 continue;
1865 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == Cmp)
1866 continue;
1867 return false;
1868 }
1869
1870 // This is a cheap/incomplete check for dominance - just match a single
1871 // predecessor with a conditional branch.
1872 BasicBlock *CmpBB = Cmp->getParent();
1873 BasicBlock *DomBB = CmpBB->getSinglePredecessor();
1874 if (!DomBB)
1875 return false;
1876
1877 // We want to ensure that the only way control gets to the comparison of
1878 // interest is that a less/greater than comparison on the same operands is
1879 // false.
1880 Value *DomCond;
1881 BasicBlock *TrueBB, *FalseBB;
1882 if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB)))
1883 return false;
1884 if (CmpBB != FalseBB)
1885 return false;
1886
1887 Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1);
1888 CmpPredicate DomPred;
1889 if (!match(DomCond, m_ICmp(DomPred, m_Specific(CmpOp0), m_Specific(CmpOp1))))
1890 return false;
1891 if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT)
1892 return false;
1893
1894 // Convert the equality comparison to the opposite of the dominating
1895 // comparison and swap the direction for all branch/select users.
1896 // We have conceptually converted:
1897 // Res = (a < b) ? <LT_RES> : (a == b) ? <EQ_RES> : <GT_RES>;
1898 // to
1899 // Res = (a < b) ? <LT_RES> : (a > b) ? <GT_RES> : <EQ_RES>;
1900 // And similarly for branches.
1901 for (User *U : Cmp->users()) {
1902 if (auto *BI = dyn_cast<BranchInst>(U)) {
1903 assert(BI->isConditional() && "Must be conditional");
1904 BI->swapSuccessors();
1905 continue;
1906 }
1907 if (auto *SI = dyn_cast<SelectInst>(U)) {
1908 // Swap operands
1909 SI->swapValues();
1910 SI->swapProfMetadata();
1911 continue;
1912 }
1913 llvm_unreachable("Must be a branch or a select");
1914 }
1915 Cmp->setPredicate(CmpInst::getSwappedPredicate(DomPred));
1916 return true;
1917}
1918
1919/// Many architectures use the same instruction for both subtract and cmp. Try
1920/// to swap cmp operands to match subtract operations to allow for CSE.
1922 Value *Op0 = Cmp->getOperand(0);
1923 Value *Op1 = Cmp->getOperand(1);
1924 if (!Op0->getType()->isIntegerTy() || isa<Constant>(Op0) ||
1925 isa<Constant>(Op1) || Op0 == Op1)
1926 return false;
1927
1928 // If a subtract already has the same operands as a compare, swapping would be
1929 // bad. If a subtract has the same operands as a compare but in reverse order,
1930 // then swapping is good.
1931 int GoodToSwap = 0;
1932 unsigned NumInspected = 0;
1933 for (const User *U : Op0->users()) {
1934 // Avoid walking many users.
1935 if (++NumInspected > 128)
1936 return false;
1937 if (match(U, m_Sub(m_Specific(Op1), m_Specific(Op0))))
1938 GoodToSwap++;
1939 else if (match(U, m_Sub(m_Specific(Op0), m_Specific(Op1))))
1940 GoodToSwap--;
1941 }
1942
1943 if (GoodToSwap > 0) {
1944 Cmp->swapOperands();
1945 return true;
1946 }
1947 return false;
1948}
1949
1950static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI,
1951 const DataLayout &DL) {
1952 FCmpInst *FCmp = dyn_cast<FCmpInst>(Cmp);
1953 if (!FCmp)
1954 return false;
1955
1956 // Don't fold if the target offers free fabs and the predicate is legal.
1957 EVT VT = TLI.getValueType(DL, Cmp->getOperand(0)->getType());
1958 if (TLI.isFAbsFree(VT) &&
1960 VT.getSimpleVT()))
1961 return false;
1962
1963 // Reverse the canonicalization if it is a FP class test
1964 auto ShouldReverseTransform = [](FPClassTest ClassTest) {
1965 return ClassTest == fcInf || ClassTest == (fcInf | fcNan);
1966 };
1967 auto [ClassVal, ClassTest] =
1968 fcmpToClassTest(FCmp->getPredicate(), *FCmp->getParent()->getParent(),
1969 FCmp->getOperand(0), FCmp->getOperand(1));
1970 if (!ClassVal)
1971 return false;
1972
1973 if (!ShouldReverseTransform(ClassTest) && !ShouldReverseTransform(~ClassTest))
1974 return false;
1975
1976 IRBuilder<> Builder(Cmp);
1977 Value *IsFPClass = Builder.createIsFPClass(ClassVal, ClassTest);
1978 Cmp->replaceAllUsesWith(IsFPClass);
1980 return true;
1981}
1982
1984 Instruction *Rem, const LoopInfo *LI, Value *&RemAmtOut, Value *&AddInstOut,
1985 Value *&AddOffsetOut, PHINode *&LoopIncrPNOut) {
1986 Value *Incr, *RemAmt;
1987 // NB: If RemAmt is a power of 2 it *should* have been transformed by now.
1988 if (!match(Rem, m_URem(m_Value(Incr), m_Value(RemAmt))))
1989 return false;
1990
1991 Value *AddInst, *AddOffset;
1992 // Find out loop increment PHI.
1993 auto *PN = dyn_cast<PHINode>(Incr);
1994 if (PN != nullptr) {
1995 AddInst = nullptr;
1996 AddOffset = nullptr;
1997 } else {
1998 // Search through a NUW add on top of the loop increment.
1999 Value *V0, *V1;
2000 if (!match(Incr, m_NUWAdd(m_Value(V0), m_Value(V1))))
2001 return false;
2002
2003 AddInst = Incr;
2004 PN = dyn_cast<PHINode>(V0);
2005 if (PN != nullptr) {
2006 AddOffset = V1;
2007 } else {
2008 PN = dyn_cast<PHINode>(V1);
2009 AddOffset = V0;
2010 }
2011 }
2012
2013 if (!PN)
2014 return false;
2015
2016 // This isn't strictly necessary, what we really need is one increment and any
2017 // amount of initial values all being the same.
2018 if (PN->getNumIncomingValues() != 2)
2019 return false;
2020
2021 // Only trivially analyzable loops.
2022 Loop *L = LI->getLoopFor(PN->getParent());
2023 if (!L || !L->getLoopPreheader() || !L->getLoopLatch())
2024 return false;
2025
2026 // Req that the remainder is in the loop
2027 if (!L->contains(Rem))
2028 return false;
2029
2030 // Only works if the remainder amount is a loop invaraint
2031 if (!L->isLoopInvariant(RemAmt))
2032 return false;
2033
2034 // Is the PHI a loop increment?
2035 auto LoopIncrInfo = getIVIncrement(PN, LI);
2036 if (!LoopIncrInfo)
2037 return false;
2038
2039 // We need remainder_amount % increment_amount to be zero. Increment of one
2040 // satisfies that without any special logic and is overwhelmingly the common
2041 // case.
2042 if (!match(LoopIncrInfo->second, m_One()))
2043 return false;
2044
2045 // Need the increment to not overflow.
2046 if (!match(LoopIncrInfo->first, m_c_NUWAdd(m_Specific(PN), m_Value())))
2047 return false;
2048
2049 // Set output variables.
2050 RemAmtOut = RemAmt;
2051 LoopIncrPNOut = PN;
2052 AddInstOut = AddInst;
2053 AddOffsetOut = AddOffset;
2054
2055 return true;
2056}
2057
2058// Try to transform:
2059//
2060// for(i = Start; i < End; ++i)
2061// Rem = (i nuw+ IncrLoopInvariant) u% RemAmtLoopInvariant;
2062//
2063// ->
2064//
2065// Rem = (Start nuw+ IncrLoopInvariant) % RemAmtLoopInvariant;
2066// for(i = Start; i < End; ++i, ++rem)
2067// Rem = rem == RemAmtLoopInvariant ? 0 : Rem;
2069 const LoopInfo *LI,
2071 bool IsHuge) {
2072 Value *AddOffset, *RemAmt, *AddInst;
2073 PHINode *LoopIncrPN;
2074 if (!isRemOfLoopIncrementWithLoopInvariant(Rem, LI, RemAmt, AddInst,
2075 AddOffset, LoopIncrPN))
2076 return false;
2077
2078 // Only non-constant remainder as the extra IV is probably not profitable
2079 // in that case.
2080 //
2081 // Potential TODO(1): `urem` of a const ends up as `mul` + `shift` + `add`. If
2082 // we can rule out register pressure and ensure this `urem` is executed each
2083 // iteration, its probably profitable to handle the const case as well.
2084 //
2085 // Potential TODO(2): Should we have a check for how "nested" this remainder
2086 // operation is? The new code runs every iteration so if the remainder is
2087 // guarded behind unlikely conditions this might not be worth it.
2088 if (match(RemAmt, m_ImmConstant()))
2089 return false;
2090
2091 Loop *L = LI->getLoopFor(LoopIncrPN->getParent());
2092 Value *Start = LoopIncrPN->getIncomingValueForBlock(L->getLoopPreheader());
2093 // If we have add create initial value for remainder.
2094 // The logic here is:
2095 // (urem (add nuw Start, IncrLoopInvariant), RemAmtLoopInvariant
2096 //
2097 // Only proceed if the expression simplifies (otherwise we can't fully
2098 // optimize out the urem).
2099 if (AddInst) {
2100 assert(AddOffset && "We found an add but missing values");
2101 // Without dom-condition/assumption cache we aren't likely to get much out
2102 // of a context instruction.
2103 Start = simplifyAddInst(Start, AddOffset,
2104 match(AddInst, m_NSWAdd(m_Value(), m_Value())),
2105 /*IsNUW=*/true, *DL);
2106 if (!Start)
2107 return false;
2108 }
2109
2110 // If we can't fully optimize out the `rem`, skip this transform.
2111 Start = simplifyURemInst(Start, RemAmt, *DL);
2112 if (!Start)
2113 return false;
2114
2115 // Create new remainder with induction variable.
2116 Type *Ty = Rem->getType();
2117 IRBuilder<> Builder(Rem->getContext());
2118
2119 Builder.SetInsertPoint(LoopIncrPN);
2120 PHINode *NewRem = Builder.CreatePHI(Ty, 2);
2121
2122 Builder.SetInsertPoint(cast<Instruction>(
2123 LoopIncrPN->getIncomingValueForBlock(L->getLoopLatch())));
2124 // `(add (urem x, y), 1)` is always nuw.
2125 Value *RemAdd = Builder.CreateNUWAdd(NewRem, ConstantInt::get(Ty, 1));
2126 Value *RemCmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, RemAdd, RemAmt);
2127 Value *RemSel =
2128 Builder.CreateSelect(RemCmp, Constant::getNullValue(Ty), RemAdd);
2129
2130 NewRem->addIncoming(Start, L->getLoopPreheader());
2131 NewRem->addIncoming(RemSel, L->getLoopLatch());
2132
2133 // Insert all touched BBs.
2134 FreshBBs.insert(LoopIncrPN->getParent());
2135 FreshBBs.insert(L->getLoopLatch());
2136 FreshBBs.insert(Rem->getParent());
2137 if (AddInst)
2138 FreshBBs.insert(cast<Instruction>(AddInst)->getParent());
2139 replaceAllUsesWith(Rem, NewRem, FreshBBs, IsHuge);
2140 Rem->eraseFromParent();
2141 if (AddInst && AddInst->use_empty())
2142 cast<Instruction>(AddInst)->eraseFromParent();
2143 return true;
2144}
2145
2146bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
2147 if (foldURemOfLoopIncrement(Rem, DL, LI, FreshBBs, IsHugeFunc))
2148 return true;
2149 return false;
2150}
2151
2152/// Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`.
2153/// This function converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` if the
2154/// result cannot be zero.
2155static bool adjustIsPower2Test(CmpInst *Cmp, const TargetLowering &TLI,
2156 const TargetTransformInfo &TTI,
2157 const DataLayout &DL) {
2158 CmpPredicate Pred;
2159 if (!match(Cmp, m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(), m_One())))
2160 return false;
2161 if (!ICmpInst::isEquality(Pred))
2162 return false;
2163 auto *II = cast<IntrinsicInst>(Cmp->getOperand(0));
2164
2165 if (isKnownNonZero(II, DL)) {
2166 if (Pred == ICmpInst::ICMP_EQ) {
2167 Cmp->setOperand(1, ConstantInt::get(II->getType(), 2));
2168 Cmp->setPredicate(ICmpInst::ICMP_ULT);
2169 } else {
2170 Cmp->setPredicate(ICmpInst::ICMP_UGT);
2171 }
2172 return true;
2173 }
2174 return false;
2175}
2176
2177bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
2178 if (sinkCmpExpression(Cmp, *TLI))
2179 return true;
2180
2181 if (combineToUAddWithOverflow(Cmp, ModifiedDT))
2182 return true;
2183
2184 if (combineToUSubWithOverflow(Cmp, ModifiedDT))
2185 return true;
2186
2187 if (foldICmpWithDominatingICmp(Cmp, *TLI))
2188 return true;
2189
2191 return true;
2192
2193 if (foldFCmpToFPClassTest(Cmp, *TLI, *DL))
2194 return true;
2195
2196 if (adjustIsPower2Test(Cmp, *TLI, *TTI, *DL))
2197 return true;
2198
2199 return false;
2200}
2201
2202/// Duplicate and sink the given 'and' instruction into user blocks where it is
2203/// used in a compare to allow isel to generate better code for targets where
2204/// this operation can be combined.
2205///
2206/// Return true if any changes are made.
2208 SetOfInstrs &InsertedInsts) {
2209 // Double-check that we're not trying to optimize an instruction that was
2210 // already optimized by some other part of this pass.
2211 assert(!InsertedInsts.count(AndI) &&
2212 "Attempting to optimize already optimized and instruction");
2213 (void)InsertedInsts;
2214
2215 // Nothing to do for single use in same basic block.
2216 if (AndI->hasOneUse() &&
2217 AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())
2218 return false;
2219
2220 // Try to avoid cases where sinking/duplicating is likely to increase register
2221 // pressure.
2222 if (!isa<ConstantInt>(AndI->getOperand(0)) &&
2223 !isa<ConstantInt>(AndI->getOperand(1)) &&
2224 AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())
2225 return false;
2226
2227 for (auto *U : AndI->users()) {
2228 Instruction *User = cast<Instruction>(U);
2229
2230 // Only sink 'and' feeding icmp with 0.
2231 if (!isa<ICmpInst>(User))
2232 return false;
2233
2234 auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));
2235 if (!CmpC || !CmpC->isZero())
2236 return false;
2237 }
2238
2239 if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))
2240 return false;
2241
2242 LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n");
2243 LLVM_DEBUG(AndI->getParent()->dump());
2244
2245 // Push the 'and' into the same block as the icmp 0. There should only be
2246 // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
2247 // others, so we don't need to keep track of which BBs we insert into.
2248 for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
2249 UI != E;) {
2250 Use &TheUse = UI.getUse();
2251 Instruction *User = cast<Instruction>(*UI);
2252
2253 // Preincrement use iterator so we don't invalidate it.
2254 ++UI;
2255
2256 LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n");
2257
2258 // Keep the 'and' in the same place if the use is already in the same block.
2259 Instruction *InsertPt =
2260 User->getParent() == AndI->getParent() ? AndI : User;
2261 Instruction *InsertedAnd = BinaryOperator::Create(
2262 Instruction::And, AndI->getOperand(0), AndI->getOperand(1), "",
2263 InsertPt->getIterator());
2264 // Propagate the debug info.
2265 InsertedAnd->setDebugLoc(AndI->getDebugLoc());
2266
2267 // Replace a use of the 'and' with a use of the new 'and'.
2268 TheUse = InsertedAnd;
2269 ++NumAndUses;
2270 LLVM_DEBUG(User->getParent()->dump());
2271 }
2272
2273 // We removed all uses, nuke the and.
2274 AndI->eraseFromParent();
2275 return true;
2276}
2277
2278/// Check if the candidates could be combined with a shift instruction, which
2279/// includes:
2280/// 1. Truncate instruction
2281/// 2. And instruction and the imm is a mask of the low bits:
2282/// imm & (imm+1) == 0
2284 if (!isa<TruncInst>(User)) {
2285 if (User->getOpcode() != Instruction::And ||
2286 !isa<ConstantInt>(User->getOperand(1)))
2287 return false;
2288
2289 const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
2290
2291 if ((Cimm & (Cimm + 1)).getBoolValue())
2292 return false;
2293 }
2294 return true;
2295}
2296
2297/// Sink both shift and truncate instruction to the use of truncate's BB.
2298static bool
2301 const TargetLowering &TLI, const DataLayout &DL) {
2302 BasicBlock *UserBB = User->getParent();
2304 auto *TruncI = cast<TruncInst>(User);
2305 bool MadeChange = false;
2306
2307 for (Value::user_iterator TruncUI = TruncI->user_begin(),
2308 TruncE = TruncI->user_end();
2309 TruncUI != TruncE;) {
2310
2311 Use &TruncTheUse = TruncUI.getUse();
2312 Instruction *TruncUser = cast<Instruction>(*TruncUI);
2313 // Preincrement use iterator so we don't invalidate it.
2314
2315 ++TruncUI;
2316
2317 int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
2318 if (!ISDOpcode)
2319 continue;
2320
2321 // If the use is actually a legal node, there will not be an
2322 // implicit truncate.
2323 // FIXME: always querying the result type is just an
2324 // approximation; some nodes' legality is determined by the
2325 // operand or other means. There's no good way to find out though.
2327 ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
2328 continue;
2329
2330 // Don't bother for PHI nodes.
2331 if (isa<PHINode>(TruncUser))
2332 continue;
2333
2334 BasicBlock *TruncUserBB = TruncUser->getParent();
2335
2336 if (UserBB == TruncUserBB)
2337 continue;
2338
2339 BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
2340 CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
2341
2342 if (!InsertedShift && !InsertedTrunc) {
2343 BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
2344 assert(InsertPt != TruncUserBB->end());
2345 // Sink the shift
2346 if (ShiftI->getOpcode() == Instruction::AShr)
2347 InsertedShift =
2348 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
2349 else
2350 InsertedShift =
2351 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
2352 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2353 InsertedShift->insertBefore(*TruncUserBB, InsertPt);
2354
2355 // Sink the trunc
2356 BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
2357 TruncInsertPt++;
2358 // It will go ahead of any debug-info.
2359 TruncInsertPt.setHeadBit(true);
2360 assert(TruncInsertPt != TruncUserBB->end());
2361
2362 InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
2363 TruncI->getType(), "");
2364 InsertedTrunc->insertBefore(*TruncUserBB, TruncInsertPt);
2365 InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());
2366
2367 MadeChange = true;
2368
2369 TruncTheUse = InsertedTrunc;
2370 }
2371 }
2372 return MadeChange;
2373}
2374
2375/// Sink the shift *right* instruction into user blocks if the uses could
2376/// potentially be combined with this shift instruction and generate BitExtract
2377/// instruction. It will only be applied if the architecture supports BitExtract
2378/// instruction. Here is an example:
2379/// BB1:
2380/// %x.extract.shift = lshr i64 %arg1, 32
2381/// BB2:
2382/// %x.extract.trunc = trunc i64 %x.extract.shift to i16
2383/// ==>
2384///
2385/// BB2:
2386/// %x.extract.shift.1 = lshr i64 %arg1, 32
2387/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
2388///
2389/// CodeGen will recognize the pattern in BB2 and generate BitExtract
2390/// instruction.
2391/// Return true if any changes are made.
2393 const TargetLowering &TLI,
2394 const DataLayout &DL) {
2395 BasicBlock *DefBB = ShiftI->getParent();
2396
2397 /// Only insert instructions in each block once.
2399
2400 bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
2401
2402 bool MadeChange = false;
2403 for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
2404 UI != E;) {
2405 Use &TheUse = UI.getUse();
2406 Instruction *User = cast<Instruction>(*UI);
2407 // Preincrement use iterator so we don't invalidate it.
2408 ++UI;
2409
2410 // Don't bother for PHI nodes.
2411 if (isa<PHINode>(User))
2412 continue;
2413
2415 continue;
2416
2417 BasicBlock *UserBB = User->getParent();
2418
2419 if (UserBB == DefBB) {
2420 // If the shift and truncate instruction are in the same BB. The use of
2421 // the truncate(TruncUse) may still introduce another truncate if not
2422 // legal. In this case, we would like to sink both shift and truncate
2423 // instruction to the BB of TruncUse.
2424 // for example:
2425 // BB1:
2426 // i64 shift.result = lshr i64 opnd, imm
2427 // trunc.result = trunc shift.result to i16
2428 //
2429 // BB2:
2430 // ----> We will have an implicit truncate here if the architecture does
2431 // not have i16 compare.
2432 // cmp i16 trunc.result, opnd2
2433 //
2434 if (isa<TruncInst>(User) &&
2435 shiftIsLegal
2436 // If the type of the truncate is legal, no truncate will be
2437 // introduced in other basic blocks.
2438 && (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
2439 MadeChange =
2440 SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
2441
2442 continue;
2443 }
2444 // If we have already inserted a shift into this block, use it.
2445 BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
2446
2447 if (!InsertedShift) {
2448 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
2449 assert(InsertPt != UserBB->end());
2450
2451 if (ShiftI->getOpcode() == Instruction::AShr)
2452 InsertedShift =
2453 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
2454 else
2455 InsertedShift =
2456 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
2457 InsertedShift->insertBefore(*UserBB, InsertPt);
2458 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2459
2460 MadeChange = true;
2461 }
2462
2463 // Replace a use of the shift with a use of the new shift.
2464 TheUse = InsertedShift;
2465 }
2466
2467 // If we removed all uses, or there are none, nuke the shift.
2468 if (ShiftI->use_empty()) {
2469 salvageDebugInfo(*ShiftI);
2470 ShiftI->eraseFromParent();
2471 MadeChange = true;
2472 }
2473
2474 return MadeChange;
2475}
2476
2477/// If counting leading or trailing zeros is an expensive operation and a zero
2478/// input is defined, add a check for zero to avoid calling the intrinsic.
2479///
2480/// We want to transform:
2481/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
2482///
2483/// into:
2484/// entry:
2485/// %cmpz = icmp eq i64 %A, 0
2486/// br i1 %cmpz, label %cond.end, label %cond.false
2487/// cond.false:
2488/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
2489/// br label %cond.end
2490/// cond.end:
2491/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
2492///
2493/// If the transform is performed, return true and set ModifiedDT to true.
2494static bool despeculateCountZeros(IntrinsicInst *CountZeros,
2495 LoopInfo &LI,
2496 const TargetLowering *TLI,
2497 const DataLayout *DL, ModifyDT &ModifiedDT,
2499 bool IsHugeFunc) {
2500 // If a zero input is undefined, it doesn't make sense to despeculate that.
2501 if (match(CountZeros->getOperand(1), m_One()))
2502 return false;
2503
2504 // If it's cheap to speculate, there's nothing to do.
2505 Type *Ty = CountZeros->getType();
2506 auto IntrinsicID = CountZeros->getIntrinsicID();
2507 if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz(Ty)) ||
2508 (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz(Ty)))
2509 return false;
2510
2511 // Only handle legal scalar cases. Anything else requires too much work.
2512 unsigned SizeInBits = Ty->getScalarSizeInBits();
2513 if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())
2514 return false;
2515
2516 // Bail if the value is never zero.
2517 Use &Op = CountZeros->getOperandUse(0);
2518 if (isKnownNonZero(Op, *DL))
2519 return false;
2520
2521 // The intrinsic will be sunk behind a compare against zero and branch.
2522 BasicBlock *StartBlock = CountZeros->getParent();
2523 BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
2524 if (IsHugeFunc)
2525 FreshBBs.insert(CallBlock);
2526
2527 // Create another block after the count zero intrinsic. A PHI will be added
2528 // in this block to select the result of the intrinsic or the bit-width
2529 // constant if the input to the intrinsic is zero.
2530 BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(CountZeros));
2531 // Any debug-info after CountZeros should not be included.
2532 SplitPt.setHeadBit(true);
2533 BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
2534 if (IsHugeFunc)
2535 FreshBBs.insert(EndBlock);
2536
2537 // Update the LoopInfo. The new blocks are in the same loop as the start
2538 // block.
2539 if (Loop *L = LI.getLoopFor(StartBlock)) {
2540 L->addBasicBlockToLoop(CallBlock, LI);
2541 L->addBasicBlockToLoop(EndBlock, LI);
2542 }
2543
2544 // Set up a builder to create a compare, conditional branch, and PHI.
2545 IRBuilder<> Builder(CountZeros->getContext());
2546 Builder.SetInsertPoint(StartBlock->getTerminator());
2547 Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
2548
2549 // Replace the unconditional branch that was created by the first split with
2550 // a compare against zero and a conditional branch.
2551 Value *Zero = Constant::getNullValue(Ty);
2552 // Avoid introducing branch on poison. This also replaces the ctz operand.
2554 Op = Builder.CreateFreeze(Op, Op->getName() + ".fr");
2555 Value *Cmp = Builder.CreateICmpEQ(Op, Zero, "cmpz");
2556 Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
2557 StartBlock->getTerminator()->eraseFromParent();
2558
2559 // Create a PHI in the end block to select either the output of the intrinsic
2560 // or the bit width of the operand.
2561 Builder.SetInsertPoint(EndBlock, EndBlock->begin());
2562 PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
2563 replaceAllUsesWith(CountZeros, PN, FreshBBs, IsHugeFunc);
2564 Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
2565 PN->addIncoming(BitWidth, StartBlock);
2566 PN->addIncoming(CountZeros, CallBlock);
2567
2568 // We are explicitly handling the zero case, so we can set the intrinsic's
2569 // undefined zero argument to 'true'. This will also prevent reprocessing the
2570 // intrinsic; we only despeculate when a zero input is defined.
2571 CountZeros->setArgOperand(1, Builder.getTrue());
2572 ModifiedDT = ModifyDT::ModifyBBDT;
2573 return true;
2574}
2575
2576bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
2577 BasicBlock *BB = CI->getParent();
2578
2579 // Lower inline assembly if we can.
2580 // If we found an inline asm expession, and if the target knows how to
2581 // lower it to normal LLVM code, do so now.
2582 if (CI->isInlineAsm()) {
2583 if (TLI->ExpandInlineAsm(CI)) {
2584 // Avoid invalidating the iterator.
2585 CurInstIterator = BB->begin();
2586 // Avoid processing instructions out of order, which could cause
2587 // reuse before a value is defined.
2588 SunkAddrs.clear();
2589 return true;
2590 }
2591 // Sink address computing for memory operands into the block.
2592 if (optimizeInlineAsmInst(CI))
2593 return true;
2594 }
2595
2596 // Align the pointer arguments to this call if the target thinks it's a good
2597 // idea
2598 unsigned MinSize;
2599 Align PrefAlign;
2600 if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
2601 for (auto &Arg : CI->args()) {
2602 // We want to align both objects whose address is used directly and
2603 // objects whose address is used in casts and GEPs, though it only makes
2604 // sense for GEPs if the offset is a multiple of the desired alignment and
2605 // if size - offset meets the size threshold.
2606 if (!Arg->getType()->isPointerTy())
2607 continue;
2608 APInt Offset(DL->getIndexSizeInBits(
2609 cast<PointerType>(Arg->getType())->getAddressSpace()),
2610 0);
2612 uint64_t Offset2 = Offset.getLimitedValue();
2613 if (!isAligned(PrefAlign, Offset2))
2614 continue;
2615 AllocaInst *AI;
2616 if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlign() < PrefAlign &&
2617 DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
2618 AI->setAlignment(PrefAlign);
2619 // Global variables can only be aligned if they are defined in this
2620 // object (i.e. they are uniquely initialized in this object), and
2621 // over-aligning global variables that have an explicit section is
2622 // forbidden.
2623 GlobalVariable *GV;
2624 if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
2625 GV->getPointerAlignment(*DL) < PrefAlign &&
2626 DL->getTypeAllocSize(GV->getValueType()) >= MinSize + Offset2)
2627 GV->setAlignment(PrefAlign);
2628 }
2629 }
2630 // If this is a memcpy (or similar) then we may be able to improve the
2631 // alignment.
2632 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
2633 Align DestAlign = getKnownAlignment(MI->getDest(), *DL);
2634 MaybeAlign MIDestAlign = MI->getDestAlign();
2635 if (!MIDestAlign || DestAlign > *MIDestAlign)
2636 MI->setDestAlignment(DestAlign);
2637 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
2638 MaybeAlign MTISrcAlign = MTI->getSourceAlign();
2639 Align SrcAlign = getKnownAlignment(MTI->getSource(), *DL);
2640 if (!MTISrcAlign || SrcAlign > *MTISrcAlign)
2641 MTI->setSourceAlignment(SrcAlign);
2642 }
2643 }
2644
2645 // If we have a cold call site, try to sink addressing computation into the
2646 // cold block. This interacts with our handling for loads and stores to
2647 // ensure that we can fold all uses of a potential addressing computation
2648 // into their uses. TODO: generalize this to work over profiling data
2649 if (CI->hasFnAttr(Attribute::Cold) &&
2650 !llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
2651 for (auto &Arg : CI->args()) {
2652 if (!Arg->getType()->isPointerTy())
2653 continue;
2654 unsigned AS = Arg->getType()->getPointerAddressSpace();
2655 if (optimizeMemoryInst(CI, Arg, Arg->getType(), AS))
2656 return true;
2657 }
2658
2659 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
2660 if (II) {
2661 switch (II->getIntrinsicID()) {
2662 default:
2663 break;
2664 case Intrinsic::assume:
2665 llvm_unreachable("llvm.assume should have been removed already");
2666 case Intrinsic::allow_runtime_check:
2667 case Intrinsic::allow_ubsan_check:
2668 case Intrinsic::experimental_widenable_condition: {
2669 // Give up on future widening opportunities so that we can fold away dead
2670 // paths and merge blocks before going into block-local instruction
2671 // selection.
2672 if (II->use_empty()) {
2673 II->eraseFromParent();
2674 return true;
2675 }
2676 Constant *RetVal = ConstantInt::getTrue(II->getContext());
2677 resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
2678 replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
2679 });
2680 return true;
2681 }
2682 case Intrinsic::objectsize:
2683 llvm_unreachable("llvm.objectsize.* should have been lowered already");
2684 case Intrinsic::is_constant:
2685 llvm_unreachable("llvm.is.constant.* should have been lowered already");
2686 case Intrinsic::aarch64_stlxr:
2687 case Intrinsic::aarch64_stxr: {
2688 ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
2689 if (!ExtVal || !ExtVal->hasOneUse() ||
2690 ExtVal->getParent() == CI->getParent())
2691 return false;
2692 // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
2693 ExtVal->moveBefore(CI->getIterator());
2694 // Mark this instruction as "inserted by CGP", so that other
2695 // optimizations don't touch it.
2696 InsertedInsts.insert(ExtVal);
2697 return true;
2698 }
2699
2700 case Intrinsic::launder_invariant_group:
2701 case Intrinsic::strip_invariant_group: {
2702 Value *ArgVal = II->getArgOperand(0);
2703 auto it = LargeOffsetGEPMap.find(II);
2704 if (it != LargeOffsetGEPMap.end()) {
2705 // Merge entries in LargeOffsetGEPMap to reflect the RAUW.
2706 // Make sure not to have to deal with iterator invalidation
2707 // after possibly adding ArgVal to LargeOffsetGEPMap.
2708 auto GEPs = std::move(it->second);
2709 LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());
2710 LargeOffsetGEPMap.erase(II);
2711 }
2712
2713 replaceAllUsesWith(II, ArgVal, FreshBBs, IsHugeFunc);
2714 II->eraseFromParent();
2715 return true;
2716 }
2717 case Intrinsic::cttz:
2718 case Intrinsic::ctlz:
2719 // If counting zeros is expensive, try to avoid it.
2720 return despeculateCountZeros(II, *LI, TLI, DL, ModifiedDT, FreshBBs,
2721 IsHugeFunc);
2722 case Intrinsic::fshl:
2723 case Intrinsic::fshr:
2724 return optimizeFunnelShift(II);
2725 case Intrinsic::dbg_assign:
2726 case Intrinsic::dbg_value:
2727 return fixupDbgValue(II);
2728 case Intrinsic::masked_gather:
2729 return optimizeGatherScatterInst(II, II->getArgOperand(0));
2730 case Intrinsic::masked_scatter:
2731 return optimizeGatherScatterInst(II, II->getArgOperand(1));
2732 }
2733
2735 Type *AccessTy;
2736 if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
2737 while (!PtrOps.empty()) {
2738 Value *PtrVal = PtrOps.pop_back_val();
2739 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2740 if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
2741 return true;
2742 }
2743 }
2744
2745 // From here on out we're working with named functions.
2746 auto *Callee = CI->getCalledFunction();
2747 if (!Callee)
2748 return false;
2749
2750 // Lower all default uses of _chk calls. This is very similar
2751 // to what InstCombineCalls does, but here we are only lowering calls
2752 // to fortified library functions (e.g. __memcpy_chk) that have the default
2753 // "don't know" as the objectsize. Anything else should be left alone.
2754 FortifiedLibCallSimplifier Simplifier(TLInfo, true);
2755 IRBuilder<> Builder(CI);
2756 if (Value *V = Simplifier.optimizeCall(CI, Builder)) {
2757 replaceAllUsesWith(CI, V, FreshBBs, IsHugeFunc);
2758 CI->eraseFromParent();
2759 return true;
2760 }
2761
2762 // SCCP may have propagated, among other things, C++ static variables across
2763 // calls. If this happens to be the case, we may want to undo it in order to
2764 // avoid redundant pointer computation of the constant, as the function method
2765 // returning the constant needs to be executed anyways.
2766 auto GetUniformReturnValue = [](const Function *F) -> GlobalVariable * {
2767 if (!F->getReturnType()->isPointerTy())
2768 return nullptr;
2769
2770 GlobalVariable *UniformValue = nullptr;
2771 for (auto &BB : *F) {
2772 if (auto *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
2773 if (auto *V = dyn_cast<GlobalVariable>(RI->getReturnValue())) {
2774 if (!UniformValue)
2775 UniformValue = V;
2776 else if (V != UniformValue)
2777 return nullptr;
2778 } else {
2779 return nullptr;
2780 }
2781 }
2782 }
2783
2784 return UniformValue;
2785 };
2786
2787 if (Callee->hasExactDefinition()) {
2788 if (GlobalVariable *RV = GetUniformReturnValue(Callee)) {
2789 bool MadeChange = false;
2790 for (Use &U : make_early_inc_range(RV->uses())) {
2791 auto *I = dyn_cast<Instruction>(U.getUser());
2792 if (!I || I->getParent() != CI->getParent()) {
2793 // Limit to the same basic block to avoid extending the call-site live
2794 // range, which otherwise could increase register pressure.
2795 continue;
2796 }
2797 if (CI->comesBefore(I)) {
2798 U.set(CI);
2799 MadeChange = true;
2800 }
2801 }
2802
2803 return MadeChange;
2804 }
2805 }
2806
2807 return false;
2808}
2809
2811 const CallInst *CI) {
2812 assert(CI && CI->use_empty());
2813
2814 if (const auto *II = dyn_cast<IntrinsicInst>(CI))
2815 switch (II->getIntrinsicID()) {
2816 case Intrinsic::memset:
2817 case Intrinsic::memcpy:
2818 case Intrinsic::memmove:
2819 return true;
2820 default:
2821 return false;
2822 }
2823
2824 LibFunc LF;
2825 Function *Callee = CI->getCalledFunction();
2826 if (Callee && TLInfo && TLInfo->getLibFunc(*Callee, LF))
2827 switch (LF) {
2828 case LibFunc_strcpy:
2829 case LibFunc_strncpy:
2830 case LibFunc_strcat:
2831 case LibFunc_strncat:
2832 return true;
2833 default:
2834 return false;
2835 }
2836
2837 return false;
2838}
2839
2840/// Look for opportunities to duplicate return instructions to the predecessor
2841/// to enable tail call optimizations. The case it is currently looking for is
2842/// the following one. Known intrinsics or library function that may be tail
2843/// called are taken into account as well.
2844/// @code
2845/// bb0:
2846/// %tmp0 = tail call i32 @f0()
2847/// br label %return
2848/// bb1:
2849/// %tmp1 = tail call i32 @f1()
2850/// br label %return
2851/// bb2:
2852/// %tmp2 = tail call i32 @f2()
2853/// br label %return
2854/// return:
2855/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
2856/// ret i32 %retval
2857/// @endcode
2858///
2859/// =>
2860///
2861/// @code
2862/// bb0:
2863/// %tmp0 = tail call i32 @f0()
2864/// ret i32 %tmp0
2865/// bb1:
2866/// %tmp1 = tail call i32 @f1()
2867/// ret i32 %tmp1
2868/// bb2:
2869/// %tmp2 = tail call i32 @f2()
2870/// ret i32 %tmp2
2871/// @endcode
2872bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
2873 ModifyDT &ModifiedDT) {
2874 if (!BB->getTerminator())
2875 return false;
2876
2877 ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
2878 if (!RetI)
2879 return false;
2880
2881 assert(LI->getLoopFor(BB) == nullptr && "A return block cannot be in a loop");
2882
2883 PHINode *PN = nullptr;
2884 ExtractValueInst *EVI = nullptr;
2885 BitCastInst *BCI = nullptr;
2886 Value *V = RetI->getReturnValue();
2887 if (V) {
2888 BCI = dyn_cast<BitCastInst>(V);
2889 if (BCI)
2890 V = BCI->getOperand(0);
2891
2892 EVI = dyn_cast<ExtractValueInst>(V);
2893 if (EVI) {
2894 V = EVI->getOperand(0);
2895 if (!llvm::all_of(EVI->indices(), [](unsigned idx) { return idx == 0; }))
2896 return false;
2897 }
2898
2899 PN = dyn_cast<PHINode>(V);
2900 }
2901
2902 if (PN && PN->getParent() != BB)
2903 return false;
2904
2905 auto isLifetimeEndOrBitCastFor = [](const Instruction *Inst) {
2906 const BitCastInst *BC = dyn_cast<BitCastInst>(Inst);
2907 if (BC && BC->hasOneUse())
2908 Inst = BC->user_back();
2909
2910 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
2911 return II->getIntrinsicID() == Intrinsic::lifetime_end;
2912 return false;
2913 };
2914
2916
2917 auto isFakeUse = [&FakeUses](const Instruction *Inst) {
2918 if (auto *II = dyn_cast<IntrinsicInst>(Inst);
2919 II && II->getIntrinsicID() == Intrinsic::fake_use) {
2920 // Record the instruction so it can be preserved when the exit block is
2921 // removed. Do not preserve the fake use that uses the result of the
2922 // PHI instruction.
2923 // Do not copy fake uses that use the result of a PHI node.
2924 // FIXME: If we do want to copy the fake use into the return blocks, we
2925 // have to figure out which of the PHI node operands to use for each
2926 // copy.
2927 if (!isa<PHINode>(II->getOperand(0))) {
2928 FakeUses.push_back(II);
2929 }
2930 return true;
2931 }
2932
2933 return false;
2934 };
2935
2936 // Make sure there are no instructions between the first instruction
2937 // and return.
2939 // Skip over debug and the bitcast.
2940 while (isa<DbgInfoIntrinsic>(BI) || &*BI == BCI || &*BI == EVI ||
2941 isa<PseudoProbeInst>(BI) || isLifetimeEndOrBitCastFor(&*BI) ||
2942 isFakeUse(&*BI))
2943 BI = std::next(BI);
2944 if (&*BI != RetI)
2945 return false;
2946
2947 /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
2948 /// call.
2949 const Function *F = BB->getParent();
2950 SmallVector<BasicBlock *, 4> TailCallBBs;
2951 // Record the call instructions so we can insert any fake uses
2952 // that need to be preserved before them.
2954 if (PN) {
2955 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
2956 // Look through bitcasts.
2957 Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();
2958 CallInst *CI = dyn_cast<CallInst>(IncomingVal);
2959 BasicBlock *PredBB = PN->getIncomingBlock(I);
2960 // Make sure the phi value is indeed produced by the tail call.
2961 if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
2962 TLI->mayBeEmittedAsTailCall(CI) &&
2963 attributesPermitTailCall(F, CI, RetI, *TLI)) {
2964 TailCallBBs.push_back(PredBB);
2965 CallInsts.push_back(CI);
2966 } else {
2967 // Consider the cases in which the phi value is indirectly produced by
2968 // the tail call, for example when encountering memset(), memmove(),
2969 // strcpy(), whose return value may have been optimized out. In such
2970 // cases, the value needs to be the first function argument.
2971 //
2972 // bb0:
2973 // tail call void @llvm.memset.p0.i64(ptr %0, i8 0, i64 %1)
2974 // br label %return
2975 // return:
2976 // %phi = phi ptr [ %0, %bb0 ], [ %2, %entry ]
2977 if (PredBB && PredBB->getSingleSuccessor() == BB)
2978 CI = dyn_cast_or_null<CallInst>(
2979 PredBB->getTerminator()->getPrevNonDebugInstruction(true));
2980
2981 if (CI && CI->use_empty() &&
2982 isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
2983 IncomingVal == CI->getArgOperand(0) &&
2984 TLI->mayBeEmittedAsTailCall(CI) &&
2985 attributesPermitTailCall(F, CI, RetI, *TLI)) {
2986 TailCallBBs.push_back(PredBB);
2987 CallInsts.push_back(CI);
2988 }
2989 }
2990 }
2991 } else {
2993 for (BasicBlock *Pred : predecessors(BB)) {
2994 if (!VisitedBBs.insert(Pred).second)
2995 continue;
2996 if (Instruction *I = Pred->rbegin()->getPrevNonDebugInstruction(true)) {
2997 CallInst *CI = dyn_cast<CallInst>(I);
2998 if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
2999 attributesPermitTailCall(F, CI, RetI, *TLI)) {
3000 // Either we return void or the return value must be the first
3001 // argument of a known intrinsic or library function.
3002 if (!V || isa<UndefValue>(V) ||
3003 (isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
3004 V == CI->getArgOperand(0))) {
3005 TailCallBBs.push_back(Pred);
3006 CallInsts.push_back(CI);
3007 }
3008 }
3009 }
3010 }
3011 }
3012
3013 bool Changed = false;
3014 for (auto const &TailCallBB : TailCallBBs) {
3015 // Make sure the call instruction is followed by an unconditional branch to
3016 // the return block.
3017 BranchInst *BI = dyn_cast<BranchInst>(TailCallBB->getTerminator());
3018 if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
3019 continue;
3020
3021 // Duplicate the return into TailCallBB.
3022 (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB);
3024 BFI->getBlockFreq(BB) >= BFI->getBlockFreq(TailCallBB));
3025 BFI->setBlockFreq(BB,
3026 (BFI->getBlockFreq(BB) - BFI->getBlockFreq(TailCallBB)));
3027 ModifiedDT = ModifyDT::ModifyBBDT;
3028 Changed = true;
3029 ++NumRetsDup;
3030 }
3031
3032 // If we eliminated all predecessors of the block, delete the block now.
3033 if (Changed && !BB->hasAddressTaken() && pred_empty(BB)) {
3034 // Copy the fake uses found in the original return block to all blocks
3035 // that contain tail calls.
3036 for (auto *CI : CallInsts) {
3037 for (auto const *FakeUse : FakeUses) {
3038 auto *ClonedInst = FakeUse->clone();
3039 ClonedInst->insertBefore(CI->getIterator());
3040 }
3041 }
3042 BB->eraseFromParent();
3043 }
3044
3045 return Changed;
3046}
3047
3048//===----------------------------------------------------------------------===//
3049// Memory Optimization
3050//===----------------------------------------------------------------------===//
3051
3052namespace {
3053
3054/// This is an extended version of TargetLowering::AddrMode
3055/// which holds actual Value*'s for register values.
3056struct ExtAddrMode : public TargetLowering::AddrMode {
3057 Value *BaseReg = nullptr;
3058 Value *ScaledReg = nullptr;
3059 Value *OriginalValue = nullptr;
3060 bool InBounds = true;
3061
3062 enum FieldName {
3063 NoField = 0x00,
3064 BaseRegField = 0x01,
3065 BaseGVField = 0x02,
3066 BaseOffsField = 0x04,
3067 ScaledRegField = 0x08,
3068 ScaleField = 0x10,
3069 MultipleFields = 0xff
3070 };
3071
3072 ExtAddrMode() = default;
3073
3074 void print(raw_ostream &OS) const;
3075 void dump() const;
3076
3077 // Replace From in ExtAddrMode with To.
3078 // E.g., SExt insts may be promoted and deleted. We should replace them with
3079 // the promoted values.
3080 void replaceWith(Value *From, Value *To) {
3081 if (ScaledReg == From)
3082 ScaledReg = To;
3083 }
3084
3085 FieldName compare(const ExtAddrMode &other) {
3086 // First check that the types are the same on each field, as differing types
3087 // is something we can't cope with later on.
3088 if (BaseReg && other.BaseReg &&
3089 BaseReg->getType() != other.BaseReg->getType())
3090 return MultipleFields;
3091 if (BaseGV && other.BaseGV && BaseGV->getType() != other.BaseGV->getType())
3092 return MultipleFields;
3093 if (ScaledReg && other.ScaledReg &&
3094 ScaledReg->getType() != other.ScaledReg->getType())
3095 return MultipleFields;
3096
3097 // Conservatively reject 'inbounds' mismatches.
3098 if (InBounds != other.InBounds)
3099 return MultipleFields;
3100
3101 // Check each field to see if it differs.
3102 unsigned Result = NoField;
3103 if (BaseReg != other.BaseReg)
3104 Result |= BaseRegField;
3105 if (BaseGV != other.BaseGV)
3106 Result |= BaseGVField;
3107 if (BaseOffs != other.BaseOffs)
3108 Result |= BaseOffsField;
3109 if (ScaledReg != other.ScaledReg)
3110 Result |= ScaledRegField;
3111 // Don't count 0 as being a different scale, because that actually means
3112 // unscaled (which will already be counted by having no ScaledReg).
3113 if (Scale && other.Scale && Scale != other.Scale)
3114 Result |= ScaleField;
3115
3116 if (llvm::popcount(Result) > 1)
3117 return MultipleFields;
3118 else
3119 return static_cast<FieldName>(Result);
3120 }
3121
3122 // An AddrMode is trivial if it involves no calculation i.e. it is just a base
3123 // with no offset.
3124 bool isTrivial() {
3125 // An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is
3126 // trivial if at most one of these terms is nonzero, except that BaseGV and
3127 // BaseReg both being zero actually means a null pointer value, which we
3128 // consider to be 'non-zero' here.
3129 return !BaseOffs && !Scale && !(BaseGV && BaseReg);
3130 }
3131
3132 Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) {
3133 switch (Field) {
3134 default:
3135 return nullptr;
3136 case BaseRegField:
3137 return BaseReg;
3138 case BaseGVField:
3139 return BaseGV;
3140 case ScaledRegField:
3141 return ScaledReg;
3142 case BaseOffsField:
3143 return ConstantInt::get(IntPtrTy, BaseOffs);
3144 }
3145 }
3146
3147 void SetCombinedField(FieldName Field, Value *V,
3148 const SmallVectorImpl<ExtAddrMode> &AddrModes) {
3149 switch (Field) {
3150 default:
3151 llvm_unreachable("Unhandled fields are expected to be rejected earlier");
3152 break;
3153 case ExtAddrMode::BaseRegField:
3154 BaseReg = V;
3155 break;
3156 case ExtAddrMode::BaseGVField:
3157 // A combined BaseGV is an Instruction, not a GlobalValue, so it goes
3158 // in the BaseReg field.
3159 assert(BaseReg == nullptr);
3160 BaseReg = V;
3161 BaseGV = nullptr;
3162 break;
3163 case ExtAddrMode::ScaledRegField:
3164 ScaledReg = V;
3165 // If we have a mix of scaled and unscaled addrmodes then we want scale
3166 // to be the scale and not zero.
3167 if (!Scale)
3168 for (const ExtAddrMode &AM : AddrModes)
3169 if (AM.Scale) {
3170 Scale = AM.Scale;
3171 break;
3172 }
3173 break;
3174 case ExtAddrMode::BaseOffsField:
3175 // The offset is no longer a constant, so it goes in ScaledReg with a
3176 // scale of 1.
3177 assert(ScaledReg == nullptr);
3178 ScaledReg = V;
3179 Scale = 1;
3180 BaseOffs = 0;
3181 break;
3182 }
3183 }
3184};
3185
3186#ifndef NDEBUG
3187static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
3188 AM.print(OS);
3189 return OS;
3190}
3191#endif
3192
3193#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3194void ExtAddrMode::print(raw_ostream &OS) const {
3195 bool NeedPlus = false;
3196 OS << "[";
3197 if (InBounds)
3198 OS << "inbounds ";
3199 if (BaseGV) {
3200 OS << "GV:";
3201 BaseGV->printAsOperand(OS, /*PrintType=*/false);
3202 NeedPlus = true;
3203 }
3204
3205 if (BaseOffs) {
3206 OS << (NeedPlus ? " + " : "") << BaseOffs;
3207 NeedPlus = true;
3208 }
3209
3210 if (BaseReg) {
3211 OS << (NeedPlus ? " + " : "") << "Base:";
3212 BaseReg->printAsOperand(OS, /*PrintType=*/false);
3213 NeedPlus = true;
3214 }
3215 if (Scale) {
3216 OS << (NeedPlus ? " + " : "") << Scale << "*";
3217 ScaledReg->printAsOperand(OS, /*PrintType=*/false);
3218 }
3219
3220 OS << ']';
3221}
3222
3223LLVM_DUMP_METHOD void ExtAddrMode::dump() const {
3224 print(dbgs());
3225 dbgs() << '\n';
3226}
3227#endif
3228
3229} // end anonymous namespace
3230
3231namespace {
3232
3233/// This class provides transaction based operation on the IR.
3234/// Every change made through this class is recorded in the internal state and
3235/// can be undone (rollback) until commit is called.
3236/// CGP does not check if instructions could be speculatively executed when
3237/// moved. Preserving the original location would pessimize the debugging
3238/// experience, as well as negatively impact the quality of sample PGO.
3239class TypePromotionTransaction {
3240 /// This represents the common interface of the individual transaction.
3241 /// Each class implements the logic for doing one specific modification on
3242 /// the IR via the TypePromotionTransaction.
3243 class TypePromotionAction {
3244 protected:
3245 /// The Instruction modified.
3246 Instruction *Inst;
3247
3248 public:
3249 /// Constructor of the action.
3250 /// The constructor performs the related action on the IR.
3251 TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
3252
3253 virtual ~TypePromotionAction() = default;
3254
3255 /// Undo the modification done by this action.
3256 /// When this method is called, the IR must be in the same state as it was
3257 /// before this action was applied.
3258 /// \pre Undoing the action works if and only if the IR is in the exact same
3259 /// state as it was directly after this action was applied.
3260 virtual void undo() = 0;
3261
3262 /// Advocate every change made by this action.
3263 /// When the results on the IR of the action are to be kept, it is important
3264 /// to call this function, otherwise hidden information may be kept forever.
3265 virtual void commit() {
3266 // Nothing to be done, this action is not doing anything.
3267 }
3268 };
3269
3270 /// Utility to remember the position of an instruction.
3271 class InsertionHandler {
3272 /// Position of an instruction.
3273 /// Either an instruction:
3274 /// - Is the first in a basic block: BB is used.
3275 /// - Has a previous instruction: PrevInst is used.
3276 struct {
3277 BasicBlock::iterator PrevInst;
3278 BasicBlock *BB;
3279 } Point;
3280 std::optional<DbgRecord::self_iterator> BeforeDbgRecord = std::nullopt;
3281
3282 /// Remember whether or not the instruction had a previous instruction.
3283 bool HasPrevInstruction;
3284
3285 public:
3286 /// Record the position of \p Inst.
3287 InsertionHandler(Instruction *Inst) {
3288 HasPrevInstruction = (Inst != &*(Inst->getParent()->begin()));
3289 BasicBlock *BB = Inst->getParent();
3290
3291 // Record where we would have to re-insert the instruction in the sequence
3292 // of DbgRecords, if we ended up reinserting.
3293 if (BB->IsNewDbgInfoFormat)
3294 BeforeDbgRecord = Inst->getDbgReinsertionPosition();
3295
3296 if (HasPrevInstruction) {
3297 Point.PrevInst = std::prev(Inst->getIterator());
3298 } else {
3299 Point.BB = BB;
3300 }
3301 }
3302
3303 /// Insert \p Inst at the recorded position.
3304 void insert(Instruction *Inst) {
3305 if (HasPrevInstruction) {
3306 if (Inst->getParent())
3307 Inst->removeFromParent();
3308 Inst->insertAfter(Point.PrevInst);
3309 } else {
3310 BasicBlock::iterator Position = Point.BB->getFirstInsertionPt();
3311 if (Inst->getParent())
3312 Inst->moveBefore(*Point.BB, Position);
3313 else
3314 Inst->insertBefore(*Point.BB, Position);
3315 }
3316
3317 Inst->getParent()->reinsertInstInDbgRecords(Inst, BeforeDbgRecord);
3318 }
3319 };
3320
3321 /// Move an instruction before another.
3322 class InstructionMoveBefore : public TypePromotionAction {
3323 /// Original position of the instruction.
3324 InsertionHandler Position;
3325
3326 public:
3327 /// Move \p Inst before \p Before.
3328 InstructionMoveBefore(Instruction *Inst, BasicBlock::iterator Before)
3329 : TypePromotionAction(Inst), Position(Inst) {
3330 LLVM_DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before
3331 << "\n");
3332 Inst->moveBefore(Before);
3333 }
3334
3335 /// Move the instruction back to its original position.
3336 void undo() override {
3337 LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");
3338 Position.insert(Inst);
3339 }
3340 };
3341
3342 /// Set the operand of an instruction with a new value.
3343 class OperandSetter : public TypePromotionAction {
3344 /// Original operand of the instruction.
3345 Value *Origin;
3346
3347 /// Index of the modified instruction.
3348 unsigned Idx;
3349
3350 public:
3351 /// Set \p Idx operand of \p Inst with \p NewVal.
3352 OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
3353 : TypePromotionAction(Inst), Idx(Idx) {
3354 LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"
3355 << "for:" << *Inst << "\n"
3356 << "with:" << *NewVal << "\n");
3357 Origin = Inst->getOperand(Idx);
3358 Inst->setOperand(Idx, NewVal);
3359 }
3360
3361 /// Restore the original value of the instruction.
3362 void undo() override {
3363 LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"
3364 << "for: " << *Inst << "\n"
3365 << "with: " << *Origin << "\n");
3366 Inst->setOperand(Idx, Origin);
3367 }
3368 };
3369
3370 /// Hide the operands of an instruction.
3371 /// Do as if this instruction was not using any of its operands.
3372 class OperandsHider : public TypePromotionAction {
3373 /// The list of original operands.
3374 SmallVector<Value *, 4> OriginalValues;
3375
3376 public:
3377 /// Remove \p Inst from the uses of the operands of \p Inst.
3378 OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
3379 LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");
3380 unsigned NumOpnds = Inst->getNumOperands();
3381 OriginalValues.reserve(NumOpnds);
3382 for (unsigned It = 0; It < NumOpnds; ++It) {
3383 // Save the current operand.
3384 Value *Val = Inst->getOperand(It);
3385 OriginalValues.push_back(Val);
3386 // Set a dummy one.
3387 // We could use OperandSetter here, but that would imply an overhead
3388 // that we are not willing to pay.
3389 Inst->setOperand(It, PoisonValue::get(Val->getType()));
3390 }
3391 }
3392
3393 /// Restore the original list of uses.
3394 void undo() override {
3395 LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");
3396 for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
3397 Inst->setOperand(It, OriginalValues[It]);
3398 }
3399 };
3400
3401 /// Build a truncate instruction.
3402 class TruncBuilder : public TypePromotionAction {
3403 Value *Val;
3404
3405 public:
3406 /// Build a truncate instruction of \p Opnd producing a \p Ty
3407 /// result.
3408 /// trunc Opnd to Ty.
3409 TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
3410 IRBuilder<> Builder(Opnd);
3411 Builder.SetCurrentDebugLocation(DebugLoc());
3412 Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
3413 LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
3414 }
3415
3416 /// Get the built value.
3417 Value *getBuiltValue() { return Val; }
3418
3419 /// Remove the built instruction.
3420 void undo() override {
3421 LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");
3422 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3423 IVal->eraseFromParent();
3424 }
3425 };
3426
3427 /// Build a sign extension instruction.
3428 class SExtBuilder : public TypePromotionAction {
3429 Value *Val;
3430
3431 public:
3432 /// Build a sign extension instruction of \p Opnd producing a \p Ty
3433 /// result.
3434 /// sext Opnd to Ty.
3435 SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
3436 : TypePromotionAction(InsertPt) {
3437 IRBuilder<> Builder(InsertPt);
3438 Val = Builder.CreateSExt(Opnd, Ty, "promoted");
3439 LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");
3440 }
3441
3442 /// Get the built value.
3443 Value *getBuiltValue() { return Val; }
3444
3445 /// Remove the built instruction.
3446 void undo() override {
3447 LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");
3448 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3449 IVal->eraseFromParent();
3450 }
3451 };
3452
3453 /// Build a zero extension instruction.
3454 class ZExtBuilder : public TypePromotionAction {
3455 Value *Val;
3456
3457 public:
3458 /// Build a zero extension instruction of \p Opnd producing a \p Ty
3459 /// result.
3460 /// zext Opnd to Ty.
3461 ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
3462 : TypePromotionAction(InsertPt) {
3463 IRBuilder<> Builder(InsertPt);
3464 Builder.SetCurrentDebugLocation(DebugLoc());
3465 Val = Builder.CreateZExt(Opnd, Ty, "promoted");
3466 LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
3467 }
3468
3469 /// Get the built value.
3470 Value *getBuiltValue() { return Val; }
3471
3472 /// Remove the built instruction.
3473 void undo() override {
3474 LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");
3475 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3476 IVal->eraseFromParent();
3477 }
3478 };
3479
3480 /// Mutate an instruction to another type.
3481 class TypeMutator : public TypePromotionAction {
3482 /// Record the original type.
3483 Type *OrigTy;
3484
3485 public:
3486 /// Mutate the type of \p Inst into \p NewTy.
3487 TypeMutator(Instruction *Inst, Type *NewTy)
3488 : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
3489 LLVM_DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy
3490 << "\n");
3491 Inst->mutateType(NewTy);
3492 }
3493
3494 /// Mutate the instruction back to its original type.
3495 void undo() override {
3496 LLVM_DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy
3497 << "\n");
3498 Inst->mutateType(OrigTy);
3499 }
3500 };
3501
3502 /// Replace the uses of an instruction by another instruction.
3503 class UsesReplacer : public TypePromotionAction {
3504 /// Helper structure to keep track of the replaced uses.
3505 struct InstructionAndIdx {
3506 /// The instruction using the instruction.
3507 Instruction *Inst;
3508
3509 /// The index where this instruction is used for Inst.
3510 unsigned Idx;
3511
3512 InstructionAndIdx(Instruction *Inst, unsigned Idx)
3513 : Inst(Inst), Idx(Idx) {}
3514 };
3515
3516 /// Keep track of the original uses (pair Instruction, Index).
3518 /// Keep track of the debug users.
3520 /// And non-instruction debug-users too.
3521 SmallVector<DbgVariableRecord *, 1> DbgVariableRecords;
3522
3523 /// Keep track of the new value so that we can undo it by replacing
3524 /// instances of the new value with the original value.
3525 Value *New;
3526
3528
3529 public:
3530 /// Replace all the use of \p Inst by \p New.
3531 UsesReplacer(Instruction *Inst, Value *New)
3532 : TypePromotionAction(Inst), New(New) {
3533 LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
3534 << "\n");
3535 // Record the original uses.
3536 for (Use &U : Inst->uses()) {
3537 Instruction *UserI = cast<Instruction>(U.getUser());
3538 OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
3539 }
3540 // Record the debug uses separately. They are not in the instruction's
3541 // use list, but they are replaced by RAUW.
3542 findDbgValues(DbgValues, Inst, &DbgVariableRecords);
3543
3544 // Now, we can replace the uses.
3545 Inst->replaceAllUsesWith(New);
3546 }
3547
3548 /// Reassign the original uses of Inst to Inst.
3549 void undo() override {
3550 LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
3551 for (InstructionAndIdx &Use : OriginalUses)
3552 Use.Inst->setOperand(Use.Idx, Inst);
3553 // RAUW has replaced all original uses with references to the new value,
3554 // including the debug uses. Since we are undoing the replacements,
3555 // the original debug uses must also be reinstated to maintain the
3556 // correctness and utility of debug value instructions.
3557 for (auto *DVI : DbgValues)
3558 DVI->replaceVariableLocationOp(New, Inst);
3559 // Similar story with DbgVariableRecords, the non-instruction
3560 // representation of dbg.values.
3561 for (DbgVariableRecord *DVR : DbgVariableRecords)
3562 DVR->replaceVariableLocationOp(New, Inst);
3563 }
3564 };
3565
3566 /// Remove an instruction from the IR.
3567 class InstructionRemover : public TypePromotionAction {
3568 /// Original position of the instruction.
3569 InsertionHandler Inserter;
3570
3571 /// Helper structure to hide all the link to the instruction. In other
3572 /// words, this helps to do as if the instruction was removed.
3573 OperandsHider Hider;
3574
3575 /// Keep track of the uses replaced, if any.
3576 UsesReplacer *Replacer = nullptr;
3577
3578 /// Keep track of instructions removed.
3579 SetOfInstrs &RemovedInsts;
3580
3581 public:
3582 /// Remove all reference of \p Inst and optionally replace all its
3583 /// uses with New.
3584 /// \p RemovedInsts Keep track of the instructions removed by this Action.
3585 /// \pre If !Inst->use_empty(), then New != nullptr
3586 InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
3587 Value *New = nullptr)
3588 : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
3589 RemovedInsts(RemovedInsts) {
3590 if (New)
3591 Replacer = new UsesReplacer(Inst, New);
3592 LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
3593 RemovedInsts.insert(Inst);
3594 /// The instructions removed here will be freed after completing
3595 /// optimizeBlock() for all blocks as we need to keep track of the
3596 /// removed instructions during promotion.
3597 Inst->removeFromParent();
3598 }
3599
3600 ~InstructionRemover() override { delete Replacer; }
3601
3602 InstructionRemover &operator=(const InstructionRemover &other) = delete;
3603 InstructionRemover(const InstructionRemover &other) = delete;
3604
3605 /// Resurrect the instruction and reassign it to the proper uses if
3606 /// new value was provided when build this action.
3607 void undo() override {
3608 LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");
3609 Inserter.insert(Inst);
3610 if (Replacer)
3611 Replacer->undo();
3612 Hider.undo();
3613 RemovedInsts.erase(Inst);
3614 }
3615 };
3616
3617public:
3618 /// Restoration point.
3619 /// The restoration point is a pointer to an action instead of an iterator
3620 /// because the iterator may be invalidated but not the pointer.
3621 using ConstRestorationPt = const TypePromotionAction *;
3622
3623 TypePromotionTransaction(SetOfInstrs &RemovedInsts)
3624 : RemovedInsts(RemovedInsts) {}
3625
3626 /// Advocate every changes made in that transaction. Return true if any change
3627 /// happen.
3628 bool commit();
3629
3630 /// Undo all the changes made after the given point.
3631 void rollback(ConstRestorationPt Point);
3632
3633 /// Get the current restoration point.
3634 ConstRestorationPt getRestorationPoint() const;
3635
3636 /// \name API for IR modification with state keeping to support rollback.
3637 /// @{
3638 /// Same as Instruction::setOperand.
3639 void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
3640
3641 /// Same as Instruction::eraseFromParent.
3642 void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
3643
3644 /// Same as Value::replaceAllUsesWith.
3645 void replaceAllUsesWith(Instruction *Inst, Value *New);
3646
3647 /// Same as Value::mutateType.
3648 void mutateType(Instruction *Inst, Type *NewTy);
3649
3650 /// Same as IRBuilder::createTrunc.
3651 Value *createTrunc(Instruction *Opnd, Type *Ty);
3652
3653 /// Same as IRBuilder::createSExt.
3654 Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
3655
3656 /// Same as IRBuilder::createZExt.
3657 Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
3658
3659private:
3660 /// The ordered list of actions made so far.
3662
3663 using CommitPt =
3665
3666 SetOfInstrs &RemovedInsts;
3667};
3668
3669} // end anonymous namespace
3670
3671void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
3672 Value *NewVal) {
3673 Actions.push_back(std::make_unique<TypePromotionTransaction::OperandSetter>(
3674 Inst, Idx, NewVal));
3675}
3676
3677void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
3678 Value *NewVal) {
3679 Actions.push_back(
3680 std::make_unique<TypePromotionTransaction::InstructionRemover>(
3681 Inst, RemovedInsts, NewVal));
3682}
3683
3684void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
3685 Value *New) {
3686 Actions.push_back(
3687 std::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
3688}
3689
3690void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
3691 Actions.push_back(
3692 std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
3693}
3694
3695Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, Type *Ty) {
3696 std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
3697 Value *Val = Ptr->getBuiltValue();
3698 Actions.push_back(std::move(Ptr));
3699 return Val;
3700}
3701
3702Value *TypePromotionTransaction::createSExt(Instruction *Inst, Value *Opnd,
3703 Type *Ty) {
3704 std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
3705 Value *Val = Ptr->getBuiltValue();
3706 Actions.push_back(std::move(Ptr));
3707 return Val;
3708}
3709
3710Value *TypePromotionTransaction::createZExt(Instruction *Inst, Value *Opnd,
3711 Type *Ty) {
3712 std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
3713 Value *Val = Ptr->getBuiltValue();
3714 Actions.push_back(std::move(Ptr));
3715 return Val;
3716}
3717
3718TypePromotionTransaction::ConstRestorationPt
3719TypePromotionTransaction::getRestorationPoint() const {
3720 return !Actions.empty() ? Actions.back().get() : nullptr;
3721}
3722
3723bool TypePromotionTransaction::commit() {
3724 for (std::unique_ptr<TypePromotionAction> &Action : Actions)
3725 Action->commit();
3726 bool Modified = !Actions.empty();
3727 Actions.clear();
3728 return Modified;
3729}
3730
3731void TypePromotionTransaction::rollback(
3732 TypePromotionTransaction::ConstRestorationPt Point) {
3733 while (!Actions.empty() && Point != Actions.back().get()) {
3734 std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
3735 Curr->undo();
3736 }
3737}
3738
3739namespace {
3740
3741/// A helper class for matching addressing modes.
3742///
3743/// This encapsulates the logic for matching the target-legal addressing modes.
3744class AddressingModeMatcher {
3745 SmallVectorImpl<Instruction *> &AddrModeInsts;
3746 const TargetLowering &TLI;
3747 const TargetRegisterInfo &TRI;
3748 const DataLayout &DL;
3749 const LoopInfo &LI;
3750 const std::function<const DominatorTree &()> getDTFn;
3751
3752 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
3753 /// the memory instruction that we're computing this address for.
3754 Type *AccessTy;
3755 unsigned AddrSpace;
3756 Instruction *MemoryInst;
3757
3758 /// This is the addressing mode that we're building up. This is
3759 /// part of the return value of this addressing mode matching stuff.
3761
3762 /// The instructions inserted by other CodeGenPrepare optimizations.
3763 const SetOfInstrs &InsertedInsts;
3764
3765 /// A map from the instructions to their type before promotion.
3766 InstrToOrigTy &PromotedInsts;
3767
3768 /// The ongoing transaction where every action should be registered.
3769 TypePromotionTransaction &TPT;
3770
3771 // A GEP which has too large offset to be folded into the addressing mode.
3772 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP;
3773
3774 /// This is set to true when we should not do profitability checks.
3775 /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
3776 bool IgnoreProfitability;
3777
3778 /// True if we are optimizing for size.
3779 bool OptSize = false;
3780
3781 ProfileSummaryInfo *PSI;
3783
3784 AddressingModeMatcher(
3786 const TargetRegisterInfo &TRI, const LoopInfo &LI,
3787 const std::function<const DominatorTree &()> getDTFn, Type *AT,
3788 unsigned AS, Instruction *MI, ExtAddrMode &AM,
3789 const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
3790 TypePromotionTransaction &TPT,
3791 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3792 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
3793 : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
3794 DL(MI->getDataLayout()), LI(LI), getDTFn(getDTFn),
3795 AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM),
3796 InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT),
3797 LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI), BFI(BFI) {
3798 IgnoreProfitability = false;
3799 }
3800
3801public:
3802 /// Find the maximal addressing mode that a load/store of V can fold,
3803 /// give an access type of AccessTy. This returns a list of involved
3804 /// instructions in AddrModeInsts.
3805 /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
3806 /// optimizations.
3807 /// \p PromotedInsts maps the instructions to their type before promotion.
3808 /// \p The ongoing transaction where every action should be registered.
3809 static ExtAddrMode
3810 Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
3811 SmallVectorImpl<Instruction *> &AddrModeInsts,
3812 const TargetLowering &TLI, const LoopInfo &LI,
3813 const std::function<const DominatorTree &()> getDTFn,
3814 const TargetRegisterInfo &TRI, const SetOfInstrs &InsertedInsts,
3815 InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
3816 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3817 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
3819
3820 bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, LI, getDTFn,
3821 AccessTy, AS, MemoryInst, Result,
3822 InsertedInsts, PromotedInsts, TPT,
3823 LargeOffsetGEP, OptSize, PSI, BFI)
3824 .matchAddr(V, 0);
3825 (void)Success;
3826 assert(Success && "Couldn't select *anything*?");
3827 return Result;
3828 }
3829
3830private:
3831 bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
3832 bool matchAddr(Value *Addr, unsigned Depth);
3833 bool matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth,
3834 bool *MovedAway = nullptr);
3835 bool isProfitableToFoldIntoAddressingMode(Instruction *I,
3836 ExtAddrMode &AMBefore,
3837 ExtAddrMode &AMAfter);
3838 bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
3839 bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
3840 Value *PromotedOperand) const;
3841};
3842
3843class PhiNodeSet;
3844
3845/// An iterator for PhiNodeSet.
3846class PhiNodeSetIterator {
3847 PhiNodeSet *const Set;
3848 size_t CurrentIndex = 0;
3849
3850public:
3851 /// The constructor. Start should point to either a valid element, or be equal
3852 /// to the size of the underlying SmallVector of the PhiNodeSet.
3853 PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start);
3854 PHINode *operator*() const;
3855 PhiNodeSetIterator &operator++();
3856 bool operator==(const PhiNodeSetIterator &RHS) const;
3857 bool operator!=(const PhiNodeSetIterator &RHS) const;
3858};
3859
3860/// Keeps a set of PHINodes.
3861///
3862/// This is a minimal set implementation for a specific use case:
3863/// It is very fast when there are very few elements, but also provides good
3864/// performance when there are many. It is similar to SmallPtrSet, but also
3865/// provides iteration by insertion order, which is deterministic and stable
3866/// across runs. It is also similar to SmallSetVector, but provides removing
3867/// elements in O(1) time. This is achieved by not actually removing the element
3868/// from the underlying vector, so comes at the cost of using more memory, but
3869/// that is fine, since PhiNodeSets are used as short lived objects.
3870class PhiNodeSet {
3871 friend class PhiNodeSetIterator;
3872
3874 using iterator = PhiNodeSetIterator;
3875
3876 /// Keeps the elements in the order of their insertion in the underlying
3877 /// vector. To achieve constant time removal, it never deletes any element.
3879
3880 /// Keeps the elements in the underlying set implementation. This (and not the
3881 /// NodeList defined above) is the source of truth on whether an element
3882 /// is actually in the collection.
3883 MapType NodeMap;
3884
3885 /// Points to the first valid (not deleted) element when the set is not empty
3886 /// and the value is not zero. Equals to the size of the underlying vector
3887 /// when the set is empty. When the value is 0, as in the beginning, the
3888 /// first element may or may not be valid.
3889 size_t FirstValidElement = 0;
3890
3891public:
3892 /// Inserts a new element to the collection.
3893 /// \returns true if the element is actually added, i.e. was not in the
3894 /// collection before the operation.
3895 bool insert(PHINode *Ptr) {
3896 if (NodeMap.insert(std::make_pair(Ptr, NodeList.size())).second) {
3897 NodeList.push_back(Ptr);
3898 return true;
3899 }
3900 return false;
3901 }
3902
3903 /// Removes the element from the collection.
3904 /// \returns whether the element is actually removed, i.e. was in the
3905 /// collection before the operation.
3906 bool erase(PHINode *Ptr) {
3907 if (NodeMap.erase(Ptr)) {
3908 SkipRemovedElements(FirstValidElement);
3909 return true;
3910 }
3911 return false;
3912 }
3913
3914 /// Removes all elements and clears the collection.
3915 void clear() {
3916 NodeMap.clear();
3917 NodeList.clear();
3918 FirstValidElement = 0;
3919 }
3920
3921 /// \returns an iterator that will iterate the elements in the order of
3922 /// insertion.
3923 iterator begin() {
3924 if (FirstValidElement == 0)
3925 SkipRemovedElements(FirstValidElement);
3926 return PhiNodeSetIterator(this, FirstValidElement);
3927 }
3928
3929 /// \returns an iterator that points to the end of the collection.
3930 iterator end() { return PhiNodeSetIterator(this, NodeList.size()); }
3931
3932 /// Returns the number of elements in the collection.
3933 size_t size() const { return NodeMap.size(); }
3934
3935 /// \returns 1 if the given element is in the collection, and 0 if otherwise.
3936 size_t count(PHINode *Ptr) const { return NodeMap.count(Ptr); }
3937
3938private:
3939 /// Updates the CurrentIndex so that it will point to a valid element.
3940 ///
3941 /// If the element of NodeList at CurrentIndex is valid, it does not
3942 /// change it. If there are no more valid elements, it updates CurrentIndex
3943 /// to point to the end of the NodeList.
3944 void SkipRemovedElements(size_t &CurrentIndex) {
3945 while (CurrentIndex < NodeList.size()) {
3946 auto it = NodeMap.find(NodeList[CurrentIndex]);
3947 // If the element has been deleted and added again later, NodeMap will
3948 // point to a different index, so CurrentIndex will still be invalid.
3949 if (it != NodeMap.end() && it->second == CurrentIndex)
3950 break;
3951 ++CurrentIndex;
3952 }
3953 }
3954};
3955
3956PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start)
3957 : Set(Set), CurrentIndex(Start) {}
3958
3959PHINode *PhiNodeSetIterator::operator*() const {
3960 assert(CurrentIndex < Set->NodeList.size() &&
3961 "PhiNodeSet access out of range");
3962 return Set->NodeList[CurrentIndex];
3963}
3964
3965PhiNodeSetIterator &PhiNodeSetIterator::operator++() {
3966 assert(CurrentIndex < Set->NodeList.size() &&
3967 "PhiNodeSet access out of range");
3968 ++CurrentIndex;
3969 Set->SkipRemovedElements(CurrentIndex);
3970 return *this;
3971}
3972
3973bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const {
3974 return CurrentIndex == RHS.CurrentIndex;
3975}
3976
3977bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const {
3978 return !((*this) == RHS);
3979}
3980
3981/// Keep track of simplification of Phi nodes.
3982/// Accept the set of all phi nodes and erase phi node from this set
3983/// if it is simplified.
3984class SimplificationTracker {
3986 const SimplifyQuery &SQ;
3987 // Tracks newly created Phi nodes. The elements are iterated by insertion
3988 // order.
3989 PhiNodeSet AllPhiNodes;
3990 // Tracks newly created Select nodes.
3991 SmallPtrSet<SelectInst *, 32> AllSelectNodes;
3992
3993public:
3994 SimplificationTracker(const SimplifyQuery &sq) : SQ(sq) {}
3995
3996 Value *Get(Value *V) {
3997 do {
3998 auto SV = Storage.find(V);
3999 if (SV == Storage.end())
4000 return V;
4001 V = SV->second;
4002 } while (true);
4003 }
4004
4005 Value *Simplify(Value *Val) {
4006 SmallVector<Value *, 32> WorkList;
4008 WorkList.push_back(Val);
4009 while (!WorkList.empty()) {
4010 auto *P = WorkList.pop_back_val();
4011 if (!Visited.insert(P).second)
4012 continue;
4013 if (auto *PI = dyn_cast<Instruction>(P))
4014 if (Value *V = simplifyInstruction(cast<Instruction>(PI), SQ)) {
4015 for (auto *U : PI->users())
4016 WorkList.push_back(cast<Value>(U));
4017 Put(PI, V);
4018 PI->replaceAllUsesWith(V);
4019 if (auto *PHI = dyn_cast<PHINode>(PI))
4020 AllPhiNodes.erase(PHI);
4021 if (auto *Select = dyn_cast<SelectInst>(PI))
4022 AllSelectNodes.erase(Select);
4023 PI->eraseFromParent();
4024 }
4025 }
4026 return Get(Val);
4027 }
4028
4029 void Put(Value *From, Value *To) { Storage.insert({From, To}); }
4030
4031 void ReplacePhi(PHINode *From, PHINode *To) {
4032 Value *OldReplacement = Get(From);
4033 while (OldReplacement != From) {
4034 From = To;
4035 To = dyn_cast<PHINode>(OldReplacement);
4036 OldReplacement = Get(From);
4037 }
4038 assert(To && Get(To) == To && "Replacement PHI node is already replaced.");
4039 Put(From, To);
4040 From->replaceAllUsesWith(To);
4041 AllPhiNodes.erase(From);
4042 From->eraseFromParent();
4043 }
4044
4045 PhiNodeSet &newPhiNodes() { return AllPhiNodes; }
4046
4047 void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); }
4048
4049 void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); }
4050
4051 unsigned countNewPhiNodes() const { return AllPhiNodes.size(); }
4052
4053 unsigned countNewSelectNodes() const { return AllSelectNodes.size(); }
4054
4055 void destroyNewNodes(Type *CommonType) {
4056 // For safe erasing, replace the uses with dummy value first.
4057 auto *Dummy = PoisonValue::get(CommonType);
4058 for (auto *I : AllPhiNodes) {
4059 I->replaceAllUsesWith(Dummy);
4060 I->eraseFromParent();
4061 }
4062 AllPhiNodes.clear();
4063 for (auto *I : AllSelectNodes) {
4064 I->replaceAllUsesWith(Dummy);
4065 I->eraseFromParent();
4066 }
4067 AllSelectNodes.clear();
4068 }
4069};
4070
4071/// A helper class for combining addressing modes.
4072class AddressingModeCombiner {
4073 typedef DenseMap<Value *, Value *> FoldAddrToValueMapping;
4074 typedef std::pair<PHINode *, PHINode *> PHIPair;
4075
4076private:
4077 /// The addressing modes we've collected.
4079
4080 /// The field in which the AddrModes differ, when we have more than one.
4081 ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField;
4082
4083 /// Are the AddrModes that we have all just equal to their original values?
4084 bool AllAddrModesTrivial = true;
4085
4086 /// Common Type for all different fields in addressing modes.
4087 Type *CommonType = nullptr;
4088
4089 /// SimplifyQuery for simplifyInstruction utility.
4090 const SimplifyQuery &SQ;
4091
4092 /// Original Address.
4093 Value *Original;
4094
4095 /// Common value among addresses
4096 Value *CommonValue = nullptr;
4097
4098public:
4099 AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue)
4100 : SQ(_SQ), Original(OriginalValue) {}
4101
4102 ~AddressingModeCombiner() { eraseCommonValueIfDead(); }
4103
4104 /// Get the combined AddrMode
4105 const ExtAddrMode &getAddrMode() const { return AddrModes[0]; }
4106
4107 /// Add a new AddrMode if it's compatible with the AddrModes we already
4108 /// have.
4109 /// \return True iff we succeeded in doing so.
4110 bool addNewAddrMode(ExtAddrMode &NewAddrMode) {
4111 // Take note of if we have any non-trivial AddrModes, as we need to detect
4112 // when all AddrModes are trivial as then we would introduce a phi or select
4113 // which just duplicates what's already there.
4114 AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial();
4115
4116 // If this is the first addrmode then everything is fine.
4117 if (AddrModes.empty()) {
4118 AddrModes.emplace_back(NewAddrMode);
4119 return true;
4120 }
4121
4122 // Figure out how different this is from the other address modes, which we
4123 // can do just by comparing against the first one given that we only care
4124 // about the cumulative difference.
4125 ExtAddrMode::FieldName ThisDifferentField =
4126 AddrModes[0].compare(NewAddrMode);
4127 if (DifferentField == ExtAddrMode::NoField)
4128 DifferentField = ThisDifferentField;
4129 else if (DifferentField != ThisDifferentField)
4130 DifferentField = ExtAddrMode::MultipleFields;
4131
4132 // If NewAddrMode differs in more than one dimension we cannot handle it.
4133 bool CanHandle = DifferentField != ExtAddrMode::MultipleFields;
4134
4135 // If Scale Field is different then we reject.
4136 CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField;
4137
4138 // We also must reject the case when base offset is different and
4139 // scale reg is not null, we cannot handle this case due to merge of
4140 // different offsets will be used as ScaleReg.
4141 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField ||
4142 !NewAddrMode.ScaledReg);
4143
4144 // We also must reject the case when GV is different and BaseReg installed
4145 // due to we want to use base reg as a merge of GV values.
4146 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField ||
4147 !NewAddrMode.HasBaseReg);
4148
4149 // Even if NewAddMode is the same we still need to collect it due to
4150 // original value is different. And later we will need all original values
4151 // as anchors during finding the common Phi node.
4152 if (CanHandle)
4153 AddrModes.emplace_back(NewAddrMode);
4154 else
4155 AddrModes.clear();
4156
4157 return CanHandle;
4158 }
4159
4160 /// Combine the addressing modes we've collected into a single
4161 /// addressing mode.
4162 /// \return True iff we successfully combined them or we only had one so
4163 /// didn't need to combine them anyway.
4164 bool combineAddrModes() {
4165 // If we have no AddrModes then they can't be combined.
4166 if (AddrModes.size() == 0)
4167 return false;
4168
4169 // A single AddrMode can trivially be combined.
4170 if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField)
4171 return true;
4172
4173 // If the AddrModes we collected are all just equal to the value they are
4174 // derived from then combining them wouldn't do anything useful.
4175 if (AllAddrModesTrivial)
4176 return false;
4177
4178 if (!addrModeCombiningAllowed())
4179 return false;
4180
4181 // Build a map between <original value, basic block where we saw it> to
4182 // value of base register.
4183 // Bail out if there is no common type.
4184 FoldAddrToValueMapping Map;
4185 if (!initializeMap(Map))
4186 return false;
4187
4188 CommonValue = findCommon(Map);
4189 if (CommonValue)
4190 AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
4191 return CommonValue != nullptr;
4192 }
4193
4194private:
4195 /// `CommonValue` may be a placeholder inserted by us.
4196 /// If the placeholder is not used, we should remove this dead instruction.
4197 void eraseCommonValueIfDead() {
4198 if (CommonValue && CommonValue->getNumUses() == 0)
4199 if (Instruction *CommonInst = dyn_cast<Instruction>(CommonValue))
4200 CommonInst->eraseFromParent();
4201 }
4202
4203 /// Initialize Map with anchor values. For address seen
4204 /// we set the value of different field saw in this address.
4205 /// At the same time we find a common type for different field we will
4206 /// use to create new Phi/Select nodes. Keep it in CommonType field.
4207 /// Return false if there is no common type found.
4208 bool initializeMap(FoldAddrToValueMapping &Map) {
4209 // Keep track of keys where the value is null. We will need to replace it
4210 // with constant null when we know the common type.
4211 SmallVector<Value *, 2> NullValue;
4212 Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
4213 for (auto &AM : AddrModes) {
4214 Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
4215 if (DV) {
4216 auto *Type = DV->getType();
4217 if (CommonType && CommonType != Type)
4218 return false;
4219 CommonType = Type;
4220 Map[AM.OriginalValue] = DV;
4221 } else {
4222 NullValue.push_back(AM.OriginalValue);
4223 }
4224 }
4225 assert(CommonType && "At least one non-null value must be!");
4226 for (auto *V : NullValue)
4227 Map[V] = Constant::getNullValue(CommonType);
4228 return true;
4229 }
4230
4231 /// We have mapping between value A and other value B where B was a field in
4232 /// addressing mode represented by A. Also we have an original value C
4233 /// representing an address we start with. Traversing from C through phi and
4234 /// selects we ended up with A's in a map. This utility function tries to find
4235 /// a value V which is a field in addressing mode C and traversing through phi
4236 /// nodes and selects we will end up in corresponded values B in a map.
4237 /// The utility will create a new Phi/Selects if needed.
4238 // The simple example looks as follows:
4239 // BB1:
4240 // p1 = b1 + 40
4241 // br cond BB2, BB3
4242 // BB2:
4243 // p2 = b2 + 40
4244 // br BB3
4245 // BB3:
4246 // p = phi [p1, BB1], [p2, BB2]
4247 // v = load p
4248 // Map is
4249 // p1 -> b1
4250 // p2 -> b2
4251 // Request is
4252 // p -> ?
4253 // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3.
4254 Value *findCommon(FoldAddrToValueMapping &Map) {
4255 // Tracks the simplification of newly created phi nodes. The reason we use
4256 // this mapping is because we will add new created Phi nodes in AddrToBase.
4257 // Simplification of Phi nodes is recursive, so some Phi node may
4258 // be simplified after we added it to AddrToBase. In reality this
4259 // simplification is possible only if original phi/selects were not
4260 // simplified yet.
4261 // Using this mapping we can find the current value in AddrToBase.
4262 SimplificationTracker ST(SQ);
4263
4264 // First step, DFS to create PHI nodes for all intermediate blocks.
4265 // Also fill traverse order for the second step.
4266 SmallVector<Value *, 32> TraverseOrder;
4267 InsertPlaceholders(Map, TraverseOrder, ST);
4268
4269 // Second Step, fill new nodes by merged values and simplify if possible.
4270 FillPlaceholders(Map, TraverseOrder, ST);
4271
4272 if (!AddrSinkNewSelects && ST.countNewSelectNodes() > 0) {
4273 ST.destroyNewNodes(CommonType);
4274 return nullptr;
4275 }
4276
4277 // Now we'd like to match New Phi nodes to existed ones.
4278 unsigned PhiNotMatchedCount = 0;
4279 if (!MatchPhiSet(ST, AddrSinkNewPhis, PhiNotMatchedCount)) {
4280 ST.destroyNewNodes(CommonType);
4281 return nullptr;
4282 }
4283
4284 auto *Result = ST.Get(Map.find(Original)->second);
4285 if (Result) {
4286 NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount;
4287 NumMemoryInstsSelectCreated += ST.countNewSelectNodes();
4288 }
4289 return Result;
4290 }
4291
4292 /// Try to match PHI node to Candidate.
4293 /// Matcher tracks the matched Phi nodes.
4294 bool MatchPhiNode(PHINode *PHI, PHINode *Candidate,
4296 PhiNodeSet &PhiNodesToMatch) {
4297 SmallVector<PHIPair, 8> WorkList;
4298 Matcher.insert({PHI, Candidate});
4299 SmallSet<PHINode *, 8> MatchedPHIs;
4300 MatchedPHIs.insert(PHI);
4301 WorkList.push_back({PHI, Candidate});
4302 SmallSet<PHIPair, 8> Visited;
4303 while (!WorkList.empty()) {
4304 auto Item = WorkList.pop_back_val();
4305 if (!Visited.insert(Item).second)
4306 continue;
4307 // We iterate over all incoming values to Phi to compare them.
4308 // If values are different and both of them Phi and the first one is a
4309 // Phi we added (subject to match) and both of them is in the same basic
4310 // block then we can match our pair if values match. So we state that
4311 // these values match and add it to work list to verify that.
4312 for (auto *B : Item.first->blocks()) {
4313 Value *FirstValue = Item.first->getIncomingValueForBlock(B);
4314 Value *SecondValue = Item.second->getIncomingValueForBlock(B);
4315 if (FirstValue == SecondValue)
4316 continue;
4317
4318 PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue);
4319 PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue);
4320
4321 // One of them is not Phi or
4322 // The first one is not Phi node from the set we'd like to match or
4323 // Phi nodes from different basic blocks then
4324 // we will not be able to match.
4325 if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) ||
4326 FirstPhi->getParent() != SecondPhi->getParent())
4327 return false;
4328
4329 // If we already matched them then continue.
4330 if (Matcher.count({FirstPhi, SecondPhi}))
4331 continue;
4332 // So the values are different and does not match. So we need them to
4333 // match. (But we register no more than one match per PHI node, so that
4334 // we won't later try to replace them twice.)
4335 if (MatchedPHIs.insert(FirstPhi).second)
4336 Matcher.insert({FirstPhi, SecondPhi});
4337 // But me must check it.
4338 WorkList.push_back({FirstPhi, SecondPhi});
4339 }
4340 }
4341 return true;
4342 }
4343
4344 /// For the given set of PHI nodes (in the SimplificationTracker) try
4345 /// to find their equivalents.
4346 /// Returns false if this matching fails and creation of new Phi is disabled.
4347 bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes,
4348 unsigned &PhiNotMatchedCount) {
4349 // Matched and PhiNodesToMatch iterate their elements in a deterministic
4350 // order, so the replacements (ReplacePhi) are also done in a deterministic
4351 // order.
4353 SmallPtrSet<PHINode *, 8> WillNotMatch;
4354 PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes();
4355 while (PhiNodesToMatch.size()) {
4356 PHINode *PHI = *PhiNodesToMatch.begin();
4357
4358 // Add us, if no Phi nodes in the basic block we do not match.
4359 WillNotMatch.clear();
4360 WillNotMatch.insert(PHI);
4361
4362 // Traverse all Phis until we found equivalent or fail to do that.
4363 bool IsMatched = false;
4364 for (auto &P : PHI->getParent()->phis()) {
4365 // Skip new Phi nodes.
4366 if (PhiNodesToMatch.count(&P))
4367 continue;
4368 if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))
4369 break;
4370 // If it does not match, collect all Phi nodes from matcher.
4371 // if we end up with no match, them all these Phi nodes will not match
4372 // later.
4373 for (auto M : Matched)
4374 WillNotMatch.insert(M.first);
4375 Matched.clear();
4376 }
4377 if (IsMatched) {
4378 // Replace all matched values and erase them.
4379 for (auto MV : Matched)
4380 ST.ReplacePhi(MV.first, MV.second);
4381 Matched.clear();
4382 continue;
4383 }
4384 // If we are not allowed to create new nodes then bail out.
4385 if (!AllowNewPhiNodes)
4386 return false;
4387 // Just remove all seen values in matcher. They will not match anything.
4388 PhiNotMatchedCount += WillNotMatch.size();
4389 for (auto *P : WillNotMatch)
4390 PhiNodesToMatch.erase(P);
4391 }
4392 return true;
4393 }
4394 /// Fill the placeholders with values from predecessors and simplify them.
4395 void FillPlaceholders(FoldAddrToValueMapping &Map,
4396 SmallVectorImpl<Value *> &TraverseOrder,
4397 SimplificationTracker &ST) {
4398 while (!TraverseOrder.empty()) {
4399 Value *Current = TraverseOrder.pop_back_val();
4400 assert(Map.contains(Current) && "No node to fill!!!");
4401 Value *V = Map[Current];
4402
4403 if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
4404 // CurrentValue also must be Select.
4405 auto *CurrentSelect = cast<SelectInst>(Current);
4406 auto *TrueValue = CurrentSelect->getTrueValue();
4407 assert(Map.contains(TrueValue) && "No True Value!");
4408 Select->setTrueValue(ST.Get(Map[TrueValue]));
4409 auto *FalseValue = CurrentSelect->getFalseValue();
4410 assert(Map.contains(FalseValue) && "No False Value!");
4411 Select->setFalseValue(ST.Get(Map[FalseValue]));
4412 } else {
4413 // Must be a Phi node then.
4414 auto *PHI = cast<PHINode>(V);
4415 // Fill the Phi node with values from predecessors.
4416 for (auto *B : predecessors(PHI->getParent())) {
4417 Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B);
4418 assert(Map.contains(PV) && "No predecessor Value!");
4419 PHI->addIncoming(ST.Get(Map[PV]), B);
4420 }
4421 }
4422 Map[Current] = ST.Simplify(V);
4423 }
4424 }
4425
4426 /// Starting from original value recursively iterates over def-use chain up to
4427 /// known ending values represented in a map. For each traversed phi/select
4428 /// inserts a placeholder Phi or Select.
4429 /// Reports all new created Phi/Select nodes by adding them to set.
4430 /// Also reports and order in what values have been traversed.
4431 void InsertPlaceholders(FoldAddrToValueMapping &Map,
4432 SmallVectorImpl<Value *> &TraverseOrder,
4433 SimplificationTracker &ST) {
4434 SmallVector<Value *, 32> Worklist;
4435 assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&
4436 "Address must be a Phi or Select node");
4437 auto *Dummy = PoisonValue::get(CommonType);
4438 Worklist.push_back(Original);
4439 while (!Worklist.empty()) {
4440 Value *Current = Worklist.pop_back_val();
4441 // if it is already visited or it is an ending value then skip it.
4442 if (Map.contains(Current))
4443 continue;
4444 TraverseOrder.push_back(Current);
4445
4446 // CurrentValue must be a Phi node or select. All others must be covered
4447 // by anchors.
4448 if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) {
4449 // Is it OK to get metadata from OrigSelect?!
4450 // Create a Select placeholder with dummy value.
4452 SelectInst::Create(CurrentSelect->getCondition(), Dummy, Dummy,
4453 CurrentSelect->getName(),
4454 CurrentSelect->getIterator(), CurrentSelect);
4455 Map[Current] = Select;
4456 ST.insertNewSelect(Select);
4457 // We are interested in True and False values.
4458 Worklist.push_back(CurrentSelect->getTrueValue());
4459 Worklist.push_back(CurrentSelect->getFalseValue());
4460 } else {
4461 // It must be a Phi node then.
4462 PHINode *CurrentPhi = cast<PHINode>(Current);
4463 unsigned PredCount = CurrentPhi->getNumIncomingValues();
4464 PHINode *PHI =
4465 PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi->getIterator());
4466 Map[Current] = PHI;
4467 ST.insertNewPhi(PHI);
4468 append_range(Worklist, CurrentPhi->incoming_values());
4469 }
4470 }
4471 }
4472
4473 bool addrModeCombiningAllowed() {
4475 return false;
4476 switch (DifferentField) {
4477 default:
4478 return false;
4479 case ExtAddrMode::BaseRegField:
4481 case ExtAddrMode::BaseGVField:
4482 return AddrSinkCombineBaseGV;
4483 case ExtAddrMode::BaseOffsField:
4485 case ExtAddrMode::ScaledRegField:
4487 }
4488 }
4489};
4490} // end anonymous namespace
4491
4492/// Try adding ScaleReg*Scale to the current addressing mode.
4493/// Return true and update AddrMode if this addr mode is legal for the target,
4494/// false if not.
4495bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
4496 unsigned Depth) {
4497 // If Scale is 1, then this is the same as adding ScaleReg to the addressing
4498 // mode. Just process that directly.
4499 if (Scale == 1)
4500 return matchAddr(ScaleReg, Depth);
4501
4502 // If the scale is 0, it takes nothing to add this.
4503 if (Scale == 0)
4504 return true;
4505
4506 // If we already have a scale of this value, we can add to it, otherwise, we
4507 // need an available scale field.
4508 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
4509 return false;
4510
4511 ExtAddrMode TestAddrMode = AddrMode;
4512
4513 // Add scale to turn X*4+X*3 -> X*7. This could also do things like
4514 // [A+B + A*7] -> [B+A*8].
4515 TestAddrMode.Scale += Scale;
4516 TestAddrMode.ScaledReg = ScaleReg;
4517
4518 // If the new address isn't legal, bail out.
4519 if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
4520 return false;
4521
4522 // It was legal, so commit it.
4523 AddrMode = TestAddrMode;
4524
4525 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
4526 // to see if ScaleReg is actually X+C. If so, we can turn this into adding
4527 // X*Scale + C*Scale to addr mode. If we found available IV increment, do not
4528 // go any further: we can reuse it and cannot eliminate it.
4529 ConstantInt *CI = nullptr;
4530 Value *AddLHS = nullptr;
4531 if (isa<Instruction>(ScaleReg) && // not a constant expr.
4532 match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI))) &&
4533 !isIVIncrement(ScaleReg, &LI) && CI->getValue().isSignedIntN(64)) {
4534 TestAddrMode.InBounds = false;
4535 TestAddrMode.ScaledReg = AddLHS;
4536 TestAddrMode.BaseOffs += CI->getSExtValue() * TestAddrMode.Scale;
4537
4538 // If this addressing mode is legal, commit it and remember that we folded
4539 // this instruction.
4540 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
4541 AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
4542 AddrMode = TestAddrMode;
4543 return true;
4544 }
4545 // Restore status quo.
4546 TestAddrMode = AddrMode;
4547 }
4548
4549 // If this is an add recurrence with a constant step, return the increment
4550 // instruction and the canonicalized step.
4551 auto GetConstantStep =
4552 [this](const Value *V) -> std::optional<std::pair<Instruction *, APInt>> {
4553 auto *PN = dyn_cast<PHINode>(V);
4554 if (!PN)
4555 return std::nullopt;
4556 auto IVInc = getIVIncrement(PN, &LI);
4557 if (!IVInc)
4558 return std::nullopt;
4559 // TODO: The result of the intrinsics above is two-complement. However when
4560 // IV inc is expressed as add or sub, iv.next is potentially a poison value.
4561 // If it has nuw or nsw flags, we need to make sure that these flags are
4562 // inferrable at the point of memory instruction. Otherwise we are replacing
4563 // well-defined two-complement computation with poison. Currently, to avoid
4564 // potentially complex analysis needed to prove this, we reject such cases.
4565 if (auto *OIVInc = dyn_cast<OverflowingBinaryOperator>(IVInc->first))
4566 if (OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap())
4567 return std::nullopt;
4568 if (auto *ConstantStep = dyn_cast<ConstantInt>(IVInc->second))
4569 return std::make_pair(IVInc->first, ConstantStep->getValue());
4570 return std::nullopt;
4571 };
4572
4573 // Try to account for the following special case:
4574 // 1. ScaleReg is an inductive variable;
4575 // 2. We use it with non-zero offset;
4576 // 3. IV's increment is available at the point of memory instruction.
4577 //
4578 // In this case, we may reuse the IV increment instead of the IV Phi to
4579 // achieve the following advantages:
4580 // 1. If IV step matches the offset, we will have no need in the offset;
4581 // 2. Even if they don't match, we will reduce the overlap of living IV
4582 // and IV increment, that will potentially lead to better register
4583 // assignment.
4584 if (AddrMode.BaseOffs) {
4585 if (auto IVStep = GetConstantStep(ScaleReg)) {
4586 Instruction *IVInc = IVStep->first;
4587 // The following assert is important to ensure a lack of infinite loops.
4588 // This transforms is (intentionally) the inverse of the one just above.
4589 // If they don't agree on the definition of an increment, we'd alternate
4590 // back and forth indefinitely.
4591 assert(isIVIncrement(IVInc, &LI) && "implied by GetConstantStep");
4592 APInt Step = IVStep->second;
4593 APInt Offset = Step * AddrMode.Scale;
4594 if (Offset.isSignedIntN(64)) {
4595 TestAddrMode.InBounds = false;
4596 TestAddrMode.ScaledReg = IVInc;
4597 TestAddrMode.BaseOffs -= Offset.getLimitedValue();
4598 // If this addressing mode is legal, commit it..
4599 // (Note that we defer the (expensive) domtree base legality check
4600 // to the very last possible point.)
4601 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace) &&
4602 getDTFn().dominates(IVInc, MemoryInst)) {
4603 AddrModeInsts.push_back(cast<Instruction>(IVInc));
4604 AddrMode = TestAddrMode;
4605 return true;
4606 }
4607 // Restore status quo.
4608 TestAddrMode = AddrMode;
4609 }
4610 }
4611 }
4612
4613 // Otherwise, just return what we have.
4614 return true;
4615}
4616
4617/// This is a little filter, which returns true if an addressing computation
4618/// involving I might be folded into a load/store accessing it.
4619/// This doesn't need to be perfect, but needs to accept at least
4620/// the set of instructions that MatchOperationAddr can.
4622 switch (I->getOpcode()) {
4623 case Instruction::BitCast:
4624 case Instruction::AddrSpaceCast:
4625 // Don't touch identity bitcasts.
4626 if (I->getType() == I->getOperand(0)->getType())
4627 return false;
4628 return I->getType()->isIntOrPtrTy();
4629 case Instruction::PtrToInt:
4630 // PtrToInt is always a noop, as we know that the int type is pointer sized.
4631 return true;
4632 case Instruction::IntToPtr:
4633 // We know the input is intptr_t, so this is foldable.
4634 return true;
4635 case Instruction::Add:
4636 return true;
4637 case Instruction::Mul:
4638 case Instruction::Shl:
4639 // Can only handle X*C and X << C.
4640 return isa<ConstantInt>(I->getOperand(1));
4641 case Instruction::GetElementPtr:
4642 return true;
4643 default:
4644 return false;
4645 }
4646}
4647
4648/// Check whether or not \p Val is a legal instruction for \p TLI.
4649/// \note \p Val is assumed to be the product of some type promotion.
4650/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
4651/// to be legal, as the non-promoted value would have had the same state.
4653 const DataLayout &DL, Value *Val) {
4654 Instruction *PromotedInst = dyn_cast<Instruction>(Val);
4655 if (!PromotedInst)
4656 return false;
4657 int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
4658 // If the ISDOpcode is undefined, it was undefined before the promotion.
4659 if (!ISDOpcode)
4660 return true;
4661 // Otherwise, check if the promoted instruction is legal or not.
4662 return TLI.isOperationLegalOrCustom(
4663 ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
4664}
4665
4666namespace {
4667
4668/// Hepler class to perform type promotion.
4669class TypePromotionHelper {
4670 /// Utility function to add a promoted instruction \p ExtOpnd to
4671 /// \p PromotedInsts and record the type of extension we have seen.
4672 static void addPromotedInst(InstrToOrigTy &PromotedInsts,
4673 Instruction *ExtOpnd, bool IsSExt) {
4674 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4675 InstrToOrigTy::iterator It = PromotedInsts.find(ExtOpnd);
4676 if (It != PromotedInsts.end()) {
4677 // If the new extension is same as original, the information in
4678 // PromotedInsts[ExtOpnd] is still correct.
4679 if (It->second.getInt() == ExtTy)
4680 return;
4681
4682 // Now the new extension is different from old extension, we make
4683 // the type information invalid by setting extension type to
4684 // BothExtension.
4685 ExtTy = BothExtension;
4686 }
4687 PromotedInsts[ExtOpnd] = TypeIsSExt(ExtOpnd->getType(), ExtTy);
4688 }
4689
4690 /// Utility function to query the original type of instruction \p Opnd
4691 /// with a matched extension type. If the extension doesn't match, we
4692 /// cannot use the information we had on the original type.
4693 /// BothExtension doesn't match any extension type.
4694 static const Type *getOrigType(const InstrToOrigTy &PromotedInsts,
4695 Instruction *Opnd, bool IsSExt) {
4696 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4697 InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
4698 if (It != PromotedInsts.end() && It->second.getInt() == ExtTy)
4699 return It->second.getPointer();
4700 return nullptr;
4701 }
4702
4703 /// Utility function to check whether or not a sign or zero extension
4704 /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
4705 /// either using the operands of \p Inst or promoting \p Inst.
4706 /// The type of the extension is defined by \p IsSExt.
4707 /// In other words, check if:
4708 /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
4709 /// #1 Promotion applies:
4710 /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
4711 /// #2 Operand reuses:
4712 /// ext opnd1 to ConsideredExtType.
4713 /// \p PromotedInsts maps the instructions to their type before promotion.
4714 static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
4715 const InstrToOrigTy &PromotedInsts, bool IsSExt);
4716
4717 /// Utility function to determine if \p OpIdx should be promoted when
4718 /// promoting \p Inst.
4719 static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
4720 return !(isa<SelectInst>(Inst) && OpIdx == 0);
4721 }
4722
4723 /// Utility function to promote the operand of \p Ext when this
4724 /// operand is a promotable trunc or sext or zext.
4725 /// \p PromotedInsts maps the instructions to their type before promotion.
4726 /// \p CreatedInstsCost[out] contains the cost of all instructions
4727 /// created to promote the operand of Ext.
4728 /// Newly added extensions are inserted in \p Exts.
4729 /// Newly added truncates are inserted in \p Truncs.
4730 /// Should never be called directly.
4731 /// \return The promoted value which is used instead of Ext.
4732 static Value *promoteOperandForTruncAndAnyExt(
4733 Instruction *Ext, TypePromotionTransaction &TPT,
4734 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4737
4738 /// Utility function to promote the operand of \p Ext when this
4739 /// operand is promotable and is not a supported trunc or sext.
4740 /// \p PromotedInsts maps the instructions to their type before promotion.
4741 /// \p CreatedInstsCost[out] contains the cost of all the instructions
4742 /// created to promote the operand of Ext.
4743 /// Newly added extensions are inserted in \p Exts.
4744 /// Newly added truncates are inserted in \p Truncs.
4745 /// Should never be called directly.
4746 /// \return The promoted value which is used instead of Ext.
4747 static Value *promoteOperandForOther(Instruction *Ext,
4748 TypePromotionTransaction &TPT,
4749 InstrToOrigTy &PromotedInsts,
4750 unsigned &CreatedInstsCost,
4753 const TargetLowering &TLI, bool IsSExt);
4754
4755 /// \see promoteOperandForOther.
4756 static Value *signExtendOperandForOther(
4757 Instruction *Ext, TypePromotionTransaction &TPT,
4758 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4760 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4761 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4762 Exts, Truncs, TLI, true);
4763 }
4764
4765 /// \see promoteOperandForOther.
4766 static Value *zeroExtendOperandForOther(
4767 Instruction *Ext, TypePromotionTransaction &TPT,
4768 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4770 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4771 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4772 Exts, Truncs, TLI, false);
4773 }
4774
4775public:
4776 /// Type for the utility function that promotes the operand of Ext.
4777 using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT,
4778 InstrToOrigTy &PromotedInsts,
4779 unsigned &CreatedInstsCost,
4782 const TargetLowering &TLI);
4783
4784 /// Given a sign/zero extend instruction \p Ext, return the appropriate
4785 /// action to promote the operand of \p Ext instead of using Ext.
4786 /// \return NULL if no promotable action is possible with the current
4787 /// sign extension.
4788 /// \p InsertedInsts keeps track of all the instructions inserted by the
4789 /// other CodeGenPrepare optimizations. This information is important
4790 /// because we do not want to promote these instructions as CodeGenPrepare
4791 /// will reinsert them later. Thus creating an infinite loop: create/remove.
4792 /// \p PromotedInsts maps the instructions to their type before promotion.
4793 static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,
4794 const TargetLowering &TLI,
4795 const InstrToOrigTy &PromotedInsts);
4796};
4797
4798} // end anonymous namespace
4799
4800bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
4801 Type *ConsideredExtType,
4802 const InstrToOrigTy &PromotedInsts,
4803 bool IsSExt) {
4804 // The promotion helper does not know how to deal with vector types yet.
4805 // To be able to fix that, we would need to fix the places where we
4806 // statically extend, e.g., constants and such.
4807 if (Inst->getType()->isVectorTy())
4808 return false;
4809
4810 // We can always get through zext.
4811 if (isa<ZExtInst>(Inst))
4812 return true;
4813
4814 // sext(sext) is ok too.
4815 if (IsSExt && isa<SExtInst>(Inst))
4816 return true;
4817
4818 // We can get through binary operator, if it is legal. In other words, the
4819 // binary operator must have a nuw or nsw flag.
4820 if (const auto *BinOp = dyn_cast<BinaryOperator>(Inst))
4821 if (isa<OverflowingBinaryOperator>(BinOp) &&
4822 ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
4823 (IsSExt && BinOp->hasNoSignedWrap())))
4824 return true;
4825
4826 // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst))
4827 if ((Inst->getOpcode() == Instruction::And ||
4828 Inst->getOpcode() == Instruction::Or))
4829 return true;
4830
4831 // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst))
4832 if (Inst->getOpcode() == Instruction::Xor) {
4833 // Make sure it is not a NOT.
4834 if (const auto *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1)))
4835 if (!Cst->getValue().isAllOnes())
4836 return true;
4837 }
4838
4839 // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst))
4840 // It may change a poisoned value into a regular value, like
4841 // zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12
4842 // poisoned value regular value
4843 // It should be OK since undef covers valid value.
4844 if (Inst->getOpcode() == Instruction::LShr && !IsSExt)
4845 return true;
4846
4847 // and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst)
4848 // It may change a poisoned value into a regular value, like
4849 // zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12
4850 // poisoned value regular value
4851 // It should be OK since undef covers valid value.
4852 if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) {
4853 const auto *ExtInst = cast<const Instruction>(*Inst->user_begin());
4854 if (ExtInst->hasOneUse()) {
4855 const auto *AndInst = dyn_cast<const Instruction>(*ExtInst->user_begin());
4856 if (AndInst && AndInst->getOpcode() == Instruction::And) {
4857 const auto *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
4858 if (Cst &&
4859 Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth()))
4860 return true;
4861 }
4862 }
4863 }
4864
4865 // Check if we can do the following simplification.
4866 // ext(trunc(opnd)) --> ext(opnd)
4867 if (!isa<TruncInst>(Inst))
4868 return false;
4869
4870 Value *OpndVal = Inst->getOperand(0);
4871 // Check if we can use this operand in the extension.
4872 // If the type is larger than the result type of the extension, we cannot.
4873 if (!OpndVal->getType()->isIntegerTy() ||
4874 OpndVal->getType()->getIntegerBitWidth() >
4875 ConsideredExtType->getIntegerBitWidth())
4876 return false;
4877
4878 // If the operand of the truncate is not an instruction, we will not have
4879 // any information on the dropped bits.
4880 // (Actually we could for constant but it is not worth the extra logic).
4881 Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
4882 if (!Opnd)
4883 return false;
4884
4885 // Check if the source of the type is narrow enough.
4886 // I.e., check that trunc just drops extended bits of the same kind of
4887 // the extension.
4888 // #1 get the type of the operand and check the kind of the extended bits.
4889 const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt);
4890 if (OpndType)
4891 ;
4892 else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
4893 OpndType = Opnd->getOperand(0)->getType();
4894 else
4895 return false;
4896
4897 // #2 check that the truncate just drops extended bits.
4898 return Inst->getType()->getIntegerBitWidth() >=
4899 OpndType->getIntegerBitWidth();
4900}
4901
4902TypePromotionHelper::Action TypePromotionHelper::getAction(
4903 Instruction *Ext, const SetOfInstrs &InsertedInsts,
4904 const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
4905 assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
4906 "Unexpected instruction type");
4907 Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
4908 Type *ExtTy = Ext->getType();
4909 bool IsSExt = isa<SExtInst>(Ext);
4910 // If the operand of the extension is not an instruction, we cannot
4911 // get through.
4912 // If it, check we can get through.
4913 if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
4914 return nullptr;
4915
4916 // Do not promote if the operand has been added by codegenprepare.
4917 // Otherwise, it means we are undoing an optimization that is likely to be
4918 // redone, thus causing potential infinite loop.
4919 if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
4920 return nullptr;
4921
4922 // SExt or Trunc instructions.
4923 // Return the related handler.
4924 if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
4925 isa<ZExtInst>(ExtOpnd))
4926 return promoteOperandForTruncAndAnyExt;
4927
4928 // Regular instruction.
4929 // Abort early if we will have to insert non-free instructions.
4930 if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
4931 return nullptr;
4932 return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
4933}
4934
4935Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
4936 Instruction *SExt, TypePromotionTransaction &TPT,
4937 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4939 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4940 // By construction, the operand of SExt is an instruction. Otherwise we cannot
4941 // get through it and this method should not be called.
4942 Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
4943 Value *ExtVal = SExt;
4944 bool HasMergedNonFreeExt = false;
4945 if (isa<ZExtInst>(SExtOpnd)) {
4946 // Replace s|zext(zext(opnd))
4947 // => zext(opnd).
4948 HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
4949 Value *ZExt =
4950 TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
4951 TPT.replaceAllUsesWith(SExt, ZExt);
4952 TPT.eraseInstruction(SExt);
4953 ExtVal = ZExt;
4954 } else {
4955 // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
4956 // => z|sext(opnd).
4957 TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
4958 }
4959 CreatedInstsCost = 0;
4960
4961 // Remove dead code.
4962 if (SExtOpnd->use_empty())
4963 TPT.eraseInstruction(SExtOpnd);
4964
4965 // Check if the extension is still needed.
4966 Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
4967 if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
4968 if (ExtInst) {
4969 if (Exts)
4970 Exts->push_back(ExtInst);
4971 CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
4972 }
4973 return ExtVal;
4974 }
4975
4976 // At this point we have: ext ty opnd to ty.
4977 // Reassign the uses of ExtInst to the opnd and remove ExtInst.
4978 Value *NextVal = ExtInst->getOperand(0);
4979 TPT.eraseInstruction(ExtInst, NextVal);
4980 return NextVal;
4981}
4982
4983Value *TypePromotionHelper::promoteOperandForOther(
4984 Instruction *Ext, TypePromotionTransaction &TPT,
4985 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4988 bool IsSExt) {
4989 // By construction, the operand of Ext is an instruction. Otherwise we cannot
4990 // get through it and this method should not be called.
4991 Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
4992 CreatedInstsCost = 0;
4993 if (!ExtOpnd->hasOneUse()) {
4994 // ExtOpnd will be promoted.
4995 // All its uses, but Ext, will need to use a truncated value of the
4996 // promoted version.
4997 // Create the truncate now.
4998 Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
4999 if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
5000 // Insert it just after the definition.
5001 ITrunc->moveAfter(ExtOpnd);
5002 if (Truncs)
5003 Truncs->push_back(ITrunc);
5004 }
5005
5006 TPT.replaceAllUsesWith(ExtOpnd, Trunc);
5007 // Restore the operand of Ext (which has been replaced by the previous call
5008 // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
5009 TPT.setOperand(Ext, 0, ExtOpnd);
5010 }
5011
5012 // Get through the Instruction:
5013 // 1. Update its type.
5014 // 2. Replace the uses of Ext by Inst.
5015 // 3. Extend each operand that needs to be extended.
5016
5017 // Remember the original type of the instruction before promotion.
5018 // This is useful to know that the high bits are sign extended bits.
5019 addPromotedInst(PromotedInsts, ExtOpnd, IsSExt);
5020 // Step #1.
5021 TPT.mutateType(ExtOpnd, Ext->getType());
5022 // Step #2.
5023 TPT.replaceAllUsesWith(Ext, ExtOpnd);
5024 // Step #3.
5025 LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n");
5026 for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
5027 ++OpIdx) {
5028 LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n');
5029 if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
5030 !shouldExtOperand(ExtOpnd, OpIdx)) {
5031 LLVM_DEBUG(dbgs() << "No need to propagate\n");
5032 continue;
5033 }
5034 // Check if we can statically extend the operand.
5035 Value *Opnd = ExtOpnd->getOperand(OpIdx);
5036 if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
5037 LLVM_DEBUG(dbgs() << "Statically extend\n");
5038 unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
5039 APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
5040 : Cst->getValue().zext(BitWidth);
5041 TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
5042 continue;
5043 }
5044 // UndefValue are typed, so we have to statically sign extend them.
5045 if (isa<UndefValue>(Opnd)) {
5046 LLVM_DEBUG(dbgs() << "Statically extend\n");
5047 TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
5048 continue;
5049 }
5050
5051 // Otherwise we have to explicitly sign extend the operand.
5052 Value *ValForExtOpnd = IsSExt
5053 ? TPT.createSExt(ExtOpnd, Opnd, Ext->getType())
5054 : TPT.createZExt(ExtOpnd, Opnd, Ext->getType());
5055 TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
5056 Instruction *InstForExtOpnd = dyn_cast<Instruction>(ValForExtOpnd);
5057 if (!InstForExtOpnd)
5058 continue;
5059
5060 if (Exts)
5061 Exts->push_back(InstForExtOpnd);
5062
5063 CreatedInstsCost += !TLI.isExtFree(InstForExtOpnd);
5064 }
5065 LLVM_DEBUG(dbgs() << "Extension is useless now\n");
5066 TPT.eraseInstruction(Ext);
5067 return ExtOpnd;
5068}
5069
5070/// Check whether or not promoting an instruction to a wider type is profitable.
5071/// \p NewCost gives the cost of extension instructions created by the
5072/// promotion.
5073/// \p OldCost gives the cost of extension instructions before the promotion
5074/// plus the number of instructions that have been
5075/// matched in the addressing mode the promotion.
5076/// \p PromotedOperand is the value that has been promoted.
5077/// \return True if the promotion is profitable, false otherwise.
5078bool AddressingModeMatcher::isPromotionProfitable(
5079 unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
5080 LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost
5081 << '\n');
5082 // The cost of the new extensions is greater than the cost of the
5083 // old extension plus what we folded.
5084 // This is not profitable.
5085 if (NewCost > OldCost)
5086 return false;
5087 if (NewCost < OldCost)
5088 return true;
5089 // The promotion is neutral but it may help folding the sign extension in
5090 // loads for instance.
5091 // Check that we did not create an illegal instruction.
5092 return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
5093}
5094
5095/// Given an instruction or constant expr, see if we can fold the operation
5096/// into the addressing mode. If so, update the addressing mode and return
5097/// true, otherwise return false without modifying AddrMode.
5098/// If \p MovedAway is not NULL, it contains the information of whether or
5099/// not AddrInst has to be folded into the addressing mode on success.
5100/// If \p MovedAway == true, \p AddrInst will not be part of the addressing
5101/// because it has been moved away.
5102/// Thus AddrInst must not be added in the matched instructions.
5103/// This state can happen when AddrInst is a sext, since it may be moved away.
5104/// Therefore, AddrInst may not be valid when MovedAway is true and it must
5105/// not be referenced anymore.
5106bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
5107 unsigned Depth,
5108 bool *MovedAway) {
5109 // Avoid exponential behavior on extremely deep expression trees.
5110 if (Depth >= 5)
5111 return false;
5112
5113 // By default, all matched instructions stay in place.
5114 if (MovedAway)
5115 *MovedAway = false;
5116
5117 switch (Opcode) {
5118 case Instruction::PtrToInt:
5119 // PtrToInt is always a noop, as we know that the int type is pointer sized.
5120 return matchAddr(AddrInst->getOperand(0), Depth);
5121 case Instruction::IntToPtr: {
5122 auto AS = AddrInst->getType()->getPointerAddressSpace();
5123 auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
5124 // This inttoptr is a no-op if the integer type is pointer sized.
5125 if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
5126 return matchAddr(AddrInst->getOperand(0), Depth);
5127 return false;
5128 }
5129 case Instruction::BitCast:
5130 // BitCast is always a noop, and we can handle it as long as it is
5131 // int->int or pointer->pointer (we don't want int<->fp or something).
5132 if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() &&
5133 // Don't touch identity bitcasts. These were probably put here by LSR,
5134 // and we don't want to mess around with them. Assume it knows what it
5135 // is doing.
5136 AddrInst->getOperand(0)->getType() != AddrInst->getType())
5137 return matchAddr(AddrInst->getOperand(0), Depth);
5138 return false;
5139 case Instruction::AddrSpaceCast: {
5140 unsigned SrcAS =
5141 AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
5142 unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
5143 if (TLI.getTargetMachine().isNoopAddrSpaceCast(SrcAS, DestAS))
5144 return matchAddr(AddrInst->getOperand(0), Depth);
5145 return false;
5146 }
5147 case Instruction::Add: {
5148 // Check to see if we can merge in one operand, then the other. If so, we
5149 // win.
5150 ExtAddrMode BackupAddrMode = AddrMode;
5151 unsigned OldSize = AddrModeInsts.size();
5152 // Start a transaction at this point.
5153 // The LHS may match but not the RHS.
5154 // Therefore, we need a higher level restoration point to undo partially
5155 // matched operation.
5156 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5157 TPT.getRestorationPoint();
5158
5159 // Try to match an integer constant second to increase its chance of ending
5160 // up in `BaseOffs`, resp. decrease its chance of ending up in `BaseReg`.
5161 int First = 0, Second = 1;
5162 if (isa<ConstantInt>(AddrInst->getOperand(First))
5163 && !isa<ConstantInt>(AddrInst->getOperand(Second)))
5164 std::swap(First, Second);
5165 AddrMode.InBounds = false;
5166 if (matchAddr(AddrInst->getOperand(First), Depth + 1) &&
5167 matchAddr(AddrInst->getOperand(Second), Depth + 1))
5168 return true;
5169
5170 // Restore the old addr mode info.
5171 AddrMode = BackupAddrMode;
5172 AddrModeInsts.resize(OldSize);
5173 TPT.rollback(LastKnownGood);
5174
5175 // Otherwise this was over-aggressive. Try merging operands in the opposite
5176 // order.
5177 if (matchAddr(AddrInst->getOperand(Second), Depth + 1) &&
5178 matchAddr(AddrInst->getOperand(First), Depth + 1))
5179 return true;
5180
5181 // Otherwise we definitely can't merge the ADD in.
5182 AddrMode = BackupAddrMode;
5183 AddrModeInsts.resize(OldSize);
5184 TPT.rollback(LastKnownGood);
5185 break;
5186 }
5187 // case Instruction::Or:
5188 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
5189 // break;
5190 case Instruction::Mul:
5191 case Instruction::Shl: {
5192 // Can only handle X*C and X << C.
5193 AddrMode.InBounds = false;
5194 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
5195 if (!RHS || RHS->getBitWidth() > 64)
5196 return false;
5197 int64_t Scale = Opcode == Instruction::Shl
5198 ? 1LL << RHS->getLimitedValue(RHS->getBitWidth() - 1)
5199 : RHS->getSExtValue();
5200
5201 return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
5202 }
5203 case Instruction::GetElementPtr: {
5204 // Scan the GEP. We check it if it contains constant offsets and at most
5205 // one variable offset.
5206 int VariableOperand = -1;
5207 unsigned VariableScale = 0;
5208
5209 int64_t ConstantOffset = 0;
5210 gep_type_iterator GTI = gep_type_begin(AddrInst);
5211 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
5212 if (StructType *STy = GTI.getStructTypeOrNull()) {
5213 const StructLayout *SL = DL.getStructLayout(STy);
5214 unsigned Idx =
5215 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
5216 ConstantOffset += SL->getElementOffset(Idx);
5217 } else {
5219 if (TS.isNonZero()) {
5220 // The optimisations below currently only work for fixed offsets.
5221 if (TS.isScalable())
5222 return false;
5223 int64_t TypeSize = TS.getFixedValue();
5224 if (ConstantInt *CI =
5225 dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
5226 const APInt &CVal = CI->getValue();
5227 if (CVal.getSignificantBits() <= 64) {
5228 ConstantOffset += CVal.getSExtValue() * TypeSize;
5229 continue;
5230 }
5231 }
5232 // We only allow one variable index at the moment.
5233 if (VariableOperand != -1)
5234 return false;
5235
5236 // Remember the variable index.
5237 VariableOperand = i;
5238 VariableScale = TypeSize;
5239 }
5240 }
5241 }
5242
5243 // A common case is for the GEP to only do a constant offset. In this case,
5244 // just add it to the disp field and check validity.
5245 if (VariableOperand == -1) {
5246 AddrMode.BaseOffs += ConstantOffset;
5247 if (matchAddr(AddrInst->getOperand(0), Depth + 1)) {
5248 if (!cast<GEPOperator>(AddrInst)->isInBounds())
5249 AddrMode.InBounds = false;
5250 return true;
5251 }
5252 AddrMode.BaseOffs -= ConstantOffset;
5253
5254 if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&
5255 TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
5256 ConstantOffset > 0) {
5257 // Record GEPs with non-zero offsets as candidates for splitting in
5258 // the event that the offset cannot fit into the r+i addressing mode.
5259 // Simple and common case that only one GEP is used in calculating the
5260 // address for the memory access.
5261 Value *Base = AddrInst->getOperand(0);
5262 auto *BaseI = dyn_cast<Instruction>(Base);
5263 auto *GEP = cast<GetElementPtrInst>(AddrInst);
5264 if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
5265 (BaseI && !isa<CastInst>(BaseI) &&
5266 !isa<GetElementPtrInst>(BaseI))) {
5267 // Make sure the parent block allows inserting non-PHI instructions
5268 // before the terminator.
5269 BasicBlock *Parent = BaseI ? BaseI->getParent()
5270 : &GEP->getFunction()->getEntryBlock();
5271 if (!Parent->getTerminator()->isEHPad())
5272 LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
5273 }
5274 }
5275
5276 return false;
5277 }
5278
5279 // Save the valid addressing mode in case we can't match.
5280 ExtAddrMode BackupAddrMode = AddrMode;
5281 unsigned OldSize = AddrModeInsts.size();
5282
5283 // See if the scale and offset amount is valid for this target.
5284 AddrMode.BaseOffs += ConstantOffset;
5285 if (!cast<GEPOperator>(AddrInst)->isInBounds())
5286 AddrMode.InBounds = false;
5287
5288 // Match the base operand of the GEP.
5289 if (!matchAddr(AddrInst->getOperand(0), Depth + 1)) {
5290 // If it couldn't be matched, just stuff the value in a register.
5291 if (AddrMode.HasBaseReg) {
5292 AddrMode = BackupAddrMode;
5293 AddrModeInsts.resize(OldSize);
5294 return false;
5295 }
5296 AddrMode.HasBaseReg = true;
5297 AddrMode.BaseReg = AddrInst->getOperand(0);
5298 }
5299
5300 // Match the remaining variable portion of the GEP.
5301 if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
5302 Depth)) {
5303 // If it couldn't be matched, try stuffing the base into a register
5304 // instead of matching it, and retrying the match of the scale.
5305 AddrMode = BackupAddrMode;
5306 AddrModeInsts.resize(OldSize);
5307 if (AddrMode.HasBaseReg)
5308 return false;
5309 AddrMode.HasBaseReg = true;
5310 AddrMode.BaseReg = AddrInst->getOperand(0);
5311 AddrMode.BaseOffs += ConstantOffset;
5312 if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
5313 VariableScale, Depth)) {
5314 // If even that didn't work, bail.
5315 AddrMode = BackupAddrMode;
5316 AddrModeInsts.resize(OldSize);
5317 return false;
5318 }
5319 }
5320
5321 return true;
5322 }
5323 case Instruction::SExt:
5324 case Instruction::ZExt: {
5325 Instruction *Ext = dyn_cast<Instruction>(AddrInst);
5326 if (!Ext)
5327 return false;
5328
5329 // Try to move this ext out of the way of the addressing mode.
5330 // Ask for a method for doing so.
5331 TypePromotionHelper::Action TPH =
5332 TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
5333 if (!TPH)
5334 return false;
5335
5336 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5337 TPT.getRestorationPoint();
5338 unsigned CreatedInstsCost = 0;
5339 unsigned ExtCost = !TLI.isExtFree(Ext);
5340 Value *PromotedOperand =
5341 TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
5342 // SExt has been moved away.
5343 // Thus either it will be rematched later in the recursive calls or it is
5344 // gone. Anyway, we must not fold it into the addressing mode at this point.
5345 // E.g.,
5346 // op = add opnd, 1
5347 // idx = ext op
5348 // addr = gep base, idx
5349 // is now:
5350 // promotedOpnd = ext opnd <- no match here
5351 // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)
5352 // addr = gep base, op <- match
5353 if (MovedAway)
5354 *MovedAway = true;
5355
5356 assert(PromotedOperand &&
5357 "TypePromotionHelper should have filtered out those cases");
5358
5359 ExtAddrMode BackupAddrMode = AddrMode;
5360 unsigned OldSize = AddrModeInsts.size();
5361
5362 if (!matchAddr(PromotedOperand, Depth) ||
5363 // The total of the new cost is equal to the cost of the created
5364 // instructions.
5365 // The total of the old cost is equal to the cost of the extension plus
5366 // what we have saved in the addressing mode.
5367 !isPromotionProfitable(CreatedInstsCost,
5368 ExtCost + (AddrModeInsts.size() - OldSize),
5369 PromotedOperand)) {
5370 AddrMode = BackupAddrMode;
5371 AddrModeInsts.resize(OldSize);
5372 LLVM_DEBUG(dbgs() << "Sign extension does not pay off: rollback\n");
5373 TPT.rollback(LastKnownGood);
5374 return false;
5375 }
5376
5377 // SExt has been deleted. Make sure it is not referenced by the AddrMode.
5378 AddrMode.replaceWith(Ext, PromotedOperand);
5379 return true;
5380 }
5381 case Instruction::Call:
5382 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(AddrInst)) {
5383 if (II->getIntrinsicID() == Intrinsic::threadlocal_address) {
5384 GlobalValue &GV = cast<GlobalValue>(*II->getArgOperand(0));
5385 if (TLI.addressingModeSupportsTLS(GV))
5386 return matchAddr(AddrInst->getOperand(0), Depth);
5387 }
5388 }
5389 break;
5390 }
5391 return false;
5392}
5393
5394/// If we can, try to add the value of 'Addr' into the current addressing mode.
5395/// If Addr can't be added to AddrMode this returns false and leaves AddrMode
5396/// unmodified. This assumes that Addr is either a pointer type or intptr_t
5397/// for the target.
5398///
5399bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
5400 // Start a transaction at this point that we will rollback if the matching
5401 // fails.
5402 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5403 TPT.getRestorationPoint();
5404 if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
5405 if (CI->getValue().isSignedIntN(64)) {
5406 // Fold in immediates if legal for the target.
5407 AddrMode.BaseOffs += CI->getSExtValue();
5408 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5409 return true;
5410 AddrMode.BaseOffs -= CI->getSExtValue();
5411 }
5412 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
5413 // If this is a global variable, try to fold it into the addressing mode.
5414 if (!AddrMode.BaseGV) {
5415 AddrMode.BaseGV = GV;
5416 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5417 return true;
5418 AddrMode.BaseGV = nullptr;
5419 }
5420 } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
5421 ExtAddrMode BackupAddrMode = AddrMode;
5422 unsigned OldSize = AddrModeInsts.size();
5423
5424 // Check to see if it is possible to fold this operation.
5425 bool MovedAway = false;
5426 if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
5427 // This instruction may have been moved away. If so, there is nothing
5428 // to check here.
5429 if (MovedAway)
5430 return true;
5431 // Okay, it's possible to fold this. Check to see if it is actually
5432 // *profitable* to do so. We use a simple cost model to avoid increasing
5433 // register pressure too much.
5434 if (I->hasOneUse() ||
5435 isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
5436 AddrModeInsts.push_back(I);
5437 return true;
5438 }
5439
5440 // It isn't profitable to do this, roll back.
5441 AddrMode = BackupAddrMode;
5442 AddrModeInsts.resize(OldSize);
5443 TPT.rollback(LastKnownGood);
5444 }
5445 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
5446 if (matchOperationAddr(CE, CE->getOpcode(), Depth))
5447 return true;
5448 TPT.rollback(LastKnownGood);
5449 } else if (isa<ConstantPointerNull>(Addr)) {
5450 // Null pointer gets folded without affecting the addressing mode.
5451 return true;
5452 }
5453
5454 // Worse case, the target should support [reg] addressing modes. :)
5455 if (!AddrMode.HasBaseReg) {
5456 AddrMode.HasBaseReg = true;
5457 AddrMode.BaseReg = Addr;
5458 // Still check for legality in case the target supports [imm] but not [i+r].
5459 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5460 return true;
5461 AddrMode.HasBaseReg = false;
5462 AddrMode.BaseReg = nullptr;
5463 }
5464
5465 // If the base register is already taken, see if we can do [r+r].
5466 if (AddrMode.Scale == 0) {
5467 AddrMode.Scale = 1;
5468 AddrMode.ScaledReg = Addr;
5469 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5470 return true;
5471 AddrMode.Scale = 0;
5472 AddrMode.ScaledReg = nullptr;
5473 }
5474 // Couldn't match.
5475 TPT.rollback(LastKnownGood);
5476 return false;
5477}
5478
5479/// Check to see if all uses of OpVal by the specified inline asm call are due
5480/// to memory operands. If so, return true, otherwise return false.
5482 const TargetLowering &TLI,
5483 const TargetRegisterInfo &TRI) {
5484 const Function *F = CI->getFunction();
5485 TargetLowering::AsmOperandInfoVector TargetConstraints =
5486 TLI.ParseConstraints(F->getDataLayout(), &TRI, *CI);
5487
5488 for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
5489 // Compute the constraint code and ConstraintType to use.
5490 TLI.ComputeConstraintToUse(OpInfo, SDValue());
5491
5492 // If this asm operand is our Value*, and if it isn't an indirect memory
5493 // operand, we can't fold it! TODO: Also handle C_Address?
5494 if (OpInfo.CallOperandVal == OpVal &&
5495 (OpInfo.ConstraintType != TargetLowering::C_Memory ||
5496 !OpInfo.isIndirect))
5497 return false;
5498 }
5499
5500 return true;
5501}
5502
5503/// Recursively walk all the uses of I until we find a memory use.
5504/// If we find an obviously non-foldable instruction, return true.
5505/// Add accessed addresses and types to MemoryUses.
5507 Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
5508 SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
5509 const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI,
5510 BlockFrequencyInfo *BFI, unsigned &SeenInsts) {
5511 // If we already considered this instruction, we're done.
5512 if (!ConsideredInsts.insert(I).second)
5513 return false;
5514
5515 // If this is an obviously unfoldable instruction, bail out.
5516 if (!MightBeFoldableInst(I))
5517 return true;
5518
5519 // Loop over all the uses, recursively processing them.
5520 for (Use &U : I->uses()) {
5521 // Conservatively return true if we're seeing a large number or a deep chain
5522 // of users. This avoids excessive compilation times in pathological cases.
5523 if (SeenInsts++ >= MaxAddressUsersToScan)
5524 return true;
5525
5526 Instruction *UserI = cast<Instruction>(U.getUser());
5527 if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
5528 MemoryUses.push_back({&U, LI->getType()});
5529 continue;
5530 }
5531
5532 if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
5533 if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
5534 return true; // Storing addr, not into addr.
5535 MemoryUses.push_back({&U, SI->getValueOperand()->getType()});
5536 continue;
5537 }
5538
5539 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
5540 if (U.getOperandNo() != AtomicRMWInst::getPointerOperandIndex())
5541 return true; // Storing addr, not into addr.
5542 MemoryUses.push_back({&U, RMW->getValOperand()->getType()});
5543 continue;
5544 }
5545
5546 if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) {
5547 if (U.getOperandNo() != AtomicCmpXchgInst::getPointerOperandIndex())
5548 return true; // Storing addr, not into addr.
5549 MemoryUses.push_back({&U, CmpX->getCompareOperand()->getType()});
5550 continue;
5551 }
5552
5553 if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
5554 if (CI->hasFnAttr(Attribute::Cold)) {
5555 // If this is a cold call, we can sink the addressing calculation into
5556 // the cold path. See optimizeCallInst
5557 if (!llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI))
5558 continue;
5559 }
5560
5561 InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand());
5562 if (!IA)
5563 return true;
5564
5565 // If this is a memory operand, we're cool, otherwise bail out.
5566 if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI))
5567 return true;
5568 continue;
5569 }
5570
5571 if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5572 PSI, BFI, SeenInsts))
5573 return true;
5574 }
5575
5576 return false;
5577}
5578
5580 Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
5581 const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize,
5583 unsigned SeenInsts = 0;
5584 SmallPtrSet<Instruction *, 16> ConsideredInsts;
5585 return FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5586 PSI, BFI, SeenInsts);
5587}
5588
5589
5590/// Return true if Val is already known to be live at the use site that we're
5591/// folding it into. If so, there is no cost to include it in the addressing
5592/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
5593/// instruction already.
5594bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,
5595 Value *KnownLive1,
5596 Value *KnownLive2) {
5597 // If Val is either of the known-live values, we know it is live!
5598 if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
5599 return true;
5600
5601 // All values other than instructions and arguments (e.g. constants) are live.
5602 if (!isa<Instruction>(Val) && !isa<Argument>(Val))
5603 return true;
5604
5605 // If Val is a constant sized alloca in the entry block, it is live, this is
5606 // true because it is just a reference to the stack/frame pointer, which is
5607 // live for the whole function.
5608 if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
5609 if (AI->isStaticAlloca())
5610 return true;
5611
5612 // Check to see if this value is already used in the memory instruction's
5613 // block. If so, it's already live into the block at the very least, so we
5614 // can reasonably fold it.
5615 return Val->isUsedInBasicBlock(MemoryInst->getParent());
5616}
5617
5618/// It is possible for the addressing mode of the machine to fold the specified
5619/// instruction into a load or store that ultimately uses it.
5620/// However, the specified instruction has multiple uses.
5621/// Given this, it may actually increase register pressure to fold it
5622/// into the load. For example, consider this code:
5623///
5624/// X = ...
5625/// Y = X+1
5626/// use(Y) -> nonload/store
5627/// Z = Y+1
5628/// load Z
5629///
5630/// In this case, Y has multiple uses, and can be folded into the load of Z
5631/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
5632/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
5633/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
5634/// number of computations either.
5635///
5636/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
5637/// X was live across 'load Z' for other reasons, we actually *would* want to
5638/// fold the addressing mode in the Z case. This would make Y die earlier.
5639bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode(
5640 Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode &AMAfter) {
5641 if (IgnoreProfitability)
5642 return true;
5643
5644 // AMBefore is the addressing mode before this instruction was folded into it,
5645 // and AMAfter is the addressing mode after the instruction was folded. Get
5646 // the set of registers referenced by AMAfter and subtract out those
5647 // referenced by AMBefore: this is the set of values which folding in this
5648 // address extends the lifetime of.
5649 //
5650 // Note that there are only two potential values being referenced here,
5651 // BaseReg and ScaleReg (global addresses are always available, as are any
5652 // folded immediates).
5653 Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
5654
5655 // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
5656 // lifetime wasn't extended by adding this instruction.
5657 if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5658 BaseReg = nullptr;
5659 if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5660 ScaledReg = nullptr;
5661
5662 // If folding this instruction (and it's subexprs) didn't extend any live
5663 // ranges, we're ok with it.
5664 if (!BaseReg && !ScaledReg)
5665 return true;
5666
5667 // If all uses of this instruction can have the address mode sunk into them,
5668 // we can remove the addressing mode and effectively trade one live register
5669 // for another (at worst.) In this context, folding an addressing mode into
5670 // the use is just a particularly nice way of sinking it.
5672 if (FindAllMemoryUses(I, MemoryUses, TLI, TRI, OptSize, PSI, BFI))
5673 return false; // Has a non-memory, non-foldable use!
5674
5675 // Now that we know that all uses of this instruction are part of a chain of
5676 // computation involving only operations that could theoretically be folded
5677 // into a memory use, loop over each of these memory operation uses and see
5678 // if they could *actually* fold the instruction. The assumption is that
5679 // addressing modes are cheap and that duplicating the computation involved
5680 // many times is worthwhile, even on a fastpath. For sinking candidates
5681 // (i.e. cold call sites), this serves as a way to prevent excessive code
5682 // growth since most architectures have some reasonable small and fast way to
5683 // compute an effective address. (i.e LEA on x86)
5684 SmallVector<Instruction *, 32> MatchedAddrModeInsts;
5685 for (const std::pair<Use *, Type *> &Pair : MemoryUses) {
5686 Value *Address = Pair.first->get();
5687 Instruction *UserI = cast<Instruction>(Pair.first->getUser());
5688 Type *AddressAccessTy = Pair.second;
5689 unsigned AS = Address->getType()->getPointerAddressSpace();
5690
5691 // Do a match against the root of this address, ignoring profitability. This
5692 // will tell us if the addressing mode for the memory operation will
5693 // *actually* cover the shared instruction.
5695 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5696 0);
5697 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5698 TPT.getRestorationPoint();
5699 AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, getDTFn,
5700 AddressAccessTy, AS, UserI, Result,
5701 InsertedInsts, PromotedInsts, TPT,
5702 LargeOffsetGEP, OptSize, PSI, BFI);
5703 Matcher.IgnoreProfitability = true;
5704 bool Success = Matcher.matchAddr(Address, 0);
5705 (void)Success;
5706 assert(Success && "Couldn't select *anything*?");
5707
5708 // The match was to check the profitability, the changes made are not
5709 // part of the original matcher. Therefore, they should be dropped
5710 // otherwise the original matcher will not present the right state.
5711 TPT.rollback(LastKnownGood);
5712
5713 // If the match didn't cover I, then it won't be shared by it.
5714 if (!is_contained(MatchedAddrModeInsts, I))
5715 return false;
5716
5717 MatchedAddrModeInsts.clear();
5718 }
5719
5720 return true;
5721}
5722
5723/// Return true if the specified values are defined in a
5724/// different basic block than BB.
5725static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
5726 if (Instruction *I = dyn_cast<Instruction>(V))
5727 return I->getParent() != BB;
5728 return false;
5729}
5730
5731/// Sink addressing mode computation immediate before MemoryInst if doing so
5732/// can be done without increasing register pressure. The need for the
5733/// register pressure constraint means this can end up being an all or nothing
5734/// decision for all uses of the same addressing computation.
5735///
5736/// Load and Store Instructions often have addressing modes that can do
5737/// significant amounts of computation. As such, instruction selection will try
5738/// to get the load or store to do as much computation as possible for the
5739/// program. The problem is that isel can only see within a single block. As
5740/// such, we sink as much legal addressing mode work into the block as possible.
5741///
5742/// This method is used to optimize both load/store and inline asms with memory
5743/// operands. It's also used to sink addressing computations feeding into cold
5744/// call sites into their (cold) basic block.
5745///
5746/// The motivation for handling sinking into cold blocks is that doing so can
5747/// both enable other address mode sinking (by satisfying the register pressure
5748/// constraint above), and reduce register pressure globally (by removing the
5749/// addressing mode computation from the fast path entirely.).
5750bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
5751 Type *AccessTy, unsigned AddrSpace) {
5752 Value *Repl = Addr;
5753
5754 // Try to collapse single-value PHI nodes. This is necessary to undo
5755 // unprofitable PRE transformations.
5756 SmallVector<Value *, 8> worklist;
5758 worklist.push_back(Addr);
5759
5760 // Use a worklist to iteratively look through PHI and select nodes, and
5761 // ensure that the addressing mode obtained from the non-PHI/select roots of
5762 // the graph are compatible.
5763 bool PhiOrSelectSeen = false;
5764 SmallVector<Instruction *, 16> AddrModeInsts;
5765 const SimplifyQuery SQ(*DL, TLInfo);
5766 AddressingModeCombiner AddrModes(SQ, Addr);
5767 TypePromotionTransaction TPT(RemovedInsts);
5768 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5769 TPT.getRestorationPoint();
5770 while (!worklist.empty()) {
5771 Value *V = worklist.pop_back_val();
5772
5773 // We allow traversing cyclic Phi nodes.
5774 // In case of success after this loop we ensure that traversing through
5775 // Phi nodes ends up with all cases to compute address of the form
5776 // BaseGV + Base + Scale * Index + Offset
5777 // where Scale and Offset are constans and BaseGV, Base and Index
5778 // are exactly the same Values in all cases.
5779 // It means that BaseGV, Scale and Offset dominate our memory instruction
5780 // and have the same value as they had in address computation represented
5781 // as Phi. So we can safely sink address computation to memory instruction.
5782 if (!Visited.insert(V).second)
5783 continue;
5784
5785 // For a PHI node, push all of its incoming values.
5786 if (PHINode *P = dyn_cast<PHINode>(V)) {
5787 append_range(worklist, P->incoming_values());
5788 PhiOrSelectSeen = true;
5789 continue;
5790 }
5791 // Similar for select.
5792 if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
5793 worklist.push_back(SI->getFalseValue());
5794 worklist.push_back(SI->getTrueValue());
5795 PhiOrSelectSeen = true;
5796 continue;
5797 }
5798
5799 // For non-PHIs, determine the addressing mode being computed. Note that
5800 // the result may differ depending on what other uses our candidate
5801 // addressing instructions might have.
5802 AddrModeInsts.clear();
5803 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5804 0);
5805 // Defer the query (and possible computation of) the dom tree to point of
5806 // actual use. It's expected that most address matches don't actually need
5807 // the domtree.
5808 auto getDTFn = [MemoryInst, this]() -> const DominatorTree & {
5809 Function *F = MemoryInst->getParent()->getParent();
5810 return this->getDT(*F);
5811 };
5812 ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
5813 V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, getDTFn,
5814 *TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
5815 BFI.get());
5816
5817 GetElementPtrInst *GEP = LargeOffsetGEP.first;
5818 if (GEP && !NewGEPBases.count(GEP)) {
5819 // If splitting the underlying data structure can reduce the offset of a
5820 // GEP, collect the GEP. Skip the GEPs that are the new bases of
5821 // previously split data structures.
5822 LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP);
5823 LargeOffsetGEPID.insert(std::make_pair(GEP, LargeOffsetGEPID.size()));
5824 }
5825
5826 NewAddrMode.OriginalValue = V;
5827 if (!AddrModes.addNewAddrMode(NewAddrMode))
5828 break;
5829 }
5830
5831 // Try to combine the AddrModes we've collected. If we couldn't collect any,
5832 // or we have multiple but either couldn't combine them or combining them
5833 // wouldn't do anything useful, bail out now.
5834 if (!AddrModes.combineAddrModes()) {
5835 TPT.rollback(LastKnownGood);
5836 return false;
5837 }
5838 bool Modified = TPT.commit();
5839
5840 // Get the combined AddrMode (or the only AddrMode, if we only had one).
5841 ExtAddrMode AddrMode = AddrModes.getAddrMode();
5842
5843 // If all the instructions matched are already in this BB, don't do anything.
5844 // If we saw a Phi node then it is not local definitely, and if we saw a
5845 // select then we want to push the address calculation past it even if it's
5846 // already in this BB.
5847 if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) {
5848 return IsNonLocalValue(V, MemoryInst->getParent());
5849 })) {
5850 LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode
5851 << "\n");
5852 return Modified;
5853 }
5854
5855 // Insert this computation right after this user. Since our caller is
5856 // scanning from the top of the BB to the bottom, reuse of the expr are
5857 // guaranteed to happen later.
5858 IRBuilder<> Builder(MemoryInst);
5859
5860 // Now that we determined the addressing expression we want to use and know
5861 // that we have to sink it into this block. Check to see if we have already
5862 // done this for some other load/store instr in this block. If so, reuse
5863 // the computation. Before attempting reuse, check if the address is valid
5864 // as it may have been erased.
5865
5866 WeakTrackingVH SunkAddrVH = SunkAddrs[Addr];
5867
5868 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
5869 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
5870 if (SunkAddr) {
5871 LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
5872 << " for " << *MemoryInst << "\n");
5873 if (SunkAddr->getType() != Addr->getType()) {
5874 if (SunkAddr->getType()->getPointerAddressSpace() !=
5875 Addr->getType()->getPointerAddressSpace() &&
5876 !DL->isNonIntegralPointerType(Addr->getType())) {
5877 // There are two reasons the address spaces might not match: a no-op
5878 // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
5879 // ptrtoint/inttoptr pair to ensure we match the original semantics.
5880 // TODO: allow bitcast between different address space pointers with the
5881 // same size.
5882 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
5883 SunkAddr =
5884 Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
5885 } else
5886 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
5887 }
5889 SubtargetInfo->addrSinkUsingGEPs())) {
5890 // By default, we use the GEP-based method when AA is used later. This
5891 // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
5892 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
5893 << " for " << *MemoryInst << "\n");
5894 Value *ResultPtr = nullptr, *ResultIndex = nullptr;
5895
5896 // First, find the pointer.
5897 if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
5898 ResultPtr = AddrMode.BaseReg;
5899 AddrMode.BaseReg = nullptr;
5900 }
5901
5902 if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
5903 // We can't add more than one pointer together, nor can we scale a
5904 // pointer (both of which seem meaningless).
5905 if (ResultPtr || AddrMode.Scale != 1)
5906 return Modified;
5907
5908 ResultPtr = AddrMode.ScaledReg;
5909 AddrMode.Scale = 0;
5910 }
5911
5912 // It is only safe to sign extend the BaseReg if we know that the math
5913 // required to create it did not overflow before we extend it. Since
5914 // the original IR value was tossed in favor of a constant back when
5915 // the AddrMode was created we need to bail out gracefully if widths
5916 // do not match instead of extending it.
5917 //
5918 // (See below for code to add the scale.)
5919 if (AddrMode.Scale) {
5920 Type *ScaledRegTy = AddrMode.ScaledReg->getType();
5921 if (cast<IntegerType>(IntPtrTy)->getBitWidth() >
5922 cast<IntegerType>(ScaledRegTy)->getBitWidth())
5923 return Modified;
5924 }
5925
5926 GlobalValue *BaseGV = AddrMode.BaseGV;
5927 if (BaseGV != nullptr) {
5928 if (ResultPtr)
5929 return Modified;
5930
5931 if (BaseGV->isThreadLocal()) {
5932 ResultPtr = Builder.CreateThreadLocalAddress(BaseGV);
5933 } else {
5934 ResultPtr = BaseGV;
5935 }
5936 }
5937
5938 // If the real base value actually came from an inttoptr, then the matcher
5939 // will look through it and provide only the integer value. In that case,
5940 // use it here.
5941 if (!DL->isNonIntegralPointerType(Addr->getType())) {
5942 if (!ResultPtr && AddrMode.BaseReg) {
5943 ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(),
5944 "sunkaddr");
5945 AddrMode.BaseReg = nullptr;
5946 } else if (!ResultPtr && AddrMode.Scale == 1) {
5947 ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(),
5948 "sunkaddr");
5949 AddrMode.Scale = 0;
5950 }
5951 }
5952
5953 if (!ResultPtr && !AddrMode.BaseReg && !AddrMode.Scale &&
5954 !AddrMode.BaseOffs) {
5955 SunkAddr = Constant::getNullValue(Addr->getType());
5956 } else if (!ResultPtr) {
5957 return Modified;
5958 } else {
5959 Type *I8PtrTy =
5960 Builder.getPtrTy(Addr->getType()->getPointerAddressSpace());
5961
5962 // Start with the base register. Do this first so that subsequent address
5963 // matching finds it last, which will prevent it from trying to match it
5964 // as the scaled value in case it happens to be a mul. That would be
5965 // problematic if we've sunk a different mul for the scale, because then
5966 // we'd end up sinking both muls.
5967 if (AddrMode.BaseReg) {
5968 Value *V = AddrMode.BaseReg;
5969 if (V->getType() != IntPtrTy)
5970 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
5971
5972 ResultIndex = V;
5973 }
5974
5975 // Add the scale value.
5976 if (AddrMode.Scale) {
5977 Value *V = AddrMode.ScaledReg;
5978 if (V->getType() == IntPtrTy) {
5979 // done.
5980 } else {
5981 assert(cast<IntegerType>(IntPtrTy)->getBitWidth() <
5982 cast<IntegerType>(V->getType())->getBitWidth() &&
5983 "We can't transform if ScaledReg is too narrow");
5984 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
5985 }
5986
5987 if (AddrMode.Scale != 1)
5988 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
5989 "sunkaddr");
5990 if (ResultIndex)
5991 ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
5992 else
5993 ResultIndex = V;
5994 }
5995
5996 // Add in the Base Offset if present.
5997 if (AddrMode.BaseOffs) {
5998 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
5999 if (ResultIndex) {
6000 // We need to add this separately from the scale above to help with
6001 // SDAG consecutive load/store merging.
6002 if (ResultPtr->getType() != I8PtrTy)
6003 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
6004 ResultPtr = Builder.CreatePtrAdd(ResultPtr, ResultIndex, "sunkaddr",
6005 AddrMode.InBounds);
6006 }
6007
6008 ResultIndex = V;
6009 }
6010
6011 if (!ResultIndex) {
6012 SunkAddr = ResultPtr;
6013 } else {
6014 if (ResultPtr->getType() != I8PtrTy)
6015 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
6016 SunkAddr = Builder.CreatePtrAdd(ResultPtr, ResultIndex, "sunkaddr",
6017 AddrMode.InBounds);
6018 }
6019
6020 if (SunkAddr->getType() != Addr->getType()) {
6021 if (SunkAddr->getType()->getPointerAddressSpace() !=
6022 Addr->getType()->getPointerAddressSpace() &&
6023 !DL->isNonIntegralPointerType(Addr->getType())) {
6024 // There are two reasons the address spaces might not match: a no-op
6025 // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
6026 // ptrtoint/inttoptr pair to ensure we match the original semantics.
6027 // TODO: allow bitcast between different address space pointers with
6028 // the same size.
6029 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
6030 SunkAddr =
6031 Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
6032 } else
6033 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
6034 }
6035 }
6036 } else {
6037 // We'd require a ptrtoint/inttoptr down the line, which we can't do for
6038 // non-integral pointers, so in that case bail out now.
6039 Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr;
6040 Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr;
6041 PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy);
6042 PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy);
6043 if (DL->isNonIntegralPointerType(Addr->getType()) ||
6044 (BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) ||
6045 (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) ||
6046 (AddrMode.BaseGV &&
6047 DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))
6048 return Modified;
6049
6050 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
6051 << " for " << *MemoryInst << "\n");
6052 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
6053 Value *Result = nullptr;
6054
6055 // Start with the base register. Do this first so that subsequent address
6056 // matching finds it last, which will prevent it from trying to match it
6057 // as the scaled value in case it happens to be a mul. That would be
6058 // problematic if we've sunk a different mul for the scale, because then
6059 // we'd end up sinking both muls.
6060 if (AddrMode.BaseReg) {
6061 Value *V = AddrMode.BaseReg;
6062 if (V->getType()->isPointerTy())
6063 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
6064 if (V->getType() != IntPtrTy)
6065 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
6066 Result = V;
6067 }
6068
6069 // Add the scale value.
6070 if (AddrMode.Scale) {
6071 Value *V = AddrMode.ScaledReg;
6072 if (V->getType() == IntPtrTy) {
6073 // done.
6074 } else if (V->getType()->isPointerTy()) {
6075 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
6076 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
6077 cast<IntegerType>(V->getType())->getBitWidth()) {
6078 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
6079 } else {
6080 // It is only safe to sign extend the BaseReg if we know that the math
6081 // required to create it did not overflow before we extend it. Since
6082 // the original IR value was tossed in favor of a constant back when
6083 // the AddrMode was created we need to bail out gracefully if widths
6084 // do not match instead of extending it.
6085 Instruction *I = dyn_cast_or_null<Instruction>(Result);
6086 if (I && (Result != AddrMode.BaseReg))
6087 I->eraseFromParent();
6088 return Modified;
6089 }
6090 if (AddrMode.Scale != 1)
6091 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
6092 "sunkaddr");
6093 if (Result)
6094 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6095 else
6096 Result = V;
6097 }
6098
6099 // Add in the BaseGV if present.
6100 GlobalValue *BaseGV = AddrMode.BaseGV;
6101 if (BaseGV != nullptr) {
6102 Value *BaseGVPtr;
6103 if (BaseGV->isThreadLocal()) {
6104 BaseGVPtr = Builder.CreateThreadLocalAddress(BaseGV);
6105 } else {
6106 BaseGVPtr = BaseGV;
6107 }
6108 Value *V = Builder.CreatePtrToInt(BaseGVPtr, IntPtrTy, "sunkaddr");
6109 if (Result)
6110 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6111 else
6112 Result = V;
6113 }
6114
6115 // Add in the Base Offset if present.
6116 if (AddrMode.BaseOffs) {
6117 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
6118 if (Result)
6119 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6120 else
6121 Result = V;
6122 }
6123
6124 if (!Result)
6125 SunkAddr = Constant::getNullValue(Addr->getType());
6126 else
6127 SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
6128 }
6129
6130 MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
6131 // Store the newly computed address into the cache. In the case we reused a
6132 // value, this should be idempotent.
6133 SunkAddrs[Addr] = WeakTrackingVH(SunkAddr);
6134
6135 // If we have no uses, recursively delete the value and all dead instructions
6136 // using it.
6137 if (Repl->use_empty()) {
6138 resetIteratorIfInvalidatedWhileCalling(CurInstIterator->getParent(), [&]() {
6139 RecursivelyDeleteTriviallyDeadInstructions(
6140 Repl, TLInfo, nullptr,
6141 [&](Value *V) { removeAllAssertingVHReferences(V); });
6142 });
6143 }
6144 ++NumMemoryInsts;
6145 return true;
6146}
6147
6148/// Rewrite GEP input to gather/scatter to enable SelectionDAGBuilder to find
6149/// a uniform base to use for ISD::MGATHER/MSCATTER. SelectionDAGBuilder can
6150/// only handle a 2 operand GEP in the same basic block or a splat constant
6151/// vector. The 2 operands to the GEP must have a scalar pointer and a vector
6152/// index.
6153///
6154/// If the existing GEP has a vector base pointer that is splat, we can look
6155/// through the splat to find the scalar pointer. If we can't find a scalar
6156/// pointer there's nothing we can do.
6157///
6158/// If we have a GEP with more than 2 indices where the middle indices are all
6159/// zeroes, we can replace it with 2 GEPs where the second has 2 operands.
6160///
6161/// If the final index isn't a vector or is a splat, we can emit a scalar GEP
6162/// followed by a GEP with an all zeroes vector index. This will enable
6163/// SelectionDAGBuilder to use the scalar GEP as the uniform base and have a
6164/// zero index.
6165bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
6166 Value *Ptr) {
6167 Value *NewAddr;
6168
6169 if (const auto *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
6170 // Don't optimize GEPs that don't have indices.
6171 if (!GEP->hasIndices())
6172 return false;
6173
6174 // If the GEP and the gather/scatter aren't in the same BB, don't optimize.
6175 // FIXME: We should support this by sinking the GEP.
6176 if (MemoryInst->getParent() != GEP->getParent())
6177 return false;
6178
6179 SmallVector<Value *, 2> Ops(GEP->operands());
6180
6181 bool RewriteGEP = false;
6182
6183 if (Ops[0]->getType()->isVectorTy()) {
6184 Ops[0] = getSplatValue(Ops[0]);
6185 if (!Ops[0])
6186 return false;
6187 RewriteGEP = true;
6188 }
6189
6190 unsigned FinalIndex = Ops.size() - 1;
6191
6192 // Ensure all but the last index is 0.
6193 // FIXME: This isn't strictly required. All that's required is that they are
6194 // all scalars or splats.
6195 for (unsigned i = 1; i < FinalIndex; ++i) {
6196 auto *C = dyn_cast<Constant>(Ops[i]);
6197 if (!C)
6198 return false;
6199 if (isa<VectorType>(C->getType()))
6200 C = C->getSplatValue();
6201 auto *CI = dyn_cast_or_null<ConstantInt>(C);
6202 if (!CI || !CI->isZero())
6203 return false;
6204 // Scalarize the index if needed.
6205 Ops[i] = CI;
6206 }
6207
6208 // Try to scalarize the final index.
6209 if (Ops[FinalIndex]->getType()->isVectorTy()) {
6210 if (Value *V = getSplatValue(Ops[FinalIndex])) {
6211 auto *C = dyn_cast<ConstantInt>(V);
6212 // Don't scalarize all zeros vector.
6213 if (!C || !C->isZero()) {
6214 Ops[FinalIndex] = V;
6215 RewriteGEP = true;
6216 }
6217 }
6218 }
6219
6220 // If we made any changes or the we have extra operands, we need to generate
6221 // new instructions.
6222 if (!RewriteGEP && Ops.size() == 2)
6223 return false;
6224
6225 auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
6226
6227 IRBuilder<> Builder(MemoryInst);
6228
6229 Type *SourceTy = GEP->getSourceElementType();
6230 Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType());
6231
6232 // If the final index isn't a vector, emit a scalar GEP containing all ops
6233 // and a vector GEP with all zeroes final index.
6234 if (!Ops[FinalIndex]->getType()->isVectorTy()) {
6235 NewAddr = Builder.CreateGEP(SourceTy, Ops[0], ArrayRef(Ops).drop_front());
6236 auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
6237 auto *SecondTy = GetElementPtrInst::getIndexedType(
6238 SourceTy, ArrayRef(Ops).drop_front());
6239 NewAddr =
6240 Builder.CreateGEP(SecondTy, NewAddr, Constant::getNullValue(IndexTy));
6241 } else {
6242 Value *Base = Ops[0];
6243 Value *Index = Ops[FinalIndex];
6244
6245 // Create a scalar GEP if there are more than 2 operands.
6246 if (Ops.size() != 2) {
6247 // Replace the last index with 0.
6248 Ops[FinalIndex] =
6249 Constant::getNullValue(Ops[FinalIndex]->getType()->getScalarType());
6250 Base = Builder.CreateGEP(SourceTy, Base, ArrayRef(Ops).drop_front());
6252 SourceTy, ArrayRef(Ops).drop_front());
6253 }
6254
6255 // Now create the GEP with scalar pointer and vector index.
6256 NewAddr = Builder.CreateGEP(SourceTy, Base, Index);
6257 }
6258 } else if (!isa<Constant>(Ptr)) {
6259 // Not a GEP, maybe its a splat and we can create a GEP to enable
6260 // SelectionDAGBuilder to use it as a uniform base.
6262 if (!V)
6263 return false;
6264
6265 auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
6266
6267 IRBuilder<> Builder(MemoryInst);
6268
6269 // Emit a vector GEP with a scalar pointer and all 0s vector index.
6270 Type *ScalarIndexTy = DL->getIndexType(V->getType()->getScalarType());
6271 auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
6272 Type *ScalarTy;
6273 if (cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
6274 Intrinsic::masked_gather) {
6275 ScalarTy = MemoryInst->getType()->getScalarType();
6276 } else {
6277 assert(cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
6278 Intrinsic::masked_scatter);
6279 ScalarTy = MemoryInst->getOperand(0)->getType()->getScalarType();
6280 }
6281 NewAddr = Builder.CreateGEP(ScalarTy, V, Constant::getNullValue(IndexTy));
6282 } else {
6283 // Constant, SelectionDAGBuilder knows to check if its a splat.
6284 return false;
6285 }
6286
6287 MemoryInst->replaceUsesOfWith(Ptr, NewAddr);
6288
6289 // If we have no uses, recursively delete the value and all dead instructions
6290 // using it.
6291 if (Ptr->use_empty())
6293 Ptr, TLInfo, nullptr,
6294 [&](Value *V) { removeAllAssertingVHReferences(V); });
6295
6296 return true;
6297}
6298
6299/// If there are any memory operands, use OptimizeMemoryInst to sink their
6300/// address computing into the block when possible / profitable.
6301bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
6302 bool MadeChange = false;
6303
6304 const TargetRegisterInfo *TRI =
6305 TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo();
6306 TargetLowering::AsmOperandInfoVector TargetConstraints =
6307 TLI->ParseConstraints(*DL, TRI, *CS);
6308 unsigned ArgNo = 0;
6309 for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
6310 // Compute the constraint code and ConstraintType to use.
6311 TLI->ComputeConstraintToUse(OpInfo, SDValue());
6312
6313 // TODO: Also handle C_Address?
6314 if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
6315 OpInfo.isIndirect) {
6316 Value *OpVal = CS->getArgOperand(ArgNo++);
6317 MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
6318 } else if (OpInfo.Type == InlineAsm::isInput)
6319 ArgNo++;
6320 }
6321
6322 return MadeChange;
6323}
6324
6325/// Check if all the uses of \p Val are equivalent (or free) zero or
6326/// sign extensions.
6327static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
6328 assert(!Val->use_empty() && "Input must have at least one use");
6329 const Instruction *FirstUser = cast<Instruction>(*Val->user_begin());
6330 bool IsSExt = isa<SExtInst>(FirstUser);
6331 Type *ExtTy = FirstUser->getType();
6332 for (const User *U : Val->users()) {
6333 const Instruction *UI = cast<Instruction>(U);
6334 if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
6335 return false;
6336 Type *CurTy = UI->getType();
6337 // Same input and output types: Same instruction after CSE.
6338 if (CurTy == ExtTy)
6339 continue;
6340
6341 // If IsSExt is true, we are in this situation:
6342 // a = Val
6343 // b = sext ty1 a to ty2
6344 // c = sext ty1 a to ty3
6345 // Assuming ty2 is shorter than ty3, this could be turned into:
6346 // a = Val
6347 // b = sext ty1 a to ty2
6348 // c = sext ty2 b to ty3
6349 // However, the last sext is not free.
6350 if (IsSExt)
6351 return false;
6352
6353 // This is a ZExt, maybe this is free to extend from one type to another.
6354 // In that case, we would not account for a different use.
6355 Type *NarrowTy;
6356 Type *LargeTy;
6357 if (ExtTy->getScalarType()->getIntegerBitWidth() >
6358 CurTy->getScalarType()->getIntegerBitWidth()) {
6359 NarrowTy = CurTy;
6360 LargeTy = ExtTy;
6361 } else {
6362 NarrowTy = ExtTy;
6363 LargeTy = CurTy;
6364 }
6365
6366 if (!TLI.isZExtFree(NarrowTy, LargeTy))
6367 return false;
6368 }
6369 // All uses are the same or can be derived from one another for free.
6370 return true;
6371}
6372
6373/// Try to speculatively promote extensions in \p Exts and continue
6374/// promoting through newly promoted operands recursively as far as doing so is
6375/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.
6376/// When some promotion happened, \p TPT contains the proper state to revert
6377/// them.
6378///
6379/// \return true if some promotion happened, false otherwise.
6380bool CodeGenPrepare::tryToPromoteExts(
6381 TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
6382 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
6383 unsigned CreatedInstsCost) {
6384 bool Promoted = false;
6385
6386 // Iterate over all the extensions to try to promote them.
6387 for (auto *I : Exts) {
6388 // Early check if we directly have ext(load).
6389 if (isa<LoadInst>(I->getOperand(0))) {
6390 ProfitablyMovedExts.push_back(I);
6391 continue;
6392 }
6393
6394 // Check whether or not we want to do any promotion. The reason we have
6395 // this check inside the for loop is to catch the case where an extension
6396 // is directly fed by a load because in such case the extension can be moved
6397 // up without any promotion on its operands.
6399 return false;
6400
6401 // Get the action to perform the promotion.
6402 TypePromotionHelper::Action TPH =
6403 TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts);
6404 // Check if we can promote.
6405 if (!TPH) {
6406 // Save the current extension as we cannot move up through its operand.
6407 ProfitablyMovedExts.push_back(I);
6408 continue;
6409 }
6410
6411 // Save the current state.
6412 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
6413 TPT.getRestorationPoint();
6415 unsigned NewCreatedInstsCost = 0;
6416 unsigned ExtCost = !TLI->isExtFree(I);
6417 // Promote.
6418 Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
6419 &NewExts, nullptr, *TLI);
6420 assert(PromotedVal &&
6421 "TypePromotionHelper should have filtered out those cases");
6422
6423 // We would be able to merge only one extension in a load.
6424 // Therefore, if we have more than 1 new extension we heuristically
6425 // cut this search path, because it means we degrade the code quality.
6426 // With exactly 2, the transformation is neutral, because we will merge
6427 // one extension but leave one. However, we optimistically keep going,
6428 // because the new extension may be removed too. Also avoid replacing a
6429 // single free extension with multiple extensions, as this increases the
6430 // number of IR instructions while not providing any savings.
6431 long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
6432 // FIXME: It would be possible to propagate a negative value instead of
6433 // conservatively ceiling it to 0.
6434 TotalCreatedInstsCost =
6435 std::max((long long)0, (TotalCreatedInstsCost - ExtCost));
6436 if (!StressExtLdPromotion &&
6437 (TotalCreatedInstsCost > 1 ||
6438 !isPromotedInstructionLegal(*TLI, *DL, PromotedVal) ||
6439 (ExtCost == 0 && NewExts.size() > 1))) {
6440 // This promotion is not profitable, rollback to the previous state, and
6441 // save the current extension in ProfitablyMovedExts as the latest
6442 // speculative promotion turned out to be unprofitable.
6443 TPT.rollback(LastKnownGood);
6444 ProfitablyMovedExts.push_back(I);
6445 continue;
6446 }
6447 // Continue promoting NewExts as far as doing so is profitable.
6448 SmallVector<Instruction *, 2> NewlyMovedExts;
6449 (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);
6450 bool NewPromoted = false;
6451 for (auto *ExtInst : NewlyMovedExts) {
6452 Instruction *MovedExt = cast<Instruction>(ExtInst);
6453 Value *ExtOperand = MovedExt->getOperand(0);
6454 // If we have reached to a load, we need this extra profitability check
6455 // as it could potentially be merged into an ext(load).
6456 if (isa<LoadInst>(ExtOperand) &&
6457 !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
6458 (ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI))))
6459 continue;
6460
6461 ProfitablyMovedExts.push_back(MovedExt);
6462 NewPromoted = true;
6463 }
6464
6465 // If none of speculative promotions for NewExts is profitable, rollback
6466 // and save the current extension (I) as the last profitable extension.
6467 if (!NewPromoted) {
6468 TPT.rollback(LastKnownGood);
6469 ProfitablyMovedExts.push_back(I);
6470 continue;
6471 }
6472 // The promotion is profitable.
6473 Promoted = true;
6474 }
6475 return Promoted;
6476}
6477
6478/// Merging redundant sexts when one is dominating the other.
6479bool CodeGenPrepare::mergeSExts(Function &F) {
6480 bool Changed = false;
6481 for (auto &Entry : ValToSExtendedUses) {
6482 SExts &Insts = Entry.second;
6483 SExts CurPts;
6484 for (Instruction *Inst : Insts) {
6485 if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
6486 Inst->getOperand(0) != Entry.first)
6487 continue;
6488 bool inserted = false;
6489 for (auto &Pt : CurPts) {
6490 if (getDT(F).dominates(Inst, Pt)) {
6491 replaceAllUsesWith(Pt, Inst, FreshBBs, IsHugeFunc);
6492 RemovedInsts.insert(Pt);
6493 Pt->removeFromParent();
6494 Pt = Inst;
6495 inserted = true;
6496 Changed = true;
6497 break;
6498 }
6499 if (!getDT(F).dominates(Pt, Inst))
6500 // Give up if we need to merge in a common dominator as the
6501 // experiments show it is not profitable.
6502 continue;
6503 replaceAllUsesWith(Inst, Pt, FreshBBs, IsHugeFunc);
6504 RemovedInsts.insert(Inst);
6505 Inst->removeFromParent();
6506 inserted = true;
6507 Changed = true;
6508 break;
6509 }
6510 if (!inserted)
6511 CurPts.push_back(Inst);
6512 }
6513 }
6514 return Changed;
6515}
6516
6517// Splitting large data structures so that the GEPs accessing them can have
6518// smaller offsets so that they can be sunk to the same blocks as their users.
6519// For example, a large struct starting from %base is split into two parts
6520// where the second part starts from %new_base.
6521//
6522// Before:
6523// BB0:
6524// %base =
6525//
6526// BB1:
6527// %gep0 = gep %base, off0
6528// %gep1 = gep %base, off1
6529// %gep2 = gep %base, off2
6530//
6531// BB2:
6532// %load1 = load %gep0
6533// %load2 = load %gep1
6534// %load3 = load %gep2
6535//
6536// After:
6537// BB0:
6538// %base =
6539// %new_base = gep %base, off0
6540//
6541// BB1:
6542// %new_gep0 = %new_base
6543// %new_gep1 = gep %new_base, off1 - off0
6544// %new_gep2 = gep %new_base, off2 - off0
6545//
6546// BB2:
6547// %load1 = load i32, i32* %new_gep0
6548// %load2 = load i32, i32* %new_gep1
6549// %load3 = load i32, i32* %new_gep2
6550//
6551// %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because
6552// their offsets are smaller enough to fit into the addressing mode.
6553bool CodeGenPrepare::splitLargeGEPOffsets() {
6554 bool Changed = false;
6555 for (auto &Entry : LargeOffsetGEPMap) {
6556 Value *OldBase = Entry.first;
6558 &LargeOffsetGEPs = Entry.second;
6559 auto compareGEPOffset =
6560 [&](const std::pair<GetElementPtrInst *, int64_t> &LHS,
6561 const std::pair<GetElementPtrInst *, int64_t> &RHS) {
6562 if (LHS.first == RHS.first)
6563 return false;
6564 if (LHS.second != RHS.second)
6565 return LHS.second < RHS.second;
6566 return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first];
6567 };
6568 // Sorting all the GEPs of the same data structures based on the offsets.
6569 llvm::sort(LargeOffsetGEPs, compareGEPOffset);
6570 LargeOffsetGEPs.erase(llvm::unique(LargeOffsetGEPs), LargeOffsetGEPs.end());
6571 // Skip if all the GEPs have the same offsets.
6572 if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second)
6573 continue;
6574 GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first;
6575 int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
6576 Value *NewBaseGEP = nullptr;
6577
6578 auto createNewBase = [&](int64_t BaseOffset, Value *OldBase,
6580 LLVMContext &Ctx = GEP->getContext();
6581 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
6582 Type *I8PtrTy =
6583 PointerType::get(Ctx, GEP->getType()->getPointerAddressSpace());
6584
6585 BasicBlock::iterator NewBaseInsertPt;
6586 BasicBlock *NewBaseInsertBB;
6587 if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
6588 // If the base of the struct is an instruction, the new base will be
6589 // inserted close to it.
6590 NewBaseInsertBB = BaseI->getParent();
6591 if (isa<PHINode>(BaseI))
6592 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6593 else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
6594 NewBaseInsertBB =
6595 SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI);
6596 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6597 } else
6598 NewBaseInsertPt = std::next(BaseI->getIterator());
6599 } else {
6600 // If the current base is an argument or global value, the new base
6601 // will be inserted to the entry block.
6602 NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
6603 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6604 }
6605 IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
6606 // Create a new base.
6607 Value *BaseIndex = ConstantInt::get(PtrIdxTy, BaseOffset);
6608 NewBaseGEP = OldBase;
6609 if (NewBaseGEP->getType() != I8PtrTy)
6610 NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
6611 NewBaseGEP =
6612 NewBaseBuilder.CreatePtrAdd(NewBaseGEP, BaseIndex, "splitgep");
6613 NewGEPBases.insert(NewBaseGEP);
6614 return;
6615 };
6616
6617 // Check whether all the offsets can be encoded with prefered common base.
6618 if (int64_t PreferBase = TLI->getPreferredLargeGEPBaseOffset(
6619 LargeOffsetGEPs.front().second, LargeOffsetGEPs.back().second)) {
6620 BaseOffset = PreferBase;
6621 // Create a new base if the offset of the BaseGEP can be decoded with one
6622 // instruction.
6623 createNewBase(BaseOffset, OldBase, BaseGEP);
6624 }
6625
6626 auto *LargeOffsetGEP = LargeOffsetGEPs.begin();
6627 while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
6628 GetElementPtrInst *GEP = LargeOffsetGEP->first;
6629 int64_t Offset = LargeOffsetGEP->second;
6630 if (Offset != BaseOffset) {
6632 AddrMode.HasBaseReg = true;
6633 AddrMode.BaseOffs = Offset - BaseOffset;
6634 // The result type of the GEP might not be the type of the memory
6635 // access.
6636 if (!TLI->isLegalAddressingMode(*DL, AddrMode,
6637 GEP->getResultElementType(),
6638 GEP->getAddressSpace())) {
6639 // We need to create a new base if the offset to the current base is
6640 // too large to fit into the addressing mode. So, a very large struct
6641 // may be split into several parts.
6642 BaseGEP = GEP;
6643 BaseOffset = Offset;
6644 NewBaseGEP = nullptr;
6645 }
6646 }
6647
6648 // Generate a new GEP to replace the current one.
6649 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
6650
6651 if (!NewBaseGEP) {
6652 // Create a new base if we don't have one yet. Find the insertion
6653 // pointer for the new base first.
6654 createNewBase(BaseOffset, OldBase, GEP);
6655 }
6656
6657 IRBuilder<> Builder(GEP);
6658 Value *NewGEP = NewBaseGEP;
6659 if (Offset != BaseOffset) {
6660 // Calculate the new offset for the new GEP.
6661 Value *Index = ConstantInt::get(PtrIdxTy, Offset - BaseOffset);
6662 NewGEP = Builder.CreatePtrAdd(NewBaseGEP, Index);
6663 }
6664 replaceAllUsesWith(GEP, NewGEP, FreshBBs, IsHugeFunc);
6665 LargeOffsetGEPID.erase(GEP);
6666 LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP);
6667 GEP->eraseFromParent();
6668 Changed = true;
6669 }
6670 }
6671 return Changed;
6672}
6673
6674bool CodeGenPrepare::optimizePhiType(
6676 SmallPtrSetImpl<Instruction *> &DeletedInstrs) {
6677 // We are looking for a collection on interconnected phi nodes that together
6678 // only use loads/bitcasts and are used by stores/bitcasts, and the bitcasts
6679 // are of the same type. Convert the whole set of nodes to the type of the
6680 // bitcast.
6681 Type *PhiTy = I->getType();
6682 Type *ConvertTy = nullptr;
6683 if (Visited.count(I) ||
6684 (!I->getType()->isIntegerTy() && !I->getType()->isFloatingPointTy()))
6685 return false;
6686
6688 Worklist.push_back(cast<Instruction>(I));
6691 PhiNodes.insert(I);
6692 Visited.insert(I);
6695 // This works by adding extra bitcasts between load/stores and removing
6696 // existing bicasts. If we have a phi(bitcast(load)) or a store(bitcast(phi))
6697 // we can get in the situation where we remove a bitcast in one iteration
6698 // just to add it again in the next. We need to ensure that at least one
6699 // bitcast we remove are anchored to something that will not change back.
6700 bool AnyAnchored = false;
6701
6702 while (!Worklist.empty()) {
6703 Instruction *II = Worklist.pop_back_val();
6704
6705 if (auto *Phi = dyn_cast<PHINode>(II)) {
6706 // Handle Defs, which might also be PHI's
6707 for (Value *V : Phi->incoming_values()) {
6708 if (auto *OpPhi = dyn_cast<PHINode>(V)) {
6709 if (!PhiNodes.count(OpPhi)) {
6710 if (!Visited.insert(OpPhi).second)
6711 return false;
6712 PhiNodes.insert(OpPhi);
6713 Worklist.push_back(OpPhi);
6714 }
6715 } else if (auto *OpLoad = dyn_cast<LoadInst>(V)) {
6716 if (!OpLoad->isSimple())
6717 return false;
6718 if (Defs.insert(OpLoad).second)
6719 Worklist.push_back(OpLoad);
6720 } else if (auto *OpEx = dyn_cast<ExtractElementInst>(V)) {
6721 if (Defs.insert(OpEx).second)
6722 Worklist.push_back(OpEx);
6723 } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
6724 if (!ConvertTy)
6725 ConvertTy = OpBC->getOperand(0)->getType();
6726 if (OpBC->getOperand(0)->getType() != ConvertTy)
6727 return false;
6728 if (Defs.insert(OpBC).second) {
6729 Worklist.push_back(OpBC);
6730 AnyAnchored |= !isa<LoadInst>(OpBC->getOperand(0)) &&
6731 !isa<ExtractElementInst>(OpBC->getOperand(0));
6732 }
6733 } else if (auto *OpC = dyn_cast<ConstantData>(V))
6734 Constants.insert(OpC);
6735 else
6736 return false;
6737 }
6738 }
6739
6740 // Handle uses which might also be phi's
6741 for (User *V : II->users()) {
6742 if (auto *OpPhi = dyn_cast<PHINode>(V)) {
6743 if (!PhiNodes.count(OpPhi)) {
6744 if (Visited.count(OpPhi))
6745 return false;
6746 PhiNodes.insert(OpPhi);
6747 Visited.insert(OpPhi);
6748 Worklist.push_back(OpPhi);
6749 }
6750 } else if (auto *OpStore = dyn_cast<StoreInst>(V)) {
6751 if (!OpStore->isSimple() || OpStore->getOperand(0) != II)
6752 return false;
6753 Uses.insert(OpStore);
6754 } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
6755 if (!ConvertTy)
6756 ConvertTy = OpBC->getType();
6757 if (OpBC->getType() != ConvertTy)
6758 return false;
6759 Uses.insert(OpBC);
6760 AnyAnchored |=
6761 any_of(OpBC->users(), [](User *U) { return !isa<StoreInst>(U); });
6762 } else {
6763 return false;
6764 }
6765 }
6766 }
6767
6768 if (!ConvertTy || !AnyAnchored ||
6769 !TLI->shouldConvertPhiType(PhiTy, ConvertTy))
6770 return false;
6771
6772 LLVM_DEBUG(dbgs() << "Converting " << *I << "\n and connected nodes to "
6773 << *ConvertTy << "\n");
6774
6775 // Create all the new phi nodes of the new type, and bitcast any loads to the
6776 // correct type.
6777 ValueToValueMap ValMap;
6778 for (ConstantData *C : Constants)
6779 ValMap[C] = ConstantExpr::getBitCast(C, ConvertTy);
6780 for (Instruction *D : Defs) {
6781 if (isa<BitCastInst>(D)) {
6782 ValMap[D] = D->getOperand(0);
6783 DeletedInstrs.insert(D);
6784 } else {
6785 BasicBlock::iterator insertPt = std::next(D->getIterator());
6786 ValMap[D] = new BitCastInst(D, ConvertTy, D->getName() + ".bc", insertPt);
6787 }
6788 }
6789 for (PHINode *Phi : PhiNodes)
6790 ValMap[Phi] = PHINode::Create(ConvertTy, Phi->getNumIncomingValues(),
6791 Phi->getName() + ".tc", Phi->getIterator());
6792 // Pipe together all the PhiNodes.
6793 for (PHINode *Phi : PhiNodes) {
6794 PHINode *NewPhi = cast<PHINode>(ValMap[Phi]);
6795 for (int i = 0, e = Phi->getNumIncomingValues(); i < e; i++)
6796 NewPhi->addIncoming(ValMap[Phi->getIncomingValue(i)],
6797 Phi->getIncomingBlock(i));
6798 Visited.insert(NewPhi);
6799 }
6800 // And finally pipe up the stores and bitcasts
6801 for (Instruction *U : Uses) {
6802 if (isa<BitCastInst>(U)) {
6803 DeletedInstrs.insert(U);
6804 replaceAllUsesWith(U, ValMap[U->getOperand(0)], FreshBBs, IsHugeFunc);
6805 } else {
6806 U->setOperand(0, new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc",
6807 U->getIterator()));
6808 }
6809 }
6810
6811 // Save the removed phis to be deleted later.
6812 for (PHINode *Phi : PhiNodes)
6813 DeletedInstrs.insert(Phi);
6814 return true;
6815}
6816
6817bool CodeGenPrepare::optimizePhiTypes(Function &F) {
6818 if (!OptimizePhiTypes)
6819 return false;
6820
6821 bool Changed = false;
6823 SmallPtrSet<Instruction *, 4> DeletedInstrs;
6824
6825 // Attempt to optimize all the phis in the functions to the correct type.
6826 for (auto &BB : F)
6827 for (auto &Phi : BB.phis())
6828 Changed |= optimizePhiType(&Phi, Visited, DeletedInstrs);
6829
6830 // Remove any old phi's that have been converted.
6831 for (auto *I : DeletedInstrs) {
6832 replaceAllUsesWith(I, PoisonValue::get(I->getType()), FreshBBs, IsHugeFunc);
6833 I->eraseFromParent();
6834 }
6835
6836 return Changed;
6837}
6838
6839/// Return true, if an ext(load) can be formed from an extension in
6840/// \p MovedExts.
6841bool CodeGenPrepare::canFormExtLd(
6842 const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI,
6843 Instruction *&Inst, bool HasPromoted) {
6844 for (auto *MovedExtInst : MovedExts) {
6845 if (isa<LoadInst>(MovedExtInst->getOperand(0))) {
6846 LI = cast<LoadInst>(MovedExtInst->getOperand(0));
6847 Inst = MovedExtInst;
6848 break;
6849 }
6850 }
6851 if (!LI)
6852 return false;
6853
6854 // If they're already in the same block, there's nothing to do.
6855 // Make the cheap checks first if we did not promote.
6856 // If we promoted, we need to check if it is indeed profitable.
6857 if (!HasPromoted && LI->getParent() == Inst->getParent())
6858 return false;
6859
6860 return TLI->isExtLoad(LI, Inst, *DL);
6861}
6862
6863/// Move a zext or sext fed by a load into the same basic block as the load,
6864/// unless conditions are unfavorable. This allows SelectionDAG to fold the
6865/// extend into the load.
6866///
6867/// E.g.,
6868/// \code
6869/// %ld = load i32* %addr
6870/// %add = add nuw i32 %ld, 4
6871/// %zext = zext i32 %add to i64
6872// \endcode
6873/// =>
6874/// \code
6875/// %ld = load i32* %addr
6876/// %zext = zext i32 %ld to i64
6877/// %add = add nuw i64 %zext, 4
6878/// \encode
6879/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
6880/// allow us to match zext(load i32*) to i64.
6881///
6882/// Also, try to promote the computations used to obtain a sign extended
6883/// value used into memory accesses.
6884/// E.g.,
6885/// \code
6886/// a = add nsw i32 b, 3
6887/// d = sext i32 a to i64
6888/// e = getelementptr ..., i64 d
6889/// \endcode
6890/// =>
6891/// \code
6892/// f = sext i32 b to i64
6893/// a = add nsw i64 f, 3
6894/// e = getelementptr ..., i64 a
6895/// \endcode
6896///
6897/// \p Inst[in/out] the extension may be modified during the process if some
6898/// promotions apply.
6899bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
6900 bool AllowPromotionWithoutCommonHeader = false;
6901 /// See if it is an interesting sext operations for the address type
6902 /// promotion before trying to promote it, e.g., the ones with the right
6903 /// type and used in memory accesses.
6904 bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
6905 *Inst, AllowPromotionWithoutCommonHeader);
6906 TypePromotionTransaction TPT(RemovedInsts);
6907 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
6908 TPT.getRestorationPoint();
6910 SmallVector<Instruction *, 2> SpeculativelyMovedExts;
6911 Exts.push_back(Inst);
6912
6913 bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
6914
6915 // Look for a load being extended.
6916 LoadInst *LI = nullptr;
6917 Instruction *ExtFedByLoad;
6918
6919 // Try to promote a chain of computation if it allows to form an extended
6920 // load.
6921 if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
6922 assert(LI && ExtFedByLoad && "Expect a valid load and extension");
6923 TPT.commit();
6924 // Move the extend into the same block as the load.
6925 ExtFedByLoad->moveAfter(LI);
6926 ++NumExtsMoved;
6927 Inst = ExtFedByLoad;
6928 return true;
6929 }
6930
6931 // Continue promoting SExts if known as considerable depending on targets.
6932 if (ATPConsiderable &&
6933 performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
6934 HasPromoted, TPT, SpeculativelyMovedExts))
6935 return true;
6936
6937 TPT.rollback(LastKnownGood);
6938 return false;
6939}
6940
6941// Perform address type promotion if doing so is profitable.
6942// If AllowPromotionWithoutCommonHeader == false, we should find other sext
6943// instructions that sign extended the same initial value. However, if
6944// AllowPromotionWithoutCommonHeader == true, we expect promoting the
6945// extension is just profitable.
6946bool CodeGenPrepare::performAddressTypePromotion(
6947 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
6948 bool HasPromoted, TypePromotionTransaction &TPT,
6949 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
6950 bool Promoted = false;
6951 SmallPtrSet<Instruction *, 1> UnhandledExts;
6952 bool AllSeenFirst = true;
6953 for (auto *I : SpeculativelyMovedExts) {
6954 Value *HeadOfChain = I->getOperand(0);
6956 SeenChainsForSExt.find(HeadOfChain);
6957 // If there is an unhandled SExt which has the same header, try to promote
6958 // it as well.
6959 if (AlreadySeen != SeenChainsForSExt.end()) {
6960 if (AlreadySeen->second != nullptr)
6961 UnhandledExts.insert(AlreadySeen->second);
6962 AllSeenFirst = false;
6963 }
6964 }
6965
6966 if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
6967 SpeculativelyMovedExts.size() == 1)) {
6968 TPT.commit();
6969 if (HasPromoted)
6970 Promoted = true;
6971 for (auto *I : SpeculativelyMovedExts) {
6972 Value *HeadOfChain = I->getOperand(0);
6973 SeenChainsForSExt[HeadOfChain] = nullptr;
6974 ValToSExtendedUses[HeadOfChain].push_back(I);
6975 }
6976 // Update Inst as promotion happen.
6977 Inst = SpeculativelyMovedExts.pop_back_val();
6978 } else {
6979 // This is the first chain visited from the header, keep the current chain
6980 // as unhandled. Defer to promote this until we encounter another SExt
6981 // chain derived from the same header.
6982 for (auto *I : SpeculativelyMovedExts) {
6983 Value *HeadOfChain = I->getOperand(0);
6984 SeenChainsForSExt[HeadOfChain] = Inst;
6985 }
6986 return false;
6987 }
6988
6989 if (!AllSeenFirst && !UnhandledExts.empty())
6990 for (auto *VisitedSExt : UnhandledExts) {
6991 if (RemovedInsts.count(VisitedSExt))
6992 continue;
6993 TypePromotionTransaction TPT(RemovedInsts);
6996 Exts.push_back(VisitedSExt);
6997 bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
6998 TPT.commit();
6999 if (HasPromoted)
7000 Promoted = true;
7001 for (auto *I : Chains) {
7002 Value *HeadOfChain = I->getOperand(0);
7003 // Mark this as handled.
7004 SeenChainsForSExt[HeadOfChain] = nullptr;
7005 ValToSExtendedUses[HeadOfChain].push_back(I);
7006 }
7007 }
7008 return Promoted;
7009}
7010
7011bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
7012 BasicBlock *DefBB = I->getParent();
7013
7014 // If the result of a {s|z}ext and its source are both live out, rewrite all
7015 // other uses of the source with result of extension.
7016 Value *Src = I->getOperand(0);
7017 if (Src->hasOneUse())
7018 return false;
7019
7020 // Only do this xform if truncating is free.
7021 if (!TLI->isTruncateFree(I->getType(), Src->getType()))
7022 return false;
7023
7024 // Only safe to perform the optimization if the source is also defined in
7025 // this block.
7026 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
7027 return false;
7028
7029 bool DefIsLiveOut = false;
7030 for (User *U : I->users()) {
7031 Instruction *UI = cast<Instruction>(U);
7032
7033 // Figure out which BB this ext is used in.
7034 BasicBlock *UserBB = UI->getParent();
7035 if (UserBB == DefBB)
7036 continue;
7037 DefIsLiveOut = true;
7038 break;
7039 }
7040 if (!DefIsLiveOut)
7041 return false;
7042
7043 // Make sure none of the uses are PHI nodes.
7044 for (User *U : Src->users()) {
7045 Instruction *UI = cast<Instruction>(U);
7046 BasicBlock *UserBB = UI->getParent();
7047 if (UserBB == DefBB)
7048 continue;
7049 // Be conservative. We don't want this xform to end up introducing
7050 // reloads just before load / store instructions.
7051 if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
7052 return false;
7053 }
7054
7055 // InsertedTruncs - Only insert one trunc in each block once.
7057
7058 bool MadeChange = false;
7059 for (Use &U : Src->uses()) {
7060 Instruction *User = cast<Instruction>(U.getUser());
7061
7062 // Figure out which BB this ext is used in.
7063 BasicBlock *UserBB = User->getParent();
7064 if (UserBB == DefBB)
7065 continue;
7066
7067 // Both src and def are live in this block. Rewrite the use.
7068 Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
7069
7070 if (!InsertedTrunc) {
7071 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
7072 assert(InsertPt != UserBB->end());
7073 InsertedTrunc = new TruncInst(I, Src->getType(), "");
7074 InsertedTrunc->insertBefore(*UserBB, InsertPt);
7075 InsertedInsts.insert(InsertedTrunc);
7076 }
7077
7078 // Replace a use of the {s|z}ext source with a use of the result.
7079 U = InsertedTrunc;
7080 ++NumExtUses;
7081 MadeChange = true;
7082 }
7083
7084 return MadeChange;
7085}
7086
7087// Find loads whose uses only use some of the loaded value's bits. Add an "and"
7088// just after the load if the target can fold this into one extload instruction,
7089// with the hope of eliminating some of the other later "and" instructions using
7090// the loaded value. "and"s that are made trivially redundant by the insertion
7091// of the new "and" are removed by this function, while others (e.g. those whose
7092// path from the load goes through a phi) are left for isel to potentially
7093// remove.
7094//
7095// For example:
7096//
7097// b0:
7098// x = load i32
7099// ...
7100// b1:
7101// y = and x, 0xff
7102// z = use y
7103//
7104// becomes:
7105//
7106// b0:
7107// x = load i32
7108// x' = and x, 0xff
7109// ...
7110// b1:
7111// z = use x'
7112//
7113// whereas:
7114//
7115// b0:
7116// x1 = load i32
7117// ...
7118// b1:
7119// x2 = load i32
7120// ...
7121// b2:
7122// x = phi x1, x2
7123// y = and x, 0xff
7124//
7125// becomes (after a call to optimizeLoadExt for each load):
7126//
7127// b0:
7128// x1 = load i32
7129// x1' = and x1, 0xff
7130// ...
7131// b1:
7132// x2 = load i32
7133// x2' = and x2, 0xff
7134// ...
7135// b2:
7136// x = phi x1', x2'
7137// y = and x, 0xff
7138bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
7139 if (!Load->isSimple() || !Load->getType()->isIntOrPtrTy())
7140 return false;
7141
7142 // Skip loads we've already transformed.
7143 if (Load->hasOneUse() &&
7144 InsertedInsts.count(cast<Instruction>(*Load->user_begin())))
7145 return false;
7146
7147 // Look at all uses of Load, looking through phis, to determine how many bits
7148 // of the loaded value are needed.
7151 SmallVector<Instruction *, 8> AndsToMaybeRemove;
7153 for (auto *U : Load->users())
7154 WorkList.push_back(cast<Instruction>(U));
7155
7156 EVT LoadResultVT = TLI->getValueType(*DL, Load->getType());
7157 unsigned BitWidth = LoadResultVT.getSizeInBits();
7158 // If the BitWidth is 0, do not try to optimize the type
7159 if (BitWidth == 0)
7160 return false;
7161
7162 APInt DemandBits(BitWidth, 0);
7163 APInt WidestAndBits(BitWidth, 0);
7164
7165 while (!WorkList.empty()) {
7166 Instruction *I = WorkList.pop_back_val();
7167
7168 // Break use-def graph loops.
7169 if (!Visited.insert(I).second)
7170 continue;
7171
7172 // For a PHI node, push all of its users.
7173 if (auto *Phi = dyn_cast<PHINode>(I)) {
7174 for (auto *U : Phi->users())
7175 WorkList.push_back(cast<Instruction>(U));
7176 continue;
7177 }
7178
7179 switch (I->getOpcode()) {
7180 case Instruction::And: {
7181 auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));
7182 if (!AndC)
7183 return false;
7184 APInt AndBits = AndC->getValue();
7185 DemandBits |= AndBits;
7186 // Keep track of the widest and mask we see.
7187 if (AndBits.ugt(WidestAndBits))
7188 WidestAndBits = AndBits;
7189 if (AndBits == WidestAndBits && I->getOperand(0) == Load)
7190 AndsToMaybeRemove.push_back(I);
7191 break;
7192 }
7193
7194 case Instruction::Shl: {
7195 auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));
7196 if (!ShlC)
7197 return false;
7198 uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
7199 DemandBits.setLowBits(BitWidth - ShiftAmt);
7200 DropFlags.push_back(I);
7201 break;
7202 }
7203
7204 case Instruction::Trunc: {
7205 EVT TruncVT = TLI->getValueType(*DL, I->getType());
7206 unsigned TruncBitWidth = TruncVT.getSizeInBits();
7207 DemandBits.setLowBits(TruncBitWidth);
7208 DropFlags.push_back(I);
7209 break;
7210 }
7211
7212 default:
7213 return false;
7214 }
7215 }
7216
7217 uint32_t ActiveBits = DemandBits.getActiveBits();
7218 // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the
7219 // target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example,
7220 // for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but
7221 // (and (load x) 1) is not matched as a single instruction, rather as a LDR
7222 // followed by an AND.
7223 // TODO: Look into removing this restriction by fixing backends to either
7224 // return false for isLoadExtLegal for i1 or have them select this pattern to
7225 // a single instruction.
7226 //
7227 // Also avoid hoisting if we didn't see any ands with the exact DemandBits
7228 // mask, since these are the only ands that will be removed by isel.
7229 if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) ||
7230 WidestAndBits != DemandBits)
7231 return false;
7232
7233 LLVMContext &Ctx = Load->getType()->getContext();
7234 Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits);
7235 EVT TruncVT = TLI->getValueType(*DL, TruncTy);
7236
7237 // Reject cases that won't be matched as extloads.
7238 if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() ||
7239 !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT))
7240 return false;
7241
7242 IRBuilder<> Builder(Load->getNextNonDebugInstruction());
7243 auto *NewAnd = cast<Instruction>(
7244 Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
7245 // Mark this instruction as "inserted by CGP", so that other
7246 // optimizations don't touch it.
7247 InsertedInsts.insert(NewAnd);
7248
7249 // Replace all uses of load with new and (except for the use of load in the
7250 // new and itself).
7251 replaceAllUsesWith(Load, NewAnd, FreshBBs, IsHugeFunc);
7252 NewAnd->setOperand(0, Load);
7253
7254 // Remove any and instructions that are now redundant.
7255 for (auto *And : AndsToMaybeRemove)
7256 // Check that the and mask is the same as the one we decided to put on the
7257 // new and.
7258 if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) {
7259 replaceAllUsesWith(And, NewAnd, FreshBBs, IsHugeFunc);
7260 if (&*CurInstIterator == And)
7261 CurInstIterator = std::next(And->getIterator());
7262 And->eraseFromParent();
7263 ++NumAndUses;
7264 }
7265
7266 // NSW flags may not longer hold.
7267 for (auto *Inst : DropFlags)
7268 Inst->setHasNoSignedWrap(false);
7269
7270 ++NumAndsAdded;
7271 return true;
7272}
7273
7274/// Check if V (an operand of a select instruction) is an expensive instruction
7275/// that is only used once.
7277 auto *I = dyn_cast<Instruction>(V);
7278 // If it's safe to speculatively execute, then it should not have side
7279 // effects; therefore, it's safe to sink and possibly *not* execute.
7280 return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
7282}
7283
7284/// Returns true if a SelectInst should be turned into an explicit branch.
7286 const TargetLowering *TLI,
7287 SelectInst *SI) {
7288 // If even a predictable select is cheap, then a branch can't be cheaper.
7289 if (!TLI->isPredictableSelectExpensive())
7290 return false;
7291
7292 // FIXME: This should use the same heuristics as IfConversion to determine
7293 // whether a select is better represented as a branch.
7294
7295 // If metadata tells us that the select condition is obviously predictable,
7296 // then we want to replace the select with a branch.
7297 uint64_t TrueWeight, FalseWeight;
7298 if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
7299 uint64_t Max = std::max(TrueWeight, FalseWeight);
7300 uint64_t Sum = TrueWeight + FalseWeight;
7301 if (Sum != 0) {
7302 auto Probability = BranchProbability::getBranchProbability(Max, Sum);
7303 if (Probability > TTI->getPredictableBranchThreshold())
7304 return true;
7305 }
7306 }
7307
7308 CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
7309
7310 // If a branch is predictable, an out-of-order CPU can avoid blocking on its
7311 // comparison condition. If the compare has more than one use, there's
7312 // probably another cmov or setcc around, so it's not worth emitting a branch.
7313 if (!Cmp || !Cmp->hasOneUse())
7314 return false;
7315
7316 // If either operand of the select is expensive and only needed on one side
7317 // of the select, we should form a branch.
7318 if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
7319 sinkSelectOperand(TTI, SI->getFalseValue()))
7320 return true;
7321
7322 return false;
7323}
7324
7325/// If \p isTrue is true, return the true value of \p SI, otherwise return
7326/// false value of \p SI. If the true/false value of \p SI is defined by any
7327/// select instructions in \p Selects, look through the defining select
7328/// instruction until the true/false value is not defined in \p Selects.
7329static Value *
7331 const SmallPtrSet<const Instruction *, 2> &Selects) {
7332 Value *V = nullptr;
7333
7334 for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
7335 DefSI = dyn_cast<SelectInst>(V)) {
7336 assert(DefSI->getCondition() == SI->getCondition() &&
7337 "The condition of DefSI does not match with SI");
7338 V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
7339 }
7340
7341 assert(V && "Failed to get select true/false value");
7342 return V;
7343}
7344
7345bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
7346 assert(Shift->isShift() && "Expected a shift");
7347
7348 // If this is (1) a vector shift, (2) shifts by scalars are cheaper than
7349 // general vector shifts, and (3) the shift amount is a select-of-splatted
7350 // values, hoist the shifts before the select:
7351 // shift Op0, (select Cond, TVal, FVal) -->
7352 // select Cond, (shift Op0, TVal), (shift Op0, FVal)
7353 //
7354 // This is inverting a generic IR transform when we know that the cost of a
7355 // general vector shift is more than the cost of 2 shift-by-scalars.
7356 // We can't do this effectively in SDAG because we may not be able to
7357 // determine if the select operands are splats from within a basic block.
7358 Type *Ty = Shift->getType();
7359 if (!Ty->isVectorTy() || !TTI->isVectorShiftByScalarCheap(Ty))
7360 return false;
7361 Value *Cond, *TVal, *FVal;
7362 if (!match(Shift->getOperand(1),
7363 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
7364 return false;
7365 if (!isSplatValue(TVal) || !isSplatValue(FVal))
7366 return false;
7367
7368 IRBuilder<> Builder(Shift);
7369 BinaryOperator::BinaryOps Opcode = Shift->getOpcode();
7370 Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal);
7371 Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal);
7372 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
7373 replaceAllUsesWith(Shift, NewSel, FreshBBs, IsHugeFunc);
7374 Shift->eraseFromParent();
7375 return true;
7376}
7377
7378bool CodeGenPrepare::optimizeFunnelShift(IntrinsicInst *Fsh) {
7379 Intrinsic::ID Opcode = Fsh->getIntrinsicID();
7380 assert((Opcode == Intrinsic::fshl || Opcode == Intrinsic::fshr) &&
7381 "Expected a funnel shift");
7382
7383 // If this is (1) a vector funnel shift, (2) shifts by scalars are cheaper
7384 // than general vector shifts, and (3) the shift amount is select-of-splatted
7385 // values, hoist the funnel shifts before the select:
7386 // fsh Op0, Op1, (select Cond, TVal, FVal) -->
7387 // select Cond, (fsh Op0, Op1, TVal), (fsh Op0, Op1, FVal)
7388 //
7389 // This is inverting a generic IR transform when we know that the cost of a
7390 // general vector shift is more than the cost of 2 shift-by-scalars.
7391 // We can't do this effectively in SDAG because we may not be able to
7392 // determine if the select operands are splats from within a basic block.
7393 Type *Ty = Fsh->getType();
7394 if (!Ty->isVectorTy() || !TTI->isVectorShiftByScalarCheap(Ty))
7395 return false;
7396 Value *Cond, *TVal, *FVal;
7397 if (!match(Fsh->getOperand(2),
7398 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
7399 return false;
7400 if (!isSplatValue(TVal) || !isSplatValue(FVal))
7401 return false;
7402
7403 IRBuilder<> Builder(Fsh);
7404 Value *X = Fsh->getOperand(0), *Y = Fsh->getOperand(1);
7405 Value *NewTVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, TVal});
7406 Value *NewFVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, FVal});
7407 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
7408 replaceAllUsesWith(Fsh, NewSel, FreshBBs, IsHugeFunc);
7409 Fsh->eraseFromParent();
7410 return true;
7411}
7412
7413/// If we have a SelectInst that will likely profit from branch prediction,
7414/// turn it into a branch.
7415bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
7417 return false;
7418
7419 // If the SelectOptimize pass is enabled, selects have already been optimized.
7421 return false;
7422
7423 // Find all consecutive select instructions that share the same condition.
7425 ASI.push_back(SI);
7427 It != SI->getParent()->end(); ++It) {
7428 SelectInst *I = dyn_cast<SelectInst>(&*It);
7429 if (I && SI->getCondition() == I->getCondition()) {
7430 ASI.push_back(I);
7431 } else {
7432 break;
7433 }
7434 }
7435
7436 SelectInst *LastSI = ASI.back();
7437 // Increment the current iterator to skip all the rest of select instructions
7438 // because they will be either "not lowered" or "all lowered" to branch.
7439 CurInstIterator = std::next(LastSI->getIterator());
7440 // Examine debug-info attached to the consecutive select instructions. They
7441 // won't be individually optimised by optimizeInst, so we need to perform
7442 // DbgVariableRecord maintenence here instead.
7443 for (SelectInst *SI : ArrayRef(ASI).drop_front())
7444 fixupDbgVariableRecordsOnInst(*SI);
7445
7446 bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
7447
7448 // Can we convert the 'select' to CF ?
7449 if (VectorCond || SI->getMetadata(LLVMContext::MD_unpredictable))
7450 return false;
7451
7453 if (SI->getType()->isVectorTy())
7454 SelectKind = TargetLowering::ScalarCondVectorVal;
7455 else
7456 SelectKind = TargetLowering::ScalarValSelect;
7457
7458 if (TLI->isSelectSupported(SelectKind) &&
7460 llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get())))
7461 return false;
7462
7463 // The DominatorTree needs to be rebuilt by any consumers after this
7464 // transformation. We simply reset here rather than setting the ModifiedDT
7465 // flag to avoid restarting the function walk in runOnFunction for each
7466 // select optimized.
7467 DT.reset();
7468
7469 // Transform a sequence like this:
7470 // start:
7471 // %cmp = cmp uge i32 %a, %b
7472 // %sel = select i1 %cmp, i32 %c, i32 %d
7473 //
7474 // Into:
7475 // start:
7476 // %cmp = cmp uge i32 %a, %b
7477 // %cmp.frozen = freeze %cmp
7478 // br i1 %cmp.frozen, label %select.true, label %select.false
7479 // select.true:
7480 // br label %select.end
7481 // select.false:
7482 // br label %select.end
7483 // select.end:
7484 // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
7485 //
7486 // %cmp should be frozen, otherwise it may introduce undefined behavior.
7487 // In addition, we may sink instructions that produce %c or %d from
7488 // the entry block into the destination(s) of the new branch.
7489 // If the true or false blocks do not contain a sunken instruction, that
7490 // block and its branch may be optimized away. In that case, one side of the
7491 // first branch will point directly to select.end, and the corresponding PHI
7492 // predecessor block will be the start block.
7493
7494 // Collect values that go on the true side and the values that go on the false
7495 // side.
7496 SmallVector<Instruction *> TrueInstrs, FalseInstrs;
7497 for (SelectInst *SI : ASI) {
7498 if (Value *V = SI->getTrueValue(); sinkSelectOperand(TTI, V))
7499 TrueInstrs.push_back(cast<Instruction>(V));
7500 if (Value *V = SI->getFalseValue(); sinkSelectOperand(TTI, V))
7501 FalseInstrs.push_back(cast<Instruction>(V));
7502 }
7503
7504 // Split the select block, according to how many (if any) values go on each
7505 // side.
7506 BasicBlock *StartBlock = SI->getParent();
7507 BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(LastSI));
7508 // We should split before any debug-info.
7509 SplitPt.setHeadBit(true);
7510
7511 IRBuilder<> IB(SI);
7512 auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
7513
7514 BasicBlock *TrueBlock = nullptr;
7515 BasicBlock *FalseBlock = nullptr;
7516 BasicBlock *EndBlock = nullptr;
7517 BranchInst *TrueBranch = nullptr;
7518 BranchInst *FalseBranch = nullptr;
7519 if (TrueInstrs.size() == 0) {
7520 FalseBranch = cast<BranchInst>(SplitBlockAndInsertIfElse(
7521 CondFr, SplitPt, false, nullptr, nullptr, LI));
7522 FalseBlock = FalseBranch->getParent();
7523 EndBlock = cast<BasicBlock>(FalseBranch->getOperand(0));
7524 } else if (FalseInstrs.size() == 0) {
7525 TrueBranch = cast<BranchInst>(SplitBlockAndInsertIfThen(
7526 CondFr, SplitPt, false, nullptr, nullptr, LI));
7527 TrueBlock = TrueBranch->getParent();
7528 EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
7529 } else {
7530 Instruction *ThenTerm = nullptr;
7531 Instruction *ElseTerm = nullptr;
7532 SplitBlockAndInsertIfThenElse(CondFr, SplitPt, &ThenTerm, &ElseTerm,
7533 nullptr, nullptr, LI);
7534 TrueBranch = cast<BranchInst>(ThenTerm);
7535 FalseBranch = cast<BranchInst>(ElseTerm);
7536 TrueBlock = TrueBranch->getParent();
7537 FalseBlock = FalseBranch->getParent();
7538 EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
7539 }
7540
7541 EndBlock->setName("select.end");
7542 if (TrueBlock)
7543 TrueBlock->setName("select.true.sink");
7544 if (FalseBlock)
7545 FalseBlock->setName(FalseInstrs.size() == 0 ? "select.false"
7546 : "select.false.sink");
7547
7548 if (IsHugeFunc) {
7549 if (TrueBlock)
7550 FreshBBs.insert(TrueBlock);
7551 if (FalseBlock)
7552 FreshBBs.insert(FalseBlock);
7553 FreshBBs.insert(EndBlock);
7554 }
7555
7556 BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock));
7557
7558 static const unsigned MD[] = {
7559 LLVMContext::MD_prof, LLVMContext::MD_unpredictable,
7560 LLVMContext::MD_make_implicit, LLVMContext::MD_dbg};
7561 StartBlock->getTerminator()->copyMetadata(*SI, MD);
7562
7563 // Sink expensive instructions into the conditional blocks to avoid executing
7564 // them speculatively.
7565 for (Instruction *I : TrueInstrs)
7566 I->moveBefore(TrueBranch->getIterator());
7567 for (Instruction *I : FalseInstrs)
7568 I->moveBefore(FalseBranch->getIterator());
7569
7570 // If we did not create a new block for one of the 'true' or 'false' paths
7571 // of the condition, it means that side of the branch goes to the end block
7572 // directly and the path originates from the start block from the point of
7573 // view of the new PHI.
7574 if (TrueBlock == nullptr)
7575 TrueBlock = StartBlock;
7576 else if (FalseBlock == nullptr)
7577 FalseBlock = StartBlock;
7578
7580 INS.insert(ASI.begin(), ASI.end());
7581 // Use reverse iterator because later select may use the value of the
7582 // earlier select, and we need to propagate value through earlier select
7583 // to get the PHI operand.
7584 for (SelectInst *SI : llvm::reverse(ASI)) {
7585 // The select itself is replaced with a PHI Node.
7586 PHINode *PN = PHINode::Create(SI->getType(), 2, "");
7587 PN->insertBefore(EndBlock->begin());
7588 PN->takeName(SI);
7589 PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
7590 PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
7591 PN->setDebugLoc(SI->getDebugLoc());
7592
7593 replaceAllUsesWith(SI, PN, FreshBBs, IsHugeFunc);
7594 SI->eraseFromParent();
7595 INS.erase(SI);
7596 ++NumSelectsExpanded;
7597 }
7598
7599 // Instruct OptimizeBlock to skip to the next block.
7600 CurInstIterator = StartBlock->end();
7601 return true;
7602}
7603
7604/// Some targets only accept certain types for splat inputs. For example a VDUP
7605/// in MVE takes a GPR (integer) register, and the instruction that incorporate
7606/// a VDUP (such as a VADD qd, qm, rm) also require a gpr register.
7607bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
7608 // Accept shuf(insertelem(undef/poison, val, 0), undef/poison, <0,0,..>) only
7610 m_Undef(), m_ZeroMask())))
7611 return false;
7612 Type *NewType = TLI->shouldConvertSplatType(SVI);
7613 if (!NewType)
7614 return false;
7615
7616 auto *SVIVecType = cast<FixedVectorType>(SVI->getType());
7617 assert(!NewType->isVectorTy() && "Expected a scalar type!");
7618 assert(NewType->getScalarSizeInBits() == SVIVecType->getScalarSizeInBits() &&
7619 "Expected a type of the same size!");
7620 auto *NewVecType =
7621 FixedVectorType::get(NewType, SVIVecType->getNumElements());
7622
7623 // Create a bitcast (shuffle (insert (bitcast(..))))
7624 IRBuilder<> Builder(SVI->getContext());
7625 Builder.SetInsertPoint(SVI);
7626 Value *BC1 = Builder.CreateBitCast(
7627 cast<Instruction>(SVI->getOperand(0))->getOperand(1), NewType);
7628 Value *Shuffle = Builder.CreateVectorSplat(NewVecType->getNumElements(), BC1);
7629 Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType);
7630
7631 replaceAllUsesWith(SVI, BC2, FreshBBs, IsHugeFunc);
7633 SVI, TLInfo, nullptr,
7634 [&](Value *V) { removeAllAssertingVHReferences(V); });
7635
7636 // Also hoist the bitcast up to its operand if it they are not in the same
7637 // block.
7638 if (auto *BCI = dyn_cast<Instruction>(BC1))
7639 if (auto *Op = dyn_cast<Instruction>(BCI->getOperand(0)))
7640 if (BCI->getParent() != Op->getParent() && !isa<PHINode>(Op) &&
7641 !Op->isTerminator() && !Op->isEHPad())
7642 BCI->moveAfter(Op);
7643
7644 return true;
7645}
7646
7647bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
7648 // If the operands of I can be folded into a target instruction together with
7649 // I, duplicate and sink them.
7650 SmallVector<Use *, 4> OpsToSink;
7651 if (!TTI->isProfitableToSinkOperands(I, OpsToSink))
7652 return false;
7653
7654 // OpsToSink can contain multiple uses in a use chain (e.g.
7655 // (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating
7656 // uses must come first, so we process the ops in reverse order so as to not
7657 // create invalid IR.
7658 BasicBlock *TargetBB = I->getParent();
7659 bool Changed = false;
7660 SmallVector<Use *, 4> ToReplace;
7661 Instruction *InsertPoint = I;
7663 unsigned long InstNumber = 0;
7664 for (const auto &I : *TargetBB)
7665 InstOrdering[&I] = InstNumber++;
7666
7667 for (Use *U : reverse(OpsToSink)) {
7668 auto *UI = cast<Instruction>(U->get());
7669 if (isa<PHINode>(UI))
7670 continue;
7671 if (UI->getParent() == TargetBB) {
7672 if (InstOrdering[UI] < InstOrdering[InsertPoint])
7673 InsertPoint = UI;
7674 continue;
7675 }
7676 ToReplace.push_back(U);
7677 }
7678
7679 SetVector<Instruction *> MaybeDead;
7681 for (Use *U : ToReplace) {
7682 auto *UI = cast<Instruction>(U->get());
7683 Instruction *NI = UI->clone();
7684
7685 if (IsHugeFunc) {
7686 // Now we clone an instruction, its operands' defs may sink to this BB
7687 // now. So we put the operands defs' BBs into FreshBBs to do optimization.
7688 for (Value *Op : NI->operands())
7689 if (auto *OpDef = dyn_cast<Instruction>(Op))
7690 FreshBBs.insert(OpDef->getParent());
7691 }
7692
7693 NewInstructions[UI] = NI;
7694 MaybeDead.insert(UI);
7695 LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n");
7696 NI->insertBefore(InsertPoint->getIterator());
7697 InsertPoint = NI;
7698 InsertedInsts.insert(NI);
7699
7700 // Update the use for the new instruction, making sure that we update the
7701 // sunk instruction uses, if it is part of a chain that has already been
7702 // sunk.
7703 Instruction *OldI = cast<Instruction>(U->getUser());
7704 if (auto It = NewInstructions.find(OldI); It != NewInstructions.end())
7705 It->second->setOperand(U->getOperandNo(), NI);
7706 else
7707 U->set(NI);
7708 Changed = true;
7709 }
7710
7711 // Remove instructions that are dead after sinking.
7712 for (auto *I : MaybeDead) {
7713 if (!I->hasNUsesOrMore(1)) {
7714 LLVM_DEBUG(dbgs() << "Removing dead instruction: " << *I << "\n");
7715 I->eraseFromParent();
7716 }
7717 }
7718
7719 return Changed;
7720}
7721
7722bool CodeGenPrepare::optimizeSwitchType(SwitchInst *SI) {
7723 Value *Cond = SI->getCondition();
7724 Type *OldType = Cond->getType();
7725 LLVMContext &Context = Cond->getContext();
7726 EVT OldVT = TLI->getValueType(*DL, OldType);
7727 MVT RegType = TLI->getPreferredSwitchConditionType(Context, OldVT);
7728 unsigned RegWidth = RegType.getSizeInBits();
7729
7730 if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
7731 return false;
7732
7733 // If the register width is greater than the type width, expand the condition
7734 // of the switch instruction and each case constant to the width of the
7735 // register. By widening the type of the switch condition, subsequent
7736 // comparisons (for case comparisons) will not need to be extended to the
7737 // preferred register width, so we will potentially eliminate N-1 extends,
7738 // where N is the number of cases in the switch.
7739 auto *NewType = Type::getIntNTy(Context, RegWidth);
7740
7741 // Extend the switch condition and case constants using the target preferred
7742 // extend unless the switch condition is a function argument with an extend
7743 // attribute. In that case, we can avoid an unnecessary mask/extension by
7744 // matching the argument extension instead.
7745 Instruction::CastOps ExtType = Instruction::ZExt;
7746 // Some targets prefer SExt over ZExt.
7747 if (TLI->isSExtCheaperThanZExt(OldVT, RegType))
7748 ExtType = Instruction::SExt;
7749
7750 if (auto *Arg = dyn_cast<Argument>(Cond)) {
7751 if (Arg->hasSExtAttr())
7752 ExtType = Instruction::SExt;
7753 if (Arg->hasZExtAttr())
7754 ExtType = Instruction::ZExt;
7755 }
7756
7757 auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
7758 ExtInst->insertBefore(SI->getIterator());
7759 ExtInst->setDebugLoc(SI->getDebugLoc());
7760 SI->setCondition(ExtInst);
7761 for (auto Case : SI->cases()) {
7762 const APInt &NarrowConst = Case.getCaseValue()->getValue();
7763 APInt WideConst = (ExtType == Instruction::ZExt)
7764 ? NarrowConst.zext(RegWidth)
7765 : NarrowConst.sext(RegWidth);
7766 Case.setValue(ConstantInt::get(Context, WideConst));
7767 }
7768
7769 return true;
7770}
7771
7772bool CodeGenPrepare::optimizeSwitchPhiConstants(SwitchInst *SI) {
7773 // The SCCP optimization tends to produce code like this:
7774 // switch(x) { case 42: phi(42, ...) }
7775 // Materializing the constant for the phi-argument needs instructions; So we
7776 // change the code to:
7777 // switch(x) { case 42: phi(x, ...) }
7778
7779 Value *Condition = SI->getCondition();
7780 // Avoid endless loop in degenerate case.
7781 if (isa<ConstantInt>(*Condition))
7782 return false;
7783
7784 bool Changed = false;
7785 BasicBlock *SwitchBB = SI->getParent();
7786 Type *ConditionType = Condition->getType();
7787
7788 for (const SwitchInst::CaseHandle &Case : SI->cases()) {
7789 ConstantInt *CaseValue = Case.getCaseValue();
7790 BasicBlock *CaseBB = Case.getCaseSuccessor();
7791 // Set to true if we previously checked that `CaseBB` is only reached by
7792 // a single case from this switch.
7793 bool CheckedForSinglePred = false;
7794 for (PHINode &PHI : CaseBB->phis()) {
7795 Type *PHIType = PHI.getType();
7796 // If ZExt is free then we can also catch patterns like this:
7797 // switch((i32)x) { case 42: phi((i64)42, ...); }
7798 // and replace `(i64)42` with `zext i32 %x to i64`.
7799 bool TryZExt =
7800 PHIType->isIntegerTy() &&
7801 PHIType->getIntegerBitWidth() > ConditionType->getIntegerBitWidth() &&
7802 TLI->isZExtFree(ConditionType, PHIType);
7803 if (PHIType == ConditionType || TryZExt) {
7804 // Set to true to skip this case because of multiple preds.
7805 bool SkipCase = false;
7806 Value *Replacement = nullptr;
7807 for (unsigned I = 0, E = PHI.getNumIncomingValues(); I != E; I++) {
7808 Value *PHIValue = PHI.getIncomingValue(I);
7809 if (PHIValue != CaseValue) {
7810 if (!TryZExt)
7811 continue;
7812 ConstantInt *PHIValueInt = dyn_cast<ConstantInt>(PHIValue);
7813 if (!PHIValueInt ||
7814 PHIValueInt->getValue() !=
7815 CaseValue->getValue().zext(PHIType->getIntegerBitWidth()))
7816 continue;
7817 }
7818 if (PHI.getIncomingBlock(I) != SwitchBB)
7819 continue;
7820 // We cannot optimize if there are multiple case labels jumping to
7821 // this block. This check may get expensive when there are many
7822 // case labels so we test for it last.
7823 if (!CheckedForSinglePred) {
7824 CheckedForSinglePred = true;
7825 if (SI->findCaseDest(CaseBB) == nullptr) {
7826 SkipCase = true;
7827 break;
7828 }
7829 }
7830
7831 if (Replacement == nullptr) {
7832 if (PHIValue == CaseValue) {
7833 Replacement = Condition;
7834 } else {
7835 IRBuilder<> Builder(SI);
7836 Replacement = Builder.CreateZExt(Condition, PHIType);
7837 }
7838 }
7839 PHI.setIncomingValue(I, Replacement);
7840 Changed = true;
7841 }
7842 if (SkipCase)
7843 break;
7844 }
7845 }
7846 }
7847 return Changed;
7848}
7849
7850bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
7851 bool Changed = optimizeSwitchType(SI);
7852 Changed |= optimizeSwitchPhiConstants(SI);
7853 return Changed;
7854}
7855
7856namespace {
7857
7858/// Helper class to promote a scalar operation to a vector one.
7859/// This class is used to move downward extractelement transition.
7860/// E.g.,
7861/// a = vector_op <2 x i32>
7862/// b = extractelement <2 x i32> a, i32 0
7863/// c = scalar_op b
7864/// store c
7865///
7866/// =>
7867/// a = vector_op <2 x i32>
7868/// c = vector_op a (equivalent to scalar_op on the related lane)
7869/// * d = extractelement <2 x i32> c, i32 0
7870/// * store d
7871/// Assuming both extractelement and store can be combine, we get rid of the
7872/// transition.
7873class VectorPromoteHelper {
7874 /// DataLayout associated with the current module.
7875 const DataLayout &DL;
7876
7877 /// Used to perform some checks on the legality of vector operations.
7878 const TargetLowering &TLI;
7879
7880 /// Used to estimated the cost of the promoted chain.
7881 const TargetTransformInfo &TTI;
7882
7883 /// The transition being moved downwards.
7884 Instruction *Transition;
7885
7886 /// The sequence of instructions to be promoted.
7887 SmallVector<Instruction *, 4> InstsToBePromoted;
7888
7889 /// Cost of combining a store and an extract.
7890 unsigned StoreExtractCombineCost;
7891
7892 /// Instruction that will be combined with the transition.
7893 Instruction *CombineInst = nullptr;
7894
7895 /// The instruction that represents the current end of the transition.
7896 /// Since we are faking the promotion until we reach the end of the chain
7897 /// of computation, we need a way to get the current end of the transition.
7898 Instruction *getEndOfTransition() const {
7899 if (InstsToBePromoted.empty())
7900 return Transition;
7901 return InstsToBePromoted.back();
7902 }
7903
7904 /// Return the index of the original value in the transition.
7905 /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
7906 /// c, is at index 0.
7907 unsigned getTransitionOriginalValueIdx() const {
7908 assert(isa<ExtractElementInst>(Transition) &&
7909 "Other kind of transitions are not supported yet");
7910 return 0;
7911 }
7912
7913 /// Return the index of the index in the transition.
7914 /// E.g., for "extractelement <2 x i32> c, i32 0" the index
7915 /// is at index 1.
7916 unsigned getTransitionIdx() const {
7917 assert(isa<ExtractElementInst>(Transition) &&
7918 "Other kind of transitions are not supported yet");
7919 return 1;
7920 }
7921
7922 /// Get the type of the transition.
7923 /// This is the type of the original value.
7924 /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
7925 /// transition is <2 x i32>.
7926 Type *getTransitionType() const {
7927 return Transition->getOperand(getTransitionOriginalValueIdx())->getType();
7928 }
7929
7930 /// Promote \p ToBePromoted by moving \p Def downward through.
7931 /// I.e., we have the following sequence:
7932 /// Def = Transition <ty1> a to <ty2>
7933 /// b = ToBePromoted <ty2> Def, ...
7934 /// =>
7935 /// b = ToBePromoted <ty1> a, ...
7936 /// Def = Transition <ty1> ToBePromoted to <ty2>
7937 void promoteImpl(Instruction *ToBePromoted);
7938
7939 /// Check whether or not it is profitable to promote all the
7940 /// instructions enqueued to be promoted.
7941 bool isProfitableToPromote() {
7942 Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());
7943 unsigned Index = isa<ConstantInt>(ValIdx)
7944 ? cast<ConstantInt>(ValIdx)->getZExtValue()
7945 : -1;
7946 Type *PromotedType = getTransitionType();
7947
7948 StoreInst *ST = cast<StoreInst>(CombineInst);
7949 unsigned AS = ST->getPointerAddressSpace();
7950 // Check if this store is supported.
7952 TLI.getValueType(DL, ST->getValueOperand()->getType()), AS,
7953 ST->getAlign())) {
7954 // If this is not supported, there is no way we can combine
7955 // the extract with the store.
7956 return false;
7957 }
7958
7959 // The scalar chain of computation has to pay for the transition
7960 // scalar to vector.
7961 // The vector chain has to account for the combining cost.
7964 InstructionCost ScalarCost =
7965 TTI.getVectorInstrCost(*Transition, PromotedType, CostKind, Index);
7966 InstructionCost VectorCost = StoreExtractCombineCost;
7967 for (const auto &Inst : InstsToBePromoted) {
7968 // Compute the cost.
7969 // By construction, all instructions being promoted are arithmetic ones.
7970 // Moreover, one argument is a constant that can be viewed as a splat
7971 // constant.
7972 Value *Arg0 = Inst->getOperand(0);
7973 bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
7974 isa<ConstantFP>(Arg0);
7975 TargetTransformInfo::OperandValueInfo Arg0Info, Arg1Info;
7976 if (IsArg0Constant)
7978 else
7980
7981 ScalarCost += TTI.getArithmeticInstrCost(
7982 Inst->getOpcode(), Inst->getType(), CostKind, Arg0Info, Arg1Info);
7983 VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
7984 CostKind, Arg0Info, Arg1Info);
7985 }
7986 LLVM_DEBUG(
7987 dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
7988 << ScalarCost << "\nVector: " << VectorCost << '\n');
7989 return ScalarCost > VectorCost;
7990 }
7991
7992 /// Generate a constant vector with \p Val with the same
7993 /// number of elements as the transition.
7994 /// \p UseSplat defines whether or not \p Val should be replicated
7995 /// across the whole vector.
7996 /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
7997 /// otherwise we generate a vector with as many poison as possible:
7998 /// <poison, ..., poison, Val, poison, ..., poison> where \p Val is only
7999 /// used at the index of the extract.
8000 Value *getConstantVector(Constant *Val, bool UseSplat) const {
8001 unsigned ExtractIdx = std::numeric_limits<unsigned>::max();
8002 if (!UseSplat) {
8003 // If we cannot determine where the constant must be, we have to
8004 // use a splat constant.
8005 Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());
8006 if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))
8007 ExtractIdx = CstVal->getSExtValue();
8008 else
8009 UseSplat = true;
8010 }
8011
8012 ElementCount EC = cast<VectorType>(getTransitionType())->getElementCount();
8013 if (UseSplat)
8014 return ConstantVector::getSplat(EC, Val);
8015
8016 if (!EC.isScalable()) {
8018 PoisonValue *PoisonVal = PoisonValue::get(Val->getType());
8019 for (unsigned Idx = 0; Idx != EC.getKnownMinValue(); ++Idx) {
8020 if (Idx == ExtractIdx)
8021 ConstVec.push_back(Val);
8022 else
8023 ConstVec.push_back(PoisonVal);
8024 }
8025 return ConstantVector::get(ConstVec);
8026 } else
8028 "Generate scalable vector for non-splat is unimplemented");
8029 }
8030
8031 /// Check if promoting to a vector type an operand at \p OperandIdx
8032 /// in \p Use can trigger undefined behavior.
8033 static bool canCauseUndefinedBehavior(const Instruction *Use,
8034 unsigned OperandIdx) {
8035 // This is not safe to introduce undef when the operand is on
8036 // the right hand side of a division-like instruction.
8037 if (OperandIdx != 1)
8038 return false;
8039 switch (Use->getOpcode()) {
8040 default:
8041 return false;
8042 case Instruction::SDiv:
8043 case Instruction::UDiv:
8044 case Instruction::SRem:
8045 case Instruction::URem:
8046 return true;
8047 case Instruction::FDiv:
8048 case Instruction::FRem:
8049 return !Use->hasNoNaNs();
8050 }
8051 llvm_unreachable(nullptr);
8052 }
8053
8054public:
8055 VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI,
8056 const TargetTransformInfo &TTI, Instruction *Transition,
8057 unsigned CombineCost)
8058 : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition),
8059 StoreExtractCombineCost(CombineCost) {
8060 assert(Transition && "Do not know how to promote null");
8061 }
8062
8063 /// Check if we can promote \p ToBePromoted to \p Type.
8064 bool canPromote(const Instruction *ToBePromoted) const {
8065 // We could support CastInst too.
8066 return isa<BinaryOperator>(ToBePromoted);
8067 }
8068
8069 /// Check if it is profitable to promote \p ToBePromoted
8070 /// by moving downward the transition through.
8071 bool shouldPromote(const Instruction *ToBePromoted) const {
8072 // Promote only if all the operands can be statically expanded.
8073 // Indeed, we do not want to introduce any new kind of transitions.
8074 for (const Use &U : ToBePromoted->operands()) {
8075 const Value *Val = U.get();
8076 if (Val == getEndOfTransition()) {
8077 // If the use is a division and the transition is on the rhs,
8078 // we cannot promote the operation, otherwise we may create a
8079 // division by zero.
8080 if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()))
8081 return false;
8082 continue;
8083 }
8084 if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
8085 !isa<ConstantFP>(Val))
8086 return false;
8087 }
8088 // Check that the resulting operation is legal.
8089 int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());
8090 if (!ISDOpcode)
8091 return false;
8092 return StressStoreExtract ||
8094 ISDOpcode, TLI.getValueType(DL, getTransitionType(), true));
8095 }
8096
8097 /// Check whether or not \p Use can be combined
8098 /// with the transition.
8099 /// I.e., is it possible to do Use(Transition) => AnotherUse?
8100 bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); }
8101
8102 /// Record \p ToBePromoted as part of the chain to be promoted.
8103 void enqueueForPromotion(Instruction *ToBePromoted) {
8104 InstsToBePromoted.push_back(ToBePromoted);
8105 }
8106
8107 /// Set the instruction that will be combined with the transition.
8108 void recordCombineInstruction(Instruction *ToBeCombined) {
8109 assert(canCombine(ToBeCombined) && "Unsupported instruction to combine");
8110 CombineInst = ToBeCombined;
8111 }
8112
8113 /// Promote all the instructions enqueued for promotion if it is
8114 /// is profitable.
8115 /// \return True if the promotion happened, false otherwise.
8116 bool promote() {
8117 // Check if there is something to promote.
8118 // Right now, if we do not have anything to combine with,
8119 // we assume the promotion is not profitable.
8120 if (InstsToBePromoted.empty() || !CombineInst)
8121 return false;
8122
8123 // Check cost.
8124 if (!StressStoreExtract && !isProfitableToPromote())
8125 return false;
8126
8127 // Promote.
8128 for (auto &ToBePromoted : InstsToBePromoted)
8129 promoteImpl(ToBePromoted);
8130 InstsToBePromoted.clear();
8131 return true;
8132 }
8133};
8134
8135} // end anonymous namespace
8136
8137void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
8138 // At this point, we know that all the operands of ToBePromoted but Def
8139 // can be statically promoted.
8140 // For Def, we need to use its parameter in ToBePromoted:
8141 // b = ToBePromoted ty1 a
8142 // Def = Transition ty1 b to ty2
8143 // Move the transition down.
8144 // 1. Replace all uses of the promoted operation by the transition.
8145 // = ... b => = ... Def.
8146 assert(ToBePromoted->getType() == Transition->getType() &&
8147 "The type of the result of the transition does not match "
8148 "the final type");
8149 ToBePromoted->replaceAllUsesWith(Transition);
8150 // 2. Update the type of the uses.
8151 // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.
8152 Type *TransitionTy = getTransitionType();
8153 ToBePromoted->mutateType(TransitionTy);
8154 // 3. Update all the operands of the promoted operation with promoted
8155 // operands.
8156 // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.
8157 for (Use &U : ToBePromoted->operands()) {
8158 Value *Val = U.get();
8159 Value *NewVal = nullptr;
8160 if (Val == Transition)
8161 NewVal = Transition->getOperand(getTransitionOriginalValueIdx());
8162 else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||
8163 isa<ConstantFP>(Val)) {
8164 // Use a splat constant if it is not safe to use undef.
8165 NewVal = getConstantVector(
8166 cast<Constant>(Val),
8167 isa<UndefValue>(Val) ||
8168 canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));
8169 } else
8170 llvm_unreachable("Did you modified shouldPromote and forgot to update "
8171 "this?");
8172 ToBePromoted->setOperand(U.getOperandNo(), NewVal);
8173 }
8174 Transition->moveAfter(ToBePromoted);
8175 Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
8176}
8177
8178/// Some targets can do store(extractelement) with one instruction.
8179/// Try to push the extractelement towards the stores when the target
8180/// has this feature and this is profitable.
8181bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
8182 unsigned CombineCost = std::numeric_limits<unsigned>::max();
8183 if (DisableStoreExtract ||
8186 Inst->getOperand(1), CombineCost)))
8187 return false;
8188
8189 // At this point we know that Inst is a vector to scalar transition.
8190 // Try to move it down the def-use chain, until:
8191 // - We can combine the transition with its single use
8192 // => we got rid of the transition.
8193 // - We escape the current basic block
8194 // => we would need to check that we are moving it at a cheaper place and
8195 // we do not do that for now.
8196 BasicBlock *Parent = Inst->getParent();
8197 LLVM_DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');
8198 VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost);
8199 // If the transition has more than one use, assume this is not going to be
8200 // beneficial.
8201 while (Inst->hasOneUse()) {
8202 Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());
8203 LLVM_DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n');
8204
8205 if (ToBePromoted->getParent() != Parent) {
8206 LLVM_DEBUG(dbgs() << "Instruction to promote is in a different block ("
8207 << ToBePromoted->getParent()->getName()
8208 << ") than the transition (" << Parent->getName()
8209 << ").\n");
8210 return false;
8211 }
8212
8213 if (VPH.canCombine(ToBePromoted)) {
8214 LLVM_DEBUG(dbgs() << "Assume " << *Inst << '\n'
8215 << "will be combined with: " << *ToBePromoted << '\n');
8216 VPH.recordCombineInstruction(ToBePromoted);
8217 bool Changed = VPH.promote();
8218 NumStoreExtractExposed += Changed;
8219 return Changed;
8220 }
8221
8222 LLVM_DEBUG(dbgs() << "Try promoting.\n");
8223 if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
8224 return false;
8225
8226 LLVM_DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n");
8227
8228 VPH.enqueueForPromotion(ToBePromoted);
8229 Inst = ToBePromoted;
8230 }
8231 return false;
8232}
8233
8234/// For the instruction sequence of store below, F and I values
8235/// are bundled together as an i64 value before being stored into memory.
8236/// Sometimes it is more efficient to generate separate stores for F and I,
8237/// which can remove the bitwise instructions or sink them to colder places.
8238///
8239/// (store (or (zext (bitcast F to i32) to i64),
8240/// (shl (zext I to i64), 32)), addr) -->
8241/// (store F, addr) and (store I, addr+4)
8242///
8243/// Similarly, splitting for other merged store can also be beneficial, like:
8244/// For pair of {i32, i32}, i64 store --> two i32 stores.
8245/// For pair of {i32, i16}, i64 store --> two i32 stores.
8246/// For pair of {i16, i16}, i32 store --> two i16 stores.
8247/// For pair of {i16, i8}, i32 store --> two i16 stores.
8248/// For pair of {i8, i8}, i16 store --> two i8 stores.
8249///
8250/// We allow each target to determine specifically which kind of splitting is
8251/// supported.
8252///
8253/// The store patterns are commonly seen from the simple code snippet below
8254/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
8255/// void goo(const std::pair<int, float> &);
8256/// hoo() {
8257/// ...
8258/// goo(std::make_pair(tmp, ftmp));
8259/// ...
8260/// }
8261///
8262/// Although we already have similar splitting in DAG Combine, we duplicate
8263/// it in CodeGenPrepare to catch the case in which pattern is across
8264/// multiple BBs. The logic in DAG Combine is kept to catch case generated
8265/// during code expansion.
8267 const TargetLowering &TLI) {
8268 // Handle simple but common cases only.
8269 Type *StoreType = SI.getValueOperand()->getType();
8270
8271 // The code below assumes shifting a value by <number of bits>,
8272 // whereas scalable vectors would have to be shifted by
8273 // <2log(vscale) + number of bits> in order to store the
8274 // low/high parts. Bailing out for now.
8275 if (StoreType->isScalableTy())
8276 return false;
8277
8278 if (!DL.typeSizeEqualsStoreSize(StoreType) ||
8279 DL.getTypeSizeInBits(StoreType) == 0)
8280 return false;
8281
8282 unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;
8283 Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);
8284 if (!DL.typeSizeEqualsStoreSize(SplitStoreType))
8285 return false;
8286
8287 // Don't split the store if it is volatile.
8288 if (SI.isVolatile())
8289 return false;
8290
8291 // Match the following patterns:
8292 // (store (or (zext LValue to i64),
8293 // (shl (zext HValue to i64), 32)), HalfValBitSize)
8294 // or
8295 // (store (or (shl (zext HValue to i64), 32)), HalfValBitSize)
8296 // (zext LValue to i64),
8297 // Expect both operands of OR and the first operand of SHL have only
8298 // one use.
8299 Value *LValue, *HValue;
8300 if (!match(SI.getValueOperand(),
8303 m_SpecificInt(HalfValBitSize))))))
8304 return false;
8305
8306 // Check LValue and HValue are int with size less or equal than 32.
8307 if (!LValue->getType()->isIntegerTy() ||
8308 DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize ||
8309 !HValue->getType()->isIntegerTy() ||
8310 DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize)
8311 return false;
8312
8313 // If LValue/HValue is a bitcast instruction, use the EVT before bitcast
8314 // as the input of target query.
8315 auto *LBC = dyn_cast<BitCastInst>(LValue);
8316 auto *HBC = dyn_cast<BitCastInst>(HValue);
8317 EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType())
8318 : EVT::getEVT(LValue->getType());
8319 EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType())
8320 : EVT::getEVT(HValue->getType());
8321 if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
8322 return false;
8323
8324 // Start to split store.
8325 IRBuilder<> Builder(SI.getContext());
8326 Builder.SetInsertPoint(&SI);
8327
8328 // If LValue/HValue is a bitcast in another BB, create a new one in current
8329 // BB so it may be merged with the splitted stores by dag combiner.
8330 if (LBC && LBC->getParent() != SI.getParent())
8331 LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType());
8332 if (HBC && HBC->getParent() != SI.getParent())
8333 HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType());
8334
8335 bool IsLE = SI.getDataLayout().isLittleEndian();
8336 auto CreateSplitStore = [&](Value *V, bool Upper) {
8337 V = Builder.CreateZExtOrBitCast(V, SplitStoreType);
8338 Value *Addr = SI.getPointerOperand();
8339 Align Alignment = SI.getAlign();
8340 const bool IsOffsetStore = (IsLE && Upper) || (!IsLE && !Upper);
8341 if (IsOffsetStore) {
8342 Addr = Builder.CreateGEP(
8343 SplitStoreType, Addr,
8344 ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1));
8345
8346 // When splitting the store in half, naturally one half will retain the
8347 // alignment of the original wider store, regardless of whether it was
8348 // over-aligned or not, while the other will require adjustment.
8349 Alignment = commonAlignment(Alignment, HalfValBitSize / 8);
8350 }
8351 Builder.CreateAlignedStore(V, Addr, Alignment);
8352 };
8353
8354 CreateSplitStore(LValue, false);
8355 CreateSplitStore(HValue, true);
8356
8357 // Delete the old store.
8358 SI.eraseFromParent();
8359 return true;
8360}
8361
8362// Return true if the GEP has two operands, the first operand is of a sequential
8363// type, and the second operand is a constant.
8366 return GEP->getNumOperands() == 2 && I.isSequential() &&
8367 isa<ConstantInt>(GEP->getOperand(1));
8368}
8369
8370// Try unmerging GEPs to reduce liveness interference (register pressure) across
8371// IndirectBr edges. Since IndirectBr edges tend to touch on many blocks,
8372// reducing liveness interference across those edges benefits global register
8373// allocation. Currently handles only certain cases.
8374//
8375// For example, unmerge %GEPI and %UGEPI as below.
8376//
8377// ---------- BEFORE ----------
8378// SrcBlock:
8379// ...
8380// %GEPIOp = ...
8381// ...
8382// %GEPI = gep %GEPIOp, Idx
8383// ...
8384// indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ]
8385// (* %GEPI is alive on the indirectbr edges due to other uses ahead)
8386// (* %GEPIOp is alive on the indirectbr edges only because of it's used by
8387// %UGEPI)
8388//
8389// DstB0: ... (there may be a gep similar to %UGEPI to be unmerged)
8390// DstB1: ... (there may be a gep similar to %UGEPI to be unmerged)
8391// ...
8392//
8393// DstBi:
8394// ...
8395// %UGEPI = gep %GEPIOp, UIdx
8396// ...
8397// ---------------------------
8398//
8399// ---------- AFTER ----------
8400// SrcBlock:
8401// ... (same as above)
8402// (* %GEPI is still alive on the indirectbr edges)
8403// (* %GEPIOp is no longer alive on the indirectbr edges as a result of the
8404// unmerging)
8405// ...
8406//
8407// DstBi:
8408// ...
8409// %UGEPI = gep %GEPI, (UIdx-Idx)
8410// ...
8411// ---------------------------
8412//
8413// The register pressure on the IndirectBr edges is reduced because %GEPIOp is
8414// no longer alive on them.
8415//
8416// We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging
8417// of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as
8418// not to disable further simplications and optimizations as a result of GEP
8419// merging.
8420//
8421// Note this unmerging may increase the length of the data flow critical path
8422// (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff
8423// between the register pressure and the length of data-flow critical
8424// path. Restricting this to the uncommon IndirectBr case would minimize the
8425// impact of potentially longer critical path, if any, and the impact on compile
8426// time.
8428 const TargetTransformInfo *TTI) {
8429 BasicBlock *SrcBlock = GEPI->getParent();
8430 // Check that SrcBlock ends with an IndirectBr. If not, give up. The common
8431 // (non-IndirectBr) cases exit early here.
8432 if (!isa<IndirectBrInst>(SrcBlock->getTerminator()))
8433 return false;
8434 // Check that GEPI is a simple gep with a single constant index.
8435 if (!GEPSequentialConstIndexed(GEPI))
8436 return false;
8437 ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1));
8438 // Check that GEPI is a cheap one.
8439 if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType(),
8442 return false;
8443 Value *GEPIOp = GEPI->getOperand(0);
8444 // Check that GEPIOp is an instruction that's also defined in SrcBlock.
8445 if (!isa<Instruction>(GEPIOp))
8446 return false;
8447 auto *GEPIOpI = cast<Instruction>(GEPIOp);
8448 if (GEPIOpI->getParent() != SrcBlock)
8449 return false;
8450 // Check that GEP is used outside the block, meaning it's alive on the
8451 // IndirectBr edge(s).
8452 if (llvm::none_of(GEPI->users(), [&](User *Usr) {
8453 if (auto *I = dyn_cast<Instruction>(Usr)) {
8454 if (I->getParent() != SrcBlock) {
8455 return true;
8456 }
8457 }
8458 return false;
8459 }))
8460 return false;
8461 // The second elements of the GEP chains to be unmerged.
8462 std::vector<GetElementPtrInst *> UGEPIs;
8463 // Check each user of GEPIOp to check if unmerging would make GEPIOp not alive
8464 // on IndirectBr edges.
8465 for (User *Usr : GEPIOp->users()) {
8466 if (Usr == GEPI)
8467 continue;
8468 // Check if Usr is an Instruction. If not, give up.
8469 if (!isa<Instruction>(Usr))
8470 return false;
8471 auto *UI = cast<Instruction>(Usr);
8472 // Check if Usr in the same block as GEPIOp, which is fine, skip.
8473 if (UI->getParent() == SrcBlock)
8474 continue;
8475 // Check if Usr is a GEP. If not, give up.
8476 if (!isa<GetElementPtrInst>(Usr))
8477 return false;
8478 auto *UGEPI = cast<GetElementPtrInst>(Usr);
8479 // Check if UGEPI is a simple gep with a single constant index and GEPIOp is
8480 // the pointer operand to it. If so, record it in the vector. If not, give
8481 // up.
8482 if (!GEPSequentialConstIndexed(UGEPI))
8483 return false;
8484 if (UGEPI->getOperand(0) != GEPIOp)
8485 return false;
8486 if (UGEPI->getSourceElementType() != GEPI->getSourceElementType())
8487 return false;
8488 if (GEPIIdx->getType() !=
8489 cast<ConstantInt>(UGEPI->getOperand(1))->getType())
8490 return false;
8491 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8492 if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType(),
8495 return false;
8496 UGEPIs.push_back(UGEPI);
8497 }
8498 if (UGEPIs.size() == 0)
8499 return false;
8500 // Check the materializing cost of (Uidx-Idx).
8501 for (GetElementPtrInst *UGEPI : UGEPIs) {
8502 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8503 APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();
8505 NewIdx, GEPIIdx->getType(), TargetTransformInfo::TCK_SizeAndLatency);
8506 if (ImmCost > TargetTransformInfo::TCC_Basic)
8507 return false;
8508 }
8509 // Now unmerge between GEPI and UGEPIs.
8510 for (GetElementPtrInst *UGEPI : UGEPIs) {
8511 UGEPI->setOperand(0, GEPI);
8512 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8513 Constant *NewUGEPIIdx = ConstantInt::get(
8514 GEPIIdx->getType(), UGEPIIdx->getValue() - GEPIIdx->getValue());
8515 UGEPI->setOperand(1, NewUGEPIIdx);
8516 // If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not
8517 // inbounds to avoid UB.
8518 if (!GEPI->isInBounds()) {
8519 UGEPI->setIsInBounds(false);
8520 }
8521 }
8522 // After unmerging, verify that GEPIOp is actually only used in SrcBlock (not
8523 // alive on IndirectBr edges).
8524 assert(llvm::none_of(GEPIOp->users(),
8525 [&](User *Usr) {
8526 return cast<Instruction>(Usr)->getParent() != SrcBlock;
8527 }) &&
8528 "GEPIOp is used outside SrcBlock");
8529 return true;
8530}
8531
8532static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI,
8534 bool IsHugeFunc) {
8535 // Try and convert
8536 // %c = icmp ult %x, 8
8537 // br %c, bla, blb
8538 // %tc = lshr %x, 3
8539 // to
8540 // %tc = lshr %x, 3
8541 // %c = icmp eq %tc, 0
8542 // br %c, bla, blb
8543 // Creating the cmp to zero can be better for the backend, especially if the
8544 // lshr produces flags that can be used automatically.
8545 if (!TLI.preferZeroCompareBranch() || !Branch->isConditional())
8546 return false;
8547
8548 ICmpInst *Cmp = dyn_cast<ICmpInst>(Branch->getCondition());
8549 if (!Cmp || !isa<ConstantInt>(Cmp->getOperand(1)) || !Cmp->hasOneUse())
8550 return false;
8551
8552 Value *X = Cmp->getOperand(0);
8553 APInt CmpC = cast<ConstantInt>(Cmp->getOperand(1))->getValue();
8554
8555 for (auto *U : X->users()) {
8556 Instruction *UI = dyn_cast<Instruction>(U);
8557 // A quick dominance check
8558 if (!UI ||
8559 (UI->getParent() != Branch->getParent() &&
8560 UI->getParent() != Branch->getSuccessor(0) &&
8561 UI->getParent() != Branch->getSuccessor(1)) ||
8562 (UI->getParent() != Branch->getParent() &&
8563 !UI->getParent()->getSinglePredecessor()))
8564 continue;
8565
8566 if (CmpC.isPowerOf2() && Cmp->getPredicate() == ICmpInst::ICMP_ULT &&
8567 match(UI, m_Shr(m_Specific(X), m_SpecificInt(CmpC.logBase2())))) {
8568 IRBuilder<> Builder(Branch);
8569 if (UI->getParent() != Branch->getParent())
8570 UI->moveBefore(Branch->getIterator());
8572 Value *NewCmp = Builder.CreateCmp(ICmpInst::ICMP_EQ, UI,
8573 ConstantInt::get(UI->getType(), 0));
8574 LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
8575 LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
8576 replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
8577 return true;
8578 }
8579 if (Cmp->isEquality() &&
8580 (match(UI, m_Add(m_Specific(X), m_SpecificInt(-CmpC))) ||
8581 match(UI, m_Sub(m_Specific(X), m_SpecificInt(CmpC))))) {
8582 IRBuilder<> Builder(Branch);
8583 if (UI->getParent() != Branch->getParent())
8584 UI->moveBefore(Branch->getIterator());
8586 Value *NewCmp = Builder.CreateCmp(Cmp->getPredicate(), UI,
8587 ConstantInt::get(UI->getType(), 0));
8588 LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
8589 LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
8590 replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
8591 return true;
8592 }
8593 }
8594 return false;
8595}
8596
8597bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
8598 bool AnyChange = false;
8599 AnyChange = fixupDbgVariableRecordsOnInst(*I);
8600
8601 // Bail out if we inserted the instruction to prevent optimizations from
8602 // stepping on each other's toes.
8603 if (InsertedInsts.count(I))
8604 return AnyChange;
8605
8606 // TODO: Move into the switch on opcode below here.
8607 if (PHINode *P = dyn_cast<PHINode>(I)) {
8608 // It is possible for very late stage optimizations (such as SimplifyCFG)
8609 // to introduce PHI nodes too late to be cleaned up. If we detect such a
8610 // trivial PHI, go ahead and zap it here.
8611 if (Value *V = simplifyInstruction(P, {*DL, TLInfo})) {
8612 LargeOffsetGEPMap.erase(P);
8613 replaceAllUsesWith(P, V, FreshBBs, IsHugeFunc);
8614 P->eraseFromParent();
8615 ++NumPHIsElim;
8616 return true;
8617 }
8618 return AnyChange;
8619 }
8620
8621 if (CastInst *CI = dyn_cast<CastInst>(I)) {
8622 // If the source of the cast is a constant, then this should have
8623 // already been constant folded. The only reason NOT to constant fold
8624 // it is if something (e.g. LSR) was careful to place the constant
8625 // evaluation in a block other than then one that uses it (e.g. to hoist
8626 // the address of globals out of a loop). If this is the case, we don't
8627 // want to forward-subst the cast.
8628 if (isa<Constant>(CI->getOperand(0)))
8629 return AnyChange;
8630
8631 if (OptimizeNoopCopyExpression(CI, *TLI, *DL))
8632 return true;
8633
8634 if ((isa<UIToFPInst>(I) || isa<SIToFPInst>(I) || isa<FPToUIInst>(I) ||
8635 isa<TruncInst>(I)) &&
8637 I, LI->getLoopFor(I->getParent()), *TTI))
8638 return true;
8639
8640 if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
8641 /// Sink a zext or sext into its user blocks if the target type doesn't
8642 /// fit in one register
8643 if (TLI->getTypeAction(CI->getContext(),
8644 TLI->getValueType(*DL, CI->getType())) ==
8645 TargetLowering::TypeExpandInteger) {
8646 return SinkCast(CI);
8647 } else {
8649 I, LI->getLoopFor(I->getParent()), *TTI))
8650 return true;
8651
8652 bool MadeChange = optimizeExt(I);
8653 return MadeChange | optimizeExtUses(I);
8654 }
8655 }
8656 return AnyChange;
8657 }
8658
8659 if (auto *Cmp = dyn_cast<CmpInst>(I))
8660 if (optimizeCmp(Cmp, ModifiedDT))
8661 return true;
8662
8663 if (match(I, m_URem(m_Value(), m_Value())))
8664 if (optimizeURem(I))
8665 return true;
8666
8667 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
8668 LI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
8669 bool Modified = optimizeLoadExt(LI);
8670 unsigned AS = LI->getPointerAddressSpace();
8671 Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
8672 return Modified;
8673 }
8674
8675 if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
8676 if (splitMergedValStore(*SI, *DL, *TLI))
8677 return true;
8678 SI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
8679 unsigned AS = SI->getPointerAddressSpace();
8680 return optimizeMemoryInst(I, SI->getOperand(1),
8681 SI->getOperand(0)->getType(), AS);
8682 }
8683
8684 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
8685 unsigned AS = RMW->getPointerAddressSpace();
8686 return optimizeMemoryInst(I, RMW->getPointerOperand(), RMW->getType(), AS);
8687 }
8688
8689 if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(I)) {
8690 unsigned AS = CmpX->getPointerAddressSpace();
8691 return optimizeMemoryInst(I, CmpX->getPointerOperand(),
8692 CmpX->getCompareOperand()->getType(), AS);
8693 }
8694
8695 BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
8696
8697 if (BinOp && BinOp->getOpcode() == Instruction::And && EnableAndCmpSinking &&
8698 sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts))
8699 return true;
8700
8701 // TODO: Move this into the switch on opcode - it handles shifts already.
8702 if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
8703 BinOp->getOpcode() == Instruction::LShr)) {
8704 ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
8705 if (CI && TLI->hasExtractBitsInsn())
8706 if (OptimizeExtractBits(BinOp, CI, *TLI, *DL))
8707 return true;
8708 }
8709
8710 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
8711 if (GEPI->hasAllZeroIndices()) {
8712 /// The GEP operand must be a pointer, so must its result -> BitCast
8713 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
8714 GEPI->getName(), GEPI->getIterator());
8715 NC->setDebugLoc(GEPI->getDebugLoc());
8716 replaceAllUsesWith(GEPI, NC, FreshBBs, IsHugeFunc);
8718 GEPI, TLInfo, nullptr,
8719 [&](Value *V) { removeAllAssertingVHReferences(V); });
8720 ++NumGEPsElim;
8721 optimizeInst(NC, ModifiedDT);
8722 return true;
8723 }
8725 return true;
8726 }
8727 }
8728
8729 if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
8730 // freeze(icmp a, const)) -> icmp (freeze a), const
8731 // This helps generate efficient conditional jumps.
8732 Instruction *CmpI = nullptr;
8733 if (ICmpInst *II = dyn_cast<ICmpInst>(FI->getOperand(0)))
8734 CmpI = II;
8735 else if (FCmpInst *F = dyn_cast<FCmpInst>(FI->getOperand(0)))
8736 CmpI = F->getFastMathFlags().none() ? F : nullptr;
8737
8738 if (CmpI && CmpI->hasOneUse()) {
8739 auto Op0 = CmpI->getOperand(0), Op1 = CmpI->getOperand(1);
8740 bool Const0 = isa<ConstantInt>(Op0) || isa<ConstantFP>(Op0) ||
8741 isa<ConstantPointerNull>(Op0);
8742 bool Const1 = isa<ConstantInt>(Op1) || isa<ConstantFP>(Op1) ||
8743 isa<ConstantPointerNull>(Op1);
8744 if (Const0 || Const1) {
8745 if (!Const0 || !Const1) {
8746 auto *F = new FreezeInst(Const0 ? Op1 : Op0, "", CmpI->getIterator());
8747 F->takeName(FI);
8748 CmpI->setOperand(Const0 ? 1 : 0, F);
8749 }
8750 replaceAllUsesWith(FI, CmpI, FreshBBs, IsHugeFunc);
8751 FI->eraseFromParent();
8752 return true;
8753 }
8754 }
8755 return AnyChange;
8756 }
8757
8758 if (tryToSinkFreeOperands(I))
8759 return true;
8760
8761 switch (I->getOpcode()) {
8762 case Instruction::Shl:
8763 case Instruction::LShr:
8764 case Instruction::AShr:
8765 return optimizeShiftInst(cast<BinaryOperator>(I));
8766 case Instruction::Call:
8767 return optimizeCallInst(cast<CallInst>(I), ModifiedDT);
8768 case Instruction::Select:
8769 return optimizeSelectInst(cast<SelectInst>(I));
8770 case Instruction::ShuffleVector:
8771 return optimizeShuffleVectorInst(cast<ShuffleVectorInst>(I));
8772 case Instruction::Switch:
8773 return optimizeSwitchInst(cast<SwitchInst>(I));
8774 case Instruction::ExtractElement:
8775 return optimizeExtractElementInst(cast<ExtractElementInst>(I));
8776 case Instruction::Br:
8777 return optimizeBranch(cast<BranchInst>(I), *TLI, FreshBBs, IsHugeFunc);
8778 }
8779
8780 return AnyChange;
8781}
8782
8783/// Given an OR instruction, check to see if this is a bitreverse
8784/// idiom. If so, insert the new intrinsic and return true.
8785bool CodeGenPrepare::makeBitReverse(Instruction &I) {
8786 if (!I.getType()->isIntegerTy() ||
8788 TLI->getValueType(*DL, I.getType(), true)))
8789 return false;
8790
8792 if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts))
8793 return false;
8794 Instruction *LastInst = Insts.back();
8795 replaceAllUsesWith(&I, LastInst, FreshBBs, IsHugeFunc);
8797 &I, TLInfo, nullptr,
8798 [&](Value *V) { removeAllAssertingVHReferences(V); });
8799 return true;
8800}
8801
8802// In this pass we look for GEP and cast instructions that are used
8803// across basic blocks and rewrite them to improve basic-block-at-a-time
8804// selection.
8805bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT) {
8806 SunkAddrs.clear();
8807 bool MadeChange = false;
8808
8809 do {
8810 CurInstIterator = BB.begin();
8811 ModifiedDT = ModifyDT::NotModifyDT;
8812 while (CurInstIterator != BB.end()) {
8813 MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
8814 if (ModifiedDT != ModifyDT::NotModifyDT) {
8815 // For huge function we tend to quickly go though the inner optmization
8816 // opportunities in the BB. So we go back to the BB head to re-optimize
8817 // each instruction instead of go back to the function head.
8818 if (IsHugeFunc) {
8819 DT.reset();
8820 getDT(*BB.getParent());
8821 break;
8822 } else {
8823 return true;
8824 }
8825 }
8826 }
8827 } while (ModifiedDT == ModifyDT::ModifyInstDT);
8828
8829 bool MadeBitReverse = true;
8830 while (MadeBitReverse) {
8831 MadeBitReverse = false;
8832 for (auto &I : reverse(BB)) {
8833 if (makeBitReverse(I)) {
8834 MadeBitReverse = MadeChange = true;
8835 break;
8836 }
8837 }
8838 }
8839 MadeChange |= dupRetToEnableTailCallOpts(&BB, ModifiedDT);
8840
8841 return MadeChange;
8842}
8843
8844// Some CGP optimizations may move or alter what's computed in a block. Check
8845// whether a dbg.value intrinsic could be pointed at a more appropriate operand.
8846bool CodeGenPrepare::fixupDbgValue(Instruction *I) {
8847 assert(isa<DbgValueInst>(I));
8848 DbgValueInst &DVI = *cast<DbgValueInst>(I);
8849
8850 // Does this dbg.value refer to a sunk address calculation?
8851 bool AnyChange = false;
8852 SmallDenseSet<Value *> LocationOps(DVI.location_ops().begin(),
8853 DVI.location_ops().end());
8854 for (Value *Location : LocationOps) {
8855 WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
8856 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
8857 if (SunkAddr) {
8858 // Point dbg.value at locally computed address, which should give the best
8859 // opportunity to be accurately lowered. This update may change the type
8860 // of pointer being referred to; however this makes no difference to
8861 // debugging information, and we can't generate bitcasts that may affect
8862 // codegen.
8863 DVI.replaceVariableLocationOp(Location, SunkAddr);
8864 AnyChange = true;
8865 }
8866 }
8867 return AnyChange;
8868}
8869
8870bool CodeGenPrepare::fixupDbgVariableRecordsOnInst(Instruction &I) {
8871 bool AnyChange = false;
8872 for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
8873 AnyChange |= fixupDbgVariableRecord(DVR);
8874 return AnyChange;
8875}
8876
8877// FIXME: should updating debug-info really cause the "changed" flag to fire,
8878// which can cause a function to be reprocessed?
8879bool CodeGenPrepare::fixupDbgVariableRecord(DbgVariableRecord &DVR) {
8880 if (DVR.Type != DbgVariableRecord::LocationType::Value &&
8881 DVR.Type != DbgVariableRecord::LocationType::Assign)
8882 return false;
8883
8884 // Does this DbgVariableRecord refer to a sunk address calculation?
8885 bool AnyChange = false;
8886 SmallDenseSet<Value *> LocationOps(DVR.location_ops().begin(),
8887 DVR.location_ops().end());
8888 for (Value *Location : LocationOps) {
8889 WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
8890 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
8891 if (SunkAddr) {
8892 // Point dbg.value at locally computed address, which should give the best
8893 // opportunity to be accurately lowered. This update may change the type
8894 // of pointer being referred to; however this makes no difference to
8895 // debugging information, and we can't generate bitcasts that may affect
8896 // codegen.
8897 DVR.replaceVariableLocationOp(Location, SunkAddr);
8898 AnyChange = true;
8899 }
8900 }
8901 return AnyChange;
8902}
8903
8905 DVI->removeFromParent();
8906 if (isa<PHINode>(VI))
8907 DVI->insertBefore(VI->getParent()->getFirstInsertionPt());
8908 else
8909 DVI->insertAfter(VI);
8910}
8911
8913 DVR->removeFromParent();
8914 BasicBlock *VIBB = VI->getParent();
8915 if (isa<PHINode>(VI))
8916 VIBB->insertDbgRecordBefore(DVR, VIBB->getFirstInsertionPt());
8917 else
8918 VIBB->insertDbgRecordAfter(DVR, &*VI);
8919}
8920
8921// A llvm.dbg.value may be using a value before its definition, due to
8922// optimizations in this pass and others. Scan for such dbg.values, and rescue
8923// them by moving the dbg.value to immediately after the value definition.
8924// FIXME: Ideally this should never be necessary, and this has the potential
8925// to re-order dbg.value intrinsics.
8926bool CodeGenPrepare::placeDbgValues(Function &F) {
8927 bool MadeChange = false;
8928 DominatorTree DT(F);
8929
8930 auto DbgProcessor = [&](auto *DbgItem, Instruction *Position) {
8932 for (Value *V : DbgItem->location_ops())
8933 if (Instruction *VI = dyn_cast_or_null<Instruction>(V))
8934 VIs.push_back(VI);
8935
8936 // This item may depend on multiple instructions, complicating any
8937 // potential sink. This block takes the defensive approach, opting to
8938 // "undef" the item if it has more than one instruction and any of them do
8939 // not dominate iem.
8940 for (Instruction *VI : VIs) {
8941 if (VI->isTerminator())
8942 continue;
8943
8944 // If VI is a phi in a block with an EHPad terminator, we can't insert
8945 // after it.
8946 if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
8947 continue;
8948
8949 // If the defining instruction dominates the dbg.value, we do not need
8950 // to move the dbg.value.
8951 if (DT.dominates(VI, Position))
8952 continue;
8953
8954 // If we depend on multiple instructions and any of them doesn't
8955 // dominate this DVI, we probably can't salvage it: moving it to
8956 // after any of the instructions could cause us to lose the others.
8957 if (VIs.size() > 1) {
8958 LLVM_DEBUG(
8959 dbgs()
8960 << "Unable to find valid location for Debug Value, undefing:\n"
8961 << *DbgItem);
8962 DbgItem->setKillLocation();
8963 break;
8964 }
8965
8966 LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
8967 << *DbgItem << ' ' << *VI);
8968 DbgInserterHelper(DbgItem, VI->getIterator());
8969 MadeChange = true;
8970 ++NumDbgValueMoved;
8971 }
8972 };
8973
8974 for (BasicBlock &BB : F) {
8976 // Process dbg.value intrinsics.
8977 DbgValueInst *DVI = dyn_cast<DbgValueInst>(&Insn);
8978 if (DVI) {
8979 DbgProcessor(DVI, DVI);
8980 continue;
8981 }
8982
8983 // If this isn't a dbg.value, process any attached DbgVariableRecord
8984 // records attached to this instruction.
8986 filterDbgVars(Insn.getDbgRecordRange()))) {
8987 if (DVR.Type != DbgVariableRecord::LocationType::Value)
8988 continue;
8989 DbgProcessor(&DVR, &Insn);
8990 }
8991 }
8992 }
8993
8994 return MadeChange;
8995}
8996
8997// Group scattered pseudo probes in a block to favor SelectionDAG. Scattered
8998// probes can be chained dependencies of other regular DAG nodes and block DAG
8999// combine optimizations.
9000bool CodeGenPrepare::placePseudoProbes(Function &F) {
9001 bool MadeChange = false;
9002 for (auto &Block : F) {
9003 // Move the rest probes to the beginning of the block.
9004 auto FirstInst = Block.getFirstInsertionPt();
9005 while (FirstInst != Block.end() && FirstInst->isDebugOrPseudoInst())
9006 ++FirstInst;
9007 BasicBlock::iterator I(FirstInst);
9008 I++;
9009 while (I != Block.end()) {
9010 if (auto *II = dyn_cast<PseudoProbeInst>(I++)) {
9011 II->moveBefore(FirstInst);
9012 MadeChange = true;
9013 }
9014 }
9015 }
9016 return MadeChange;
9017}
9018
9019/// Scale down both weights to fit into uint32_t.
9020static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
9021 uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
9022 uint32_t Scale = (NewMax / std::numeric_limits<uint32_t>::max()) + 1;
9023 NewTrue = NewTrue / Scale;
9024 NewFalse = NewFalse / Scale;
9025}
9026
9027/// Some targets prefer to split a conditional branch like:
9028/// \code
9029/// %0 = icmp ne i32 %a, 0
9030/// %1 = icmp ne i32 %b, 0
9031/// %or.cond = or i1 %0, %1
9032/// br i1 %or.cond, label %TrueBB, label %FalseBB
9033/// \endcode
9034/// into multiple branch instructions like:
9035/// \code
9036/// bb1:
9037/// %0 = icmp ne i32 %a, 0
9038/// br i1 %0, label %TrueBB, label %bb2
9039/// bb2:
9040/// %1 = icmp ne i32 %b, 0
9041/// br i1 %1, label %TrueBB, label %FalseBB
9042/// \endcode
9043/// This usually allows instruction selection to do even further optimizations
9044/// and combine the compare with the branch instruction. Currently this is
9045/// applied for targets which have "cheap" jump instructions.
9046///
9047/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
9048///
9049bool CodeGenPrepare::splitBranchCondition(Function &F, ModifyDT &ModifiedDT) {
9050 if (!TM->Options.EnableFastISel || TLI->isJumpExpensive())
9051 return false;
9052
9053 bool MadeChange = false;
9054 for (auto &BB : F) {
9055 // Does this BB end with the following?
9056 // %cond1 = icmp|fcmp|binary instruction ...
9057 // %cond2 = icmp|fcmp|binary instruction ...
9058 // %cond.or = or|and i1 %cond1, cond2
9059 // br i1 %cond.or label %dest1, label %dest2"
9060 Instruction *LogicOp;
9061 BasicBlock *TBB, *FBB;
9062 if (!match(BB.getTerminator(),
9063 m_Br(m_OneUse(m_Instruction(LogicOp)), TBB, FBB)))
9064 continue;
9065
9066 auto *Br1 = cast<BranchInst>(BB.getTerminator());
9067 if (Br1->getMetadata(LLVMContext::MD_unpredictable))
9068 continue;
9069
9070 // The merging of mostly empty BB can cause a degenerate branch.
9071 if (TBB == FBB)
9072 continue;
9073
9074 unsigned Opc;
9075 Value *Cond1, *Cond2;
9076 if (match(LogicOp,
9077 m_LogicalAnd(m_OneUse(m_Value(Cond1)), m_OneUse(m_Value(Cond2)))))
9078 Opc = Instruction::And;
9079 else if (match(LogicOp, m_LogicalOr(m_OneUse(m_Value(Cond1)),
9080 m_OneUse(m_Value(Cond2)))))
9081 Opc = Instruction::Or;
9082 else
9083 continue;
9084
9085 auto IsGoodCond = [](Value *Cond) {
9086 return match(
9087 Cond,
9089 m_LogicalOr(m_Value(), m_Value()))));
9090 };
9091 if (!IsGoodCond(Cond1) || !IsGoodCond(Cond2))
9092 continue;
9093
9094 LLVM_DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
9095
9096 // Create a new BB.
9097 auto *TmpBB =
9098 BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split",
9099 BB.getParent(), BB.getNextNode());
9100 if (IsHugeFunc)
9101 FreshBBs.insert(TmpBB);
9102
9103 // Update original basic block by using the first condition directly by the
9104 // branch instruction and removing the no longer needed and/or instruction.
9105 Br1->setCondition(Cond1);
9106 LogicOp->eraseFromParent();
9107
9108 // Depending on the condition we have to either replace the true or the
9109 // false successor of the original branch instruction.
9110 if (Opc == Instruction::And)
9111 Br1->setSuccessor(0, TmpBB);
9112 else
9113 Br1->setSuccessor(1, TmpBB);
9114
9115 // Fill in the new basic block.
9116 auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB);
9117 if (auto *I = dyn_cast<Instruction>(Cond2)) {
9118 I->removeFromParent();
9119 I->insertBefore(Br2->getIterator());
9120 }
9121
9122 // Update PHI nodes in both successors. The original BB needs to be
9123 // replaced in one successor's PHI nodes, because the branch comes now from
9124 // the newly generated BB (NewBB). In the other successor we need to add one
9125 // incoming edge to the PHI nodes, because both branch instructions target
9126 // now the same successor. Depending on the original branch condition
9127 // (and/or) we have to swap the successors (TrueDest, FalseDest), so that
9128 // we perform the correct update for the PHI nodes.
9129 // This doesn't change the successor order of the just created branch
9130 // instruction (or any other instruction).
9131 if (Opc == Instruction::Or)
9132 std::swap(TBB, FBB);
9133
9134 // Replace the old BB with the new BB.
9135 TBB->replacePhiUsesWith(&BB, TmpBB);
9136
9137 // Add another incoming edge from the new BB.
9138 for (PHINode &PN : FBB->phis()) {
9139 auto *Val = PN.getIncomingValueForBlock(&BB);
9140 PN.addIncoming(Val, TmpBB);
9141 }
9142
9143 // Update the branch weights (from SelectionDAGBuilder::
9144 // FindMergedConditions).
9145 if (Opc == Instruction::Or) {
9146 // Codegen X | Y as:
9147 // BB1:
9148 // jmp_if_X TBB
9149 // jmp TmpBB
9150 // TmpBB:
9151 // jmp_if_Y TBB
9152 // jmp FBB
9153 //
9154
9155 // We have flexibility in setting Prob for BB1 and Prob for NewBB.
9156 // The requirement is that
9157 // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
9158 // = TrueProb for original BB.
9159 // Assuming the original weights are A and B, one choice is to set BB1's
9160 // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
9161 // assumes that
9162 // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
9163 // Another choice is to assume TrueProb for BB1 equals to TrueProb for
9164 // TmpBB, but the math is more complicated.
9165 uint64_t TrueWeight, FalseWeight;
9166 if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
9167 uint64_t NewTrueWeight = TrueWeight;
9168 uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
9169 scaleWeights(NewTrueWeight, NewFalseWeight);
9170 Br1->setMetadata(LLVMContext::MD_prof,
9171 MDBuilder(Br1->getContext())
9172 .createBranchWeights(TrueWeight, FalseWeight,
9173 hasBranchWeightOrigin(*Br1)));
9174
9175 NewTrueWeight = TrueWeight;
9176 NewFalseWeight = 2 * FalseWeight;
9177 scaleWeights(NewTrueWeight, NewFalseWeight);
9178 Br2->setMetadata(LLVMContext::MD_prof,
9179 MDBuilder(Br2->getContext())
9180 .createBranchWeights(TrueWeight, FalseWeight));
9181 }
9182 } else {
9183 // Codegen X & Y as:
9184 // BB1:
9185 // jmp_if_X TmpBB
9186 // jmp FBB
9187 // TmpBB:
9188 // jmp_if_Y TBB
9189 // jmp FBB
9190 //
9191 // This requires creation of TmpBB after CurBB.
9192
9193 // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
9194 // The requirement is that
9195 // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
9196 // = FalseProb for original BB.
9197 // Assuming the original weights are A and B, one choice is to set BB1's
9198 // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
9199 // assumes that
9200 // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
9201 uint64_t TrueWeight, FalseWeight;
9202 if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
9203 uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
9204 uint64_t NewFalseWeight = FalseWeight;
9205 scaleWeights(NewTrueWeight, NewFalseWeight);
9206 Br1->setMetadata(LLVMContext::MD_prof,
9207 MDBuilder(Br1->getContext())
9208 .createBranchWeights(TrueWeight, FalseWeight));
9209
9210 NewTrueWeight = 2 * TrueWeight;
9211 NewFalseWeight = FalseWeight;
9212 scaleWeights(NewTrueWeight, NewFalseWeight);
9213 Br2->setMetadata(LLVMContext::MD_prof,
9214 MDBuilder(Br2->getContext())
9215 .createBranchWeights(TrueWeight, FalseWeight));
9216 }
9217 }
9218
9219 ModifiedDT = ModifyDT::ModifyBBDT;
9220 MadeChange = true;
9221
9222 LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
9223 TmpBB->dump());
9224 }
9225 return MadeChange;
9226}
#define Success
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static unsigned getIntrinsicID(const SDNode *N)
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc, unsigned ZeroReg=0, bool CheckZeroReg=false)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static bool sinkAndCmp0Expression(Instruction *AndI, const TargetLowering &TLI, SetOfInstrs &InsertedInsts)
Duplicate and sink the given 'and' instruction into user blocks where it is used in a compare to allo...
static bool SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, DenseMap< BasicBlock *, BinaryOperator * > &InsertedShifts, const TargetLowering &TLI, const DataLayout &DL)
Sink both shift and truncate instruction to the use of truncate's BB.
static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, SmallVectorImpl< Value * > &OffsetV)
Optimize for code generation
static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V)
Check if V (an operand of a select instruction) is an expensive instruction that is only used once.
static void replaceAllUsesWith(Value *Old, Value *New, SmallSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
Replace all old uses with new ones, and push the updated BBs into FreshBBs.
static bool isExtractBitsCandidateUse(Instruction *User)
Check if the candidates could be combined with a shift instruction, which includes:
static cl::opt< unsigned > MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100), cl::Hidden, cl::desc("Max number of address users to look at"))
static cl::opt< bool > OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true), cl::desc("Enable converting phi types in CodeGenPrepare"))
static cl::opt< bool > DisableStoreExtract("disable-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Disable store(extract) optimizations in CodeGenPrepare"))
static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI, const DataLayout &DL)
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI)
Sink the given CmpInst into user blocks to reduce the number of virtual registers that must be create...
static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse)
Scale down both weights to fit into uint32_t.
static cl::opt< bool > ProfileUnknownInSpecialSection("profile-unknown-in-special-section", cl::Hidden, cl::desc("In profiling mode like sampleFDO, if a function doesn't have " "profile, we cannot tell the function is cold for sure because " "it may be a function newly added without ever being sampled. " "With the flag enabled, compiler can put such profile unknown " "functions into a special section, so runtime system can choose " "to handle it in a different way than .text section, to save " "RAM for example. "))
static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, const TargetLowering &TLI, const DataLayout &DL)
Sink the shift right instruction into user blocks if the uses could potentially be combined with this...
static cl::opt< bool > DisableExtLdPromotion("disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in " "CodeGenPrepare"))
static cl::opt< bool > DisablePreheaderProtect("disable-preheader-prot", cl::Hidden, cl::init(false), cl::desc("Disable protection against removing loop preheaders"))
static cl::opt< bool > AddrSinkCombineBaseOffs("addr-sink-combine-base-offs", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseOffs field in Address sinking."))
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, const DataLayout &DL)
If the specified cast instruction is a noop copy (e.g.
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static bool SinkCast(CastInst *CI)
Sink the specified cast instruction into its user blocks.
static bool swapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp)
Many architectures use the same instruction for both subtract and cmp.
static cl::opt< bool > AddrSinkCombineBaseReg("addr-sink-combine-base-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseReg field in Address sinking."))
static bool FindAllMemoryUses(Instruction *I, SmallVectorImpl< std::pair< Use *, Type * > > &MemoryUses, SmallPtrSetImpl< Instruction * > &ConsideredInsts, const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, unsigned &SeenInsts)
Recursively walk all the uses of I until we find a memory use.
static cl::opt< bool > StressStoreExtract("stress-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"))
static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, const TargetLowering *TLI, SelectInst *SI)
Returns true if a SelectInst should be turned into an explicit branch.
static std::optional< std::pair< Instruction *, Constant * > > getIVIncrement(const PHINode *PN, const LoopInfo *LI)
If given PN is an inductive variable with value IVInc coming from the backedge, and on each iteration...
static cl::opt< bool > AddrSinkCombineBaseGV("addr-sink-combine-base-gv", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseGV field in Address sinking."))
static cl::opt< bool > AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true), cl::desc("Address sinking in CGP using GEPs."))
static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)
If isTrue is true, return the true value of SI, otherwise return false value of SI.
static void DbgInserterHelper(DbgValueInst *DVI, BasicBlock::iterator VI)
static cl::opt< bool > DisableBranchOpts("disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare"))
static cl::opt< bool > EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden, cl::desc("Enable merging of redundant sexts when one is dominating" " the other."), cl::init(true))
static bool adjustIsPower2Test(CmpInst *Cmp, const TargetLowering &TLI, const TargetTransformInfo &TTI, const DataLayout &DL)
Some targets have better codegen for ctpop(X) u< 2 than ctpop(X) == 1.
static cl::opt< bool > ProfileGuidedSectionPrefix("profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use profile info to add section prefix for hot/cold functions"))
static cl::opt< unsigned > HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden, cl::desc("Least BB number of huge function."))
static cl::opt< bool > AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true), cl::desc("Allow creation of selects in Address sinking."))
static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, const TargetTransformInfo *TTI)
static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, const TargetLowering &TLI, const TargetRegisterInfo &TRI)
Check to see if all uses of OpVal by the specified inline asm call are due to memory operands.
static bool isIntrinsicOrLFToBeTailCalled(const TargetLibraryInfo *TLInfo, const CallInst *CI)
static cl::opt< bool > ForceSplitStore("force-split-store", cl::Hidden, cl::init(false), cl::desc("Force store splitting no matter what the target query says."))
static void computeBaseDerivedRelocateMap(const SmallVectorImpl< GCRelocateInst * > &AllRelocateCalls, MapVector< GCRelocateInst *, SmallVector< GCRelocateInst *, 0 > > &RelocateInstMap)
static bool simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, const SmallVectorImpl< GCRelocateInst * > &Targets)
static cl::opt< bool > AddrSinkCombineScaledReg("addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of ScaledReg field in Address sinking."))
static bool foldICmpWithDominatingICmp(CmpInst *Cmp, const TargetLowering &TLI)
For pattern like:
static bool MightBeFoldableInst(Instruction *I)
This is a little filter, which returns true if an addressing computation involving I might be folded ...
static bool matchIncrement(const Instruction *IVInc, Instruction *&LHS, Constant *&Step)
static cl::opt< bool > EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden, cl::init(true), cl::desc("Enable splitting large offset of GEP."))
static cl::opt< bool > DisableComplexAddrModes("disable-complex-addr-modes", cl::Hidden, cl::init(false), cl::desc("Disables combining addressing modes with different parts " "in optimizeMemoryInst."))
static cl::opt< bool > EnableICMP_EQToICMP_ST("cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false), cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."))
static cl::opt< bool > VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false), cl::desc("Enable BFI update verification for " "CodeGenPrepare."))
static cl::opt< bool > BBSectionsGuidedSectionPrefix("bbsections-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use the basic-block-sections profile to determine the text " "section prefix for hot functions. Functions with " "basic-block-sections profile will be placed in `.text.hot` " "regardless of their FDO profile info. Other functions won't be " "impacted, i.e., their prefixes will be decided by FDO/sampleFDO " "profiles."))
static bool isRemOfLoopIncrementWithLoopInvariant(Instruction *Rem, const LoopInfo *LI, Value *&RemAmtOut, Value *&AddInstOut, Value *&AddOffsetOut, PHINode *&LoopIncrPNOut)
static bool isIVIncrement(const Value *V, const LoopInfo *LI)
static cl::opt< bool > DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), cl::desc("Disable GC optimizations in CodeGenPrepare"))
static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP)
static bool isPromotedInstructionLegal(const TargetLowering &TLI, const DataLayout &DL, Value *Val)
Check whether or not Val is a legal instruction for TLI.
static cl::opt< uint64_t > FreqRatioToSkipMerge("cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), cl::desc("Skip merging empty blocks if (frequency of empty block) / " "(frequency of destination block) is greater than this ratio"))
static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI, SmallSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
#define DEBUG_TYPE
static bool IsNonLocalValue(Value *V, BasicBlock *BB)
Return true if the specified values are defined in a different basic block than BB.
static cl::opt< bool > EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true), cl::desc("Enable sinking and/cmp into branches."))
static bool despeculateCountZeros(IntrinsicInst *CountZeros, LoopInfo &LI, const TargetLowering *TLI, const DataLayout *DL, ModifyDT &ModifiedDT, SmallSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
If counting leading or trailing zeros is an expensive operation and a zero input is defined,...
static bool hasSameExtUse(Value *Val, const TargetLowering &TLI)
Check if all the uses of Val are equivalent (or free) zero or sign extensions.
static cl::opt< bool > StressExtLdPromotion("stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) " "optimization in CodeGenPrepare"))
static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp, BinaryOperator *&Add)
Match special-case patterns that check for unsigned add overflow.
static cl::opt< bool > DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion."))
static cl::opt< bool > DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false), cl::desc("Disable elimination of dead PHI nodes."))
static bool foldURemOfLoopIncrement(Instruction *Rem, const DataLayout *DL, const LoopInfo *LI, SmallSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
static cl::opt< bool > AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false), cl::desc("Allow creation of Phis in Address sinking."))
Defines an IR pass for CodeGen Prepare.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:622
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:282
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
uint64_t Addr
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon Common GEP
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This defines the Use class.
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
Definition: LICM.cpp:1504
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
This file implements a map that provides insertion order iteration.
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file defines the PointerIntPair class.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, const MachineInstr &B)
Remove Loads Into Fake Uses
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
This file describes how to lower LLVM code to machine code.
static cl::opt< bool > DisableSelectOptimize("disable-select-optimize", cl::init(true), cl::Hidden, cl::desc("Disable the select-optimization pass from running"))
Disable the select optimization pass.
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
static Constant * getConstantVector(MVT VT, ArrayRef< APInt > Bits, const APInt &Undefs, LLVMContext &C)
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:986
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:435
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1511
unsigned logBase2() const
Definition: APInt.h:1739
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:959
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:440
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1542
an instruction to allocate memory on the stack
Definition: Instructions.h:63
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:124
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:117
void setAlignment(Align Align)
Definition: Instructions.h:128
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:429
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
Represent the analysis usage information of a pass.
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Value handle that asserts if the Value is deleted.
Definition: ValueHandle.h:264
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:501
static unsigned getPointerOperandIndex()
Definition: Instructions.h:631
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:704
static unsigned getPointerOperandIndex()
Definition: Instructions.h:872
Analysis pass providing the BasicBlockSectionsProfileReader.
bool isFunctionHot(StringRef FuncName) const
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:474
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:461
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:530
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:437
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition: BasicBlock.h:671
InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:381
void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
InstListType::const_iterator const_iterator
Definition: BasicBlock.h:178
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:213
InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:398
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:599
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:481
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:489
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:511
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:220
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
Definition: BasicBlock.cpp:279
void insertDbgRecordAfter(DbgRecord *DR, Instruction *I)
Insert a DbgRecord into a block at the position given by I.
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:168
bool IsNewDbgInfoFormat
Flag recording whether or not this block stores debug-info in the form of intrinsic instructions (fal...
Definition: BasicBlock.h:67
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:240
BinaryOps getOpcode() const
Definition: InstrTypes.h:370
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
This class represents a no-op cast from one type to another.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Analysis providing branch probability information.
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
bool isInlineAsm() const
Check if this call is an inline asm statement.
Definition: InstrTypes.h:1408
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1341
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
Definition: InstrTypes.h:1451
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1286
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1291
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1277
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:444
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:661
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:702
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:696
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:700
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:698
@ ICMP_EQ
equal
Definition: InstrTypes.h:694
@ ICMP_NE
not equal
Definition: InstrTypes.h:695
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:825
static CmpInst * Create(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:763
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition: CmpPredicate.h:22
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Base class for constants with no operands.
Definition: Constants.h:53
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1108
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2321
static Constant * getNeg(Constant *C, bool HasNSW=false)
Definition: Constants.cpp:2626
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:866
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:208
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:163
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:148
static Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:1472
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1421
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:420
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:373
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:851
This represents the llvm.dbg.value instruction.
iterator_range< location_op_iterator > location_ops() const
Get the locations corresponding to the variable referenced by the debug info intrinsic.
void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LocationType Type
Classification of the debug-info record that this DbgVariableRecord represents.
void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
iterator_range< location_op_iterator > location_ops() const
Get the locations corresponding to the variable referenced by the debug info intrinsic.
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
bool erase(const KeyT &Val)
Definition: DenseMap.h:321
unsigned size() const
Definition: DenseMap.h:99
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
This instruction extracts a struct member or array element value from an aggregate value.
iterator_range< idx_iterator > indices() const
This instruction compares its operands according to the predicate given to the constructor.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:791
This class implements simplifications for calls to fortified library functions (__st*cpy_chk,...
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
const BasicBlock & getEntryBlock() const
Definition: Function.h:815
const Value * getStatepoint() const
The statepoint with which this gc.relocate is associated.
Represents calls to the gc.relocate intrinsic.
unsigned getBasePtrIndex() const
The index into the associate statepoint's argument list which contains the base pointer of the pointe...
Represents a gc.statepoint intrinsic call.
Definition: Statepoint.h:61
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:933
static Type * getIndexedType(Type *Ty, ArrayRef< Value * > IdxList)
Returns the result type of a getelementptr with the given source element type and indexes.
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalObject.
Definition: Globals.cpp:144
bool canIncreaseAlignment() const
Returns true if the alignment of the value can be unilaterally increased.
Definition: Globals.cpp:329
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:264
Type * getValueType() const
Definition: GlobalValue.h:297
This instruction compares its operands according to the predicate given to the constructor.
bool isEquality() const
Return true if this predicate is either EQ or NE.
Value * CreateZExtOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2162
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:485
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1053
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2574
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:239
Value * CreateNUWAdd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1383
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1874
Value * createIsFPClass(Value *FPNum, unsigned Test)
Definition: IRBuilder.cpp:1248
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2404
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2435
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2270
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2152
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1164
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:199
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1834
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2380
ConstantInt * getInt(const APInt &AI)
Get a constant integer value.
Definition: IRBuilder.h:521
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2705
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
Definition: Instruction.cpp:80
void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:99
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:511
const Instruction * getPrevNonDebugInstruction(bool SkipPseudoOp=false) const
Return a pointer to the previous non-debug instruction in the same basic block as 'this',...
void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
Definition: Instruction.h:404
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:869
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:169
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:72
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1679
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:310
bool isShift() const
Definition: Instruction.h:318
void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
std::optional< simple_ilist< DbgRecord >::iterator > getDbgReinsertionPosition()
Return an iterator to the position of the "Next" DbgRecord after this instruction,...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:508
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:55
Invoke instruction.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:176
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:261
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:593
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Machine Value Type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
iterator end()
Definition: MapVector.h:71
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition: MapVector.h:193
iterator find(const KeyT &Key)
Definition: MapVector.h:167
bool empty() const
Definition: MapVector.h:79
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: MapVector.h:141
void clear()
Definition: MapVector.h:88
This is the common base class for memset/memcpy/memmove.
This class wraps the llvm.memcpy/memmove intrinsics.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:692
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
PointerIntPair - This class implements a pair of a pointer and small integer.
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
Definition: Constants.h:1460
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
Return a value (possibly void), from a function.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
A vector that has set insertion semantics.
Definition: SetVector.h:57
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
value_type pop_back_val()
Definition: SetVector.h:285
This instruction constructs a fixed permutation of two input vectors.
VectorType * getType() const
Overload to return most specific vector type.
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition: DenseSet.h:298
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:401
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:175
bool erase(const T &V)
Definition: SmallSet.h:193
void clear()
Definition: SmallSet.h:204
bool contains(const T &V) const
Check if the SmallSet contains the given element.
Definition: SmallSet.h:222
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void reserve(size_type N)
Definition: SmallVector.h:663
iterator erase(const_iterator CI)
Definition: SmallVector.h:737
typename SuperClass::iterator iterator
Definition: SmallVector.h:577
void resize(size_type N)
Definition: SmallVector.h:638
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
static unsigned getPointerOperandIndex()
Definition: Instructions.h:383
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:567
TypeSize getElementOffset(unsigned Idx) const
Definition: DataLayout.h:596
Class to represent struct types.
Definition: DerivedTypes.h:218
Multiway switch.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool isSelectSupported(SelectSupportKind) const
virtual bool isEqualityCmpFoldedWithSignedCmp() const
Return true if instruction generated for equality comparison is folded with instruction generated for...
virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT, bool MathUsed) const
Try to convert math with an overflow comparison into the corresponding DAG node operation.
virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const
Return if the target supports combining a chain like:
bool isExtLoad(const LoadInst *Load, const Instruction *Ext, const DataLayout &DL) const
Return true if Load and Ext can form an ExtLoad.
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
const TargetMachine & getTargetMachine() const
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool enableExtLdPromotion() const
Return true if the target wants to use the optimization that turns ext(promotableInst1(....
virtual bool isCheapToSpeculateCttz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isJumpExpensive() const
Return true if Flow Control is an expensive operation that should be avoided.
bool hasExtractBitsInsn() const
Return true if the target has BitExtract instructions.
SelectSupportKind
Enum that describes what type of support for selects the target has.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isSlowDivBypassed() const
Returns true if target has indicated at least one type should be bypassed.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual MVT getPreferredSwitchConditionType(LLVMContext &Context, EVT ConditionVT) const
Returns preferred type for switch condition.
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const
Return true if the target can combine store(extractelement VectorTy, Idx).
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
virtual bool shouldConsiderGEPOffsetSplit() const
bool hasMultipleConditionRegisters() const
Return true if multiple condition registers are available.
bool isExtFree(const Instruction *I) const
Return true if the extension represented by I is free.
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
virtual bool getAddrModeArguments(const IntrinsicInst *, SmallVectorImpl< Value * > &, Type *&) const
CodeGenPrepare sinks address calculations into the same BB as Load/Store instructions reading the add...
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
const DenseMap< unsigned int, unsigned int > & getBypassSlowDivWidths() const
Returns map of slow types for division or remainder with corresponding fast types.
virtual bool isCheapToSpeculateCtlz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic ctlz.
virtual bool useSoftFloat() const
virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset) const
Return the prefered common base offset.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool shouldAlignPointerArgs(CallInst *, unsigned &, Align &) const
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
virtual Type * shouldConvertSplatType(ShuffleVectorInst *SVI) const
Given a shuffle vector SVI representing a vector splat, return a new scalar type of size equal to SVI...
virtual bool addressingModeSupportsTLS(const GlobalValue &) const
Returns true if the targets addressing mode can target thread local storage (TLS).
virtual bool shouldConvertPhiType(Type *From, Type *To) const
Given a set in interconnected phis of type 'From' that are loaded/stored or bitcast to type 'To',...
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
virtual bool preferZeroCompareBranch() const
Return true if the heuristic to prefer icmp eq zero should be used in code gen prepare.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool optimizeExtendOrTruncateConversion(Instruction *I, Loop *L, const TargetTransformInfo &TTI) const
Try to optimize extending or truncating conversion instructions (like zext, trunc,...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
std::vector< AsmOperandInfo > AsmOperandInfoVector
virtual bool ExpandInlineAsm(CallInst *) const
This hook allows the target to expand an inline asm call to be explicit llvm code if it wants to.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual bool mayBeEmittedAsTailCall(const CallInst *) const
Return true if the target may be able emit the call instruction as a tail call.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:81
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetLowering * getTargetLowering() const
virtual bool addrSinkUsingGEPs() const
Sink addresses into blocks using GEP instructions rather than pointer casts and arithmetic.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool isExpensiveToSpeculativelyExecute(const Instruction *I) const
Return true if the cost of the instruction is too high to speculatively execute and should be kept be...
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
@ TCC_Basic
The cost of a typical 'add' instruction.
bool isVectorShiftByScalarCheap(Type *Ty) const
Return true if it's significantly cheaper to shift a vector by a uniform scalar than by an amount whi...
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
Return true if sinking I's operands to the same basic block as I is profitable, e....
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, Value *Op0=nullptr, Value *Op1=nullptr) const
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:252
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1859
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
op_range operands()
Definition: User.h:288
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
const Use & getOperandUse(unsigned i) const
Definition: User.h:241
void setOperand(unsigned i, Value *Val)
Definition: User.h:233
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
See the file comment.
Definition: ValueMap.h:84
void clear()
Definition: ValueMap.h:145
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
Definition: Value.h:746
user_iterator user_begin()
Definition: Value.h:397
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:946
bool isUsedInBasicBlock(const BasicBlock *BB) const
Check if this value is used in the specified basic block.
Definition: Value.cpp:234
bool hasNUsesOrMore(unsigned N) const
Return true if this value has N uses or more.
Definition: Value.cpp:153
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:694
bool use_empty() const
Definition: Value.h:344
user_iterator user_end()
Definition: Value.h:405
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1094
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:255
iterator_range< use_iterator > uses()
Definition: Value.h:376
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
Definition: Value.h:825
user_iterator_impl< User > user_iterator
Definition: Value.h:390
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:5309
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:204
bool pointsToAliveValue() const
Definition: ValueHandle.h:224
This class represents zero extension of integer types.
int getNumOccurrences() const
Definition: CommandLine.h:399
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
constexpr bool isNonZero() const
Definition: TypeSize.h:158
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
TypeSize getSequentialElementStride(const DataLayout &DL) const
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:353
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition: COFF.h:844
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned getAddrMode(MCInstrInfo const &MCII, MCInst const &MCI)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
Definition: PatternMatch.h:524
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:165
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:982
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:826
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:885
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap, true > m_c_NUWAdd(const LHS &L, const RHS &R)
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:168
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:592
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:599
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:864
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:105
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:299
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
UAddWithOverflow_match< LHS_t, RHS_t, Sum_t > m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S)
Match an icmp instruction checking for unsigned overflow on addition.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
int compare(DigitsT LDigits, int16_t LScale, DigitsT RDigits, int16_t RScale)
Compare two scaled numbers.
Definition: ScaledNumber.h:252
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ Assume
Do not drop type tests (default).
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
const_iterator begin(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
const_iterator end(StringRef path LLVM_LIFETIME_BOUND)
Get end iterator over path.
Definition: Path.cpp:235
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition: DWP.cpp:480
bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1697
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:543
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition: Local.cpp:136
auto pred_end(const MachineBasicBlock *BB)
APInt operator*(APInt a, uint64_t RHS)
Definition: APInt.h:2204
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1683
auto successors(const MachineBasicBlock *BB)
ReturnInst * FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, BasicBlock *Pred, DomTreeUpdater *DTU=nullptr)
This method duplicates the specified return instruction into a predecessor which ends in an unconditi...
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2082
Instruction * SplitBlockAndInsertIfElse(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ElseBlock=nullptr)
Similar to SplitBlockAndInsertIfThen, but the inserted block is on the false path of the branch.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2115
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
auto unique(Range &&R, Predicate P)
Definition: STLExtras.h:2055
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
void initializeCodeGenPrepareLegacyPassPass(PassRegistry &)
bool hasBranchWeightOrigin(const Instruction &I)
Check if Branch Weight Metadata has an "expected" field from an llvm.expect* intrinsic.
void findDbgValues(SmallVectorImpl< DbgValueInst * > &DbgValues, Value *V, SmallVectorImpl< DbgVariableRecord * > *DbgVariableRecords=nullptr)
Finds the llvm.dbg.value intrinsics describing a value.
Definition: DebugInfo.cpp:155
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
Value * simplifyAddInst(Value *LHS, Value *RHS, bool IsNSW, bool IsNUW, const SimplifyQuery &Q)
Given operands for an Add, fold the result or return null.
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition: Local.h:242
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2107
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr, SmallSetVector< Instruction *, 8 > *UnsimplifiedUsers=nullptr)
Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
bool recognizeBSwapOrBitReverseIdiom(Instruction *I, bool MatchBSwaps, bool MatchBitReversals, SmallVectorImpl< Instruction * > &InsertedInsts)
Try to match a bswap or bitreverse idiom.
Definition: Local.cpp:4093
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1664
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1753
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
FunctionPass * createCodeGenPrepareLegacyPass()
createCodeGenPrepareLegacyPass - Transform the code to expose more pattern matching during instructio...
ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred)
getFCmpCondCode - Return the ISD condition code corresponding to the given LLVM IR floating-point con...
Definition: Analysis.cpp:199
bool VerifyLoopInfo
Enable verification of loop info.
Definition: LoopInfo.cpp:51
bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
bool attributesPermitTailCall(const Function *F, const Instruction *I, const ReturnInst *Ret, const TargetLoweringBase &TLI, bool *AllowDifferingSizes=nullptr)
Test if given that the input instruction is in the tail call position, if there is an attribute misma...
Definition: Analysis.cpp:584
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1938
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:303
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto pred_begin(const MachineBasicBlock *BB)
bool bypassSlowDivision(BasicBlock *BB, const DenseMap< unsigned int, unsigned int > &BypassWidth)
This optimization identifies DIV instructions in a BB that can be profitably bypassed and carried out...
gep_type_iterator gep_type_begin(const User *GEP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2099
std::pair< Value *, FPClassTest > fcmpToClassTest(CmpInst::Predicate Pred, const Function &F, Value *LHS, Value *RHS, bool LookThroughSrc=true)
Returns a pair of values, which if passed to llvm.is.fpclass, returns the same result as an fcmp with...
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
Value * simplifyURemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a URem, fold the result or return null.
CGPassBuilderOption getCGPassBuilderOption()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define NC
Definition: regutils.h:42
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:279
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:295
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:289
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:243
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
ExtAddrMode()=default
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
const DataLayout & DL
Definition: SimplifyQuery.h:71
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.