LLVM 22.0.0git
CodeGenPrepare.cpp
Go to the documentation of this file.
1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass munges the code in the input function to better prepare it for
10// SelectionDAG-based code generation. This works around limitations in it's
11// basic-block-at-a-time approach. It should eventually be removed.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/MapVector.h"
21#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/Statistic.h"
45#include "llvm/Config/llvm-config.h"
46#include "llvm/IR/Argument.h"
47#include "llvm/IR/Attributes.h"
48#include "llvm/IR/BasicBlock.h"
49#include "llvm/IR/Constant.h"
50#include "llvm/IR/Constants.h"
51#include "llvm/IR/DataLayout.h"
52#include "llvm/IR/DebugInfo.h"
54#include "llvm/IR/Dominators.h"
55#include "llvm/IR/Function.h"
57#include "llvm/IR/GlobalValue.h"
59#include "llvm/IR/IRBuilder.h"
60#include "llvm/IR/InlineAsm.h"
61#include "llvm/IR/InstrTypes.h"
62#include "llvm/IR/Instruction.h"
65#include "llvm/IR/Intrinsics.h"
66#include "llvm/IR/IntrinsicsAArch64.h"
67#include "llvm/IR/LLVMContext.h"
68#include "llvm/IR/MDBuilder.h"
69#include "llvm/IR/Module.h"
70#include "llvm/IR/Operator.h"
73#include "llvm/IR/Statepoint.h"
74#include "llvm/IR/Type.h"
75#include "llvm/IR/Use.h"
76#include "llvm/IR/User.h"
77#include "llvm/IR/Value.h"
78#include "llvm/IR/ValueHandle.h"
79#include "llvm/IR/ValueMap.h"
81#include "llvm/Pass.h"
87#include "llvm/Support/Debug.h"
97#include <algorithm>
98#include <cassert>
99#include <cstdint>
100#include <iterator>
101#include <limits>
102#include <memory>
103#include <optional>
104#include <utility>
105#include <vector>
106
107using namespace llvm;
108using namespace llvm::PatternMatch;
109
110#define DEBUG_TYPE "codegenprepare"
111
112STATISTIC(NumBlocksElim, "Number of blocks eliminated");
113STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
114STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
115STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
116 "sunken Cmps");
117STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
118 "of sunken Casts");
119STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
120 "computations were sunk");
121STATISTIC(NumMemoryInstsPhiCreated,
122 "Number of phis created when address "
123 "computations were sunk to memory instructions");
124STATISTIC(NumMemoryInstsSelectCreated,
125 "Number of select created when address "
126 "computations were sunk to memory instructions");
127STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
128STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
129STATISTIC(NumAndsAdded,
130 "Number of and mask instructions added to form ext loads");
131STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
132STATISTIC(NumRetsDup, "Number of return instructions duplicated");
133STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
134STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
135STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
136
138 "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
139 cl::desc("Disable branch optimizations in CodeGenPrepare"));
140
141static cl::opt<bool>
142 DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
143 cl::desc("Disable GC optimizations in CodeGenPrepare"));
144
145static cl::opt<bool>
146 DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden,
147 cl::init(false),
148 cl::desc("Disable select to branch conversion."));
149
150static cl::opt<bool>
151 AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true),
152 cl::desc("Address sinking in CGP using GEPs."));
153
154static cl::opt<bool>
155 EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true),
156 cl::desc("Enable sinking and/cmp into branches."));
157
159 "disable-cgp-store-extract", cl::Hidden, cl::init(false),
160 cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
161
163 "stress-cgp-store-extract", cl::Hidden, cl::init(false),
164 cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
165
167 "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
168 cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
169 "CodeGenPrepare"));
170
172 "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
173 cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
174 "optimization in CodeGenPrepare"));
175
177 "disable-preheader-prot", cl::Hidden, cl::init(false),
178 cl::desc("Disable protection against removing loop preheaders"));
179
181 "profile-guided-section-prefix", cl::Hidden, cl::init(true),
182 cl::desc("Use profile info to add section prefix for hot/cold functions"));
183
185 "profile-unknown-in-special-section", cl::Hidden,
186 cl::desc("In profiling mode like sampleFDO, if a function doesn't have "
187 "profile, we cannot tell the function is cold for sure because "
188 "it may be a function newly added without ever being sampled. "
189 "With the flag enabled, compiler can put such profile unknown "
190 "functions into a special section, so runtime system can choose "
191 "to handle it in a different way than .text section, to save "
192 "RAM for example. "));
193
195 "bbsections-guided-section-prefix", cl::Hidden, cl::init(true),
196 cl::desc("Use the basic-block-sections profile to determine the text "
197 "section prefix for hot functions. Functions with "
198 "basic-block-sections profile will be placed in `.text.hot` "
199 "regardless of their FDO profile info. Other functions won't be "
200 "impacted, i.e., their prefixes will be decided by FDO/sampleFDO "
201 "profiles."));
202
204 "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
205 cl::desc("Skip merging empty blocks if (frequency of empty block) / "
206 "(frequency of destination block) is greater than this ratio"));
207
209 "force-split-store", cl::Hidden, cl::init(false),
210 cl::desc("Force store splitting no matter what the target query says."));
211
213 "cgp-type-promotion-merge", cl::Hidden,
214 cl::desc("Enable merging of redundant sexts when one is dominating"
215 " the other."),
216 cl::init(true));
217
219 "disable-complex-addr-modes", cl::Hidden, cl::init(false),
220 cl::desc("Disables combining addressing modes with different parts "
221 "in optimizeMemoryInst."));
222
223static cl::opt<bool>
224 AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false),
225 cl::desc("Allow creation of Phis in Address sinking."));
226
228 "addr-sink-new-select", cl::Hidden, cl::init(true),
229 cl::desc("Allow creation of selects in Address sinking."));
230
232 "addr-sink-combine-base-reg", cl::Hidden, cl::init(true),
233 cl::desc("Allow combining of BaseReg field in Address sinking."));
234
236 "addr-sink-combine-base-gv", cl::Hidden, cl::init(true),
237 cl::desc("Allow combining of BaseGV field in Address sinking."));
238
240 "addr-sink-combine-base-offs", cl::Hidden, cl::init(true),
241 cl::desc("Allow combining of BaseOffs field in Address sinking."));
242
244 "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true),
245 cl::desc("Allow combining of ScaledReg field in Address sinking."));
246
247static cl::opt<bool>
248 EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden,
249 cl::init(true),
250 cl::desc("Enable splitting large offset of GEP."));
251
253 "cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false),
254 cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."));
255
256static cl::opt<bool>
257 VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false),
258 cl::desc("Enable BFI update verification for "
259 "CodeGenPrepare."));
260
261static cl::opt<bool>
262 OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true),
263 cl::desc("Enable converting phi types in CodeGenPrepare"));
264
266 HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden,
267 cl::desc("Least BB number of huge function."));
268
270 MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100),
272 cl::desc("Max number of address users to look at"));
273
274static cl::opt<bool>
275 DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false),
276 cl::desc("Disable elimination of dead PHI nodes."));
277
278namespace {
279
280enum ExtType {
281 ZeroExtension, // Zero extension has been seen.
282 SignExtension, // Sign extension has been seen.
283 BothExtension // This extension type is used if we saw sext after
284 // ZeroExtension had been set, or if we saw zext after
285 // SignExtension had been set. It makes the type
286 // information of a promoted instruction invalid.
287};
288
289enum ModifyDT {
290 NotModifyDT, // Not Modify any DT.
291 ModifyBBDT, // Modify the Basic Block Dominator Tree.
292 ModifyInstDT // Modify the Instruction Dominator in a Basic Block,
293 // This usually means we move/delete/insert instruction
294 // in a Basic Block. So we should re-iterate instructions
295 // in such Basic Block.
296};
297
298using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
299using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>;
300using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>;
302using ValueToSExts = MapVector<Value *, SExts>;
303
304class TypePromotionTransaction;
305
306class CodeGenPrepare {
307 friend class CodeGenPrepareLegacyPass;
308 const TargetMachine *TM = nullptr;
309 const TargetSubtargetInfo *SubtargetInfo = nullptr;
310 const TargetLowering *TLI = nullptr;
311 const TargetRegisterInfo *TRI = nullptr;
312 const TargetTransformInfo *TTI = nullptr;
313 const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr;
314 const TargetLibraryInfo *TLInfo = nullptr;
315 LoopInfo *LI = nullptr;
316 std::unique_ptr<BlockFrequencyInfo> BFI;
317 std::unique_ptr<BranchProbabilityInfo> BPI;
318 ProfileSummaryInfo *PSI = nullptr;
319
320 /// As we scan instructions optimizing them, this is the next instruction
321 /// to optimize. Transforms that can invalidate this should update it.
322 BasicBlock::iterator CurInstIterator;
323
324 /// Keeps track of non-local addresses that have been sunk into a block.
325 /// This allows us to avoid inserting duplicate code for blocks with
326 /// multiple load/stores of the same address. The usage of WeakTrackingVH
327 /// enables SunkAddrs to be treated as a cache whose entries can be
328 /// invalidated if a sunken address computation has been erased.
330
331 /// Keeps track of all instructions inserted for the current function.
332 SetOfInstrs InsertedInsts;
333
334 /// Keeps track of the type of the related instruction before their
335 /// promotion for the current function.
336 InstrToOrigTy PromotedInsts;
337
338 /// Keep track of instructions removed during promotion.
339 SetOfInstrs RemovedInsts;
340
341 /// Keep track of sext chains based on their initial value.
342 DenseMap<Value *, Instruction *> SeenChainsForSExt;
343
344 /// Keep track of GEPs accessing the same data structures such as structs or
345 /// arrays that are candidates to be split later because of their large
346 /// size.
349 LargeOffsetGEPMap;
350
351 /// Keep track of new GEP base after splitting the GEPs having large offset.
352 SmallSet<AssertingVH<Value>, 2> NewGEPBases;
353
354 /// Map serial numbers to Large offset GEPs.
355 DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID;
356
357 /// Keep track of SExt promoted.
358 ValueToSExts ValToSExtendedUses;
359
360 /// True if the function has the OptSize attribute.
361 bool OptSize;
362
363 /// DataLayout for the Function being processed.
364 const DataLayout *DL = nullptr;
365
366 /// Building the dominator tree can be expensive, so we only build it
367 /// lazily and update it when required.
368 std::unique_ptr<DominatorTree> DT;
369
370public:
371 CodeGenPrepare(){};
372 CodeGenPrepare(const TargetMachine *TM) : TM(TM){};
373 /// If encounter huge function, we need to limit the build time.
374 bool IsHugeFunc = false;
375
376 /// FreshBBs is like worklist, it collected the updated BBs which need
377 /// to be optimized again.
378 /// Note: Consider building time in this pass, when a BB updated, we need
379 /// to insert such BB into FreshBBs for huge function.
381
382 void releaseMemory() {
383 // Clear per function information.
384 InsertedInsts.clear();
385 PromotedInsts.clear();
386 FreshBBs.clear();
387 BPI.reset();
388 BFI.reset();
389 }
390
392
393private:
394 template <typename F>
395 void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) {
396 // Substituting can cause recursive simplifications, which can invalidate
397 // our iterator. Use a WeakTrackingVH to hold onto it in case this
398 // happens.
399 Value *CurValue = &*CurInstIterator;
400 WeakTrackingVH IterHandle(CurValue);
401
402 f();
403
404 // If the iterator instruction was recursively deleted, start over at the
405 // start of the block.
406 if (IterHandle != CurValue) {
407 CurInstIterator = BB->begin();
408 SunkAddrs.clear();
409 }
410 }
411
412 // Get the DominatorTree, building if necessary.
413 DominatorTree &getDT(Function &F) {
414 if (!DT)
415 DT = std::make_unique<DominatorTree>(F);
416 return *DT;
417 }
418
419 void removeAllAssertingVHReferences(Value *V);
420 bool eliminateAssumptions(Function &F);
421 bool eliminateFallThrough(Function &F, DominatorTree *DT = nullptr);
422 bool eliminateMostlyEmptyBlocks(Function &F);
423 BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
424 bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
425 void eliminateMostlyEmptyBlock(BasicBlock *BB);
426 bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
427 bool isPreheader);
428 bool makeBitReverse(Instruction &I);
429 bool optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT);
430 bool optimizeInst(Instruction *I, ModifyDT &ModifiedDT);
431 bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy,
432 unsigned AddrSpace);
433 bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr);
434 bool optimizeInlineAsmInst(CallInst *CS);
435 bool optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT);
436 bool optimizeExt(Instruction *&I);
437 bool optimizeExtUses(Instruction *I);
438 bool optimizeLoadExt(LoadInst *Load);
439 bool optimizeShiftInst(BinaryOperator *BO);
440 bool optimizeFunnelShift(IntrinsicInst *Fsh);
441 bool optimizeSelectInst(SelectInst *SI);
442 bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
443 bool optimizeSwitchType(SwitchInst *SI);
444 bool optimizeSwitchPhiConstants(SwitchInst *SI);
445 bool optimizeSwitchInst(SwitchInst *SI);
446 bool optimizeExtractElementInst(Instruction *Inst);
447 bool dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT);
448 bool fixupDbgVariableRecord(DbgVariableRecord &I);
449 bool fixupDbgVariableRecordsOnInst(Instruction &I);
450 bool placeDbgValues(Function &F);
451 bool placePseudoProbes(Function &F);
452 bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
453 LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
454 bool tryToPromoteExts(TypePromotionTransaction &TPT,
456 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
457 unsigned CreatedInstsCost = 0);
458 bool mergeSExts(Function &F);
459 bool splitLargeGEPOffsets();
460 bool optimizePhiType(PHINode *Inst, SmallPtrSetImpl<PHINode *> &Visited,
461 SmallPtrSetImpl<Instruction *> &DeletedInstrs);
462 bool optimizePhiTypes(Function &F);
463 bool performAddressTypePromotion(
464 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
465 bool HasPromoted, TypePromotionTransaction &TPT,
466 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
467 bool splitBranchCondition(Function &F, ModifyDT &ModifiedDT);
468 bool simplifyOffsetableRelocate(GCStatepointInst &I);
469
470 bool tryToSinkFreeOperands(Instruction *I);
471 bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1,
472 CmpInst *Cmp, Intrinsic::ID IID);
473 bool optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT);
474 bool optimizeURem(Instruction *Rem);
475 bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
476 bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
477 bool unfoldPowerOf2Test(CmpInst *Cmp);
478 void verifyBFIUpdates(Function &F);
479 bool _run(Function &F);
480};
481
482class CodeGenPrepareLegacyPass : public FunctionPass {
483public:
484 static char ID; // Pass identification, replacement for typeid
485
486 CodeGenPrepareLegacyPass() : FunctionPass(ID) {
488 }
489
490 bool runOnFunction(Function &F) override;
491
492 StringRef getPassName() const override { return "CodeGen Prepare"; }
493
494 void getAnalysisUsage(AnalysisUsage &AU) const override {
495 // FIXME: When we can selectively preserve passes, preserve the domtree.
502 }
503};
504
505} // end anonymous namespace
506
507char CodeGenPrepareLegacyPass::ID = 0;
508
509bool CodeGenPrepareLegacyPass::runOnFunction(Function &F) {
510 if (skipFunction(F))
511 return false;
512 auto TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
513 CodeGenPrepare CGP(TM);
514 CGP.DL = &F.getDataLayout();
515 CGP.SubtargetInfo = TM->getSubtargetImpl(F);
516 CGP.TLI = CGP.SubtargetInfo->getTargetLowering();
517 CGP.TRI = CGP.SubtargetInfo->getRegisterInfo();
518 CGP.TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
519 CGP.TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
520 CGP.LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
521 CGP.BPI.reset(new BranchProbabilityInfo(F, *CGP.LI));
522 CGP.BFI.reset(new BlockFrequencyInfo(F, *CGP.BPI, *CGP.LI));
523 CGP.PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
524 auto BBSPRWP =
525 getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
526 CGP.BBSectionsProfileReader = BBSPRWP ? &BBSPRWP->getBBSPR() : nullptr;
527
528 return CGP._run(F);
529}
530
531INITIALIZE_PASS_BEGIN(CodeGenPrepareLegacyPass, DEBUG_TYPE,
532 "Optimize for code generation", false, false)
539INITIALIZE_PASS_END(CodeGenPrepareLegacyPass, DEBUG_TYPE,
541
543 return new CodeGenPrepareLegacyPass();
544}
545
548 CodeGenPrepare CGP(TM);
549
550 bool Changed = CGP.run(F, AM);
551 if (!Changed)
552 return PreservedAnalyses::all();
553
558 return PA;
559}
560
561bool CodeGenPrepare::run(Function &F, FunctionAnalysisManager &AM) {
562 DL = &F.getDataLayout();
563 SubtargetInfo = TM->getSubtargetImpl(F);
564 TLI = SubtargetInfo->getTargetLowering();
565 TRI = SubtargetInfo->getRegisterInfo();
566 TLInfo = &AM.getResult<TargetLibraryAnalysis>(F);
568 LI = &AM.getResult<LoopAnalysis>(F);
569 BPI.reset(new BranchProbabilityInfo(F, *LI));
570 BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
571 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
572 PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
573 BBSectionsProfileReader =
575 return _run(F);
576}
577
578bool CodeGenPrepare::_run(Function &F) {
579 bool EverMadeChange = false;
580
581 OptSize = F.hasOptSize();
582 // Use the basic-block-sections profile to promote hot functions to .text.hot
583 // if requested.
584 if (BBSectionsGuidedSectionPrefix && BBSectionsProfileReader &&
585 BBSectionsProfileReader->isFunctionHot(F.getName())) {
586 F.setSectionPrefix("hot");
587 } else if (ProfileGuidedSectionPrefix) {
588 // The hot attribute overwrites profile count based hotness while profile
589 // counts based hotness overwrite the cold attribute.
590 // This is a conservative behabvior.
591 if (F.hasFnAttribute(Attribute::Hot) ||
592 PSI->isFunctionHotInCallGraph(&F, *BFI))
593 F.setSectionPrefix("hot");
594 // If PSI shows this function is not hot, we will placed the function
595 // into unlikely section if (1) PSI shows this is a cold function, or
596 // (2) the function has a attribute of cold.
597 else if (PSI->isFunctionColdInCallGraph(&F, *BFI) ||
598 F.hasFnAttribute(Attribute::Cold))
599 F.setSectionPrefix("unlikely");
600 else if (ProfileUnknownInSpecialSection && PSI->hasPartialSampleProfile() &&
601 PSI->isFunctionHotnessUnknown(F))
602 F.setSectionPrefix("unknown");
603 }
604
605 /// This optimization identifies DIV instructions that can be
606 /// profitably bypassed and carried out with a shorter, faster divide.
607 if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI->isSlowDivBypassed()) {
608 const DenseMap<unsigned int, unsigned int> &BypassWidths =
610 BasicBlock *BB = &*F.begin();
611 while (BB != nullptr) {
612 // bypassSlowDivision may create new BBs, but we don't want to reapply the
613 // optimization to those blocks.
614 BasicBlock *Next = BB->getNextNode();
615 if (!llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
616 EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
617 BB = Next;
618 }
619 }
620
621 // Get rid of @llvm.assume builtins before attempting to eliminate empty
622 // blocks, since there might be blocks that only contain @llvm.assume calls
623 // (plus arguments that we can get rid of).
624 EverMadeChange |= eliminateAssumptions(F);
625
626 // Eliminate blocks that contain only PHI nodes and an
627 // unconditional branch.
628 EverMadeChange |= eliminateMostlyEmptyBlocks(F);
629
630 ModifyDT ModifiedDT = ModifyDT::NotModifyDT;
632 EverMadeChange |= splitBranchCondition(F, ModifiedDT);
633
634 // Split some critical edges where one of the sources is an indirect branch,
635 // to help generate sane code for PHIs involving such edges.
636 EverMadeChange |=
637 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/true);
638
639 // If we are optimzing huge function, we need to consider the build time.
640 // Because the basic algorithm's complex is near O(N!).
641 IsHugeFunc = F.size() > HugeFuncThresholdInCGPP;
642
643 // Transformations above may invalidate dominator tree and/or loop info.
644 DT.reset();
645 LI->releaseMemory();
646 LI->analyze(getDT(F));
647
648 bool MadeChange = true;
649 bool FuncIterated = false;
650 while (MadeChange) {
651 MadeChange = false;
652
654 if (FuncIterated && !FreshBBs.contains(&BB))
655 continue;
656
657 ModifyDT ModifiedDTOnIteration = ModifyDT::NotModifyDT;
658 bool Changed = optimizeBlock(BB, ModifiedDTOnIteration);
659
660 if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT)
661 DT.reset();
662
663 MadeChange |= Changed;
664 if (IsHugeFunc) {
665 // If the BB is updated, it may still has chance to be optimized.
666 // This usually happen at sink optimization.
667 // For example:
668 //
669 // bb0:
670 // %and = and i32 %a, 4
671 // %cmp = icmp eq i32 %and, 0
672 //
673 // If the %cmp sink to other BB, the %and will has chance to sink.
674 if (Changed)
675 FreshBBs.insert(&BB);
676 else if (FuncIterated)
677 FreshBBs.erase(&BB);
678 } else {
679 // For small/normal functions, we restart BB iteration if the dominator
680 // tree of the Function was changed.
681 if (ModifiedDTOnIteration != ModifyDT::NotModifyDT)
682 break;
683 }
684 }
685 // We have iterated all the BB in the (only work for huge) function.
686 FuncIterated = IsHugeFunc;
687
688 if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
689 MadeChange |= mergeSExts(F);
690 if (!LargeOffsetGEPMap.empty())
691 MadeChange |= splitLargeGEPOffsets();
692 MadeChange |= optimizePhiTypes(F);
693
694 if (MadeChange)
695 eliminateFallThrough(F, DT.get());
696
697#ifndef NDEBUG
698 if (MadeChange && VerifyLoopInfo)
699 LI->verify(getDT(F));
700#endif
701
702 // Really free removed instructions during promotion.
703 for (Instruction *I : RemovedInsts)
704 I->deleteValue();
705
706 EverMadeChange |= MadeChange;
707 SeenChainsForSExt.clear();
708 ValToSExtendedUses.clear();
709 RemovedInsts.clear();
710 LargeOffsetGEPMap.clear();
711 LargeOffsetGEPID.clear();
712 }
713
714 NewGEPBases.clear();
715 SunkAddrs.clear();
716
717 if (!DisableBranchOpts) {
718 MadeChange = false;
719 // Use a set vector to get deterministic iteration order. The order the
720 // blocks are removed may affect whether or not PHI nodes in successors
721 // are removed.
723 for (BasicBlock &BB : F) {
725 MadeChange |= ConstantFoldTerminator(&BB, true);
726 if (!MadeChange)
727 continue;
728
729 for (BasicBlock *Succ : Successors)
730 if (pred_empty(Succ))
731 WorkList.insert(Succ);
732 }
733
734 // Delete the dead blocks and any of their dead successors.
735 MadeChange |= !WorkList.empty();
736 while (!WorkList.empty()) {
737 BasicBlock *BB = WorkList.pop_back_val();
739
740 DeleteDeadBlock(BB);
741
742 for (BasicBlock *Succ : Successors)
743 if (pred_empty(Succ))
744 WorkList.insert(Succ);
745 }
746
747 // Merge pairs of basic blocks with unconditional branches, connected by
748 // a single edge.
749 if (EverMadeChange || MadeChange)
750 MadeChange |= eliminateFallThrough(F);
751
752 EverMadeChange |= MadeChange;
753 }
754
755 if (!DisableGCOpts) {
757 for (BasicBlock &BB : F)
758 for (Instruction &I : BB)
759 if (auto *SP = dyn_cast<GCStatepointInst>(&I))
760 Statepoints.push_back(SP);
761 for (auto &I : Statepoints)
762 EverMadeChange |= simplifyOffsetableRelocate(*I);
763 }
764
765 // Do this last to clean up use-before-def scenarios introduced by other
766 // preparatory transforms.
767 EverMadeChange |= placeDbgValues(F);
768 EverMadeChange |= placePseudoProbes(F);
769
770#ifndef NDEBUG
772 verifyBFIUpdates(F);
773#endif
774
775 return EverMadeChange;
776}
777
778bool CodeGenPrepare::eliminateAssumptions(Function &F) {
779 bool MadeChange = false;
780 for (BasicBlock &BB : F) {
781 CurInstIterator = BB.begin();
782 while (CurInstIterator != BB.end()) {
783 Instruction *I = &*(CurInstIterator++);
784 if (auto *Assume = dyn_cast<AssumeInst>(I)) {
785 MadeChange = true;
786 Value *Operand = Assume->getOperand(0);
787 Assume->eraseFromParent();
788
789 resetIteratorIfInvalidatedWhileCalling(&BB, [&]() {
790 RecursivelyDeleteTriviallyDeadInstructions(Operand, TLInfo, nullptr);
791 });
792 }
793 }
794 }
795 return MadeChange;
796}
797
798/// An instruction is about to be deleted, so remove all references to it in our
799/// GEP-tracking data strcutures.
800void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {
801 LargeOffsetGEPMap.erase(V);
802 NewGEPBases.erase(V);
803
804 auto GEP = dyn_cast<GetElementPtrInst>(V);
805 if (!GEP)
806 return;
807
808 LargeOffsetGEPID.erase(GEP);
809
810 auto VecI = LargeOffsetGEPMap.find(GEP->getPointerOperand());
811 if (VecI == LargeOffsetGEPMap.end())
812 return;
813
814 auto &GEPVector = VecI->second;
815 llvm::erase_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; });
816
817 if (GEPVector.empty())
818 LargeOffsetGEPMap.erase(VecI);
819}
820
821// Verify BFI has been updated correctly by recomputing BFI and comparing them.
822void LLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) {
823 DominatorTree NewDT(F);
824 LoopInfo NewLI(NewDT);
825 BranchProbabilityInfo NewBPI(F, NewLI, TLInfo);
826 BlockFrequencyInfo NewBFI(F, NewBPI, NewLI);
827 NewBFI.verifyMatch(*BFI);
828}
829
830/// Merge basic blocks which are connected by a single edge, where one of the
831/// basic blocks has a single successor pointing to the other basic block,
832/// which has a single predecessor.
833bool CodeGenPrepare::eliminateFallThrough(Function &F, DominatorTree *DT) {
834 bool Changed = false;
835 // Scan all of the blocks in the function, except for the entry block.
836 // Use a temporary array to avoid iterator being invalidated when
837 // deleting blocks.
840
842 for (auto &Block : Blocks) {
843 auto *BB = cast_or_null<BasicBlock>(Block);
844 if (!BB)
845 continue;
846 // If the destination block has a single pred, then this is a trivial
847 // edge, just collapse it.
848 BasicBlock *SinglePred = BB->getSinglePredecessor();
849
850 // Don't merge if BB's address is taken.
851 if (!SinglePred || SinglePred == BB || BB->hasAddressTaken())
852 continue;
853
854 // Make an effort to skip unreachable blocks.
855 if (DT && !DT->isReachableFromEntry(BB))
856 continue;
857
858 BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
859 if (Term && !Term->isConditional()) {
860 Changed = true;
861 LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n");
862
863 // Merge BB into SinglePred and delete it.
864 MergeBlockIntoPredecessor(BB, /* DTU */ nullptr, LI, /* MSSAU */ nullptr,
865 /* MemDep */ nullptr,
866 /* PredecessorWithTwoSuccessors */ false, DT);
867 Preds.insert(SinglePred);
868
869 if (IsHugeFunc) {
870 // Update FreshBBs to optimize the merged BB.
871 FreshBBs.insert(SinglePred);
872 FreshBBs.erase(BB);
873 }
874 }
875 }
876
877 // (Repeatedly) merging blocks into their predecessors can create redundant
878 // debug intrinsics.
879 for (const auto &Pred : Preds)
880 if (auto *BB = cast_or_null<BasicBlock>(Pred))
882
883 return Changed;
884}
885
886/// Find a destination block from BB if BB is mergeable empty block.
887BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
888 // If this block doesn't end with an uncond branch, ignore it.
889 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
890 if (!BI || !BI->isUnconditional())
891 return nullptr;
892
893 // If the instruction before the branch (skipping debug info) isn't a phi
894 // node, then other stuff is happening here.
896 if (BBI != BB->begin()) {
897 --BBI;
898 if (!isa<PHINode>(BBI))
899 return nullptr;
900 }
901
902 // Do not break infinite loops.
903 BasicBlock *DestBB = BI->getSuccessor(0);
904 if (DestBB == BB)
905 return nullptr;
906
907 if (!canMergeBlocks(BB, DestBB))
908 DestBB = nullptr;
909
910 return DestBB;
911}
912
913/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
914/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
915/// edges in ways that are non-optimal for isel. Start by eliminating these
916/// blocks so we can split them the way we want them.
917bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
919 SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
920 while (!LoopList.empty()) {
921 Loop *L = LoopList.pop_back_val();
922 llvm::append_range(LoopList, *L);
923 if (BasicBlock *Preheader = L->getLoopPreheader())
924 Preheaders.insert(Preheader);
925 }
926
927 bool MadeChange = false;
928 // Copy blocks into a temporary array to avoid iterator invalidation issues
929 // as we remove them.
930 // Note that this intentionally skips the entry block.
932 for (auto &Block : llvm::drop_begin(F)) {
933 // Delete phi nodes that could block deleting other empty blocks.
935 MadeChange |= DeleteDeadPHIs(&Block, TLInfo);
936 Blocks.push_back(&Block);
937 }
938
939 for (auto &Block : Blocks) {
940 BasicBlock *BB = cast_or_null<BasicBlock>(Block);
941 if (!BB)
942 continue;
943 BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
944 if (!DestBB ||
945 !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
946 continue;
947
948 eliminateMostlyEmptyBlock(BB);
949 MadeChange = true;
950 }
951 return MadeChange;
952}
953
954bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
955 BasicBlock *DestBB,
956 bool isPreheader) {
957 // Do not delete loop preheaders if doing so would create a critical edge.
958 // Loop preheaders can be good locations to spill registers. If the
959 // preheader is deleted and we create a critical edge, registers may be
960 // spilled in the loop body instead.
961 if (!DisablePreheaderProtect && isPreheader &&
962 !(BB->getSinglePredecessor() &&
964 return false;
965
966 // Skip merging if the block's successor is also a successor to any callbr
967 // that leads to this block.
968 // FIXME: Is this really needed? Is this a correctness issue?
969 for (BasicBlock *Pred : predecessors(BB)) {
970 if (isa<CallBrInst>(Pred->getTerminator()) &&
971 llvm::is_contained(successors(Pred), DestBB))
972 return false;
973 }
974
975 // Try to skip merging if the unique predecessor of BB is terminated by a
976 // switch or indirect branch instruction, and BB is used as an incoming block
977 // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
978 // add COPY instructions in the predecessor of BB instead of BB (if it is not
979 // merged). Note that the critical edge created by merging such blocks wont be
980 // split in MachineSink because the jump table is not analyzable. By keeping
981 // such empty block (BB), ISel will place COPY instructions in BB, not in the
982 // predecessor of BB.
983 BasicBlock *Pred = BB->getUniquePredecessor();
984 if (!Pred || !(isa<SwitchInst>(Pred->getTerminator()) ||
985 isa<IndirectBrInst>(Pred->getTerminator())))
986 return true;
987
988 if (BB->getTerminator() != &*BB->getFirstNonPHIOrDbg())
989 return true;
990
991 // We use a simple cost heuristic which determine skipping merging is
992 // profitable if the cost of skipping merging is less than the cost of
993 // merging : Cost(skipping merging) < Cost(merging BB), where the
994 // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
995 // the Cost(merging BB) is Freq(Pred) * Cost(Copy).
996 // Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
997 // Freq(Pred) / Freq(BB) > 2.
998 // Note that if there are multiple empty blocks sharing the same incoming
999 // value for the PHIs in the DestBB, we consider them together. In such
1000 // case, Cost(merging BB) will be the sum of their frequencies.
1001
1002 if (!isa<PHINode>(DestBB->begin()))
1003 return true;
1004
1005 SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
1006
1007 // Find all other incoming blocks from which incoming values of all PHIs in
1008 // DestBB are the same as the ones from BB.
1009 for (BasicBlock *DestBBPred : predecessors(DestBB)) {
1010 if (DestBBPred == BB)
1011 continue;
1012
1013 if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) {
1014 return DestPN.getIncomingValueForBlock(BB) ==
1015 DestPN.getIncomingValueForBlock(DestBBPred);
1016 }))
1017 SameIncomingValueBBs.insert(DestBBPred);
1018 }
1019
1020 // See if all BB's incoming values are same as the value from Pred. In this
1021 // case, no reason to skip merging because COPYs are expected to be place in
1022 // Pred already.
1023 if (SameIncomingValueBBs.count(Pred))
1024 return true;
1025
1026 BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
1027 BlockFrequency BBFreq = BFI->getBlockFreq(BB);
1028
1029 for (auto *SameValueBB : SameIncomingValueBBs)
1030 if (SameValueBB->getUniquePredecessor() == Pred &&
1031 DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
1032 BBFreq += BFI->getBlockFreq(SameValueBB);
1033
1034 std::optional<BlockFrequency> Limit = BBFreq.mul(FreqRatioToSkipMerge);
1035 return !Limit || PredFreq <= *Limit;
1036}
1037
1038/// Return true if we can merge BB into DestBB if there is a single
1039/// unconditional branch between them, and BB contains no other non-phi
1040/// instructions.
1041bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
1042 const BasicBlock *DestBB) const {
1043 // We only want to eliminate blocks whose phi nodes are used by phi nodes in
1044 // the successor. If there are more complex condition (e.g. preheaders),
1045 // don't mess around with them.
1046 for (const PHINode &PN : BB->phis()) {
1047 for (const User *U : PN.users()) {
1048 const Instruction *UI = cast<Instruction>(U);
1049 if (UI->getParent() != DestBB || !isa<PHINode>(UI))
1050 return false;
1051 // If User is inside DestBB block and it is a PHINode then check
1052 // incoming value. If incoming value is not from BB then this is
1053 // a complex condition (e.g. preheaders) we want to avoid here.
1054 if (UI->getParent() == DestBB) {
1055 if (const PHINode *UPN = dyn_cast<PHINode>(UI))
1056 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
1057 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
1058 if (Insn && Insn->getParent() == BB &&
1059 Insn->getParent() != UPN->getIncomingBlock(I))
1060 return false;
1061 }
1062 }
1063 }
1064 }
1065
1066 // If BB and DestBB contain any common predecessors, then the phi nodes in BB
1067 // and DestBB may have conflicting incoming values for the block. If so, we
1068 // can't merge the block.
1069 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
1070 if (!DestBBPN)
1071 return true; // no conflict.
1072
1073 // Collect the preds of BB.
1075 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1076 // It is faster to get preds from a PHI than with pred_iterator.
1077 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1078 BBPreds.insert(BBPN->getIncomingBlock(i));
1079 } else {
1080 BBPreds.insert_range(predecessors(BB));
1081 }
1082
1083 // Walk the preds of DestBB.
1084 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
1085 BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
1086 if (BBPreds.count(Pred)) { // Common predecessor?
1087 for (const PHINode &PN : DestBB->phis()) {
1088 const Value *V1 = PN.getIncomingValueForBlock(Pred);
1089 const Value *V2 = PN.getIncomingValueForBlock(BB);
1090
1091 // If V2 is a phi node in BB, look up what the mapped value will be.
1092 if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
1093 if (V2PN->getParent() == BB)
1094 V2 = V2PN->getIncomingValueForBlock(Pred);
1095
1096 // If there is a conflict, bail out.
1097 if (V1 != V2)
1098 return false;
1099 }
1100 }
1101 }
1102
1103 return true;
1104}
1105
1106/// Replace all old uses with new ones, and push the updated BBs into FreshBBs.
1107static void replaceAllUsesWith(Value *Old, Value *New,
1109 bool IsHuge) {
1110 auto *OldI = dyn_cast<Instruction>(Old);
1111 if (OldI) {
1112 for (Value::user_iterator UI = OldI->user_begin(), E = OldI->user_end();
1113 UI != E; ++UI) {
1114 Instruction *User = cast<Instruction>(*UI);
1115 if (IsHuge)
1116 FreshBBs.insert(User->getParent());
1117 }
1118 }
1119 Old->replaceAllUsesWith(New);
1120}
1121
1122/// Eliminate a basic block that has only phi's and an unconditional branch in
1123/// it.
1124void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
1125 BranchInst *BI = cast<BranchInst>(BB->getTerminator());
1126 BasicBlock *DestBB = BI->getSuccessor(0);
1127
1128 LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
1129 << *BB << *DestBB);
1130
1131 // If the destination block has a single pred, then this is a trivial edge,
1132 // just collapse it.
1133 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
1134 if (SinglePred != DestBB) {
1135 assert(SinglePred == BB &&
1136 "Single predecessor not the same as predecessor");
1137 // Merge DestBB into SinglePred/BB and delete it.
1139 // Note: BB(=SinglePred) will not be deleted on this path.
1140 // DestBB(=its single successor) is the one that was deleted.
1141 LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n");
1142
1143 if (IsHugeFunc) {
1144 // Update FreshBBs to optimize the merged BB.
1145 FreshBBs.insert(SinglePred);
1146 FreshBBs.erase(DestBB);
1147 }
1148 return;
1149 }
1150 }
1151
1152 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
1153 // to handle the new incoming edges it is about to have.
1154 for (PHINode &PN : DestBB->phis()) {
1155 // Remove the incoming value for BB, and remember it.
1156 Value *InVal = PN.removeIncomingValue(BB, false);
1157
1158 // Two options: either the InVal is a phi node defined in BB or it is some
1159 // value that dominates BB.
1160 PHINode *InValPhi = dyn_cast<PHINode>(InVal);
1161 if (InValPhi && InValPhi->getParent() == BB) {
1162 // Add all of the input values of the input PHI as inputs of this phi.
1163 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
1164 PN.addIncoming(InValPhi->getIncomingValue(i),
1165 InValPhi->getIncomingBlock(i));
1166 } else {
1167 // Otherwise, add one instance of the dominating value for each edge that
1168 // we will be adding.
1169 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1170 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1171 PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
1172 } else {
1173 for (BasicBlock *Pred : predecessors(BB))
1174 PN.addIncoming(InVal, Pred);
1175 }
1176 }
1177 }
1178
1179 // Preserve loop Metadata.
1180 if (BI->hasMetadata(LLVMContext::MD_loop)) {
1181 for (auto *Pred : predecessors(BB))
1182 Pred->getTerminator()->copyMetadata(*BI, LLVMContext::MD_loop);
1183 }
1184
1185 // The PHIs are now updated, change everything that refers to BB to use
1186 // DestBB and remove BB.
1187 BB->replaceAllUsesWith(DestBB);
1188 BB->eraseFromParent();
1189 ++NumBlocksElim;
1190
1191 LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
1192}
1193
1194// Computes a map of base pointer relocation instructions to corresponding
1195// derived pointer relocation instructions given a vector of all relocate calls
1197 const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
1199 &RelocateInstMap) {
1200 // Collect information in two maps: one primarily for locating the base object
1201 // while filling the second map; the second map is the final structure holding
1202 // a mapping between Base and corresponding Derived relocate calls
1204 for (auto *ThisRelocate : AllRelocateCalls) {
1205 auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
1206 ThisRelocate->getDerivedPtrIndex());
1207 RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
1208 }
1209 for (auto &Item : RelocateIdxMap) {
1210 std::pair<unsigned, unsigned> Key = Item.first;
1211 if (Key.first == Key.second)
1212 // Base relocation: nothing to insert
1213 continue;
1214
1215 GCRelocateInst *I = Item.second;
1216 auto BaseKey = std::make_pair(Key.first, Key.first);
1217
1218 // We're iterating over RelocateIdxMap so we cannot modify it.
1219 auto MaybeBase = RelocateIdxMap.find(BaseKey);
1220 if (MaybeBase == RelocateIdxMap.end())
1221 // TODO: We might want to insert a new base object relocate and gep off
1222 // that, if there are enough derived object relocates.
1223 continue;
1224
1225 RelocateInstMap[MaybeBase->second].push_back(I);
1226 }
1227}
1228
1229// Accepts a GEP and extracts the operands into a vector provided they're all
1230// small integer constants
1232 SmallVectorImpl<Value *> &OffsetV) {
1233 for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
1234 // Only accept small constant integer operands
1235 auto *Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
1236 if (!Op || Op->getZExtValue() > 20)
1237 return false;
1238 }
1239
1240 for (unsigned i = 1; i < GEP->getNumOperands(); i++)
1241 OffsetV.push_back(GEP->getOperand(i));
1242 return true;
1243}
1244
1245// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
1246// replace, computes a replacement, and affects it.
1247static bool
1249 const SmallVectorImpl<GCRelocateInst *> &Targets) {
1250 bool MadeChange = false;
1251 // We must ensure the relocation of derived pointer is defined after
1252 // relocation of base pointer. If we find a relocation corresponding to base
1253 // defined earlier than relocation of base then we move relocation of base
1254 // right before found relocation. We consider only relocation in the same
1255 // basic block as relocation of base. Relocations from other basic block will
1256 // be skipped by optimization and we do not care about them.
1257 for (auto R = RelocatedBase->getParent()->getFirstInsertionPt();
1258 &*R != RelocatedBase; ++R)
1259 if (auto *RI = dyn_cast<GCRelocateInst>(R))
1260 if (RI->getStatepoint() == RelocatedBase->getStatepoint())
1261 if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {
1262 RelocatedBase->moveBefore(RI->getIterator());
1263 MadeChange = true;
1264 break;
1265 }
1266
1267 for (GCRelocateInst *ToReplace : Targets) {
1268 assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&
1269 "Not relocating a derived object of the original base object");
1270 if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
1271 // A duplicate relocate call. TODO: coalesce duplicates.
1272 continue;
1273 }
1274
1275 if (RelocatedBase->getParent() != ToReplace->getParent()) {
1276 // Base and derived relocates are in different basic blocks.
1277 // In this case transform is only valid when base dominates derived
1278 // relocate. However it would be too expensive to check dominance
1279 // for each such relocate, so we skip the whole transformation.
1280 continue;
1281 }
1282
1283 Value *Base = ToReplace->getBasePtr();
1284 auto *Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
1285 if (!Derived || Derived->getPointerOperand() != Base)
1286 continue;
1287
1289 if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
1290 continue;
1291
1292 // Create a Builder and replace the target callsite with a gep
1293 assert(RelocatedBase->getNextNode() &&
1294 "Should always have one since it's not a terminator");
1295
1296 // Insert after RelocatedBase
1297 IRBuilder<> Builder(RelocatedBase->getNextNode());
1298 Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
1299
1300 // If gc_relocate does not match the actual type, cast it to the right type.
1301 // In theory, there must be a bitcast after gc_relocate if the type does not
1302 // match, and we should reuse it to get the derived pointer. But it could be
1303 // cases like this:
1304 // bb1:
1305 // ...
1306 // %g1 = call coldcc i8 addrspace(1)*
1307 // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1308 //
1309 // bb2:
1310 // ...
1311 // %g2 = call coldcc i8 addrspace(1)*
1312 // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1313 //
1314 // merge:
1315 // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
1316 // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
1317 //
1318 // In this case, we can not find the bitcast any more. So we insert a new
1319 // bitcast no matter there is already one or not. In this way, we can handle
1320 // all cases, and the extra bitcast should be optimized away in later
1321 // passes.
1322 Value *ActualRelocatedBase = RelocatedBase;
1323 if (RelocatedBase->getType() != Base->getType()) {
1324 ActualRelocatedBase =
1325 Builder.CreateBitCast(RelocatedBase, Base->getType());
1326 }
1327 Value *Replacement =
1328 Builder.CreateGEP(Derived->getSourceElementType(), ActualRelocatedBase,
1329 ArrayRef(OffsetV));
1330 Replacement->takeName(ToReplace);
1331 // If the newly generated derived pointer's type does not match the original
1332 // derived pointer's type, cast the new derived pointer to match it. Same
1333 // reasoning as above.
1334 Value *ActualReplacement = Replacement;
1335 if (Replacement->getType() != ToReplace->getType()) {
1336 ActualReplacement =
1337 Builder.CreateBitCast(Replacement, ToReplace->getType());
1338 }
1339 ToReplace->replaceAllUsesWith(ActualReplacement);
1340 ToReplace->eraseFromParent();
1341
1342 MadeChange = true;
1343 }
1344 return MadeChange;
1345}
1346
1347// Turns this:
1348//
1349// %base = ...
1350// %ptr = gep %base + 15
1351// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1352// %base' = relocate(%tok, i32 4, i32 4)
1353// %ptr' = relocate(%tok, i32 4, i32 5)
1354// %val = load %ptr'
1355//
1356// into this:
1357//
1358// %base = ...
1359// %ptr = gep %base + 15
1360// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1361// %base' = gc.relocate(%tok, i32 4, i32 4)
1362// %ptr' = gep %base' + 15
1363// %val = load %ptr'
1364bool CodeGenPrepare::simplifyOffsetableRelocate(GCStatepointInst &I) {
1365 bool MadeChange = false;
1366 SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
1367 for (auto *U : I.users())
1368 if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
1369 // Collect all the relocate calls associated with a statepoint
1370 AllRelocateCalls.push_back(Relocate);
1371
1372 // We need at least one base pointer relocation + one derived pointer
1373 // relocation to mangle
1374 if (AllRelocateCalls.size() < 2)
1375 return false;
1376
1377 // RelocateInstMap is a mapping from the base relocate instruction to the
1378 // corresponding derived relocate instructions
1380 computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
1381 if (RelocateInstMap.empty())
1382 return false;
1383
1384 for (auto &Item : RelocateInstMap)
1385 // Item.first is the RelocatedBase to offset against
1386 // Item.second is the vector of Targets to replace
1387 MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
1388 return MadeChange;
1389}
1390
1391/// Sink the specified cast instruction into its user blocks.
1392static bool SinkCast(CastInst *CI) {
1393 BasicBlock *DefBB = CI->getParent();
1394
1395 /// InsertedCasts - Only insert a cast in each block once.
1397
1398 bool MadeChange = false;
1399 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
1400 UI != E;) {
1401 Use &TheUse = UI.getUse();
1402 Instruction *User = cast<Instruction>(*UI);
1403
1404 // Figure out which BB this cast is used in. For PHI's this is the
1405 // appropriate predecessor block.
1406 BasicBlock *UserBB = User->getParent();
1407 if (PHINode *PN = dyn_cast<PHINode>(User)) {
1408 UserBB = PN->getIncomingBlock(TheUse);
1409 }
1410
1411 // Preincrement use iterator so we don't invalidate it.
1412 ++UI;
1413
1414 // The first insertion point of a block containing an EH pad is after the
1415 // pad. If the pad is the user, we cannot sink the cast past the pad.
1416 if (User->isEHPad())
1417 continue;
1418
1419 // If the block selected to receive the cast is an EH pad that does not
1420 // allow non-PHI instructions before the terminator, we can't sink the
1421 // cast.
1422 if (UserBB->getTerminator()->isEHPad())
1423 continue;
1424
1425 // If this user is in the same block as the cast, don't change the cast.
1426 if (UserBB == DefBB)
1427 continue;
1428
1429 // If we have already inserted a cast into this block, use it.
1430 CastInst *&InsertedCast = InsertedCasts[UserBB];
1431
1432 if (!InsertedCast) {
1433 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1434 assert(InsertPt != UserBB->end());
1435 InsertedCast = cast<CastInst>(CI->clone());
1436 InsertedCast->insertBefore(*UserBB, InsertPt);
1437 }
1438
1439 // Replace a use of the cast with a use of the new cast.
1440 TheUse = InsertedCast;
1441 MadeChange = true;
1442 ++NumCastUses;
1443 }
1444
1445 // If we removed all uses, nuke the cast.
1446 if (CI->use_empty()) {
1447 salvageDebugInfo(*CI);
1448 CI->eraseFromParent();
1449 MadeChange = true;
1450 }
1451
1452 return MadeChange;
1453}
1454
1455/// If the specified cast instruction is a noop copy (e.g. it's casting from
1456/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
1457/// reduce the number of virtual registers that must be created and coalesced.
1458///
1459/// Return true if any changes are made.
1461 const DataLayout &DL) {
1462 // Sink only "cheap" (or nop) address-space casts. This is a weaker condition
1463 // than sinking only nop casts, but is helpful on some platforms.
1464 if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
1465 if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(),
1466 ASC->getDestAddressSpace()))
1467 return false;
1468 }
1469
1470 // If this is a noop copy,
1471 EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1472 EVT DstVT = TLI.getValueType(DL, CI->getType());
1473
1474 // This is an fp<->int conversion?
1475 if (SrcVT.isInteger() != DstVT.isInteger())
1476 return false;
1477
1478 // If this is an extension, it will be a zero or sign extension, which
1479 // isn't a noop.
1480 if (SrcVT.bitsLT(DstVT))
1481 return false;
1482
1483 // If these values will be promoted, find out what they will be promoted
1484 // to. This helps us consider truncates on PPC as noop copies when they
1485 // are.
1486 if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
1488 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
1489 if (TLI.getTypeAction(CI->getContext(), DstVT) ==
1491 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
1492
1493 // If, after promotion, these are the same types, this is a noop copy.
1494 if (SrcVT != DstVT)
1495 return false;
1496
1497 return SinkCast(CI);
1498}
1499
1500// Match a simple increment by constant operation. Note that if a sub is
1501// matched, the step is negated (as if the step had been canonicalized to
1502// an add, even though we leave the instruction alone.)
1503static bool matchIncrement(const Instruction *IVInc, Instruction *&LHS,
1504 Constant *&Step) {
1505 if (match(IVInc, m_Add(m_Instruction(LHS), m_Constant(Step))) ||
1506 match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::uadd_with_overflow>(
1507 m_Instruction(LHS), m_Constant(Step)))))
1508 return true;
1509 if (match(IVInc, m_Sub(m_Instruction(LHS), m_Constant(Step))) ||
1510 match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::usub_with_overflow>(
1511 m_Instruction(LHS), m_Constant(Step))))) {
1512 Step = ConstantExpr::getNeg(Step);
1513 return true;
1514 }
1515 return false;
1516}
1517
1518/// If given \p PN is an inductive variable with value IVInc coming from the
1519/// backedge, and on each iteration it gets increased by Step, return pair
1520/// <IVInc, Step>. Otherwise, return std::nullopt.
1521static std::optional<std::pair<Instruction *, Constant *>>
1522getIVIncrement(const PHINode *PN, const LoopInfo *LI) {
1523 const Loop *L = LI->getLoopFor(PN->getParent());
1524 if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch())
1525 return std::nullopt;
1526 auto *IVInc =
1527 dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
1528 if (!IVInc || LI->getLoopFor(IVInc->getParent()) != L)
1529 return std::nullopt;
1530 Instruction *LHS = nullptr;
1531 Constant *Step = nullptr;
1532 if (matchIncrement(IVInc, LHS, Step) && LHS == PN)
1533 return std::make_pair(IVInc, Step);
1534 return std::nullopt;
1535}
1536
1537static bool isIVIncrement(const Value *V, const LoopInfo *LI) {
1538 auto *I = dyn_cast<Instruction>(V);
1539 if (!I)
1540 return false;
1541 Instruction *LHS = nullptr;
1542 Constant *Step = nullptr;
1543 if (!matchIncrement(I, LHS, Step))
1544 return false;
1545 if (auto *PN = dyn_cast<PHINode>(LHS))
1546 if (auto IVInc = getIVIncrement(PN, LI))
1547 return IVInc->first == I;
1548 return false;
1549}
1550
1551bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
1552 Value *Arg0, Value *Arg1,
1553 CmpInst *Cmp,
1554 Intrinsic::ID IID) {
1555 auto IsReplacableIVIncrement = [this, &Cmp](BinaryOperator *BO) {
1556 if (!isIVIncrement(BO, LI))
1557 return false;
1558 const Loop *L = LI->getLoopFor(BO->getParent());
1559 assert(L && "L should not be null after isIVIncrement()");
1560 // Do not risk on moving increment into a child loop.
1561 if (LI->getLoopFor(Cmp->getParent()) != L)
1562 return false;
1563
1564 // Finally, we need to ensure that the insert point will dominate all
1565 // existing uses of the increment.
1566
1567 auto &DT = getDT(*BO->getParent()->getParent());
1568 if (DT.dominates(Cmp->getParent(), BO->getParent()))
1569 // If we're moving up the dom tree, all uses are trivially dominated.
1570 // (This is the common case for code produced by LSR.)
1571 return true;
1572
1573 // Otherwise, special case the single use in the phi recurrence.
1574 return BO->hasOneUse() && DT.dominates(Cmp->getParent(), L->getLoopLatch());
1575 };
1576 if (BO->getParent() != Cmp->getParent() && !IsReplacableIVIncrement(BO)) {
1577 // We used to use a dominator tree here to allow multi-block optimization.
1578 // But that was problematic because:
1579 // 1. It could cause a perf regression by hoisting the math op into the
1580 // critical path.
1581 // 2. It could cause a perf regression by creating a value that was live
1582 // across multiple blocks and increasing register pressure.
1583 // 3. Use of a dominator tree could cause large compile-time regression.
1584 // This is because we recompute the DT on every change in the main CGP
1585 // run-loop. The recomputing is probably unnecessary in many cases, so if
1586 // that was fixed, using a DT here would be ok.
1587 //
1588 // There is one important particular case we still want to handle: if BO is
1589 // the IV increment. Important properties that make it profitable:
1590 // - We can speculate IV increment anywhere in the loop (as long as the
1591 // indvar Phi is its only user);
1592 // - Upon computing Cmp, we effectively compute something equivalent to the
1593 // IV increment (despite it loops differently in the IR). So moving it up
1594 // to the cmp point does not really increase register pressure.
1595 return false;
1596 }
1597
1598 // We allow matching the canonical IR (add X, C) back to (usubo X, -C).
1599 if (BO->getOpcode() == Instruction::Add &&
1600 IID == Intrinsic::usub_with_overflow) {
1601 assert(isa<Constant>(Arg1) && "Unexpected input for usubo");
1602 Arg1 = ConstantExpr::getNeg(cast<Constant>(Arg1));
1603 }
1604
1605 // Insert at the first instruction of the pair.
1606 Instruction *InsertPt = nullptr;
1607 for (Instruction &Iter : *Cmp->getParent()) {
1608 // If BO is an XOR, it is not guaranteed that it comes after both inputs to
1609 // the overflow intrinsic are defined.
1610 if ((BO->getOpcode() != Instruction::Xor && &Iter == BO) || &Iter == Cmp) {
1611 InsertPt = &Iter;
1612 break;
1613 }
1614 }
1615 assert(InsertPt != nullptr && "Parent block did not contain cmp or binop");
1616
1617 IRBuilder<> Builder(InsertPt);
1618 Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
1619 if (BO->getOpcode() != Instruction::Xor) {
1620 Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
1621 replaceAllUsesWith(BO, Math, FreshBBs, IsHugeFunc);
1622 } else
1623 assert(BO->hasOneUse() &&
1624 "Patterns with XOr should use the BO only in the compare");
1625 Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
1626 replaceAllUsesWith(Cmp, OV, FreshBBs, IsHugeFunc);
1627 Cmp->eraseFromParent();
1628 BO->eraseFromParent();
1629 return true;
1630}
1631
1632/// Match special-case patterns that check for unsigned add overflow.
1634 BinaryOperator *&Add) {
1635 // Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val)
1636 // Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero)
1637 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1638
1639 // We are not expecting non-canonical/degenerate code. Just bail out.
1640 if (isa<Constant>(A))
1641 return false;
1642
1643 ICmpInst::Predicate Pred = Cmp->getPredicate();
1644 if (Pred == ICmpInst::ICMP_EQ && match(B, m_AllOnes()))
1645 B = ConstantInt::get(B->getType(), 1);
1646 else if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt()))
1647 B = Constant::getAllOnesValue(B->getType());
1648 else
1649 return false;
1650
1651 // Check the users of the variable operand of the compare looking for an add
1652 // with the adjusted constant.
1653 for (User *U : A->users()) {
1654 if (match(U, m_Add(m_Specific(A), m_Specific(B)))) {
1655 Add = cast<BinaryOperator>(U);
1656 return true;
1657 }
1658 }
1659 return false;
1660}
1661
1662/// Try to combine the compare into a call to the llvm.uadd.with.overflow
1663/// intrinsic. Return true if any changes were made.
1664bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
1665 ModifyDT &ModifiedDT) {
1666 bool EdgeCase = false;
1667 Value *A, *B;
1669 if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) {
1671 return false;
1672 // Set A and B in case we match matchUAddWithOverflowConstantEdgeCases.
1673 A = Add->getOperand(0);
1674 B = Add->getOperand(1);
1675 EdgeCase = true;
1676 }
1677
1679 TLI->getValueType(*DL, Add->getType()),
1680 Add->hasNUsesOrMore(EdgeCase ? 1 : 2)))
1681 return false;
1682
1683 // We don't want to move around uses of condition values this late, so we
1684 // check if it is legal to create the call to the intrinsic in the basic
1685 // block containing the icmp.
1686 if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse())
1687 return false;
1688
1689 if (!replaceMathCmpWithIntrinsic(Add, A, B, Cmp,
1690 Intrinsic::uadd_with_overflow))
1691 return false;
1692
1693 // Reset callers - do not crash by iterating over a dead instruction.
1694 ModifiedDT = ModifyDT::ModifyInstDT;
1695 return true;
1696}
1697
1698bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
1699 ModifyDT &ModifiedDT) {
1700 // We are not expecting non-canonical/degenerate code. Just bail out.
1701 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1702 if (isa<Constant>(A) && isa<Constant>(B))
1703 return false;
1704
1705 // Convert (A u> B) to (A u< B) to simplify pattern matching.
1706 ICmpInst::Predicate Pred = Cmp->getPredicate();
1707 if (Pred == ICmpInst::ICMP_UGT) {
1708 std::swap(A, B);
1709 Pred = ICmpInst::ICMP_ULT;
1710 }
1711 // Convert special-case: (A == 0) is the same as (A u< 1).
1712 if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
1713 B = ConstantInt::get(B->getType(), 1);
1714 Pred = ICmpInst::ICMP_ULT;
1715 }
1716 // Convert special-case: (A != 0) is the same as (0 u< A).
1717 if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) {
1718 std::swap(A, B);
1719 Pred = ICmpInst::ICMP_ULT;
1720 }
1721 if (Pred != ICmpInst::ICMP_ULT)
1722 return false;
1723
1724 // Walk the users of a variable operand of a compare looking for a subtract or
1725 // add with that same operand. Also match the 2nd operand of the compare to
1726 // the add/sub, but that may be a negated constant operand of an add.
1727 Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
1728 BinaryOperator *Sub = nullptr;
1729 for (User *U : CmpVariableOperand->users()) {
1730 // A - B, A u< B --> usubo(A, B)
1731 if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
1732 Sub = cast<BinaryOperator>(U);
1733 break;
1734 }
1735
1736 // A + (-C), A u< C (canonicalized form of (sub A, C))
1737 const APInt *CmpC, *AddC;
1738 if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
1739 match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
1740 Sub = cast<BinaryOperator>(U);
1741 break;
1742 }
1743 }
1744 if (!Sub)
1745 return false;
1746
1748 TLI->getValueType(*DL, Sub->getType()),
1749 Sub->hasNUsesOrMore(1)))
1750 return false;
1751
1752 if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1),
1753 Cmp, Intrinsic::usub_with_overflow))
1754 return false;
1755
1756 // Reset callers - do not crash by iterating over a dead instruction.
1757 ModifiedDT = ModifyDT::ModifyInstDT;
1758 return true;
1759}
1760
1761// Decanonicalizes icmp+ctpop power-of-two test if ctpop is slow.
1762// The same transformation exists in DAG combiner, but we repeat it here because
1763// DAG builder can break the pattern by moving icmp into a successor block.
1764bool CodeGenPrepare::unfoldPowerOf2Test(CmpInst *Cmp) {
1765 CmpPredicate Pred;
1766 Value *X;
1767 const APInt *C;
1768
1769 // (icmp (ctpop x), c)
1770 if (!match(Cmp, m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(m_Value(X)),
1772 return false;
1773
1774 // We're only interested in "is power of 2 [or zero]" patterns.
1775 bool IsStrictlyPowerOf2Test = ICmpInst::isEquality(Pred) && *C == 1;
1776 bool IsPowerOf2OrZeroTest = (Pred == CmpInst::ICMP_ULT && *C == 2) ||
1777 (Pred == CmpInst::ICMP_UGT && *C == 1);
1778 if (!IsStrictlyPowerOf2Test && !IsPowerOf2OrZeroTest)
1779 return false;
1780
1781 // Some targets have better codegen for `ctpop(x) u</u>= 2/1`than for
1782 // `ctpop(x) ==/!= 1`. If ctpop is fast, only try changing the comparison,
1783 // and otherwise expand ctpop into a few simple instructions.
1784 Type *OpTy = X->getType();
1785 if (TLI->isCtpopFast(TLI->getValueType(*DL, OpTy))) {
1786 // Look for `ctpop(x) ==/!= 1`, where `ctpop(x)` is known to be non-zero.
1787 if (!IsStrictlyPowerOf2Test || !isKnownNonZero(Cmp->getOperand(0), *DL))
1788 return false;
1789
1790 // ctpop(x) == 1 -> ctpop(x) u< 2
1791 // ctpop(x) != 1 -> ctpop(x) u> 1
1792 if (Pred == ICmpInst::ICMP_EQ) {
1793 Cmp->setOperand(1, ConstantInt::get(OpTy, 2));
1794 Cmp->setPredicate(ICmpInst::ICMP_ULT);
1795 } else {
1796 Cmp->setPredicate(ICmpInst::ICMP_UGT);
1797 }
1798 return true;
1799 }
1800
1801 Value *NewCmp;
1802 if (IsPowerOf2OrZeroTest ||
1803 (IsStrictlyPowerOf2Test && isKnownNonZero(Cmp->getOperand(0), *DL))) {
1804 // ctpop(x) u< 2 -> (x & (x - 1)) == 0
1805 // ctpop(x) u> 1 -> (x & (x - 1)) != 0
1806 IRBuilder<> Builder(Cmp);
1807 Value *Sub = Builder.CreateAdd(X, Constant::getAllOnesValue(OpTy));
1808 Value *And = Builder.CreateAnd(X, Sub);
1809 CmpInst::Predicate NewPred =
1810 (Pred == CmpInst::ICMP_ULT || Pred == CmpInst::ICMP_EQ)
1813 NewCmp = Builder.CreateICmp(NewPred, And, ConstantInt::getNullValue(OpTy));
1814 } else {
1815 // ctpop(x) == 1 -> (x ^ (x - 1)) u> (x - 1)
1816 // ctpop(x) != 1 -> (x ^ (x - 1)) u<= (x - 1)
1817 IRBuilder<> Builder(Cmp);
1818 Value *Sub = Builder.CreateAdd(X, Constant::getAllOnesValue(OpTy));
1819 Value *Xor = Builder.CreateXor(X, Sub);
1820 CmpInst::Predicate NewPred =
1822 NewCmp = Builder.CreateICmp(NewPred, Xor, Sub);
1823 }
1824
1825 Cmp->replaceAllUsesWith(NewCmp);
1827 return true;
1828}
1829
1830/// Sink the given CmpInst into user blocks to reduce the number of virtual
1831/// registers that must be created and coalesced. This is a clear win except on
1832/// targets with multiple condition code registers (PowerPC), where it might
1833/// lose; some adjustment may be wanted there.
1834///
1835/// Return true if any changes are made.
1836static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
1837 if (TLI.hasMultipleConditionRegisters(EVT::getEVT(Cmp->getType())))
1838 return false;
1839
1840 // Avoid sinking soft-FP comparisons, since this can move them into a loop.
1841 if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
1842 return false;
1843
1844 // Only insert a cmp in each block once.
1846
1847 bool MadeChange = false;
1848 for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end();
1849 UI != E;) {
1850 Use &TheUse = UI.getUse();
1851 Instruction *User = cast<Instruction>(*UI);
1852
1853 // Preincrement use iterator so we don't invalidate it.
1854 ++UI;
1855
1856 // Don't bother for PHI nodes.
1857 if (isa<PHINode>(User))
1858 continue;
1859
1860 // Figure out which BB this cmp is used in.
1861 BasicBlock *UserBB = User->getParent();
1862 BasicBlock *DefBB = Cmp->getParent();
1863
1864 // If this user is in the same block as the cmp, don't change the cmp.
1865 if (UserBB == DefBB)
1866 continue;
1867
1868 // If we have already inserted a cmp into this block, use it.
1869 CmpInst *&InsertedCmp = InsertedCmps[UserBB];
1870
1871 if (!InsertedCmp) {
1872 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1873 assert(InsertPt != UserBB->end());
1874 InsertedCmp = CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(),
1875 Cmp->getOperand(0), Cmp->getOperand(1), "");
1876 InsertedCmp->insertBefore(*UserBB, InsertPt);
1877 // Propagate the debug info.
1878 InsertedCmp->setDebugLoc(Cmp->getDebugLoc());
1879 }
1880
1881 // Replace a use of the cmp with a use of the new cmp.
1882 TheUse = InsertedCmp;
1883 MadeChange = true;
1884 ++NumCmpUses;
1885 }
1886
1887 // If we removed all uses, nuke the cmp.
1888 if (Cmp->use_empty()) {
1889 Cmp->eraseFromParent();
1890 MadeChange = true;
1891 }
1892
1893 return MadeChange;
1894}
1895
1896/// For pattern like:
1897///
1898/// DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB)
1899/// ...
1900/// DomBB:
1901/// ...
1902/// br DomCond, TrueBB, CmpBB
1903/// CmpBB: (with DomBB being the single predecessor)
1904/// ...
1905/// Cmp = icmp eq CmpOp0, CmpOp1
1906/// ...
1907///
1908/// It would use two comparison on targets that lowering of icmp sgt/slt is
1909/// different from lowering of icmp eq (PowerPC). This function try to convert
1910/// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'.
1911/// After that, DomCond and Cmp can use the same comparison so reduce one
1912/// comparison.
1913///
1914/// Return true if any changes are made.
1916 const TargetLowering &TLI) {
1918 return false;
1919
1920 ICmpInst::Predicate Pred = Cmp->getPredicate();
1921 if (Pred != ICmpInst::ICMP_EQ)
1922 return false;
1923
1924 // If icmp eq has users other than BranchInst and SelectInst, converting it to
1925 // icmp slt/sgt would introduce more redundant LLVM IR.
1926 for (User *U : Cmp->users()) {
1927 if (isa<BranchInst>(U))
1928 continue;
1929 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == Cmp)
1930 continue;
1931 return false;
1932 }
1933
1934 // This is a cheap/incomplete check for dominance - just match a single
1935 // predecessor with a conditional branch.
1936 BasicBlock *CmpBB = Cmp->getParent();
1937 BasicBlock *DomBB = CmpBB->getSinglePredecessor();
1938 if (!DomBB)
1939 return false;
1940
1941 // We want to ensure that the only way control gets to the comparison of
1942 // interest is that a less/greater than comparison on the same operands is
1943 // false.
1944 Value *DomCond;
1945 BasicBlock *TrueBB, *FalseBB;
1946 if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB)))
1947 return false;
1948 if (CmpBB != FalseBB)
1949 return false;
1950
1951 Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1);
1952 CmpPredicate DomPred;
1953 if (!match(DomCond, m_ICmp(DomPred, m_Specific(CmpOp0), m_Specific(CmpOp1))))
1954 return false;
1955 if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT)
1956 return false;
1957
1958 // Convert the equality comparison to the opposite of the dominating
1959 // comparison and swap the direction for all branch/select users.
1960 // We have conceptually converted:
1961 // Res = (a < b) ? <LT_RES> : (a == b) ? <EQ_RES> : <GT_RES>;
1962 // to
1963 // Res = (a < b) ? <LT_RES> : (a > b) ? <GT_RES> : <EQ_RES>;
1964 // And similarly for branches.
1965 for (User *U : Cmp->users()) {
1966 if (auto *BI = dyn_cast<BranchInst>(U)) {
1967 assert(BI->isConditional() && "Must be conditional");
1968 BI->swapSuccessors();
1969 continue;
1970 }
1971 if (auto *SI = dyn_cast<SelectInst>(U)) {
1972 // Swap operands
1973 SI->swapValues();
1974 SI->swapProfMetadata();
1975 continue;
1976 }
1977 llvm_unreachable("Must be a branch or a select");
1978 }
1979 Cmp->setPredicate(CmpInst::getSwappedPredicate(DomPred));
1980 return true;
1981}
1982
1983/// Many architectures use the same instruction for both subtract and cmp. Try
1984/// to swap cmp operands to match subtract operations to allow for CSE.
1986 Value *Op0 = Cmp->getOperand(0);
1987 Value *Op1 = Cmp->getOperand(1);
1988 if (!Op0->getType()->isIntegerTy() || isa<Constant>(Op0) ||
1989 isa<Constant>(Op1) || Op0 == Op1)
1990 return false;
1991
1992 // If a subtract already has the same operands as a compare, swapping would be
1993 // bad. If a subtract has the same operands as a compare but in reverse order,
1994 // then swapping is good.
1995 int GoodToSwap = 0;
1996 unsigned NumInspected = 0;
1997 for (const User *U : Op0->users()) {
1998 // Avoid walking many users.
1999 if (++NumInspected > 128)
2000 return false;
2001 if (match(U, m_Sub(m_Specific(Op1), m_Specific(Op0))))
2002 GoodToSwap++;
2003 else if (match(U, m_Sub(m_Specific(Op0), m_Specific(Op1))))
2004 GoodToSwap--;
2005 }
2006
2007 if (GoodToSwap > 0) {
2008 Cmp->swapOperands();
2009 return true;
2010 }
2011 return false;
2012}
2013
2014static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI,
2015 const DataLayout &DL) {
2016 FCmpInst *FCmp = dyn_cast<FCmpInst>(Cmp);
2017 if (!FCmp)
2018 return false;
2019
2020 // Don't fold if the target offers free fabs and the predicate is legal.
2021 EVT VT = TLI.getValueType(DL, Cmp->getOperand(0)->getType());
2022 if (TLI.isFAbsFree(VT) &&
2024 VT.getSimpleVT()))
2025 return false;
2026
2027 // Reverse the canonicalization if it is a FP class test
2028 auto ShouldReverseTransform = [](FPClassTest ClassTest) {
2029 return ClassTest == fcInf || ClassTest == (fcInf | fcNan);
2030 };
2031 auto [ClassVal, ClassTest] =
2032 fcmpToClassTest(FCmp->getPredicate(), *FCmp->getParent()->getParent(),
2033 FCmp->getOperand(0), FCmp->getOperand(1));
2034 if (!ClassVal)
2035 return false;
2036
2037 if (!ShouldReverseTransform(ClassTest) && !ShouldReverseTransform(~ClassTest))
2038 return false;
2039
2040 IRBuilder<> Builder(Cmp);
2041 Value *IsFPClass = Builder.createIsFPClass(ClassVal, ClassTest);
2042 Cmp->replaceAllUsesWith(IsFPClass);
2044 return true;
2045}
2046
2048 Instruction *Rem, const LoopInfo *LI, Value *&RemAmtOut, Value *&AddInstOut,
2049 Value *&AddOffsetOut, PHINode *&LoopIncrPNOut) {
2050 Value *Incr, *RemAmt;
2051 // NB: If RemAmt is a power of 2 it *should* have been transformed by now.
2052 if (!match(Rem, m_URem(m_Value(Incr), m_Value(RemAmt))))
2053 return false;
2054
2055 Value *AddInst, *AddOffset;
2056 // Find out loop increment PHI.
2057 auto *PN = dyn_cast<PHINode>(Incr);
2058 if (PN != nullptr) {
2059 AddInst = nullptr;
2060 AddOffset = nullptr;
2061 } else {
2062 // Search through a NUW add on top of the loop increment.
2063 Value *V0, *V1;
2064 if (!match(Incr, m_NUWAdd(m_Value(V0), m_Value(V1))))
2065 return false;
2066
2067 AddInst = Incr;
2068 PN = dyn_cast<PHINode>(V0);
2069 if (PN != nullptr) {
2070 AddOffset = V1;
2071 } else {
2072 PN = dyn_cast<PHINode>(V1);
2073 AddOffset = V0;
2074 }
2075 }
2076
2077 if (!PN)
2078 return false;
2079
2080 // This isn't strictly necessary, what we really need is one increment and any
2081 // amount of initial values all being the same.
2082 if (PN->getNumIncomingValues() != 2)
2083 return false;
2084
2085 // Only trivially analyzable loops.
2086 Loop *L = LI->getLoopFor(PN->getParent());
2087 if (!L || !L->getLoopPreheader() || !L->getLoopLatch())
2088 return false;
2089
2090 // Req that the remainder is in the loop
2091 if (!L->contains(Rem))
2092 return false;
2093
2094 // Only works if the remainder amount is a loop invaraint
2095 if (!L->isLoopInvariant(RemAmt))
2096 return false;
2097
2098 // Only works if the AddOffset is a loop invaraint
2099 if (AddOffset && !L->isLoopInvariant(AddOffset))
2100 return false;
2101
2102 // Is the PHI a loop increment?
2103 auto LoopIncrInfo = getIVIncrement(PN, LI);
2104 if (!LoopIncrInfo)
2105 return false;
2106
2107 // We need remainder_amount % increment_amount to be zero. Increment of one
2108 // satisfies that without any special logic and is overwhelmingly the common
2109 // case.
2110 if (!match(LoopIncrInfo->second, m_One()))
2111 return false;
2112
2113 // Need the increment to not overflow.
2114 if (!match(LoopIncrInfo->first, m_c_NUWAdd(m_Specific(PN), m_Value())))
2115 return false;
2116
2117 // Set output variables.
2118 RemAmtOut = RemAmt;
2119 LoopIncrPNOut = PN;
2120 AddInstOut = AddInst;
2121 AddOffsetOut = AddOffset;
2122
2123 return true;
2124}
2125
2126// Try to transform:
2127//
2128// for(i = Start; i < End; ++i)
2129// Rem = (i nuw+ IncrLoopInvariant) u% RemAmtLoopInvariant;
2130//
2131// ->
2132//
2133// Rem = (Start nuw+ IncrLoopInvariant) % RemAmtLoopInvariant;
2134// for(i = Start; i < End; ++i, ++rem)
2135// Rem = rem == RemAmtLoopInvariant ? 0 : Rem;
2137 const LoopInfo *LI,
2139 bool IsHuge) {
2140 Value *AddOffset, *RemAmt, *AddInst;
2141 PHINode *LoopIncrPN;
2142 if (!isRemOfLoopIncrementWithLoopInvariant(Rem, LI, RemAmt, AddInst,
2143 AddOffset, LoopIncrPN))
2144 return false;
2145
2146 // Only non-constant remainder as the extra IV is probably not profitable
2147 // in that case.
2148 //
2149 // Potential TODO(1): `urem` of a const ends up as `mul` + `shift` + `add`. If
2150 // we can rule out register pressure and ensure this `urem` is executed each
2151 // iteration, its probably profitable to handle the const case as well.
2152 //
2153 // Potential TODO(2): Should we have a check for how "nested" this remainder
2154 // operation is? The new code runs every iteration so if the remainder is
2155 // guarded behind unlikely conditions this might not be worth it.
2156 if (match(RemAmt, m_ImmConstant()))
2157 return false;
2158
2159 Loop *L = LI->getLoopFor(LoopIncrPN->getParent());
2160 Value *Start = LoopIncrPN->getIncomingValueForBlock(L->getLoopPreheader());
2161 // If we have add create initial value for remainder.
2162 // The logic here is:
2163 // (urem (add nuw Start, IncrLoopInvariant), RemAmtLoopInvariant
2164 //
2165 // Only proceed if the expression simplifies (otherwise we can't fully
2166 // optimize out the urem).
2167 if (AddInst) {
2168 assert(AddOffset && "We found an add but missing values");
2169 // Without dom-condition/assumption cache we aren't likely to get much out
2170 // of a context instruction.
2171 Start = simplifyAddInst(Start, AddOffset,
2172 match(AddInst, m_NSWAdd(m_Value(), m_Value())),
2173 /*IsNUW=*/true, *DL);
2174 if (!Start)
2175 return false;
2176 }
2177
2178 // If we can't fully optimize out the `rem`, skip this transform.
2179 Start = simplifyURemInst(Start, RemAmt, *DL);
2180 if (!Start)
2181 return false;
2182
2183 // Create new remainder with induction variable.
2184 Type *Ty = Rem->getType();
2185 IRBuilder<> Builder(Rem->getContext());
2186
2187 Builder.SetInsertPoint(LoopIncrPN);
2188 PHINode *NewRem = Builder.CreatePHI(Ty, 2);
2189
2190 Builder.SetInsertPoint(cast<Instruction>(
2191 LoopIncrPN->getIncomingValueForBlock(L->getLoopLatch())));
2192 // `(add (urem x, y), 1)` is always nuw.
2193 Value *RemAdd = Builder.CreateNUWAdd(NewRem, ConstantInt::get(Ty, 1));
2194 Value *RemCmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, RemAdd, RemAmt);
2195 Value *RemSel =
2196 Builder.CreateSelect(RemCmp, Constant::getNullValue(Ty), RemAdd);
2197
2198 NewRem->addIncoming(Start, L->getLoopPreheader());
2199 NewRem->addIncoming(RemSel, L->getLoopLatch());
2200
2201 // Insert all touched BBs.
2202 FreshBBs.insert(LoopIncrPN->getParent());
2203 FreshBBs.insert(L->getLoopLatch());
2204 FreshBBs.insert(Rem->getParent());
2205 if (AddInst)
2206 FreshBBs.insert(cast<Instruction>(AddInst)->getParent());
2207 replaceAllUsesWith(Rem, NewRem, FreshBBs, IsHuge);
2208 Rem->eraseFromParent();
2209 if (AddInst && AddInst->use_empty())
2210 cast<Instruction>(AddInst)->eraseFromParent();
2211 return true;
2212}
2213
2214bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
2215 if (foldURemOfLoopIncrement(Rem, DL, LI, FreshBBs, IsHugeFunc))
2216 return true;
2217 return false;
2218}
2219
2220bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
2221 if (sinkCmpExpression(Cmp, *TLI))
2222 return true;
2223
2224 if (combineToUAddWithOverflow(Cmp, ModifiedDT))
2225 return true;
2226
2227 if (combineToUSubWithOverflow(Cmp, ModifiedDT))
2228 return true;
2229
2230 if (unfoldPowerOf2Test(Cmp))
2231 return true;
2232
2233 if (foldICmpWithDominatingICmp(Cmp, *TLI))
2234 return true;
2235
2237 return true;
2238
2239 if (foldFCmpToFPClassTest(Cmp, *TLI, *DL))
2240 return true;
2241
2242 return false;
2243}
2244
2245/// Duplicate and sink the given 'and' instruction into user blocks where it is
2246/// used in a compare to allow isel to generate better code for targets where
2247/// this operation can be combined.
2248///
2249/// Return true if any changes are made.
2251 SetOfInstrs &InsertedInsts) {
2252 // Double-check that we're not trying to optimize an instruction that was
2253 // already optimized by some other part of this pass.
2254 assert(!InsertedInsts.count(AndI) &&
2255 "Attempting to optimize already optimized and instruction");
2256 (void)InsertedInsts;
2257
2258 // Nothing to do for single use in same basic block.
2259 if (AndI->hasOneUse() &&
2260 AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())
2261 return false;
2262
2263 // Try to avoid cases where sinking/duplicating is likely to increase register
2264 // pressure.
2265 if (!isa<ConstantInt>(AndI->getOperand(0)) &&
2266 !isa<ConstantInt>(AndI->getOperand(1)) &&
2267 AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())
2268 return false;
2269
2270 for (auto *U : AndI->users()) {
2271 Instruction *User = cast<Instruction>(U);
2272
2273 // Only sink 'and' feeding icmp with 0.
2274 if (!isa<ICmpInst>(User))
2275 return false;
2276
2277 auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));
2278 if (!CmpC || !CmpC->isZero())
2279 return false;
2280 }
2281
2282 if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))
2283 return false;
2284
2285 LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n");
2286 LLVM_DEBUG(AndI->getParent()->dump());
2287
2288 // Push the 'and' into the same block as the icmp 0. There should only be
2289 // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
2290 // others, so we don't need to keep track of which BBs we insert into.
2291 for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
2292 UI != E;) {
2293 Use &TheUse = UI.getUse();
2294 Instruction *User = cast<Instruction>(*UI);
2295
2296 // Preincrement use iterator so we don't invalidate it.
2297 ++UI;
2298
2299 LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n");
2300
2301 // Keep the 'and' in the same place if the use is already in the same block.
2302 Instruction *InsertPt =
2303 User->getParent() == AndI->getParent() ? AndI : User;
2304 Instruction *InsertedAnd = BinaryOperator::Create(
2305 Instruction::And, AndI->getOperand(0), AndI->getOperand(1), "",
2306 InsertPt->getIterator());
2307 // Propagate the debug info.
2308 InsertedAnd->setDebugLoc(AndI->getDebugLoc());
2309
2310 // Replace a use of the 'and' with a use of the new 'and'.
2311 TheUse = InsertedAnd;
2312 ++NumAndUses;
2313 LLVM_DEBUG(User->getParent()->dump());
2314 }
2315
2316 // We removed all uses, nuke the and.
2317 AndI->eraseFromParent();
2318 return true;
2319}
2320
2321/// Check if the candidates could be combined with a shift instruction, which
2322/// includes:
2323/// 1. Truncate instruction
2324/// 2. And instruction and the imm is a mask of the low bits:
2325/// imm & (imm+1) == 0
2327 if (!isa<TruncInst>(User)) {
2328 if (User->getOpcode() != Instruction::And ||
2329 !isa<ConstantInt>(User->getOperand(1)))
2330 return false;
2331
2332 const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
2333
2334 if ((Cimm & (Cimm + 1)).getBoolValue())
2335 return false;
2336 }
2337 return true;
2338}
2339
2340/// Sink both shift and truncate instruction to the use of truncate's BB.
2341static bool
2344 const TargetLowering &TLI, const DataLayout &DL) {
2345 BasicBlock *UserBB = User->getParent();
2347 auto *TruncI = cast<TruncInst>(User);
2348 bool MadeChange = false;
2349
2350 for (Value::user_iterator TruncUI = TruncI->user_begin(),
2351 TruncE = TruncI->user_end();
2352 TruncUI != TruncE;) {
2353
2354 Use &TruncTheUse = TruncUI.getUse();
2355 Instruction *TruncUser = cast<Instruction>(*TruncUI);
2356 // Preincrement use iterator so we don't invalidate it.
2357
2358 ++TruncUI;
2359
2360 int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
2361 if (!ISDOpcode)
2362 continue;
2363
2364 // If the use is actually a legal node, there will not be an
2365 // implicit truncate.
2366 // FIXME: always querying the result type is just an
2367 // approximation; some nodes' legality is determined by the
2368 // operand or other means. There's no good way to find out though.
2370 ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
2371 continue;
2372
2373 // Don't bother for PHI nodes.
2374 if (isa<PHINode>(TruncUser))
2375 continue;
2376
2377 BasicBlock *TruncUserBB = TruncUser->getParent();
2378
2379 if (UserBB == TruncUserBB)
2380 continue;
2381
2382 BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
2383 CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
2384
2385 if (!InsertedShift && !InsertedTrunc) {
2386 BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
2387 assert(InsertPt != TruncUserBB->end());
2388 // Sink the shift
2389 if (ShiftI->getOpcode() == Instruction::AShr)
2390 InsertedShift =
2391 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
2392 else
2393 InsertedShift =
2394 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
2395 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2396 InsertedShift->insertBefore(*TruncUserBB, InsertPt);
2397
2398 // Sink the trunc
2399 BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
2400 TruncInsertPt++;
2401 // It will go ahead of any debug-info.
2402 TruncInsertPt.setHeadBit(true);
2403 assert(TruncInsertPt != TruncUserBB->end());
2404
2405 InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
2406 TruncI->getType(), "");
2407 InsertedTrunc->insertBefore(*TruncUserBB, TruncInsertPt);
2408 InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());
2409
2410 MadeChange = true;
2411
2412 TruncTheUse = InsertedTrunc;
2413 }
2414 }
2415 return MadeChange;
2416}
2417
2418/// Sink the shift *right* instruction into user blocks if the uses could
2419/// potentially be combined with this shift instruction and generate BitExtract
2420/// instruction. It will only be applied if the architecture supports BitExtract
2421/// instruction. Here is an example:
2422/// BB1:
2423/// %x.extract.shift = lshr i64 %arg1, 32
2424/// BB2:
2425/// %x.extract.trunc = trunc i64 %x.extract.shift to i16
2426/// ==>
2427///
2428/// BB2:
2429/// %x.extract.shift.1 = lshr i64 %arg1, 32
2430/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
2431///
2432/// CodeGen will recognize the pattern in BB2 and generate BitExtract
2433/// instruction.
2434/// Return true if any changes are made.
2436 const TargetLowering &TLI,
2437 const DataLayout &DL) {
2438 BasicBlock *DefBB = ShiftI->getParent();
2439
2440 /// Only insert instructions in each block once.
2442
2443 bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
2444
2445 bool MadeChange = false;
2446 for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
2447 UI != E;) {
2448 Use &TheUse = UI.getUse();
2449 Instruction *User = cast<Instruction>(*UI);
2450 // Preincrement use iterator so we don't invalidate it.
2451 ++UI;
2452
2453 // Don't bother for PHI nodes.
2454 if (isa<PHINode>(User))
2455 continue;
2456
2458 continue;
2459
2460 BasicBlock *UserBB = User->getParent();
2461
2462 if (UserBB == DefBB) {
2463 // If the shift and truncate instruction are in the same BB. The use of
2464 // the truncate(TruncUse) may still introduce another truncate if not
2465 // legal. In this case, we would like to sink both shift and truncate
2466 // instruction to the BB of TruncUse.
2467 // for example:
2468 // BB1:
2469 // i64 shift.result = lshr i64 opnd, imm
2470 // trunc.result = trunc shift.result to i16
2471 //
2472 // BB2:
2473 // ----> We will have an implicit truncate here if the architecture does
2474 // not have i16 compare.
2475 // cmp i16 trunc.result, opnd2
2476 //
2477 if (isa<TruncInst>(User) &&
2478 shiftIsLegal
2479 // If the type of the truncate is legal, no truncate will be
2480 // introduced in other basic blocks.
2481 && (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
2482 MadeChange =
2483 SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
2484
2485 continue;
2486 }
2487 // If we have already inserted a shift into this block, use it.
2488 BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
2489
2490 if (!InsertedShift) {
2491 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
2492 assert(InsertPt != UserBB->end());
2493
2494 if (ShiftI->getOpcode() == Instruction::AShr)
2495 InsertedShift =
2496 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
2497 else
2498 InsertedShift =
2499 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
2500 InsertedShift->insertBefore(*UserBB, InsertPt);
2501 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2502
2503 MadeChange = true;
2504 }
2505
2506 // Replace a use of the shift with a use of the new shift.
2507 TheUse = InsertedShift;
2508 }
2509
2510 // If we removed all uses, or there are none, nuke the shift.
2511 if (ShiftI->use_empty()) {
2512 salvageDebugInfo(*ShiftI);
2513 ShiftI->eraseFromParent();
2514 MadeChange = true;
2515 }
2516
2517 return MadeChange;
2518}
2519
2520/// If counting leading or trailing zeros is an expensive operation and a zero
2521/// input is defined, add a check for zero to avoid calling the intrinsic.
2522///
2523/// We want to transform:
2524/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
2525///
2526/// into:
2527/// entry:
2528/// %cmpz = icmp eq i64 %A, 0
2529/// br i1 %cmpz, label %cond.end, label %cond.false
2530/// cond.false:
2531/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
2532/// br label %cond.end
2533/// cond.end:
2534/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
2535///
2536/// If the transform is performed, return true and set ModifiedDT to true.
2537static bool despeculateCountZeros(IntrinsicInst *CountZeros, LoopInfo &LI,
2538 const TargetLowering *TLI,
2539 const DataLayout *DL, ModifyDT &ModifiedDT,
2541 bool IsHugeFunc) {
2542 // If a zero input is undefined, it doesn't make sense to despeculate that.
2543 if (match(CountZeros->getOperand(1), m_One()))
2544 return false;
2545
2546 // If it's cheap to speculate, there's nothing to do.
2547 Type *Ty = CountZeros->getType();
2548 auto IntrinsicID = CountZeros->getIntrinsicID();
2549 if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz(Ty)) ||
2550 (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz(Ty)))
2551 return false;
2552
2553 // Only handle scalar cases. Anything else requires too much work.
2554 unsigned SizeInBits = Ty->getScalarSizeInBits();
2555 if (Ty->isVectorTy())
2556 return false;
2557
2558 // Bail if the value is never zero.
2559 Use &Op = CountZeros->getOperandUse(0);
2560 if (isKnownNonZero(Op, *DL))
2561 return false;
2562
2563 // The intrinsic will be sunk behind a compare against zero and branch.
2564 BasicBlock *StartBlock = CountZeros->getParent();
2565 BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
2566 if (IsHugeFunc)
2567 FreshBBs.insert(CallBlock);
2568
2569 // Create another block after the count zero intrinsic. A PHI will be added
2570 // in this block to select the result of the intrinsic or the bit-width
2571 // constant if the input to the intrinsic is zero.
2572 BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(CountZeros));
2573 // Any debug-info after CountZeros should not be included.
2574 SplitPt.setHeadBit(true);
2575 BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
2576 if (IsHugeFunc)
2577 FreshBBs.insert(EndBlock);
2578
2579 // Update the LoopInfo. The new blocks are in the same loop as the start
2580 // block.
2581 if (Loop *L = LI.getLoopFor(StartBlock)) {
2582 L->addBasicBlockToLoop(CallBlock, LI);
2583 L->addBasicBlockToLoop(EndBlock, LI);
2584 }
2585
2586 // Set up a builder to create a compare, conditional branch, and PHI.
2587 IRBuilder<> Builder(CountZeros->getContext());
2588 Builder.SetInsertPoint(StartBlock->getTerminator());
2589 Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
2590
2591 // Replace the unconditional branch that was created by the first split with
2592 // a compare against zero and a conditional branch.
2593 Value *Zero = Constant::getNullValue(Ty);
2594 // Avoid introducing branch on poison. This also replaces the ctz operand.
2596 Op = Builder.CreateFreeze(Op, Op->getName() + ".fr");
2597 Value *Cmp = Builder.CreateICmpEQ(Op, Zero, "cmpz");
2598 Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
2599 StartBlock->getTerminator()->eraseFromParent();
2600
2601 // Create a PHI in the end block to select either the output of the intrinsic
2602 // or the bit width of the operand.
2603 Builder.SetInsertPoint(EndBlock, EndBlock->begin());
2604 PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
2605 replaceAllUsesWith(CountZeros, PN, FreshBBs, IsHugeFunc);
2606 Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
2607 PN->addIncoming(BitWidth, StartBlock);
2608 PN->addIncoming(CountZeros, CallBlock);
2609
2610 // We are explicitly handling the zero case, so we can set the intrinsic's
2611 // undefined zero argument to 'true'. This will also prevent reprocessing the
2612 // intrinsic; we only despeculate when a zero input is defined.
2613 CountZeros->setArgOperand(1, Builder.getTrue());
2614 ModifiedDT = ModifyDT::ModifyBBDT;
2615 return true;
2616}
2617
2618bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
2619 BasicBlock *BB = CI->getParent();
2620
2621 // Lower inline assembly if we can.
2622 // If we found an inline asm expession, and if the target knows how to
2623 // lower it to normal LLVM code, do so now.
2624 if (CI->isInlineAsm()) {
2625 if (TLI->ExpandInlineAsm(CI)) {
2626 // Avoid invalidating the iterator.
2627 CurInstIterator = BB->begin();
2628 // Avoid processing instructions out of order, which could cause
2629 // reuse before a value is defined.
2630 SunkAddrs.clear();
2631 return true;
2632 }
2633 // Sink address computing for memory operands into the block.
2634 if (optimizeInlineAsmInst(CI))
2635 return true;
2636 }
2637
2638 // Align the pointer arguments to this call if the target thinks it's a good
2639 // idea
2640 unsigned MinSize;
2641 Align PrefAlign;
2642 if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
2643 for (auto &Arg : CI->args()) {
2644 // We want to align both objects whose address is used directly and
2645 // objects whose address is used in casts and GEPs, though it only makes
2646 // sense for GEPs if the offset is a multiple of the desired alignment and
2647 // if size - offset meets the size threshold.
2648 if (!Arg->getType()->isPointerTy())
2649 continue;
2650 APInt Offset(DL->getIndexSizeInBits(
2651 cast<PointerType>(Arg->getType())->getAddressSpace()),
2652 0);
2654 uint64_t Offset2 = Offset.getLimitedValue();
2655 if (!isAligned(PrefAlign, Offset2))
2656 continue;
2657 AllocaInst *AI;
2658 if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlign() < PrefAlign &&
2659 DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
2660 AI->setAlignment(PrefAlign);
2661 // Global variables can only be aligned if they are defined in this
2662 // object (i.e. they are uniquely initialized in this object), and
2663 // over-aligning global variables that have an explicit section is
2664 // forbidden.
2665 GlobalVariable *GV;
2666 if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
2667 GV->getPointerAlignment(*DL) < PrefAlign &&
2668 DL->getTypeAllocSize(GV->getValueType()) >= MinSize + Offset2)
2669 GV->setAlignment(PrefAlign);
2670 }
2671 }
2672 // If this is a memcpy (or similar) then we may be able to improve the
2673 // alignment.
2674 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
2675 Align DestAlign = getKnownAlignment(MI->getDest(), *DL);
2676 MaybeAlign MIDestAlign = MI->getDestAlign();
2677 if (!MIDestAlign || DestAlign > *MIDestAlign)
2678 MI->setDestAlignment(DestAlign);
2679 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
2680 MaybeAlign MTISrcAlign = MTI->getSourceAlign();
2681 Align SrcAlign = getKnownAlignment(MTI->getSource(), *DL);
2682 if (!MTISrcAlign || SrcAlign > *MTISrcAlign)
2683 MTI->setSourceAlignment(SrcAlign);
2684 }
2685 }
2686
2687 // If we have a cold call site, try to sink addressing computation into the
2688 // cold block. This interacts with our handling for loads and stores to
2689 // ensure that we can fold all uses of a potential addressing computation
2690 // into their uses. TODO: generalize this to work over profiling data
2691 if (CI->hasFnAttr(Attribute::Cold) &&
2692 !llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
2693 for (auto &Arg : CI->args()) {
2694 if (!Arg->getType()->isPointerTy())
2695 continue;
2696 unsigned AS = Arg->getType()->getPointerAddressSpace();
2697 if (optimizeMemoryInst(CI, Arg, Arg->getType(), AS))
2698 return true;
2699 }
2700
2701 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
2702 if (II) {
2703 switch (II->getIntrinsicID()) {
2704 default:
2705 break;
2706 case Intrinsic::assume:
2707 llvm_unreachable("llvm.assume should have been removed already");
2708 case Intrinsic::allow_runtime_check:
2709 case Intrinsic::allow_ubsan_check:
2710 case Intrinsic::experimental_widenable_condition: {
2711 // Give up on future widening opportunities so that we can fold away dead
2712 // paths and merge blocks before going into block-local instruction
2713 // selection.
2714 if (II->use_empty()) {
2715 II->eraseFromParent();
2716 return true;
2717 }
2718 Constant *RetVal = ConstantInt::getTrue(II->getContext());
2719 resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
2720 replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
2721 });
2722 return true;
2723 }
2724 case Intrinsic::objectsize:
2725 llvm_unreachable("llvm.objectsize.* should have been lowered already");
2726 case Intrinsic::is_constant:
2727 llvm_unreachable("llvm.is.constant.* should have been lowered already");
2728 case Intrinsic::aarch64_stlxr:
2729 case Intrinsic::aarch64_stxr: {
2730 ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
2731 if (!ExtVal || !ExtVal->hasOneUse() ||
2732 ExtVal->getParent() == CI->getParent())
2733 return false;
2734 // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
2735 ExtVal->moveBefore(CI->getIterator());
2736 // Mark this instruction as "inserted by CGP", so that other
2737 // optimizations don't touch it.
2738 InsertedInsts.insert(ExtVal);
2739 return true;
2740 }
2741
2742 case Intrinsic::launder_invariant_group:
2743 case Intrinsic::strip_invariant_group: {
2744 Value *ArgVal = II->getArgOperand(0);
2745 auto it = LargeOffsetGEPMap.find(II);
2746 if (it != LargeOffsetGEPMap.end()) {
2747 // Merge entries in LargeOffsetGEPMap to reflect the RAUW.
2748 // Make sure not to have to deal with iterator invalidation
2749 // after possibly adding ArgVal to LargeOffsetGEPMap.
2750 auto GEPs = std::move(it->second);
2751 LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());
2752 LargeOffsetGEPMap.erase(II);
2753 }
2754
2755 replaceAllUsesWith(II, ArgVal, FreshBBs, IsHugeFunc);
2756 II->eraseFromParent();
2757 return true;
2758 }
2759 case Intrinsic::cttz:
2760 case Intrinsic::ctlz:
2761 // If counting zeros is expensive, try to avoid it.
2762 return despeculateCountZeros(II, *LI, TLI, DL, ModifiedDT, FreshBBs,
2763 IsHugeFunc);
2764 case Intrinsic::fshl:
2765 case Intrinsic::fshr:
2766 return optimizeFunnelShift(II);
2767 case Intrinsic::masked_gather:
2768 return optimizeGatherScatterInst(II, II->getArgOperand(0));
2769 case Intrinsic::masked_scatter:
2770 return optimizeGatherScatterInst(II, II->getArgOperand(1));
2771 case Intrinsic::masked_load:
2772 // Treat v1X masked load as load X type.
2773 if (auto *VT = dyn_cast<FixedVectorType>(II->getType())) {
2774 if (VT->getNumElements() == 1) {
2775 Value *PtrVal = II->getArgOperand(0);
2776 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2777 if (optimizeMemoryInst(II, PtrVal, VT->getElementType(), AS))
2778 return true;
2779 }
2780 }
2781 return false;
2782 case Intrinsic::masked_store:
2783 // Treat v1X masked store as store X type.
2784 if (auto *VT =
2785 dyn_cast<FixedVectorType>(II->getArgOperand(0)->getType())) {
2786 if (VT->getNumElements() == 1) {
2787 Value *PtrVal = II->getArgOperand(1);
2788 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2789 if (optimizeMemoryInst(II, PtrVal, VT->getElementType(), AS))
2790 return true;
2791 }
2792 }
2793 return false;
2794 }
2795
2797 Type *AccessTy;
2798 if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
2799 while (!PtrOps.empty()) {
2800 Value *PtrVal = PtrOps.pop_back_val();
2801 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2802 if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
2803 return true;
2804 }
2805 }
2806
2807 // From here on out we're working with named functions.
2808 auto *Callee = CI->getCalledFunction();
2809 if (!Callee)
2810 return false;
2811
2812 // Lower all default uses of _chk calls. This is very similar
2813 // to what InstCombineCalls does, but here we are only lowering calls
2814 // to fortified library functions (e.g. __memcpy_chk) that have the default
2815 // "don't know" as the objectsize. Anything else should be left alone.
2816 FortifiedLibCallSimplifier Simplifier(TLInfo, true);
2817 IRBuilder<> Builder(CI);
2818 if (Value *V = Simplifier.optimizeCall(CI, Builder)) {
2819 replaceAllUsesWith(CI, V, FreshBBs, IsHugeFunc);
2820 CI->eraseFromParent();
2821 return true;
2822 }
2823
2824 // SCCP may have propagated, among other things, C++ static variables across
2825 // calls. If this happens to be the case, we may want to undo it in order to
2826 // avoid redundant pointer computation of the constant, as the function method
2827 // returning the constant needs to be executed anyways.
2828 auto GetUniformReturnValue = [](const Function *F) -> GlobalVariable * {
2829 if (!F->getReturnType()->isPointerTy())
2830 return nullptr;
2831
2832 GlobalVariable *UniformValue = nullptr;
2833 for (auto &BB : *F) {
2834 if (auto *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
2835 if (auto *V = dyn_cast<GlobalVariable>(RI->getReturnValue())) {
2836 if (!UniformValue)
2837 UniformValue = V;
2838 else if (V != UniformValue)
2839 return nullptr;
2840 } else {
2841 return nullptr;
2842 }
2843 }
2844 }
2845
2846 return UniformValue;
2847 };
2848
2849 if (Callee->hasExactDefinition()) {
2850 if (GlobalVariable *RV = GetUniformReturnValue(Callee)) {
2851 bool MadeChange = false;
2852 for (Use &U : make_early_inc_range(RV->uses())) {
2853 auto *I = dyn_cast<Instruction>(U.getUser());
2854 if (!I || I->getParent() != CI->getParent()) {
2855 // Limit to the same basic block to avoid extending the call-site live
2856 // range, which otherwise could increase register pressure.
2857 continue;
2858 }
2859 if (CI->comesBefore(I)) {
2860 U.set(CI);
2861 MadeChange = true;
2862 }
2863 }
2864
2865 return MadeChange;
2866 }
2867 }
2868
2869 return false;
2870}
2871
2873 const CallInst *CI) {
2874 assert(CI && CI->use_empty());
2875
2876 if (const auto *II = dyn_cast<IntrinsicInst>(CI))
2877 switch (II->getIntrinsicID()) {
2878 case Intrinsic::memset:
2879 case Intrinsic::memcpy:
2880 case Intrinsic::memmove:
2881 return true;
2882 default:
2883 return false;
2884 }
2885
2886 LibFunc LF;
2887 Function *Callee = CI->getCalledFunction();
2888 if (Callee && TLInfo && TLInfo->getLibFunc(*Callee, LF))
2889 switch (LF) {
2890 case LibFunc_strcpy:
2891 case LibFunc_strncpy:
2892 case LibFunc_strcat:
2893 case LibFunc_strncat:
2894 return true;
2895 default:
2896 return false;
2897 }
2898
2899 return false;
2900}
2901
2902/// Look for opportunities to duplicate return instructions to the predecessor
2903/// to enable tail call optimizations. The case it is currently looking for is
2904/// the following one. Known intrinsics or library function that may be tail
2905/// called are taken into account as well.
2906/// @code
2907/// bb0:
2908/// %tmp0 = tail call i32 @f0()
2909/// br label %return
2910/// bb1:
2911/// %tmp1 = tail call i32 @f1()
2912/// br label %return
2913/// bb2:
2914/// %tmp2 = tail call i32 @f2()
2915/// br label %return
2916/// return:
2917/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
2918/// ret i32 %retval
2919/// @endcode
2920///
2921/// =>
2922///
2923/// @code
2924/// bb0:
2925/// %tmp0 = tail call i32 @f0()
2926/// ret i32 %tmp0
2927/// bb1:
2928/// %tmp1 = tail call i32 @f1()
2929/// ret i32 %tmp1
2930/// bb2:
2931/// %tmp2 = tail call i32 @f2()
2932/// ret i32 %tmp2
2933/// @endcode
2934bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
2935 ModifyDT &ModifiedDT) {
2936 if (!BB->getTerminator())
2937 return false;
2938
2939 ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
2940 if (!RetI)
2941 return false;
2942
2943 assert(LI->getLoopFor(BB) == nullptr && "A return block cannot be in a loop");
2944
2945 PHINode *PN = nullptr;
2946 ExtractValueInst *EVI = nullptr;
2947 BitCastInst *BCI = nullptr;
2948 Value *V = RetI->getReturnValue();
2949 if (V) {
2950 BCI = dyn_cast<BitCastInst>(V);
2951 if (BCI)
2952 V = BCI->getOperand(0);
2953
2954 EVI = dyn_cast<ExtractValueInst>(V);
2955 if (EVI) {
2956 V = EVI->getOperand(0);
2957 if (!llvm::all_of(EVI->indices(), [](unsigned idx) { return idx == 0; }))
2958 return false;
2959 }
2960
2961 PN = dyn_cast<PHINode>(V);
2962 }
2963
2964 if (PN && PN->getParent() != BB)
2965 return false;
2966
2967 auto isLifetimeEndOrBitCastFor = [](const Instruction *Inst) {
2968 const BitCastInst *BC = dyn_cast<BitCastInst>(Inst);
2969 if (BC && BC->hasOneUse())
2970 Inst = BC->user_back();
2971
2972 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
2973 return II->getIntrinsicID() == Intrinsic::lifetime_end;
2974 return false;
2975 };
2976
2978
2979 auto isFakeUse = [&FakeUses](const Instruction *Inst) {
2980 if (auto *II = dyn_cast<IntrinsicInst>(Inst);
2981 II && II->getIntrinsicID() == Intrinsic::fake_use) {
2982 // Record the instruction so it can be preserved when the exit block is
2983 // removed. Do not preserve the fake use that uses the result of the
2984 // PHI instruction.
2985 // Do not copy fake uses that use the result of a PHI node.
2986 // FIXME: If we do want to copy the fake use into the return blocks, we
2987 // have to figure out which of the PHI node operands to use for each
2988 // copy.
2989 if (!isa<PHINode>(II->getOperand(0))) {
2990 FakeUses.push_back(II);
2991 }
2992 return true;
2993 }
2994
2995 return false;
2996 };
2997
2998 // Make sure there are no instructions between the first instruction
2999 // and return.
3001 // Skip over pseudo-probes and the bitcast.
3002 while (&*BI == BCI || &*BI == EVI || isa<PseudoProbeInst>(BI) ||
3003 isLifetimeEndOrBitCastFor(&*BI) || isFakeUse(&*BI))
3004 BI = std::next(BI);
3005 if (&*BI != RetI)
3006 return false;
3007
3008 /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
3009 /// call.
3010 const Function *F = BB->getParent();
3011 SmallVector<BasicBlock *, 4> TailCallBBs;
3012 // Record the call instructions so we can insert any fake uses
3013 // that need to be preserved before them.
3015 if (PN) {
3016 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
3017 // Look through bitcasts.
3018 Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();
3019 CallInst *CI = dyn_cast<CallInst>(IncomingVal);
3020 BasicBlock *PredBB = PN->getIncomingBlock(I);
3021 // Make sure the phi value is indeed produced by the tail call.
3022 if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
3023 TLI->mayBeEmittedAsTailCall(CI) &&
3024 attributesPermitTailCall(F, CI, RetI, *TLI)) {
3025 TailCallBBs.push_back(PredBB);
3026 CallInsts.push_back(CI);
3027 } else {
3028 // Consider the cases in which the phi value is indirectly produced by
3029 // the tail call, for example when encountering memset(), memmove(),
3030 // strcpy(), whose return value may have been optimized out. In such
3031 // cases, the value needs to be the first function argument.
3032 //
3033 // bb0:
3034 // tail call void @llvm.memset.p0.i64(ptr %0, i8 0, i64 %1)
3035 // br label %return
3036 // return:
3037 // %phi = phi ptr [ %0, %bb0 ], [ %2, %entry ]
3038 if (PredBB && PredBB->getSingleSuccessor() == BB)
3039 CI = dyn_cast_or_null<CallInst>(
3040 PredBB->getTerminator()->getPrevNode());
3041
3042 if (CI && CI->use_empty() &&
3043 isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
3044 IncomingVal == CI->getArgOperand(0) &&
3045 TLI->mayBeEmittedAsTailCall(CI) &&
3046 attributesPermitTailCall(F, CI, RetI, *TLI)) {
3047 TailCallBBs.push_back(PredBB);
3048 CallInsts.push_back(CI);
3049 }
3050 }
3051 }
3052 } else {
3054 for (BasicBlock *Pred : predecessors(BB)) {
3055 if (!VisitedBBs.insert(Pred).second)
3056 continue;
3057 if (Instruction *I = Pred->rbegin()->getPrevNode()) {
3058 CallInst *CI = dyn_cast<CallInst>(I);
3059 if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
3060 attributesPermitTailCall(F, CI, RetI, *TLI)) {
3061 // Either we return void or the return value must be the first
3062 // argument of a known intrinsic or library function.
3063 if (!V || isa<UndefValue>(V) ||
3064 (isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
3065 V == CI->getArgOperand(0))) {
3066 TailCallBBs.push_back(Pred);
3067 CallInsts.push_back(CI);
3068 }
3069 }
3070 }
3071 }
3072 }
3073
3074 bool Changed = false;
3075 for (auto const &TailCallBB : TailCallBBs) {
3076 // Make sure the call instruction is followed by an unconditional branch to
3077 // the return block.
3078 BranchInst *BI = dyn_cast<BranchInst>(TailCallBB->getTerminator());
3079 if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
3080 continue;
3081
3082 // Duplicate the return into TailCallBB.
3083 (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB);
3085 BFI->getBlockFreq(BB) >= BFI->getBlockFreq(TailCallBB));
3086 BFI->setBlockFreq(BB,
3087 (BFI->getBlockFreq(BB) - BFI->getBlockFreq(TailCallBB)));
3088 ModifiedDT = ModifyDT::ModifyBBDT;
3089 Changed = true;
3090 ++NumRetsDup;
3091 }
3092
3093 // If we eliminated all predecessors of the block, delete the block now.
3094 if (Changed && !BB->hasAddressTaken() && pred_empty(BB)) {
3095 // Copy the fake uses found in the original return block to all blocks
3096 // that contain tail calls.
3097 for (auto *CI : CallInsts) {
3098 for (auto const *FakeUse : FakeUses) {
3099 auto *ClonedInst = FakeUse->clone();
3100 ClonedInst->insertBefore(CI->getIterator());
3101 }
3102 }
3103 BB->eraseFromParent();
3104 }
3105
3106 return Changed;
3107}
3108
3109//===----------------------------------------------------------------------===//
3110// Memory Optimization
3111//===----------------------------------------------------------------------===//
3112
3113namespace {
3114
3115/// This is an extended version of TargetLowering::AddrMode
3116/// which holds actual Value*'s for register values.
3117struct ExtAddrMode : public TargetLowering::AddrMode {
3118 Value *BaseReg = nullptr;
3119 Value *ScaledReg = nullptr;
3120 Value *OriginalValue = nullptr;
3121 bool InBounds = true;
3122
3123 enum FieldName {
3124 NoField = 0x00,
3125 BaseRegField = 0x01,
3126 BaseGVField = 0x02,
3127 BaseOffsField = 0x04,
3128 ScaledRegField = 0x08,
3129 ScaleField = 0x10,
3130 MultipleFields = 0xff
3131 };
3132
3133 ExtAddrMode() = default;
3134
3135 void print(raw_ostream &OS) const;
3136 void dump() const;
3137
3138 // Replace From in ExtAddrMode with To.
3139 // E.g., SExt insts may be promoted and deleted. We should replace them with
3140 // the promoted values.
3141 void replaceWith(Value *From, Value *To) {
3142 if (ScaledReg == From)
3143 ScaledReg = To;
3144 }
3145
3146 FieldName compare(const ExtAddrMode &other) {
3147 // First check that the types are the same on each field, as differing types
3148 // is something we can't cope with later on.
3149 if (BaseReg && other.BaseReg &&
3150 BaseReg->getType() != other.BaseReg->getType())
3151 return MultipleFields;
3152 if (BaseGV && other.BaseGV && BaseGV->getType() != other.BaseGV->getType())
3153 return MultipleFields;
3154 if (ScaledReg && other.ScaledReg &&
3155 ScaledReg->getType() != other.ScaledReg->getType())
3156 return MultipleFields;
3157
3158 // Conservatively reject 'inbounds' mismatches.
3159 if (InBounds != other.InBounds)
3160 return MultipleFields;
3161
3162 // Check each field to see if it differs.
3163 unsigned Result = NoField;
3164 if (BaseReg != other.BaseReg)
3165 Result |= BaseRegField;
3166 if (BaseGV != other.BaseGV)
3167 Result |= BaseGVField;
3168 if (BaseOffs != other.BaseOffs)
3169 Result |= BaseOffsField;
3170 if (ScaledReg != other.ScaledReg)
3171 Result |= ScaledRegField;
3172 // Don't count 0 as being a different scale, because that actually means
3173 // unscaled (which will already be counted by having no ScaledReg).
3174 if (Scale && other.Scale && Scale != other.Scale)
3175 Result |= ScaleField;
3176
3177 if (llvm::popcount(Result) > 1)
3178 return MultipleFields;
3179 else
3180 return static_cast<FieldName>(Result);
3181 }
3182
3183 // An AddrMode is trivial if it involves no calculation i.e. it is just a base
3184 // with no offset.
3185 bool isTrivial() {
3186 // An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is
3187 // trivial if at most one of these terms is nonzero, except that BaseGV and
3188 // BaseReg both being zero actually means a null pointer value, which we
3189 // consider to be 'non-zero' here.
3190 return !BaseOffs && !Scale && !(BaseGV && BaseReg);
3191 }
3192
3193 Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) {
3194 switch (Field) {
3195 default:
3196 return nullptr;
3197 case BaseRegField:
3198 return BaseReg;
3199 case BaseGVField:
3200 return BaseGV;
3201 case ScaledRegField:
3202 return ScaledReg;
3203 case BaseOffsField:
3204 return ConstantInt::get(IntPtrTy, BaseOffs);
3205 }
3206 }
3207
3208 void SetCombinedField(FieldName Field, Value *V,
3209 const SmallVectorImpl<ExtAddrMode> &AddrModes) {
3210 switch (Field) {
3211 default:
3212 llvm_unreachable("Unhandled fields are expected to be rejected earlier");
3213 break;
3214 case ExtAddrMode::BaseRegField:
3215 BaseReg = V;
3216 break;
3217 case ExtAddrMode::BaseGVField:
3218 // A combined BaseGV is an Instruction, not a GlobalValue, so it goes
3219 // in the BaseReg field.
3220 assert(BaseReg == nullptr);
3221 BaseReg = V;
3222 BaseGV = nullptr;
3223 break;
3224 case ExtAddrMode::ScaledRegField:
3225 ScaledReg = V;
3226 // If we have a mix of scaled and unscaled addrmodes then we want scale
3227 // to be the scale and not zero.
3228 if (!Scale)
3229 for (const ExtAddrMode &AM : AddrModes)
3230 if (AM.Scale) {
3231 Scale = AM.Scale;
3232 break;
3233 }
3234 break;
3235 case ExtAddrMode::BaseOffsField:
3236 // The offset is no longer a constant, so it goes in ScaledReg with a
3237 // scale of 1.
3238 assert(ScaledReg == nullptr);
3239 ScaledReg = V;
3240 Scale = 1;
3241 BaseOffs = 0;
3242 break;
3243 }
3244 }
3245};
3246
3247#ifndef NDEBUG
3248static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
3249 AM.print(OS);
3250 return OS;
3251}
3252#endif
3253
3254#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3255void ExtAddrMode::print(raw_ostream &OS) const {
3256 bool NeedPlus = false;
3257 OS << "[";
3258 if (InBounds)
3259 OS << "inbounds ";
3260 if (BaseGV) {
3261 OS << "GV:";
3262 BaseGV->printAsOperand(OS, /*PrintType=*/false);
3263 NeedPlus = true;
3264 }
3265
3266 if (BaseOffs) {
3267 OS << (NeedPlus ? " + " : "") << BaseOffs;
3268 NeedPlus = true;
3269 }
3270
3271 if (BaseReg) {
3272 OS << (NeedPlus ? " + " : "") << "Base:";
3273 BaseReg->printAsOperand(OS, /*PrintType=*/false);
3274 NeedPlus = true;
3275 }
3276 if (Scale) {
3277 OS << (NeedPlus ? " + " : "") << Scale << "*";
3278 ScaledReg->printAsOperand(OS, /*PrintType=*/false);
3279 }
3280
3281 OS << ']';
3282}
3283
3284LLVM_DUMP_METHOD void ExtAddrMode::dump() const {
3285 print(dbgs());
3286 dbgs() << '\n';
3287}
3288#endif
3289
3290} // end anonymous namespace
3291
3292namespace {
3293
3294/// This class provides transaction based operation on the IR.
3295/// Every change made through this class is recorded in the internal state and
3296/// can be undone (rollback) until commit is called.
3297/// CGP does not check if instructions could be speculatively executed when
3298/// moved. Preserving the original location would pessimize the debugging
3299/// experience, as well as negatively impact the quality of sample PGO.
3300class TypePromotionTransaction {
3301 /// This represents the common interface of the individual transaction.
3302 /// Each class implements the logic for doing one specific modification on
3303 /// the IR via the TypePromotionTransaction.
3304 class TypePromotionAction {
3305 protected:
3306 /// The Instruction modified.
3307 Instruction *Inst;
3308
3309 public:
3310 /// Constructor of the action.
3311 /// The constructor performs the related action on the IR.
3312 TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
3313
3314 virtual ~TypePromotionAction() = default;
3315
3316 /// Undo the modification done by this action.
3317 /// When this method is called, the IR must be in the same state as it was
3318 /// before this action was applied.
3319 /// \pre Undoing the action works if and only if the IR is in the exact same
3320 /// state as it was directly after this action was applied.
3321 virtual void undo() = 0;
3322
3323 /// Advocate every change made by this action.
3324 /// When the results on the IR of the action are to be kept, it is important
3325 /// to call this function, otherwise hidden information may be kept forever.
3326 virtual void commit() {
3327 // Nothing to be done, this action is not doing anything.
3328 }
3329 };
3330
3331 /// Utility to remember the position of an instruction.
3332 class InsertionHandler {
3333 /// Position of an instruction.
3334 /// Either an instruction:
3335 /// - Is the first in a basic block: BB is used.
3336 /// - Has a previous instruction: PrevInst is used.
3337 struct {
3338 BasicBlock::iterator PrevInst;
3339 BasicBlock *BB;
3340 } Point;
3341 std::optional<DbgRecord::self_iterator> BeforeDbgRecord = std::nullopt;
3342
3343 /// Remember whether or not the instruction had a previous instruction.
3344 bool HasPrevInstruction;
3345
3346 public:
3347 /// Record the position of \p Inst.
3348 InsertionHandler(Instruction *Inst) {
3349 HasPrevInstruction = (Inst != &*(Inst->getParent()->begin()));
3350 BasicBlock *BB = Inst->getParent();
3351
3352 // Record where we would have to re-insert the instruction in the sequence
3353 // of DbgRecords, if we ended up reinserting.
3354 BeforeDbgRecord = Inst->getDbgReinsertionPosition();
3355
3356 if (HasPrevInstruction) {
3357 Point.PrevInst = std::prev(Inst->getIterator());
3358 } else {
3359 Point.BB = BB;
3360 }
3361 }
3362
3363 /// Insert \p Inst at the recorded position.
3364 void insert(Instruction *Inst) {
3365 if (HasPrevInstruction) {
3366 if (Inst->getParent())
3367 Inst->removeFromParent();
3368 Inst->insertAfter(Point.PrevInst);
3369 } else {
3370 BasicBlock::iterator Position = Point.BB->getFirstInsertionPt();
3371 if (Inst->getParent())
3372 Inst->moveBefore(*Point.BB, Position);
3373 else
3374 Inst->insertBefore(*Point.BB, Position);
3375 }
3376
3377 Inst->getParent()->reinsertInstInDbgRecords(Inst, BeforeDbgRecord);
3378 }
3379 };
3380
3381 /// Move an instruction before another.
3382 class InstructionMoveBefore : public TypePromotionAction {
3383 /// Original position of the instruction.
3384 InsertionHandler Position;
3385
3386 public:
3387 /// Move \p Inst before \p Before.
3388 InstructionMoveBefore(Instruction *Inst, BasicBlock::iterator Before)
3389 : TypePromotionAction(Inst), Position(Inst) {
3390 LLVM_DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before
3391 << "\n");
3392 Inst->moveBefore(Before);
3393 }
3394
3395 /// Move the instruction back to its original position.
3396 void undo() override {
3397 LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");
3398 Position.insert(Inst);
3399 }
3400 };
3401
3402 /// Set the operand of an instruction with a new value.
3403 class OperandSetter : public TypePromotionAction {
3404 /// Original operand of the instruction.
3405 Value *Origin;
3406
3407 /// Index of the modified instruction.
3408 unsigned Idx;
3409
3410 public:
3411 /// Set \p Idx operand of \p Inst with \p NewVal.
3412 OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
3413 : TypePromotionAction(Inst), Idx(Idx) {
3414 LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"
3415 << "for:" << *Inst << "\n"
3416 << "with:" << *NewVal << "\n");
3417 Origin = Inst->getOperand(Idx);
3418 Inst->setOperand(Idx, NewVal);
3419 }
3420
3421 /// Restore the original value of the instruction.
3422 void undo() override {
3423 LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"
3424 << "for: " << *Inst << "\n"
3425 << "with: " << *Origin << "\n");
3426 Inst->setOperand(Idx, Origin);
3427 }
3428 };
3429
3430 /// Hide the operands of an instruction.
3431 /// Do as if this instruction was not using any of its operands.
3432 class OperandsHider : public TypePromotionAction {
3433 /// The list of original operands.
3434 SmallVector<Value *, 4> OriginalValues;
3435
3436 public:
3437 /// Remove \p Inst from the uses of the operands of \p Inst.
3438 OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
3439 LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");
3440 unsigned NumOpnds = Inst->getNumOperands();
3441 OriginalValues.reserve(NumOpnds);
3442 for (unsigned It = 0; It < NumOpnds; ++It) {
3443 // Save the current operand.
3444 Value *Val = Inst->getOperand(It);
3445 OriginalValues.push_back(Val);
3446 // Set a dummy one.
3447 // We could use OperandSetter here, but that would imply an overhead
3448 // that we are not willing to pay.
3449 Inst->setOperand(It, PoisonValue::get(Val->getType()));
3450 }
3451 }
3452
3453 /// Restore the original list of uses.
3454 void undo() override {
3455 LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");
3456 for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
3457 Inst->setOperand(It, OriginalValues[It]);
3458 }
3459 };
3460
3461 /// Build a truncate instruction.
3462 class TruncBuilder : public TypePromotionAction {
3463 Value *Val;
3464
3465 public:
3466 /// Build a truncate instruction of \p Opnd producing a \p Ty
3467 /// result.
3468 /// trunc Opnd to Ty.
3469 TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
3470 IRBuilder<> Builder(Opnd);
3471 Builder.SetCurrentDebugLocation(DebugLoc());
3472 Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
3473 LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
3474 }
3475
3476 /// Get the built value.
3477 Value *getBuiltValue() { return Val; }
3478
3479 /// Remove the built instruction.
3480 void undo() override {
3481 LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");
3482 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3483 IVal->eraseFromParent();
3484 }
3485 };
3486
3487 /// Build a sign extension instruction.
3488 class SExtBuilder : public TypePromotionAction {
3489 Value *Val;
3490
3491 public:
3492 /// Build a sign extension instruction of \p Opnd producing a \p Ty
3493 /// result.
3494 /// sext Opnd to Ty.
3495 SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
3496 : TypePromotionAction(InsertPt) {
3497 IRBuilder<> Builder(InsertPt);
3498 Val = Builder.CreateSExt(Opnd, Ty, "promoted");
3499 LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");
3500 }
3501
3502 /// Get the built value.
3503 Value *getBuiltValue() { return Val; }
3504
3505 /// Remove the built instruction.
3506 void undo() override {
3507 LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");
3508 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3509 IVal->eraseFromParent();
3510 }
3511 };
3512
3513 /// Build a zero extension instruction.
3514 class ZExtBuilder : public TypePromotionAction {
3515 Value *Val;
3516
3517 public:
3518 /// Build a zero extension instruction of \p Opnd producing a \p Ty
3519 /// result.
3520 /// zext Opnd to Ty.
3521 ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
3522 : TypePromotionAction(InsertPt) {
3523 IRBuilder<> Builder(InsertPt);
3524 Builder.SetCurrentDebugLocation(DebugLoc());
3525 Val = Builder.CreateZExt(Opnd, Ty, "promoted");
3526 LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
3527 }
3528
3529 /// Get the built value.
3530 Value *getBuiltValue() { return Val; }
3531
3532 /// Remove the built instruction.
3533 void undo() override {
3534 LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");
3535 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3536 IVal->eraseFromParent();
3537 }
3538 };
3539
3540 /// Mutate an instruction to another type.
3541 class TypeMutator : public TypePromotionAction {
3542 /// Record the original type.
3543 Type *OrigTy;
3544
3545 public:
3546 /// Mutate the type of \p Inst into \p NewTy.
3547 TypeMutator(Instruction *Inst, Type *NewTy)
3548 : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
3549 LLVM_DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy
3550 << "\n");
3551 Inst->mutateType(NewTy);
3552 }
3553
3554 /// Mutate the instruction back to its original type.
3555 void undo() override {
3556 LLVM_DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy
3557 << "\n");
3558 Inst->mutateType(OrigTy);
3559 }
3560 };
3561
3562 /// Replace the uses of an instruction by another instruction.
3563 class UsesReplacer : public TypePromotionAction {
3564 /// Helper structure to keep track of the replaced uses.
3565 struct InstructionAndIdx {
3566 /// The instruction using the instruction.
3567 Instruction *Inst;
3568
3569 /// The index where this instruction is used for Inst.
3570 unsigned Idx;
3571
3572 InstructionAndIdx(Instruction *Inst, unsigned Idx)
3573 : Inst(Inst), Idx(Idx) {}
3574 };
3575
3576 /// Keep track of the original uses (pair Instruction, Index).
3578 /// Keep track of the debug users.
3579 SmallVector<DbgVariableRecord *, 1> DbgVariableRecords;
3580
3581 /// Keep track of the new value so that we can undo it by replacing
3582 /// instances of the new value with the original value.
3583 Value *New;
3584
3586
3587 public:
3588 /// Replace all the use of \p Inst by \p New.
3589 UsesReplacer(Instruction *Inst, Value *New)
3590 : TypePromotionAction(Inst), New(New) {
3591 LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
3592 << "\n");
3593 // Record the original uses.
3594 for (Use &U : Inst->uses()) {
3595 Instruction *UserI = cast<Instruction>(U.getUser());
3596 OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
3597 }
3598 // Record the debug uses separately. They are not in the instruction's
3599 // use list, but they are replaced by RAUW.
3600 findDbgValues(Inst, DbgVariableRecords);
3601
3602 // Now, we can replace the uses.
3603 Inst->replaceAllUsesWith(New);
3604 }
3605
3606 /// Reassign the original uses of Inst to Inst.
3607 void undo() override {
3608 LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
3609 for (InstructionAndIdx &Use : OriginalUses)
3610 Use.Inst->setOperand(Use.Idx, Inst);
3611 // RAUW has replaced all original uses with references to the new value,
3612 // including the debug uses. Since we are undoing the replacements,
3613 // the original debug uses must also be reinstated to maintain the
3614 // correctness and utility of debug value records.
3615 for (DbgVariableRecord *DVR : DbgVariableRecords)
3616 DVR->replaceVariableLocationOp(New, Inst);
3617 }
3618 };
3619
3620 /// Remove an instruction from the IR.
3621 class InstructionRemover : public TypePromotionAction {
3622 /// Original position of the instruction.
3623 InsertionHandler Inserter;
3624
3625 /// Helper structure to hide all the link to the instruction. In other
3626 /// words, this helps to do as if the instruction was removed.
3627 OperandsHider Hider;
3628
3629 /// Keep track of the uses replaced, if any.
3630 UsesReplacer *Replacer = nullptr;
3631
3632 /// Keep track of instructions removed.
3633 SetOfInstrs &RemovedInsts;
3634
3635 public:
3636 /// Remove all reference of \p Inst and optionally replace all its
3637 /// uses with New.
3638 /// \p RemovedInsts Keep track of the instructions removed by this Action.
3639 /// \pre If !Inst->use_empty(), then New != nullptr
3640 InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
3641 Value *New = nullptr)
3642 : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
3643 RemovedInsts(RemovedInsts) {
3644 if (New)
3645 Replacer = new UsesReplacer(Inst, New);
3646 LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
3647 RemovedInsts.insert(Inst);
3648 /// The instructions removed here will be freed after completing
3649 /// optimizeBlock() for all blocks as we need to keep track of the
3650 /// removed instructions during promotion.
3651 Inst->removeFromParent();
3652 }
3653
3654 ~InstructionRemover() override { delete Replacer; }
3655
3656 InstructionRemover &operator=(const InstructionRemover &other) = delete;
3657 InstructionRemover(const InstructionRemover &other) = delete;
3658
3659 /// Resurrect the instruction and reassign it to the proper uses if
3660 /// new value was provided when build this action.
3661 void undo() override {
3662 LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");
3663 Inserter.insert(Inst);
3664 if (Replacer)
3665 Replacer->undo();
3666 Hider.undo();
3667 RemovedInsts.erase(Inst);
3668 }
3669 };
3670
3671public:
3672 /// Restoration point.
3673 /// The restoration point is a pointer to an action instead of an iterator
3674 /// because the iterator may be invalidated but not the pointer.
3675 using ConstRestorationPt = const TypePromotionAction *;
3676
3677 TypePromotionTransaction(SetOfInstrs &RemovedInsts)
3678 : RemovedInsts(RemovedInsts) {}
3679
3680 /// Advocate every changes made in that transaction. Return true if any change
3681 /// happen.
3682 bool commit();
3683
3684 /// Undo all the changes made after the given point.
3685 void rollback(ConstRestorationPt Point);
3686
3687 /// Get the current restoration point.
3688 ConstRestorationPt getRestorationPoint() const;
3689
3690 /// \name API for IR modification with state keeping to support rollback.
3691 /// @{
3692 /// Same as Instruction::setOperand.
3693 void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
3694
3695 /// Same as Instruction::eraseFromParent.
3696 void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
3697
3698 /// Same as Value::replaceAllUsesWith.
3699 void replaceAllUsesWith(Instruction *Inst, Value *New);
3700
3701 /// Same as Value::mutateType.
3702 void mutateType(Instruction *Inst, Type *NewTy);
3703
3704 /// Same as IRBuilder::createTrunc.
3705 Value *createTrunc(Instruction *Opnd, Type *Ty);
3706
3707 /// Same as IRBuilder::createSExt.
3708 Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
3709
3710 /// Same as IRBuilder::createZExt.
3711 Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
3712
3713private:
3714 /// The ordered list of actions made so far.
3716
3717 using CommitPt =
3719
3720 SetOfInstrs &RemovedInsts;
3721};
3722
3723} // end anonymous namespace
3724
3725void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
3726 Value *NewVal) {
3727 Actions.push_back(std::make_unique<TypePromotionTransaction::OperandSetter>(
3728 Inst, Idx, NewVal));
3729}
3730
3731void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
3732 Value *NewVal) {
3733 Actions.push_back(
3734 std::make_unique<TypePromotionTransaction::InstructionRemover>(
3735 Inst, RemovedInsts, NewVal));
3736}
3737
3738void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
3739 Value *New) {
3740 Actions.push_back(
3741 std::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
3742}
3743
3744void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
3745 Actions.push_back(
3746 std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
3747}
3748
3749Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, Type *Ty) {
3750 std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
3751 Value *Val = Ptr->getBuiltValue();
3752 Actions.push_back(std::move(Ptr));
3753 return Val;
3754}
3755
3756Value *TypePromotionTransaction::createSExt(Instruction *Inst, Value *Opnd,
3757 Type *Ty) {
3758 std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
3759 Value *Val = Ptr->getBuiltValue();
3760 Actions.push_back(std::move(Ptr));
3761 return Val;
3762}
3763
3764Value *TypePromotionTransaction::createZExt(Instruction *Inst, Value *Opnd,
3765 Type *Ty) {
3766 std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
3767 Value *Val = Ptr->getBuiltValue();
3768 Actions.push_back(std::move(Ptr));
3769 return Val;
3770}
3771
3772TypePromotionTransaction::ConstRestorationPt
3773TypePromotionTransaction::getRestorationPoint() const {
3774 return !Actions.empty() ? Actions.back().get() : nullptr;
3775}
3776
3777bool TypePromotionTransaction::commit() {
3778 for (std::unique_ptr<TypePromotionAction> &Action : Actions)
3779 Action->commit();
3780 bool Modified = !Actions.empty();
3781 Actions.clear();
3782 return Modified;
3783}
3784
3785void TypePromotionTransaction::rollback(
3786 TypePromotionTransaction::ConstRestorationPt Point) {
3787 while (!Actions.empty() && Point != Actions.back().get()) {
3788 std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
3789 Curr->undo();
3790 }
3791}
3792
3793namespace {
3794
3795/// A helper class for matching addressing modes.
3796///
3797/// This encapsulates the logic for matching the target-legal addressing modes.
3798class AddressingModeMatcher {
3799 SmallVectorImpl<Instruction *> &AddrModeInsts;
3800 const TargetLowering &TLI;
3801 const TargetRegisterInfo &TRI;
3802 const DataLayout &DL;
3803 const LoopInfo &LI;
3804 const std::function<const DominatorTree &()> getDTFn;
3805
3806 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
3807 /// the memory instruction that we're computing this address for.
3808 Type *AccessTy;
3809 unsigned AddrSpace;
3810 Instruction *MemoryInst;
3811
3812 /// This is the addressing mode that we're building up. This is
3813 /// part of the return value of this addressing mode matching stuff.
3815
3816 /// The instructions inserted by other CodeGenPrepare optimizations.
3817 const SetOfInstrs &InsertedInsts;
3818
3819 /// A map from the instructions to their type before promotion.
3820 InstrToOrigTy &PromotedInsts;
3821
3822 /// The ongoing transaction where every action should be registered.
3823 TypePromotionTransaction &TPT;
3824
3825 // A GEP which has too large offset to be folded into the addressing mode.
3826 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP;
3827
3828 /// This is set to true when we should not do profitability checks.
3829 /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
3830 bool IgnoreProfitability;
3831
3832 /// True if we are optimizing for size.
3833 bool OptSize = false;
3834
3835 ProfileSummaryInfo *PSI;
3837
3838 AddressingModeMatcher(
3840 const TargetRegisterInfo &TRI, const LoopInfo &LI,
3841 const std::function<const DominatorTree &()> getDTFn, Type *AT,
3842 unsigned AS, Instruction *MI, ExtAddrMode &AM,
3843 const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
3844 TypePromotionTransaction &TPT,
3845 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3846 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
3847 : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
3848 DL(MI->getDataLayout()), LI(LI), getDTFn(getDTFn),
3849 AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM),
3850 InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT),
3851 LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI), BFI(BFI) {
3852 IgnoreProfitability = false;
3853 }
3854
3855public:
3856 /// Find the maximal addressing mode that a load/store of V can fold,
3857 /// give an access type of AccessTy. This returns a list of involved
3858 /// instructions in AddrModeInsts.
3859 /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
3860 /// optimizations.
3861 /// \p PromotedInsts maps the instructions to their type before promotion.
3862 /// \p The ongoing transaction where every action should be registered.
3863 static ExtAddrMode
3864 Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
3865 SmallVectorImpl<Instruction *> &AddrModeInsts,
3866 const TargetLowering &TLI, const LoopInfo &LI,
3867 const std::function<const DominatorTree &()> getDTFn,
3868 const TargetRegisterInfo &TRI, const SetOfInstrs &InsertedInsts,
3869 InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
3870 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3871 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
3873
3874 bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, LI, getDTFn,
3875 AccessTy, AS, MemoryInst, Result,
3876 InsertedInsts, PromotedInsts, TPT,
3877 LargeOffsetGEP, OptSize, PSI, BFI)
3878 .matchAddr(V, 0);
3879 (void)Success;
3880 assert(Success && "Couldn't select *anything*?");
3881 return Result;
3882 }
3883
3884private:
3885 bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
3886 bool matchAddr(Value *Addr, unsigned Depth);
3887 bool matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth,
3888 bool *MovedAway = nullptr);
3889 bool isProfitableToFoldIntoAddressingMode(Instruction *I,
3890 ExtAddrMode &AMBefore,
3891 ExtAddrMode &AMAfter);
3892 bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
3893 bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
3894 Value *PromotedOperand) const;
3895};
3896
3897class PhiNodeSet;
3898
3899/// An iterator for PhiNodeSet.
3900class PhiNodeSetIterator {
3901 PhiNodeSet *const Set;
3902 size_t CurrentIndex = 0;
3903
3904public:
3905 /// The constructor. Start should point to either a valid element, or be equal
3906 /// to the size of the underlying SmallVector of the PhiNodeSet.
3907 PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start);
3908 PHINode *operator*() const;
3909 PhiNodeSetIterator &operator++();
3910 bool operator==(const PhiNodeSetIterator &RHS) const;
3911 bool operator!=(const PhiNodeSetIterator &RHS) const;
3912};
3913
3914/// Keeps a set of PHINodes.
3915///
3916/// This is a minimal set implementation for a specific use case:
3917/// It is very fast when there are very few elements, but also provides good
3918/// performance when there are many. It is similar to SmallPtrSet, but also
3919/// provides iteration by insertion order, which is deterministic and stable
3920/// across runs. It is also similar to SmallSetVector, but provides removing
3921/// elements in O(1) time. This is achieved by not actually removing the element
3922/// from the underlying vector, so comes at the cost of using more memory, but
3923/// that is fine, since PhiNodeSets are used as short lived objects.
3924class PhiNodeSet {
3925 friend class PhiNodeSetIterator;
3926
3928 using iterator = PhiNodeSetIterator;
3929
3930 /// Keeps the elements in the order of their insertion in the underlying
3931 /// vector. To achieve constant time removal, it never deletes any element.
3933
3934 /// Keeps the elements in the underlying set implementation. This (and not the
3935 /// NodeList defined above) is the source of truth on whether an element
3936 /// is actually in the collection.
3937 MapType NodeMap;
3938
3939 /// Points to the first valid (not deleted) element when the set is not empty
3940 /// and the value is not zero. Equals to the size of the underlying vector
3941 /// when the set is empty. When the value is 0, as in the beginning, the
3942 /// first element may or may not be valid.
3943 size_t FirstValidElement = 0;
3944
3945public:
3946 /// Inserts a new element to the collection.
3947 /// \returns true if the element is actually added, i.e. was not in the
3948 /// collection before the operation.
3949 bool insert(PHINode *Ptr) {
3950 if (NodeMap.insert(std::make_pair(Ptr, NodeList.size())).second) {
3951 NodeList.push_back(Ptr);
3952 return true;
3953 }
3954 return false;
3955 }
3956
3957 /// Removes the element from the collection.
3958 /// \returns whether the element is actually removed, i.e. was in the
3959 /// collection before the operation.
3960 bool erase(PHINode *Ptr) {
3961 if (NodeMap.erase(Ptr)) {
3962 SkipRemovedElements(FirstValidElement);
3963 return true;
3964 }
3965 return false;
3966 }
3967
3968 /// Removes all elements and clears the collection.
3969 void clear() {
3970 NodeMap.clear();
3971 NodeList.clear();
3972 FirstValidElement = 0;
3973 }
3974
3975 /// \returns an iterator that will iterate the elements in the order of
3976 /// insertion.
3977 iterator begin() {
3978 if (FirstValidElement == 0)
3979 SkipRemovedElements(FirstValidElement);
3980 return PhiNodeSetIterator(this, FirstValidElement);
3981 }
3982
3983 /// \returns an iterator that points to the end of the collection.
3984 iterator end() { return PhiNodeSetIterator(this, NodeList.size()); }
3985
3986 /// Returns the number of elements in the collection.
3987 size_t size() const { return NodeMap.size(); }
3988
3989 /// \returns 1 if the given element is in the collection, and 0 if otherwise.
3990 size_t count(PHINode *Ptr) const { return NodeMap.count(Ptr); }
3991
3992private:
3993 /// Updates the CurrentIndex so that it will point to a valid element.
3994 ///
3995 /// If the element of NodeList at CurrentIndex is valid, it does not
3996 /// change it. If there are no more valid elements, it updates CurrentIndex
3997 /// to point to the end of the NodeList.
3998 void SkipRemovedElements(size_t &CurrentIndex) {
3999 while (CurrentIndex < NodeList.size()) {
4000 auto it = NodeMap.find(NodeList[CurrentIndex]);
4001 // If the element has been deleted and added again later, NodeMap will
4002 // point to a different index, so CurrentIndex will still be invalid.
4003 if (it != NodeMap.end() && it->second == CurrentIndex)
4004 break;
4005 ++CurrentIndex;
4006 }
4007 }
4008};
4009
4010PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start)
4011 : Set(Set), CurrentIndex(Start) {}
4012
4013PHINode *PhiNodeSetIterator::operator*() const {
4014 assert(CurrentIndex < Set->NodeList.size() &&
4015 "PhiNodeSet access out of range");
4016 return Set->NodeList[CurrentIndex];
4017}
4018
4019PhiNodeSetIterator &PhiNodeSetIterator::operator++() {
4020 assert(CurrentIndex < Set->NodeList.size() &&
4021 "PhiNodeSet access out of range");
4022 ++CurrentIndex;
4023 Set->SkipRemovedElements(CurrentIndex);
4024 return *this;
4025}
4026
4027bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const {
4028 return CurrentIndex == RHS.CurrentIndex;
4029}
4030
4031bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const {
4032 return !((*this) == RHS);
4033}
4034
4035/// Keep track of simplification of Phi nodes.
4036/// Accept the set of all phi nodes and erase phi node from this set
4037/// if it is simplified.
4038class SimplificationTracker {
4040 const SimplifyQuery &SQ;
4041 // Tracks newly created Phi nodes. The elements are iterated by insertion
4042 // order.
4043 PhiNodeSet AllPhiNodes;
4044 // Tracks newly created Select nodes.
4045 SmallPtrSet<SelectInst *, 32> AllSelectNodes;
4046
4047public:
4048 SimplificationTracker(const SimplifyQuery &sq) : SQ(sq) {}
4049
4050 Value *Get(Value *V) {
4051 do {
4052 auto SV = Storage.find(V);
4053 if (SV == Storage.end())
4054 return V;
4055 V = SV->second;
4056 } while (true);
4057 }
4058
4059 Value *Simplify(Value *Val) {
4060 SmallVector<Value *, 32> WorkList;
4062 WorkList.push_back(Val);
4063 while (!WorkList.empty()) {
4064 auto *P = WorkList.pop_back_val();
4065 if (!Visited.insert(P).second)
4066 continue;
4067 if (auto *PI = dyn_cast<Instruction>(P))
4068 if (Value *V = simplifyInstruction(cast<Instruction>(PI), SQ)) {
4069 for (auto *U : PI->users())
4070 WorkList.push_back(cast<Value>(U));
4071 Put(PI, V);
4072 PI->replaceAllUsesWith(V);
4073 if (auto *PHI = dyn_cast<PHINode>(PI))
4074 AllPhiNodes.erase(PHI);
4075 if (auto *Select = dyn_cast<SelectInst>(PI))
4076 AllSelectNodes.erase(Select);
4077 PI->eraseFromParent();
4078 }
4079 }
4080 return Get(Val);
4081 }
4082
4083 void Put(Value *From, Value *To) { Storage.insert({From, To}); }
4084
4085 void ReplacePhi(PHINode *From, PHINode *To) {
4086 Value *OldReplacement = Get(From);
4087 while (OldReplacement != From) {
4088 From = To;
4089 To = dyn_cast<PHINode>(OldReplacement);
4090 OldReplacement = Get(From);
4091 }
4092 assert(To && Get(To) == To && "Replacement PHI node is already replaced.");
4093 Put(From, To);
4094 From->replaceAllUsesWith(To);
4095 AllPhiNodes.erase(From);
4096 From->eraseFromParent();
4097 }
4098
4099 PhiNodeSet &newPhiNodes() { return AllPhiNodes; }
4100
4101 void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); }
4102
4103 void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); }
4104
4105 unsigned countNewPhiNodes() const { return AllPhiNodes.size(); }
4106
4107 unsigned countNewSelectNodes() const { return AllSelectNodes.size(); }
4108
4109 void destroyNewNodes(Type *CommonType) {
4110 // For safe erasing, replace the uses with dummy value first.
4111 auto *Dummy = PoisonValue::get(CommonType);
4112 for (auto *I : AllPhiNodes) {
4113 I->replaceAllUsesWith(Dummy);
4114 I->eraseFromParent();
4115 }
4116 AllPhiNodes.clear();
4117 for (auto *I : AllSelectNodes) {
4118 I->replaceAllUsesWith(Dummy);
4119 I->eraseFromParent();
4120 }
4121 AllSelectNodes.clear();
4122 }
4123};
4124
4125/// A helper class for combining addressing modes.
4126class AddressingModeCombiner {
4127 typedef DenseMap<Value *, Value *> FoldAddrToValueMapping;
4128 typedef std::pair<PHINode *, PHINode *> PHIPair;
4129
4130private:
4131 /// The addressing modes we've collected.
4133
4134 /// The field in which the AddrModes differ, when we have more than one.
4135 ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField;
4136
4137 /// Are the AddrModes that we have all just equal to their original values?
4138 bool AllAddrModesTrivial = true;
4139
4140 /// Common Type for all different fields in addressing modes.
4141 Type *CommonType = nullptr;
4142
4143 /// SimplifyQuery for simplifyInstruction utility.
4144 const SimplifyQuery &SQ;
4145
4146 /// Original Address.
4147 Value *Original;
4148
4149 /// Common value among addresses
4150 Value *CommonValue = nullptr;
4151
4152public:
4153 AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue)
4154 : SQ(_SQ), Original(OriginalValue) {}
4155
4156 ~AddressingModeCombiner() { eraseCommonValueIfDead(); }
4157
4158 /// Get the combined AddrMode
4159 const ExtAddrMode &getAddrMode() const { return AddrModes[0]; }
4160
4161 /// Add a new AddrMode if it's compatible with the AddrModes we already
4162 /// have.
4163 /// \return True iff we succeeded in doing so.
4164 bool addNewAddrMode(ExtAddrMode &NewAddrMode) {
4165 // Take note of if we have any non-trivial AddrModes, as we need to detect
4166 // when all AddrModes are trivial as then we would introduce a phi or select
4167 // which just duplicates what's already there.
4168 AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial();
4169
4170 // If this is the first addrmode then everything is fine.
4171 if (AddrModes.empty()) {
4172 AddrModes.emplace_back(NewAddrMode);
4173 return true;
4174 }
4175
4176 // Figure out how different this is from the other address modes, which we
4177 // can do just by comparing against the first one given that we only care
4178 // about the cumulative difference.
4179 ExtAddrMode::FieldName ThisDifferentField =
4180 AddrModes[0].compare(NewAddrMode);
4181 if (DifferentField == ExtAddrMode::NoField)
4182 DifferentField = ThisDifferentField;
4183 else if (DifferentField != ThisDifferentField)
4184 DifferentField = ExtAddrMode::MultipleFields;
4185
4186 // If NewAddrMode differs in more than one dimension we cannot handle it.
4187 bool CanHandle = DifferentField != ExtAddrMode::MultipleFields;
4188
4189 // If Scale Field is different then we reject.
4190 CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField;
4191
4192 // We also must reject the case when base offset is different and
4193 // scale reg is not null, we cannot handle this case due to merge of
4194 // different offsets will be used as ScaleReg.
4195 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField ||
4196 !NewAddrMode.ScaledReg);
4197
4198 // We also must reject the case when GV is different and BaseReg installed
4199 // due to we want to use base reg as a merge of GV values.
4200 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField ||
4201 !NewAddrMode.HasBaseReg);
4202
4203 // Even if NewAddMode is the same we still need to collect it due to
4204 // original value is different. And later we will need all original values
4205 // as anchors during finding the common Phi node.
4206 if (CanHandle)
4207 AddrModes.emplace_back(NewAddrMode);
4208 else
4209 AddrModes.clear();
4210
4211 return CanHandle;
4212 }
4213
4214 /// Combine the addressing modes we've collected into a single
4215 /// addressing mode.
4216 /// \return True iff we successfully combined them or we only had one so
4217 /// didn't need to combine them anyway.
4218 bool combineAddrModes() {
4219 // If we have no AddrModes then they can't be combined.
4220 if (AddrModes.size() == 0)
4221 return false;
4222
4223 // A single AddrMode can trivially be combined.
4224 if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField)
4225 return true;
4226
4227 // If the AddrModes we collected are all just equal to the value they are
4228 // derived from then combining them wouldn't do anything useful.
4229 if (AllAddrModesTrivial)
4230 return false;
4231
4232 if (!addrModeCombiningAllowed())
4233 return false;
4234
4235 // Build a map between <original value, basic block where we saw it> to
4236 // value of base register.
4237 // Bail out if there is no common type.
4238 FoldAddrToValueMapping Map;
4239 if (!initializeMap(Map))
4240 return false;
4241
4242 CommonValue = findCommon(Map);
4243 if (CommonValue)
4244 AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
4245 return CommonValue != nullptr;
4246 }
4247
4248private:
4249 /// `CommonValue` may be a placeholder inserted by us.
4250 /// If the placeholder is not used, we should remove this dead instruction.
4251 void eraseCommonValueIfDead() {
4252 if (CommonValue && CommonValue->use_empty())
4253 if (Instruction *CommonInst = dyn_cast<Instruction>(CommonValue))
4254 CommonInst->eraseFromParent();
4255 }
4256
4257 /// Initialize Map with anchor values. For address seen
4258 /// we set the value of different field saw in this address.
4259 /// At the same time we find a common type for different field we will
4260 /// use to create new Phi/Select nodes. Keep it in CommonType field.
4261 /// Return false if there is no common type found.
4262 bool initializeMap(FoldAddrToValueMapping &Map) {
4263 // Keep track of keys where the value is null. We will need to replace it
4264 // with constant null when we know the common type.
4265 SmallVector<Value *, 2> NullValue;
4266 Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
4267 for (auto &AM : AddrModes) {
4268 Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
4269 if (DV) {
4270 auto *Type = DV->getType();
4271 if (CommonType && CommonType != Type)
4272 return false;
4273 CommonType = Type;
4274 Map[AM.OriginalValue] = DV;
4275 } else {
4276 NullValue.push_back(AM.OriginalValue);
4277 }
4278 }
4279 assert(CommonType && "At least one non-null value must be!");
4280 for (auto *V : NullValue)
4281 Map[V] = Constant::getNullValue(CommonType);
4282 return true;
4283 }
4284
4285 /// We have mapping between value A and other value B where B was a field in
4286 /// addressing mode represented by A. Also we have an original value C
4287 /// representing an address we start with. Traversing from C through phi and
4288 /// selects we ended up with A's in a map. This utility function tries to find
4289 /// a value V which is a field in addressing mode C and traversing through phi
4290 /// nodes and selects we will end up in corresponded values B in a map.
4291 /// The utility will create a new Phi/Selects if needed.
4292 // The simple example looks as follows:
4293 // BB1:
4294 // p1 = b1 + 40
4295 // br cond BB2, BB3
4296 // BB2:
4297 // p2 = b2 + 40
4298 // br BB3
4299 // BB3:
4300 // p = phi [p1, BB1], [p2, BB2]
4301 // v = load p
4302 // Map is
4303 // p1 -> b1
4304 // p2 -> b2
4305 // Request is
4306 // p -> ?
4307 // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3.
4308 Value *findCommon(FoldAddrToValueMapping &Map) {
4309 // Tracks the simplification of newly created phi nodes. The reason we use
4310 // this mapping is because we will add new created Phi nodes in AddrToBase.
4311 // Simplification of Phi nodes is recursive, so some Phi node may
4312 // be simplified after we added it to AddrToBase. In reality this
4313 // simplification is possible only if original phi/selects were not
4314 // simplified yet.
4315 // Using this mapping we can find the current value in AddrToBase.
4316 SimplificationTracker ST(SQ);
4317
4318 // First step, DFS to create PHI nodes for all intermediate blocks.
4319 // Also fill traverse order for the second step.
4320 SmallVector<Value *, 32> TraverseOrder;
4321 InsertPlaceholders(Map, TraverseOrder, ST);
4322
4323 // Second Step, fill new nodes by merged values and simplify if possible.
4324 FillPlaceholders(Map, TraverseOrder, ST);
4325
4326 if (!AddrSinkNewSelects && ST.countNewSelectNodes() > 0) {
4327 ST.destroyNewNodes(CommonType);
4328 return nullptr;
4329 }
4330
4331 // Now we'd like to match New Phi nodes to existed ones.
4332 unsigned PhiNotMatchedCount = 0;
4333 if (!MatchPhiSet(ST, AddrSinkNewPhis, PhiNotMatchedCount)) {
4334 ST.destroyNewNodes(CommonType);
4335 return nullptr;
4336 }
4337
4338 auto *Result = ST.Get(Map.find(Original)->second);
4339 if (Result) {
4340 NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount;
4341 NumMemoryInstsSelectCreated += ST.countNewSelectNodes();
4342 }
4343 return Result;
4344 }
4345
4346 /// Try to match PHI node to Candidate.
4347 /// Matcher tracks the matched Phi nodes.
4348 bool MatchPhiNode(PHINode *PHI, PHINode *Candidate,
4350 PhiNodeSet &PhiNodesToMatch) {
4351 SmallVector<PHIPair, 8> WorkList;
4352 Matcher.insert({PHI, Candidate});
4353 SmallPtrSet<PHINode *, 8> MatchedPHIs;
4354 MatchedPHIs.insert(PHI);
4355 WorkList.push_back({PHI, Candidate});
4356 SmallSet<PHIPair, 8> Visited;
4357 while (!WorkList.empty()) {
4358 auto Item = WorkList.pop_back_val();
4359 if (!Visited.insert(Item).second)
4360 continue;
4361 // We iterate over all incoming values to Phi to compare them.
4362 // If values are different and both of them Phi and the first one is a
4363 // Phi we added (subject to match) and both of them is in the same basic
4364 // block then we can match our pair if values match. So we state that
4365 // these values match and add it to work list to verify that.
4366 for (auto *B : Item.first->blocks()) {
4367 Value *FirstValue = Item.first->getIncomingValueForBlock(B);
4368 Value *SecondValue = Item.second->getIncomingValueForBlock(B);
4369 if (FirstValue == SecondValue)
4370 continue;
4371
4372 PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue);
4373 PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue);
4374
4375 // One of them is not Phi or
4376 // The first one is not Phi node from the set we'd like to match or
4377 // Phi nodes from different basic blocks then
4378 // we will not be able to match.
4379 if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) ||
4380 FirstPhi->getParent() != SecondPhi->getParent())
4381 return false;
4382
4383 // If we already matched them then continue.
4384 if (Matcher.count({FirstPhi, SecondPhi}))
4385 continue;
4386 // So the values are different and does not match. So we need them to
4387 // match. (But we register no more than one match per PHI node, so that
4388 // we won't later try to replace them twice.)
4389 if (MatchedPHIs.insert(FirstPhi).second)
4390 Matcher.insert({FirstPhi, SecondPhi});
4391 // But me must check it.
4392 WorkList.push_back({FirstPhi, SecondPhi});
4393 }
4394 }
4395 return true;
4396 }
4397
4398 /// For the given set of PHI nodes (in the SimplificationTracker) try
4399 /// to find their equivalents.
4400 /// Returns false if this matching fails and creation of new Phi is disabled.
4401 bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes,
4402 unsigned &PhiNotMatchedCount) {
4403 // Matched and PhiNodesToMatch iterate their elements in a deterministic
4404 // order, so the replacements (ReplacePhi) are also done in a deterministic
4405 // order.
4407 SmallPtrSet<PHINode *, 8> WillNotMatch;
4408 PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes();
4409 while (PhiNodesToMatch.size()) {
4410 PHINode *PHI = *PhiNodesToMatch.begin();
4411
4412 // Add us, if no Phi nodes in the basic block we do not match.
4413 WillNotMatch.clear();
4414 WillNotMatch.insert(PHI);
4415
4416 // Traverse all Phis until we found equivalent or fail to do that.
4417 bool IsMatched = false;
4418 for (auto &P : PHI->getParent()->phis()) {
4419 // Skip new Phi nodes.
4420 if (PhiNodesToMatch.count(&P))
4421 continue;
4422 if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))
4423 break;
4424 // If it does not match, collect all Phi nodes from matcher.
4425 // if we end up with no match, them all these Phi nodes will not match
4426 // later.
4427 WillNotMatch.insert_range(llvm::make_first_range(Matched));
4428 Matched.clear();
4429 }
4430 if (IsMatched) {
4431 // Replace all matched values and erase them.
4432 for (auto MV : Matched)
4433 ST.ReplacePhi(MV.first, MV.second);
4434 Matched.clear();
4435 continue;
4436 }
4437 // If we are not allowed to create new nodes then bail out.
4438 if (!AllowNewPhiNodes)
4439 return false;
4440 // Just remove all seen values in matcher. They will not match anything.
4441 PhiNotMatchedCount += WillNotMatch.size();
4442 for (auto *P : WillNotMatch)
4443 PhiNodesToMatch.erase(P);
4444 }
4445 return true;
4446 }
4447 /// Fill the placeholders with values from predecessors and simplify them.
4448 void FillPlaceholders(FoldAddrToValueMapping &Map,
4449 SmallVectorImpl<Value *> &TraverseOrder,
4450 SimplificationTracker &ST) {
4451 while (!TraverseOrder.empty()) {
4452 Value *Current = TraverseOrder.pop_back_val();
4453 assert(Map.contains(Current) && "No node to fill!!!");
4454 Value *V = Map[Current];
4455
4456 if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
4457 // CurrentValue also must be Select.
4458 auto *CurrentSelect = cast<SelectInst>(Current);
4459 auto *TrueValue = CurrentSelect->getTrueValue();
4460 assert(Map.contains(TrueValue) && "No True Value!");
4461 Select->setTrueValue(ST.Get(Map[TrueValue]));
4462 auto *FalseValue = CurrentSelect->getFalseValue();
4463 assert(Map.contains(FalseValue) && "No False Value!");
4464 Select->setFalseValue(ST.Get(Map[FalseValue]));
4465 } else {
4466 // Must be a Phi node then.
4467 auto *PHI = cast<PHINode>(V);
4468 // Fill the Phi node with values from predecessors.
4469 for (auto *B : predecessors(PHI->getParent())) {
4470 Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B);
4471 assert(Map.contains(PV) && "No predecessor Value!");
4472 PHI->addIncoming(ST.Get(Map[PV]), B);
4473 }
4474 }
4475 Map[Current] = ST.Simplify(V);
4476 }
4477 }
4478
4479 /// Starting from original value recursively iterates over def-use chain up to
4480 /// known ending values represented in a map. For each traversed phi/select
4481 /// inserts a placeholder Phi or Select.
4482 /// Reports all new created Phi/Select nodes by adding them to set.
4483 /// Also reports and order in what values have been traversed.
4484 void InsertPlaceholders(FoldAddrToValueMapping &Map,
4485 SmallVectorImpl<Value *> &TraverseOrder,
4486 SimplificationTracker &ST) {
4487 SmallVector<Value *, 32> Worklist;
4488 assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&
4489 "Address must be a Phi or Select node");
4490 auto *Dummy = PoisonValue::get(CommonType);
4491 Worklist.push_back(Original);
4492 while (!Worklist.empty()) {
4493 Value *Current = Worklist.pop_back_val();
4494 // if it is already visited or it is an ending value then skip it.
4495 if (Map.contains(Current))
4496 continue;
4497 TraverseOrder.push_back(Current);
4498
4499 // CurrentValue must be a Phi node or select. All others must be covered
4500 // by anchors.
4501 if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) {
4502 // Is it OK to get metadata from OrigSelect?!
4503 // Create a Select placeholder with dummy value.
4505 SelectInst::Create(CurrentSelect->getCondition(), Dummy, Dummy,
4506 CurrentSelect->getName(),
4507 CurrentSelect->getIterator(), CurrentSelect);
4508 Map[Current] = Select;
4509 ST.insertNewSelect(Select);
4510 // We are interested in True and False values.
4511 Worklist.push_back(CurrentSelect->getTrueValue());
4512 Worklist.push_back(CurrentSelect->getFalseValue());
4513 } else {
4514 // It must be a Phi node then.
4515 PHINode *CurrentPhi = cast<PHINode>(Current);
4516 unsigned PredCount = CurrentPhi->getNumIncomingValues();
4517 PHINode *PHI =
4518 PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi->getIterator());
4519 Map[Current] = PHI;
4520 ST.insertNewPhi(PHI);
4521 append_range(Worklist, CurrentPhi->incoming_values());
4522 }
4523 }
4524 }
4525
4526 bool addrModeCombiningAllowed() {
4528 return false;
4529 switch (DifferentField) {
4530 default:
4531 return false;
4532 case ExtAddrMode::BaseRegField:
4534 case ExtAddrMode::BaseGVField:
4535 return AddrSinkCombineBaseGV;
4536 case ExtAddrMode::BaseOffsField:
4538 case ExtAddrMode::ScaledRegField:
4540 }
4541 }
4542};
4543} // end anonymous namespace
4544
4545/// Try adding ScaleReg*Scale to the current addressing mode.
4546/// Return true and update AddrMode if this addr mode is legal for the target,
4547/// false if not.
4548bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
4549 unsigned Depth) {
4550 // If Scale is 1, then this is the same as adding ScaleReg to the addressing
4551 // mode. Just process that directly.
4552 if (Scale == 1)
4553 return matchAddr(ScaleReg, Depth);
4554
4555 // If the scale is 0, it takes nothing to add this.
4556 if (Scale == 0)
4557 return true;
4558
4559 // If we already have a scale of this value, we can add to it, otherwise, we
4560 // need an available scale field.
4561 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
4562 return false;
4563
4564 ExtAddrMode TestAddrMode = AddrMode;
4565
4566 // Add scale to turn X*4+X*3 -> X*7. This could also do things like
4567 // [A+B + A*7] -> [B+A*8].
4568 TestAddrMode.Scale += Scale;
4569 TestAddrMode.ScaledReg = ScaleReg;
4570
4571 // If the new address isn't legal, bail out.
4572 if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
4573 return false;
4574
4575 // It was legal, so commit it.
4576 AddrMode = TestAddrMode;
4577
4578 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
4579 // to see if ScaleReg is actually X+C. If so, we can turn this into adding
4580 // X*Scale + C*Scale to addr mode. If we found available IV increment, do not
4581 // go any further: we can reuse it and cannot eliminate it.
4582 ConstantInt *CI = nullptr;
4583 Value *AddLHS = nullptr;
4584 if (isa<Instruction>(ScaleReg) && // not a constant expr.
4585 match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI))) &&
4586 !isIVIncrement(ScaleReg, &LI) && CI->getValue().isSignedIntN(64)) {
4587 TestAddrMode.InBounds = false;
4588 TestAddrMode.ScaledReg = AddLHS;
4589 TestAddrMode.BaseOffs += CI->getSExtValue() * TestAddrMode.Scale;
4590
4591 // If this addressing mode is legal, commit it and remember that we folded
4592 // this instruction.
4593 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
4594 AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
4595 AddrMode = TestAddrMode;
4596 return true;
4597 }
4598 // Restore status quo.
4599 TestAddrMode = AddrMode;
4600 }
4601
4602 // If this is an add recurrence with a constant step, return the increment
4603 // instruction and the canonicalized step.
4604 auto GetConstantStep =
4605 [this](const Value *V) -> std::optional<std::pair<Instruction *, APInt>> {
4606 auto *PN = dyn_cast<PHINode>(V);
4607 if (!PN)
4608 return std::nullopt;
4609 auto IVInc = getIVIncrement(PN, &LI);
4610 if (!IVInc)
4611 return std::nullopt;
4612 // TODO: The result of the intrinsics above is two-complement. However when
4613 // IV inc is expressed as add or sub, iv.next is potentially a poison value.
4614 // If it has nuw or nsw flags, we need to make sure that these flags are
4615 // inferrable at the point of memory instruction. Otherwise we are replacing
4616 // well-defined two-complement computation with poison. Currently, to avoid
4617 // potentially complex analysis needed to prove this, we reject such cases.
4618 if (auto *OIVInc = dyn_cast<OverflowingBinaryOperator>(IVInc->first))
4619 if (OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap())
4620 return std::nullopt;
4621 if (auto *ConstantStep = dyn_cast<ConstantInt>(IVInc->second))
4622 return std::make_pair(IVInc->first, ConstantStep->getValue());
4623 return std::nullopt;
4624 };
4625
4626 // Try to account for the following special case:
4627 // 1. ScaleReg is an inductive variable;
4628 // 2. We use it with non-zero offset;
4629 // 3. IV's increment is available at the point of memory instruction.
4630 //
4631 // In this case, we may reuse the IV increment instead of the IV Phi to
4632 // achieve the following advantages:
4633 // 1. If IV step matches the offset, we will have no need in the offset;
4634 // 2. Even if they don't match, we will reduce the overlap of living IV
4635 // and IV increment, that will potentially lead to better register
4636 // assignment.
4637 if (AddrMode.BaseOffs) {
4638 if (auto IVStep = GetConstantStep(ScaleReg)) {
4639 Instruction *IVInc = IVStep->first;
4640 // The following assert is important to ensure a lack of infinite loops.
4641 // This transforms is (intentionally) the inverse of the one just above.
4642 // If they don't agree on the definition of an increment, we'd alternate
4643 // back and forth indefinitely.
4644 assert(isIVIncrement(IVInc, &LI) && "implied by GetConstantStep");
4645 APInt Step = IVStep->second;
4646 APInt Offset = Step * AddrMode.Scale;
4647 if (Offset.isSignedIntN(64)) {
4648 TestAddrMode.InBounds = false;
4649 TestAddrMode.ScaledReg = IVInc;
4650 TestAddrMode.BaseOffs -= Offset.getLimitedValue();
4651 // If this addressing mode is legal, commit it..
4652 // (Note that we defer the (expensive) domtree base legality check
4653 // to the very last possible point.)
4654 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace) &&
4655 getDTFn().dominates(IVInc, MemoryInst)) {
4656 AddrModeInsts.push_back(cast<Instruction>(IVInc));
4657 AddrMode = TestAddrMode;
4658 return true;
4659 }
4660 // Restore status quo.
4661 TestAddrMode = AddrMode;
4662 }
4663 }
4664 }
4665
4666 // Otherwise, just return what we have.
4667 return true;
4668}
4669
4670/// This is a little filter, which returns true if an addressing computation
4671/// involving I might be folded into a load/store accessing it.
4672/// This doesn't need to be perfect, but needs to accept at least
4673/// the set of instructions that MatchOperationAddr can.
4675 switch (I->getOpcode()) {
4676 case Instruction::BitCast:
4677 case Instruction::AddrSpaceCast:
4678 // Don't touch identity bitcasts.
4679 if (I->getType() == I->getOperand(0)->getType())
4680 return false;
4681 return I->getType()->isIntOrPtrTy();
4682 case Instruction::PtrToInt:
4683 // PtrToInt is always a noop, as we know that the int type is pointer sized.
4684 return true;
4685 case Instruction::IntToPtr:
4686 // We know the input is intptr_t, so this is foldable.
4687 return true;
4688 case Instruction::Add:
4689 return true;
4690 case Instruction::Mul:
4691 case Instruction::Shl:
4692 // Can only handle X*C and X << C.
4693 return isa<ConstantInt>(I->getOperand(1));
4694 case Instruction::GetElementPtr:
4695 return true;
4696 default:
4697 return false;
4698 }
4699}
4700
4701/// Check whether or not \p Val is a legal instruction for \p TLI.
4702/// \note \p Val is assumed to be the product of some type promotion.
4703/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
4704/// to be legal, as the non-promoted value would have had the same state.
4706 const DataLayout &DL, Value *Val) {
4707 Instruction *PromotedInst = dyn_cast<Instruction>(Val);
4708 if (!PromotedInst)
4709 return false;
4710 int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
4711 // If the ISDOpcode is undefined, it was undefined before the promotion.
4712 if (!ISDOpcode)
4713 return true;
4714 // Otherwise, check if the promoted instruction is legal or not.
4715 return TLI.isOperationLegalOrCustom(
4716 ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
4717}
4718
4719namespace {
4720
4721/// Hepler class to perform type promotion.
4722class TypePromotionHelper {
4723 /// Utility function to add a promoted instruction \p ExtOpnd to
4724 /// \p PromotedInsts and record the type of extension we have seen.
4725 static void addPromotedInst(InstrToOrigTy &PromotedInsts,
4726 Instruction *ExtOpnd, bool IsSExt) {
4727 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4728 auto [It, Inserted] = PromotedInsts.try_emplace(ExtOpnd);
4729 if (!Inserted) {
4730 // If the new extension is same as original, the information in
4731 // PromotedInsts[ExtOpnd] is still correct.
4732 if (It->second.getInt() == ExtTy)
4733 return;
4734
4735 // Now the new extension is different from old extension, we make
4736 // the type information invalid by setting extension type to
4737 // BothExtension.
4738 ExtTy = BothExtension;
4739 }
4740 It->second = TypeIsSExt(ExtOpnd->getType(), ExtTy);
4741 }
4742
4743 /// Utility function to query the original type of instruction \p Opnd
4744 /// with a matched extension type. If the extension doesn't match, we
4745 /// cannot use the information we had on the original type.
4746 /// BothExtension doesn't match any extension type.
4747 static const Type *getOrigType(const InstrToOrigTy &PromotedInsts,
4748 Instruction *Opnd, bool IsSExt) {
4749 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4750 InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
4751 if (It != PromotedInsts.end() && It->second.getInt() == ExtTy)
4752 return It->second.getPointer();
4753 return nullptr;
4754 }
4755
4756 /// Utility function to check whether or not a sign or zero extension
4757 /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
4758 /// either using the operands of \p Inst or promoting \p Inst.
4759 /// The type of the extension is defined by \p IsSExt.
4760 /// In other words, check if:
4761 /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
4762 /// #1 Promotion applies:
4763 /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
4764 /// #2 Operand reuses:
4765 /// ext opnd1 to ConsideredExtType.
4766 /// \p PromotedInsts maps the instructions to their type before promotion.
4767 static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
4768 const InstrToOrigTy &PromotedInsts, bool IsSExt);
4769
4770 /// Utility function to determine if \p OpIdx should be promoted when
4771 /// promoting \p Inst.
4772 static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
4773 return !(isa<SelectInst>(Inst) && OpIdx == 0);
4774 }
4775
4776 /// Utility function to promote the operand of \p Ext when this
4777 /// operand is a promotable trunc or sext or zext.
4778 /// \p PromotedInsts maps the instructions to their type before promotion.
4779 /// \p CreatedInstsCost[out] contains the cost of all instructions
4780 /// created to promote the operand of Ext.
4781 /// Newly added extensions are inserted in \p Exts.
4782 /// Newly added truncates are inserted in \p Truncs.
4783 /// Should never be called directly.
4784 /// \return The promoted value which is used instead of Ext.
4785 static Value *promoteOperandForTruncAndAnyExt(
4786 Instruction *Ext, TypePromotionTransaction &TPT,
4787 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4790
4791 /// Utility function to promote the operand of \p Ext when this
4792 /// operand is promotable and is not a supported trunc or sext.
4793 /// \p PromotedInsts maps the instructions to their type before promotion.
4794 /// \p CreatedInstsCost[out] contains the cost of all the instructions
4795 /// created to promote the operand of Ext.
4796 /// Newly added extensions are inserted in \p Exts.
4797 /// Newly added truncates are inserted in \p Truncs.
4798 /// Should never be called directly.
4799 /// \return The promoted value which is used instead of Ext.
4800 static Value *promoteOperandForOther(Instruction *Ext,
4801 TypePromotionTransaction &TPT,
4802 InstrToOrigTy &PromotedInsts,
4803 unsigned &CreatedInstsCost,
4806 const TargetLowering &TLI, bool IsSExt);
4807
4808 /// \see promoteOperandForOther.
4809 static Value *signExtendOperandForOther(
4810 Instruction *Ext, TypePromotionTransaction &TPT,
4811 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4813 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4814 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4815 Exts, Truncs, TLI, true);
4816 }
4817
4818 /// \see promoteOperandForOther.
4819 static Value *zeroExtendOperandForOther(
4820 Instruction *Ext, TypePromotionTransaction &TPT,
4821 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4823 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4824 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4825 Exts, Truncs, TLI, false);
4826 }
4827
4828public:
4829 /// Type for the utility function that promotes the operand of Ext.
4830 using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT,
4831 InstrToOrigTy &PromotedInsts,
4832 unsigned &CreatedInstsCost,
4835 const TargetLowering &TLI);
4836
4837 /// Given a sign/zero extend instruction \p Ext, return the appropriate
4838 /// action to promote the operand of \p Ext instead of using Ext.
4839 /// \return NULL if no promotable action is possible with the current
4840 /// sign extension.
4841 /// \p InsertedInsts keeps track of all the instructions inserted by the
4842 /// other CodeGenPrepare optimizations. This information is important
4843 /// because we do not want to promote these instructions as CodeGenPrepare
4844 /// will reinsert them later. Thus creating an infinite loop: create/remove.
4845 /// \p PromotedInsts maps the instructions to their type before promotion.
4846 static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,
4847 const TargetLowering &TLI,
4848 const InstrToOrigTy &PromotedInsts);
4849};
4850
4851} // end anonymous namespace
4852
4853bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
4854 Type *ConsideredExtType,
4855 const InstrToOrigTy &PromotedInsts,
4856 bool IsSExt) {
4857 // The promotion helper does not know how to deal with vector types yet.
4858 // To be able to fix that, we would need to fix the places where we
4859 // statically extend, e.g., constants and such.
4860 if (Inst->getType()->isVectorTy())
4861 return false;
4862
4863 // We can always get through zext.
4864 if (isa<ZExtInst>(Inst))
4865 return true;
4866
4867 // sext(sext) is ok too.
4868 if (IsSExt && isa<SExtInst>(Inst))
4869 return true;
4870
4871 // We can get through binary operator, if it is legal. In other words, the
4872 // binary operator must have a nuw or nsw flag.
4873 if (const auto *BinOp = dyn_cast<BinaryOperator>(Inst))
4874 if (isa<OverflowingBinaryOperator>(BinOp) &&
4875 ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
4876 (IsSExt && BinOp->hasNoSignedWrap())))
4877 return true;
4878
4879 // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst))
4880 if ((Inst->getOpcode() == Instruction::And ||
4881 Inst->getOpcode() == Instruction::Or))
4882 return true;
4883
4884 // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst))
4885 if (Inst->getOpcode() == Instruction::Xor) {
4886 // Make sure it is not a NOT.
4887 if (const auto *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1)))
4888 if (!Cst->getValue().isAllOnes())
4889 return true;
4890 }
4891
4892 // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst))
4893 // It may change a poisoned value into a regular value, like
4894 // zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12
4895 // poisoned value regular value
4896 // It should be OK since undef covers valid value.
4897 if (Inst->getOpcode() == Instruction::LShr && !IsSExt)
4898 return true;
4899
4900 // and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst)
4901 // It may change a poisoned value into a regular value, like
4902 // zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12
4903 // poisoned value regular value
4904 // It should be OK since undef covers valid value.
4905 if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) {
4906 const auto *ExtInst = cast<const Instruction>(*Inst->user_begin());
4907 if (ExtInst->hasOneUse()) {
4908 const auto *AndInst = dyn_cast<const Instruction>(*ExtInst->user_begin());
4909 if (AndInst && AndInst->getOpcode() == Instruction::And) {
4910 const auto *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
4911 if (Cst &&
4912 Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth()))
4913 return true;
4914 }
4915 }
4916 }
4917
4918 // Check if we can do the following simplification.
4919 // ext(trunc(opnd)) --> ext(opnd)
4920 if (!isa<TruncInst>(Inst))
4921 return false;
4922
4923 Value *OpndVal = Inst->getOperand(0);
4924 // Check if we can use this operand in the extension.
4925 // If the type is larger than the result type of the extension, we cannot.
4926 if (!OpndVal->getType()->isIntegerTy() ||
4927 OpndVal->getType()->getIntegerBitWidth() >
4928 ConsideredExtType->getIntegerBitWidth())
4929 return false;
4930
4931 // If the operand of the truncate is not an instruction, we will not have
4932 // any information on the dropped bits.
4933 // (Actually we could for constant but it is not worth the extra logic).
4934 Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
4935 if (!Opnd)
4936 return false;
4937
4938 // Check if the source of the type is narrow enough.
4939 // I.e., check that trunc just drops extended bits of the same kind of
4940 // the extension.
4941 // #1 get the type of the operand and check the kind of the extended bits.
4942 const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt);
4943 if (OpndType)
4944 ;
4945 else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
4946 OpndType = Opnd->getOperand(0)->getType();
4947 else
4948 return false;
4949
4950 // #2 check that the truncate just drops extended bits.
4951 return Inst->getType()->getIntegerBitWidth() >=
4952 OpndType->getIntegerBitWidth();
4953}
4954
4955TypePromotionHelper::Action TypePromotionHelper::getAction(
4956 Instruction *Ext, const SetOfInstrs &InsertedInsts,
4957 const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
4958 assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
4959 "Unexpected instruction type");
4960 Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
4961 Type *ExtTy = Ext->getType();
4962 bool IsSExt = isa<SExtInst>(Ext);
4963 // If the operand of the extension is not an instruction, we cannot
4964 // get through.
4965 // If it, check we can get through.
4966 if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
4967 return nullptr;
4968
4969 // Do not promote if the operand has been added by codegenprepare.
4970 // Otherwise, it means we are undoing an optimization that is likely to be
4971 // redone, thus causing potential infinite loop.
4972 if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
4973 return nullptr;
4974
4975 // SExt or Trunc instructions.
4976 // Return the related handler.
4977 if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
4978 isa<ZExtInst>(ExtOpnd))
4979 return promoteOperandForTruncAndAnyExt;
4980
4981 // Regular instruction.
4982 // Abort early if we will have to insert non-free instructions.
4983 if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
4984 return nullptr;
4985 return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
4986}
4987
4988Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
4989 Instruction *SExt, TypePromotionTransaction &TPT,
4990 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4992 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4993 // By construction, the operand of SExt is an instruction. Otherwise we cannot
4994 // get through it and this method should not be called.
4995 Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
4996 Value *ExtVal = SExt;
4997 bool HasMergedNonFreeExt = false;
4998 if (isa<ZExtInst>(SExtOpnd)) {
4999 // Replace s|zext(zext(opnd))
5000 // => zext(opnd).
5001 HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
5002 Value *ZExt =
5003 TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
5004 TPT.replaceAllUsesWith(SExt, ZExt);
5005 TPT.eraseInstruction(SExt);
5006 ExtVal = ZExt;
5007 } else {
5008 // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
5009 // => z|sext(opnd).
5010 TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
5011 }
5012 CreatedInstsCost = 0;
5013
5014 // Remove dead code.
5015 if (SExtOpnd->use_empty())
5016 TPT.eraseInstruction(SExtOpnd);
5017
5018 // Check if the extension is still needed.
5019 Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
5020 if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
5021 if (ExtInst) {
5022 if (Exts)
5023 Exts->push_back(ExtInst);
5024 CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
5025 }
5026 return ExtVal;
5027 }
5028
5029 // At this point we have: ext ty opnd to ty.
5030 // Reassign the uses of ExtInst to the opnd and remove ExtInst.
5031 Value *NextVal = ExtInst->getOperand(0);
5032 TPT.eraseInstruction(ExtInst, NextVal);
5033 return NextVal;
5034}
5035
5036Value *TypePromotionHelper::promoteOperandForOther(
5037 Instruction *Ext, TypePromotionTransaction &TPT,
5038 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
5041 bool IsSExt) {
5042 // By construction, the operand of Ext is an instruction. Otherwise we cannot
5043 // get through it and this method should not be called.
5044 Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
5045 CreatedInstsCost = 0;
5046 if (!ExtOpnd->hasOneUse()) {
5047 // ExtOpnd will be promoted.
5048 // All its uses, but Ext, will need to use a truncated value of the
5049 // promoted version.
5050 // Create the truncate now.
5051 Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
5052 if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
5053 // Insert it just after the definition.
5054 ITrunc->moveAfter(ExtOpnd);
5055 if (Truncs)
5056 Truncs->push_back(ITrunc);
5057 }
5058
5059 TPT.replaceAllUsesWith(ExtOpnd, Trunc);
5060 // Restore the operand of Ext (which has been replaced by the previous call
5061 // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
5062 TPT.setOperand(Ext, 0, ExtOpnd);
5063 }
5064
5065 // Get through the Instruction:
5066 // 1. Update its type.
5067 // 2. Replace the uses of Ext by Inst.
5068 // 3. Extend each operand that needs to be extended.
5069
5070 // Remember the original type of the instruction before promotion.
5071 // This is useful to know that the high bits are sign extended bits.
5072 addPromotedInst(PromotedInsts, ExtOpnd, IsSExt);
5073 // Step #1.
5074 TPT.mutateType(ExtOpnd, Ext->getType());
5075 // Step #2.
5076 TPT.replaceAllUsesWith(Ext, ExtOpnd);
5077 // Step #3.
5078 LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n");
5079 for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
5080 ++OpIdx) {
5081 LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n');
5082 if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
5083 !shouldExtOperand(ExtOpnd, OpIdx)) {
5084 LLVM_DEBUG(dbgs() << "No need to propagate\n");
5085 continue;
5086 }
5087 // Check if we can statically extend the operand.
5088 Value *Opnd = ExtOpnd->getOperand(OpIdx);
5089 if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
5090 LLVM_DEBUG(dbgs() << "Statically extend\n");
5091 unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
5092 APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
5093 : Cst->getValue().zext(BitWidth);
5094 TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
5095 continue;
5096 }
5097 // UndefValue are typed, so we have to statically sign extend them.
5098 if (isa<UndefValue>(Opnd)) {
5099 LLVM_DEBUG(dbgs() << "Statically extend\n");
5100 TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
5101 continue;
5102 }
5103
5104 // Otherwise we have to explicitly sign extend the operand.
5105 Value *ValForExtOpnd = IsSExt
5106 ? TPT.createSExt(ExtOpnd, Opnd, Ext->getType())
5107 : TPT.createZExt(ExtOpnd, Opnd, Ext->getType());
5108 TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
5109 Instruction *InstForExtOpnd = dyn_cast<Instruction>(ValForExtOpnd);
5110 if (!InstForExtOpnd)
5111 continue;
5112
5113 if (Exts)
5114 Exts->push_back(InstForExtOpnd);
5115
5116 CreatedInstsCost += !TLI.isExtFree(InstForExtOpnd);
5117 }
5118 LLVM_DEBUG(dbgs() << "Extension is useless now\n");
5119 TPT.eraseInstruction(Ext);
5120 return ExtOpnd;
5121}
5122
5123/// Check whether or not promoting an instruction to a wider type is profitable.
5124/// \p NewCost gives the cost of extension instructions created by the
5125/// promotion.
5126/// \p OldCost gives the cost of extension instructions before the promotion
5127/// plus the number of instructions that have been
5128/// matched in the addressing mode the promotion.
5129/// \p PromotedOperand is the value that has been promoted.
5130/// \return True if the promotion is profitable, false otherwise.
5131bool AddressingModeMatcher::isPromotionProfitable(
5132 unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
5133 LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost
5134 << '\n');
5135 // The cost of the new extensions is greater than the cost of the
5136 // old extension plus what we folded.
5137 // This is not profitable.
5138 if (NewCost > OldCost)
5139 return false;
5140 if (NewCost < OldCost)
5141 return true;
5142 // The promotion is neutral but it may help folding the sign extension in
5143 // loads for instance.
5144 // Check that we did not create an illegal instruction.
5145 return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
5146}
5147
5148/// Given an instruction or constant expr, see if we can fold the operation
5149/// into the addressing mode. If so, update the addressing mode and return
5150/// true, otherwise return false without modifying AddrMode.
5151/// If \p MovedAway is not NULL, it contains the information of whether or
5152/// not AddrInst has to be folded into the addressing mode on success.
5153/// If \p MovedAway == true, \p AddrInst will not be part of the addressing
5154/// because it has been moved away.
5155/// Thus AddrInst must not be added in the matched instructions.
5156/// This state can happen when AddrInst is a sext, since it may be moved away.
5157/// Therefore, AddrInst may not be valid when MovedAway is true and it must
5158/// not be referenced anymore.
5159bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
5160 unsigned Depth,
5161 bool *MovedAway) {
5162 // Avoid exponential behavior on extremely deep expression trees.
5163 if (Depth >= 5)
5164 return false;
5165
5166 // By default, all matched instructions stay in place.
5167 if (MovedAway)
5168 *MovedAway = false;
5169
5170 switch (Opcode) {
5171 case Instruction::PtrToInt:
5172 // PtrToInt is always a noop, as we know that the int type is pointer sized.
5173 return matchAddr(AddrInst->getOperand(0), Depth);
5174 case Instruction::IntToPtr: {
5175 auto AS = AddrInst->getType()->getPointerAddressSpace();
5176 auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
5177 // This inttoptr is a no-op if the integer type is pointer sized.
5178 if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
5179 return matchAddr(AddrInst->getOperand(0), Depth);
5180 return false;
5181 }
5182 case Instruction::BitCast:
5183 // BitCast is always a noop, and we can handle it as long as it is
5184 // int->int or pointer->pointer (we don't want int<->fp or something).
5185 if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() &&
5186 // Don't touch identity bitcasts. These were probably put here by LSR,
5187 // and we don't want to mess around with them. Assume it knows what it
5188 // is doing.
5189 AddrInst->getOperand(0)->getType() != AddrInst->getType())
5190 return matchAddr(AddrInst->getOperand(0), Depth);
5191 return false;
5192 case Instruction::AddrSpaceCast: {
5193 unsigned SrcAS =
5194 AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
5195 unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
5196 if (TLI.getTargetMachine().isNoopAddrSpaceCast(SrcAS, DestAS))
5197 return matchAddr(AddrInst->getOperand(0), Depth);
5198 return false;
5199 }
5200 case Instruction::Add: {
5201 // Check to see if we can merge in one operand, then the other. If so, we
5202 // win.
5203 ExtAddrMode BackupAddrMode = AddrMode;
5204 unsigned OldSize = AddrModeInsts.size();
5205 // Start a transaction at this point.
5206 // The LHS may match but not the RHS.
5207 // Therefore, we need a higher level restoration point to undo partially
5208 // matched operation.
5209 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5210 TPT.getRestorationPoint();
5211
5212 // Try to match an integer constant second to increase its chance of ending
5213 // up in `BaseOffs`, resp. decrease its chance of ending up in `BaseReg`.
5214 int First = 0, Second = 1;
5215 if (isa<ConstantInt>(AddrInst->getOperand(First))
5216 && !isa<ConstantInt>(AddrInst->getOperand(Second)))
5217 std::swap(First, Second);
5218 AddrMode.InBounds = false;
5219 if (matchAddr(AddrInst->getOperand(First), Depth + 1) &&
5220 matchAddr(AddrInst->getOperand(Second), Depth + 1))
5221 return true;
5222
5223 // Restore the old addr mode info.
5224 AddrMode = BackupAddrMode;
5225 AddrModeInsts.resize(OldSize);
5226 TPT.rollback(LastKnownGood);
5227
5228 // Otherwise this was over-aggressive. Try merging operands in the opposite
5229 // order.
5230 if (matchAddr(AddrInst->getOperand(Second), Depth + 1) &&
5231 matchAddr(AddrInst->getOperand(First), Depth + 1))
5232 return true;
5233
5234 // Otherwise we definitely can't merge the ADD in.
5235 AddrMode = BackupAddrMode;
5236 AddrModeInsts.resize(OldSize);
5237 TPT.rollback(LastKnownGood);
5238 break;
5239 }
5240 // case Instruction::Or:
5241 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
5242 // break;
5243 case Instruction::Mul:
5244 case Instruction::Shl: {
5245 // Can only handle X*C and X << C.
5246 AddrMode.InBounds = false;
5247 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
5248 if (!RHS || RHS->getBitWidth() > 64)
5249 return false;
5250 int64_t Scale = Opcode == Instruction::Shl
5251 ? 1LL << RHS->getLimitedValue(RHS->getBitWidth() - 1)
5252 : RHS->getSExtValue();
5253
5254 return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
5255 }
5256 case Instruction::GetElementPtr: {
5257 // Scan the GEP. We check it if it contains constant offsets and at most
5258 // one variable offset.
5259 int VariableOperand = -1;
5260 unsigned VariableScale = 0;
5261
5262 int64_t ConstantOffset = 0;
5263 gep_type_iterator GTI = gep_type_begin(AddrInst);
5264 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
5265 if (StructType *STy = GTI.getStructTypeOrNull()) {
5266 const StructLayout *SL = DL.getStructLayout(STy);
5267 unsigned Idx =
5268 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
5269 ConstantOffset += SL->getElementOffset(Idx);
5270 } else {
5272 if (TS.isNonZero()) {
5273 // The optimisations below currently only work for fixed offsets.
5274 if (TS.isScalable())
5275 return false;
5276 int64_t TypeSize = TS.getFixedValue();
5277 if (ConstantInt *CI =
5278 dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
5279 const APInt &CVal = CI->getValue();
5280 if (CVal.getSignificantBits() <= 64) {
5281 ConstantOffset += CVal.getSExtValue() * TypeSize;
5282 continue;
5283 }
5284 }
5285 // We only allow one variable index at the moment.
5286 if (VariableOperand != -1)
5287 return false;
5288
5289 // Remember the variable index.
5290 VariableOperand = i;
5291 VariableScale = TypeSize;
5292 }
5293 }
5294 }
5295
5296 // A common case is for the GEP to only do a constant offset. In this case,
5297 // just add it to the disp field and check validity.
5298 if (VariableOperand == -1) {
5299 AddrMode.BaseOffs += ConstantOffset;
5300 if (matchAddr(AddrInst->getOperand(0), Depth + 1)) {
5301 if (!cast<GEPOperator>(AddrInst)->isInBounds())
5302 AddrMode.InBounds = false;
5303 return true;
5304 }
5305 AddrMode.BaseOffs -= ConstantOffset;
5306
5307 if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&
5308 TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
5309 ConstantOffset > 0) {
5310 // Record GEPs with non-zero offsets as candidates for splitting in
5311 // the event that the offset cannot fit into the r+i addressing mode.
5312 // Simple and common case that only one GEP is used in calculating the
5313 // address for the memory access.
5314 Value *Base = AddrInst->getOperand(0);
5315 auto *BaseI = dyn_cast<Instruction>(Base);
5316 auto *GEP = cast<GetElementPtrInst>(AddrInst);
5317 if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
5318 (BaseI && !isa<CastInst>(BaseI) &&
5319 !isa<GetElementPtrInst>(BaseI))) {
5320 // Make sure the parent block allows inserting non-PHI instructions
5321 // before the terminator.
5322 BasicBlock *Parent = BaseI ? BaseI->getParent()
5323 : &GEP->getFunction()->getEntryBlock();
5324 if (!Parent->getTerminator()->isEHPad())
5325 LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
5326 }
5327 }
5328
5329 return false;
5330 }
5331
5332 // Save the valid addressing mode in case we can't match.
5333 ExtAddrMode BackupAddrMode = AddrMode;
5334 unsigned OldSize = AddrModeInsts.size();
5335
5336 // See if the scale and offset amount is valid for this target.
5337 AddrMode.BaseOffs += ConstantOffset;
5338 if (!cast<GEPOperator>(AddrInst)->isInBounds())
5339 AddrMode.InBounds = false;
5340
5341 // Match the base operand of the GEP.
5342 if (!matchAddr(AddrInst->getOperand(0), Depth + 1)) {
5343 // If it couldn't be matched, just stuff the value in a register.
5344 if (AddrMode.HasBaseReg) {
5345 AddrMode = BackupAddrMode;
5346 AddrModeInsts.resize(OldSize);
5347 return false;
5348 }
5349 AddrMode.HasBaseReg = true;
5350 AddrMode.BaseReg = AddrInst->getOperand(0);
5351 }
5352
5353 // Match the remaining variable portion of the GEP.
5354 if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
5355 Depth)) {
5356 // If it couldn't be matched, try stuffing the base into a register
5357 // instead of matching it, and retrying the match of the scale.
5358 AddrMode = BackupAddrMode;
5359 AddrModeInsts.resize(OldSize);
5360 if (AddrMode.HasBaseReg)
5361 return false;
5362 AddrMode.HasBaseReg = true;
5363 AddrMode.BaseReg = AddrInst->getOperand(0);
5364 AddrMode.BaseOffs += ConstantOffset;
5365 if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
5366 VariableScale, Depth)) {
5367 // If even that didn't work, bail.
5368 AddrMode = BackupAddrMode;
5369 AddrModeInsts.resize(OldSize);
5370 return false;
5371 }
5372 }
5373
5374 return true;
5375 }
5376 case Instruction::SExt:
5377 case Instruction::ZExt: {
5378 Instruction *Ext = dyn_cast<Instruction>(AddrInst);
5379 if (!Ext)
5380 return false;
5381
5382 // Try to move this ext out of the way of the addressing mode.
5383 // Ask for a method for doing so.
5384 TypePromotionHelper::Action TPH =
5385 TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
5386 if (!TPH)
5387 return false;
5388
5389 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5390 TPT.getRestorationPoint();
5391 unsigned CreatedInstsCost = 0;
5392 unsigned ExtCost = !TLI.isExtFree(Ext);
5393 Value *PromotedOperand =
5394 TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
5395 // SExt has been moved away.
5396 // Thus either it will be rematched later in the recursive calls or it is
5397 // gone. Anyway, we must not fold it into the addressing mode at this point.
5398 // E.g.,
5399 // op = add opnd, 1
5400 // idx = ext op
5401 // addr = gep base, idx
5402 // is now:
5403 // promotedOpnd = ext opnd <- no match here
5404 // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)
5405 // addr = gep base, op <- match
5406 if (MovedAway)
5407 *MovedAway = true;
5408
5409 assert(PromotedOperand &&
5410 "TypePromotionHelper should have filtered out those cases");
5411
5412 ExtAddrMode BackupAddrMode = AddrMode;
5413 unsigned OldSize = AddrModeInsts.size();
5414
5415 if (!matchAddr(PromotedOperand, Depth) ||
5416 // The total of the new cost is equal to the cost of the created
5417 // instructions.
5418 // The total of the old cost is equal to the cost of the extension plus
5419 // what we have saved in the addressing mode.
5420 !isPromotionProfitable(CreatedInstsCost,
5421 ExtCost + (AddrModeInsts.size() - OldSize),
5422 PromotedOperand)) {
5423 AddrMode = BackupAddrMode;
5424 AddrModeInsts.resize(OldSize);
5425 LLVM_DEBUG(dbgs() << "Sign extension does not pay off: rollback\n");
5426 TPT.rollback(LastKnownGood);
5427 return false;
5428 }
5429
5430 // SExt has been deleted. Make sure it is not referenced by the AddrMode.
5431 AddrMode.replaceWith(Ext, PromotedOperand);
5432 return true;
5433 }
5434 case Instruction::Call:
5435 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(AddrInst)) {
5436 if (II->getIntrinsicID() == Intrinsic::threadlocal_address) {
5437 GlobalValue &GV = cast<GlobalValue>(*II->getArgOperand(0));
5438 if (TLI.addressingModeSupportsTLS(GV))
5439 return matchAddr(AddrInst->getOperand(0), Depth);
5440 }
5441 }
5442 break;
5443 }
5444 return false;
5445}
5446
5447/// If we can, try to add the value of 'Addr' into the current addressing mode.
5448/// If Addr can't be added to AddrMode this returns false and leaves AddrMode
5449/// unmodified. This assumes that Addr is either a pointer type or intptr_t
5450/// for the target.
5451///
5452bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
5453 // Start a transaction at this point that we will rollback if the matching
5454 // fails.
5455 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5456 TPT.getRestorationPoint();
5457 if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
5458 if (CI->getValue().isSignedIntN(64)) {
5459 // Check if the addition would result in a signed overflow.
5460 int64_t Result;
5461 bool Overflow =
5462 AddOverflow(AddrMode.BaseOffs, CI->getSExtValue(), Result);
5463 if (!Overflow) {
5464 // Fold in immediates if legal for the target.
5465 AddrMode.BaseOffs = Result;
5466 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5467 return true;
5468 AddrMode.BaseOffs -= CI->getSExtValue();
5469 }
5470 }
5471 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
5472 // If this is a global variable, try to fold it into the addressing mode.
5473 if (!AddrMode.BaseGV) {
5474 AddrMode.BaseGV = GV;
5475 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5476 return true;
5477 AddrMode.BaseGV = nullptr;
5478 }
5479 } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
5480 ExtAddrMode BackupAddrMode = AddrMode;
5481 unsigned OldSize = AddrModeInsts.size();
5482
5483 // Check to see if it is possible to fold this operation.
5484 bool MovedAway = false;
5485 if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
5486 // This instruction may have been moved away. If so, there is nothing
5487 // to check here.
5488 if (MovedAway)
5489 return true;
5490 // Okay, it's possible to fold this. Check to see if it is actually
5491 // *profitable* to do so. We use a simple cost model to avoid increasing
5492 // register pressure too much.
5493 if (I->hasOneUse() ||
5494 isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
5495 AddrModeInsts.push_back(I);
5496 return true;
5497 }
5498
5499 // It isn't profitable to do this, roll back.
5500 AddrMode = BackupAddrMode;
5501 AddrModeInsts.resize(OldSize);
5502 TPT.rollback(LastKnownGood);
5503 }
5504 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
5505 if (matchOperationAddr(CE, CE->getOpcode(), Depth))
5506 return true;
5507 TPT.rollback(LastKnownGood);
5508 } else if (isa<ConstantPointerNull>(Addr)) {
5509 // Null pointer gets folded without affecting the addressing mode.
5510 return true;
5511 }
5512
5513 // Worse case, the target should support [reg] addressing modes. :)
5514 if (!AddrMode.HasBaseReg) {
5515 AddrMode.HasBaseReg = true;
5516 AddrMode.BaseReg = Addr;
5517 // Still check for legality in case the target supports [imm] but not [i+r].
5518 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5519 return true;
5520 AddrMode.HasBaseReg = false;
5521 AddrMode.BaseReg = nullptr;
5522 }
5523
5524 // If the base register is already taken, see if we can do [r+r].
5525 if (AddrMode.Scale == 0) {
5526 AddrMode.Scale = 1;
5527 AddrMode.ScaledReg = Addr;
5528 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5529 return true;
5530 AddrMode.Scale = 0;
5531 AddrMode.ScaledReg = nullptr;
5532 }
5533 // Couldn't match.
5534 TPT.rollback(LastKnownGood);
5535 return false;
5536}
5537
5538/// Check to see if all uses of OpVal by the specified inline asm call are due
5539/// to memory operands. If so, return true, otherwise return false.
5541 const TargetLowering &TLI,
5542 const TargetRegisterInfo &TRI) {
5543 const Function *F = CI->getFunction();
5544 TargetLowering::AsmOperandInfoVector TargetConstraints =
5545 TLI.ParseConstraints(F->getDataLayout(), &TRI, *CI);
5546
5547 for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
5548 // Compute the constraint code and ConstraintType to use.
5549 TLI.ComputeConstraintToUse(OpInfo, SDValue());
5550
5551 // If this asm operand is our Value*, and if it isn't an indirect memory
5552 // operand, we can't fold it! TODO: Also handle C_Address?
5553 if (OpInfo.CallOperandVal == OpVal &&
5554 (OpInfo.ConstraintType != TargetLowering::C_Memory ||
5555 !OpInfo.isIndirect))
5556 return false;
5557 }
5558
5559 return true;
5560}
5561
5562/// Recursively walk all the uses of I until we find a memory use.
5563/// If we find an obviously non-foldable instruction, return true.
5564/// Add accessed addresses and types to MemoryUses.
5566 Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
5567 SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
5568 const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI,
5569 BlockFrequencyInfo *BFI, unsigned &SeenInsts) {
5570 // If we already considered this instruction, we're done.
5571 if (!ConsideredInsts.insert(I).second)
5572 return false;
5573
5574 // If this is an obviously unfoldable instruction, bail out.
5575 if (!MightBeFoldableInst(I))
5576 return true;
5577
5578 // Loop over all the uses, recursively processing them.
5579 for (Use &U : I->uses()) {
5580 // Conservatively return true if we're seeing a large number or a deep chain
5581 // of users. This avoids excessive compilation times in pathological cases.
5582 if (SeenInsts++ >= MaxAddressUsersToScan)
5583 return true;
5584
5585 Instruction *UserI = cast<Instruction>(U.getUser());
5586 if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
5587 MemoryUses.push_back({&U, LI->getType()});
5588 continue;
5589 }
5590
5591 if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
5592 if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
5593 return true; // Storing addr, not into addr.
5594 MemoryUses.push_back({&U, SI->getValueOperand()->getType()});
5595 continue;
5596 }
5597
5598 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
5599 if (U.getOperandNo() != AtomicRMWInst::getPointerOperandIndex())
5600 return true; // Storing addr, not into addr.
5601 MemoryUses.push_back({&U, RMW->getValOperand()->getType()});
5602 continue;
5603 }
5604
5605 if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) {
5606 if (U.getOperandNo() != AtomicCmpXchgInst::getPointerOperandIndex())
5607 return true; // Storing addr, not into addr.
5608 MemoryUses.push_back({&U, CmpX->getCompareOperand()->getType()});
5609 continue;
5610 }
5611
5612 if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
5613 if (CI->hasFnAttr(Attribute::Cold)) {
5614 // If this is a cold call, we can sink the addressing calculation into
5615 // the cold path. See optimizeCallInst
5616 if (!llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI))
5617 continue;
5618 }
5619
5620 InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand());
5621 if (!IA)
5622 return true;
5623
5624 // If this is a memory operand, we're cool, otherwise bail out.
5625 if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI))
5626 return true;
5627 continue;
5628 }
5629
5630 if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5631 PSI, BFI, SeenInsts))
5632 return true;
5633 }
5634
5635 return false;
5636}
5637
5639 Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
5640 const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize,
5642 unsigned SeenInsts = 0;
5643 SmallPtrSet<Instruction *, 16> ConsideredInsts;
5644 return FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5645 PSI, BFI, SeenInsts);
5646}
5647
5648
5649/// Return true if Val is already known to be live at the use site that we're
5650/// folding it into. If so, there is no cost to include it in the addressing
5651/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
5652/// instruction already.
5653bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,
5654 Value *KnownLive1,
5655 Value *KnownLive2) {
5656 // If Val is either of the known-live values, we know it is live!
5657 if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
5658 return true;
5659
5660 // All values other than instructions and arguments (e.g. constants) are live.
5661 if (!isa<Instruction>(Val) && !isa<Argument>(Val))
5662 return true;
5663
5664 // If Val is a constant sized alloca in the entry block, it is live, this is
5665 // true because it is just a reference to the stack/frame pointer, which is
5666 // live for the whole function.
5667 if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
5668 if (AI->isStaticAlloca())
5669 return true;
5670
5671 // Check to see if this value is already used in the memory instruction's
5672 // block. If so, it's already live into the block at the very least, so we
5673 // can reasonably fold it.
5674 return Val->isUsedInBasicBlock(MemoryInst->getParent());
5675}
5676
5677/// It is possible for the addressing mode of the machine to fold the specified
5678/// instruction into a load or store that ultimately uses it.
5679/// However, the specified instruction has multiple uses.
5680/// Given this, it may actually increase register pressure to fold it
5681/// into the load. For example, consider this code:
5682///
5683/// X = ...
5684/// Y = X+1
5685/// use(Y) -> nonload/store
5686/// Z = Y+1
5687/// load Z
5688///
5689/// In this case, Y has multiple uses, and can be folded into the load of Z
5690/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
5691/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
5692/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
5693/// number of computations either.
5694///
5695/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
5696/// X was live across 'load Z' for other reasons, we actually *would* want to
5697/// fold the addressing mode in the Z case. This would make Y die earlier.
5698bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode(
5699 Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode &AMAfter) {
5700 if (IgnoreProfitability)
5701 return true;
5702
5703 // AMBefore is the addressing mode before this instruction was folded into it,
5704 // and AMAfter is the addressing mode after the instruction was folded. Get
5705 // the set of registers referenced by AMAfter and subtract out those
5706 // referenced by AMBefore: this is the set of values which folding in this
5707 // address extends the lifetime of.
5708 //
5709 // Note that there are only two potential values being referenced here,
5710 // BaseReg and ScaleReg (global addresses are always available, as are any
5711 // folded immediates).
5712 Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
5713
5714 // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
5715 // lifetime wasn't extended by adding this instruction.
5716 if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5717 BaseReg = nullptr;
5718 if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5719 ScaledReg = nullptr;
5720
5721 // If folding this instruction (and it's subexprs) didn't extend any live
5722 // ranges, we're ok with it.
5723 if (!BaseReg && !ScaledReg)
5724 return true;
5725
5726 // If all uses of this instruction can have the address mode sunk into them,
5727 // we can remove the addressing mode and effectively trade one live register
5728 // for another (at worst.) In this context, folding an addressing mode into
5729 // the use is just a particularly nice way of sinking it.
5731 if (FindAllMemoryUses(I, MemoryUses, TLI, TRI, OptSize, PSI, BFI))
5732 return false; // Has a non-memory, non-foldable use!
5733
5734 // Now that we know that all uses of this instruction are part of a chain of
5735 // computation involving only operations that could theoretically be folded
5736 // into a memory use, loop over each of these memory operation uses and see
5737 // if they could *actually* fold the instruction. The assumption is that
5738 // addressing modes are cheap and that duplicating the computation involved
5739 // many times is worthwhile, even on a fastpath. For sinking candidates
5740 // (i.e. cold call sites), this serves as a way to prevent excessive code
5741 // growth since most architectures have some reasonable small and fast way to
5742 // compute an effective address. (i.e LEA on x86)
5743 SmallVector<Instruction *, 32> MatchedAddrModeInsts;
5744 for (const std::pair<Use *, Type *> &Pair : MemoryUses) {
5745 Value *Address = Pair.first->get();
5746 Instruction *UserI = cast<Instruction>(Pair.first->getUser());
5747 Type *AddressAccessTy = Pair.second;
5748 unsigned AS = Address->getType()->getPointerAddressSpace();
5749
5750 // Do a match against the root of this address, ignoring profitability. This
5751 // will tell us if the addressing mode for the memory operation will
5752 // *actually* cover the shared instruction.
5754 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5755 0);
5756 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5757 TPT.getRestorationPoint();
5758 AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, getDTFn,
5759 AddressAccessTy, AS, UserI, Result,
5760 InsertedInsts, PromotedInsts, TPT,
5761 LargeOffsetGEP, OptSize, PSI, BFI);
5762 Matcher.IgnoreProfitability = true;
5763 bool Success = Matcher.matchAddr(Address, 0);
5764 (void)Success;
5765 assert(Success && "Couldn't select *anything*?");
5766
5767 // The match was to check the profitability, the changes made are not
5768 // part of the original matcher. Therefore, they should be dropped
5769 // otherwise the original matcher will not present the right state.
5770 TPT.rollback(LastKnownGood);
5771
5772 // If the match didn't cover I, then it won't be shared by it.
5773 if (!is_contained(MatchedAddrModeInsts, I))
5774 return false;
5775
5776 MatchedAddrModeInsts.clear();
5777 }
5778
5779 return true;
5780}
5781
5782/// Return true if the specified values are defined in a
5783/// different basic block than BB.
5784static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
5785 if (Instruction *I = dyn_cast<Instruction>(V))
5786 return I->getParent() != BB;
5787 return false;
5788}
5789
5790// Find an insert position of Addr for MemoryInst. We can't guarantee MemoryInst
5791// is the first instruction that will use Addr. So we need to find the first
5792// user of Addr in current BB.
5794 Value *SunkAddr) {
5795 if (Addr->hasOneUse())
5796 return MemoryInst->getIterator();
5797
5798 // We already have a SunkAddr in current BB, but we may need to insert cast
5799 // instruction after it.
5800 if (SunkAddr) {
5801 if (Instruction *AddrInst = dyn_cast<Instruction>(SunkAddr))
5802 return std::next(AddrInst->getIterator());
5803 }
5804
5805 // Find the first user of Addr in current BB.
5806 Instruction *Earliest = MemoryInst;
5807 for (User *U : Addr->users()) {
5808 Instruction *UserInst = dyn_cast<Instruction>(U);
5809 if (UserInst && UserInst->getParent() == MemoryInst->getParent()) {
5810 if (isa<PHINode>(UserInst) || UserInst->isDebugOrPseudoInst())
5811 continue;
5812 if (UserInst->comesBefore(Earliest))
5813 Earliest = UserInst;
5814 }
5815 }
5816 return Earliest->getIterator();
5817}
5818
5819/// Sink addressing mode computation immediate before MemoryInst if doing so
5820/// can be done without increasing register pressure. The need for the
5821/// register pressure constraint means this can end up being an all or nothing
5822/// decision for all uses of the same addressing computation.
5823///
5824/// Load and Store Instructions often have addressing modes that can do
5825/// significant amounts of computation. As such, instruction selection will try
5826/// to get the load or store to do as much computation as possible for the
5827/// program. The problem is that isel can only see within a single block. As
5828/// such, we sink as much legal addressing mode work into the block as possible.
5829///
5830/// This method is used to optimize both load/store and inline asms with memory
5831/// operands. It's also used to sink addressing computations feeding into cold
5832/// call sites into their (cold) basic block.
5833///
5834/// The motivation for handling sinking into cold blocks is that doing so can
5835/// both enable other address mode sinking (by satisfying the register pressure
5836/// constraint above), and reduce register pressure globally (by removing the
5837/// addressing mode computation from the fast path entirely.).
5838bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
5839 Type *AccessTy, unsigned AddrSpace) {
5840 Value *Repl = Addr;
5841
5842 // Try to collapse single-value PHI nodes. This is necessary to undo
5843 // unprofitable PRE transformations.
5844 SmallVector<Value *, 8> worklist;
5846 worklist.push_back(Addr);
5847
5848 // Use a worklist to iteratively look through PHI and select nodes, and
5849 // ensure that the addressing mode obtained from the non-PHI/select roots of
5850 // the graph are compatible.
5851 bool PhiOrSelectSeen = false;
5852 SmallVector<Instruction *, 16> AddrModeInsts;
5853 const SimplifyQuery SQ(*DL, TLInfo);
5854 AddressingModeCombiner AddrModes(SQ, Addr);
5855 TypePromotionTransaction TPT(RemovedInsts);
5856 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5857 TPT.getRestorationPoint();
5858 while (!worklist.empty()) {
5859 Value *V = worklist.pop_back_val();
5860
5861 // We allow traversing cyclic Phi nodes.
5862 // In case of success after this loop we ensure that traversing through
5863 // Phi nodes ends up with all cases to compute address of the form
5864 // BaseGV + Base + Scale * Index + Offset
5865 // where Scale and Offset are constans and BaseGV, Base and Index
5866 // are exactly the same Values in all cases.
5867 // It means that BaseGV, Scale and Offset dominate our memory instruction
5868 // and have the same value as they had in address computation represented
5869 // as Phi. So we can safely sink address computation to memory instruction.
5870 if (!Visited.insert(V).second)
5871 continue;
5872
5873 // For a PHI node, push all of its incoming values.
5874 if (PHINode *P = dyn_cast<PHINode>(V)) {
5875 append_range(worklist, P->incoming_values());
5876 PhiOrSelectSeen = true;
5877 continue;
5878 }
5879 // Similar for select.
5880 if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
5881 worklist.push_back(SI->getFalseValue());
5882 worklist.push_back(SI->getTrueValue());
5883 PhiOrSelectSeen = true;
5884 continue;
5885 }
5886
5887 // For non-PHIs, determine the addressing mode being computed. Note that
5888 // the result may differ depending on what other uses our candidate
5889 // addressing instructions might have.
5890 AddrModeInsts.clear();
5891 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5892 0);
5893 // Defer the query (and possible computation of) the dom tree to point of
5894 // actual use. It's expected that most address matches don't actually need
5895 // the domtree.
5896 auto getDTFn = [MemoryInst, this]() -> const DominatorTree & {
5897 Function *F = MemoryInst->getParent()->getParent();
5898 return this->getDT(*F);
5899 };
5900 ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
5901 V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, getDTFn,
5902 *TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
5903 BFI.get());
5904
5905 GetElementPtrInst *GEP = LargeOffsetGEP.first;
5906 if (GEP && !NewGEPBases.count(GEP)) {
5907 // If splitting the underlying data structure can reduce the offset of a
5908 // GEP, collect the GEP. Skip the GEPs that are the new bases of
5909 // previously split data structures.
5910 LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP);
5911 LargeOffsetGEPID.insert(std::make_pair(GEP, LargeOffsetGEPID.size()));
5912 }
5913
5914 NewAddrMode.OriginalValue = V;
5915 if (!AddrModes.addNewAddrMode(NewAddrMode))
5916 break;
5917 }
5918
5919 // Try to combine the AddrModes we've collected. If we couldn't collect any,
5920 // or we have multiple but either couldn't combine them or combining them
5921 // wouldn't do anything useful, bail out now.
5922 if (!AddrModes.combineAddrModes()) {
5923 TPT.rollback(LastKnownGood);
5924 return false;
5925 }
5926 bool Modified = TPT.commit();
5927
5928 // Get the combined AddrMode (or the only AddrMode, if we only had one).
5929 ExtAddrMode AddrMode = AddrModes.getAddrMode();
5930
5931 // If all the instructions matched are already in this BB, don't do anything.
5932 // If we saw a Phi node then it is not local definitely, and if we saw a
5933 // select then we want to push the address calculation past it even if it's
5934 // already in this BB.
5935 if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) {
5936 return IsNonLocalValue(V, MemoryInst->getParent());
5937 })) {
5938 LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode
5939 << "\n");
5940 return Modified;
5941 }
5942
5943 // Now that we determined the addressing expression we want to use and know
5944 // that we have to sink it into this block. Check to see if we have already
5945 // done this for some other load/store instr in this block. If so, reuse
5946 // the computation. Before attempting reuse, check if the address is valid
5947 // as it may have been erased.
5948
5949 WeakTrackingVH SunkAddrVH = SunkAddrs[Addr];
5950
5951 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
5952 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
5953
5954 // The current BB may be optimized multiple times, we can't guarantee the
5955 // reuse of Addr happens later, call findInsertPos to find an appropriate
5956 // insert position.
5957 auto InsertPos = findInsertPos(Addr, MemoryInst, SunkAddr);
5958
5959 // TODO: Adjust insert point considering (Base|Scaled)Reg if possible.
5960 if (!SunkAddr) {
5961 auto &DT = getDT(*MemoryInst->getFunction());
5962 if ((AddrMode.BaseReg && !DT.dominates(AddrMode.BaseReg, &*InsertPos)) ||
5963 (AddrMode.ScaledReg && !DT.dominates(AddrMode.ScaledReg, &*InsertPos)))
5964 return Modified;
5965 }
5966
5967 IRBuilder<> Builder(MemoryInst->getParent(), InsertPos);
5968
5969 if (SunkAddr) {
5970 LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
5971 << " for " << *MemoryInst << "\n");
5972 if (SunkAddr->getType() != Addr->getType()) {
5973 if (SunkAddr->getType()->getPointerAddressSpace() !=
5974 Addr->getType()->getPointerAddressSpace() &&
5975 !DL->isNonIntegralPointerType(Addr->getType())) {
5976 // There are two reasons the address spaces might not match: a no-op
5977 // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
5978 // ptrtoint/inttoptr pair to ensure we match the original semantics.
5979 // TODO: allow bitcast between different address space pointers with the
5980 // same size.
5981 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
5982 SunkAddr =
5983 Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
5984 } else
5985 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
5986 }
5988 SubtargetInfo->addrSinkUsingGEPs())) {
5989 // By default, we use the GEP-based method when AA is used later. This
5990 // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
5991 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
5992 << " for " << *MemoryInst << "\n");
5993 Value *ResultPtr = nullptr, *ResultIndex = nullptr;
5994
5995 // First, find the pointer.
5996 if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
5997 ResultPtr = AddrMode.BaseReg;
5998 AddrMode.BaseReg = nullptr;
5999 }
6000
6001 if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
6002 // We can't add more than one pointer together, nor can we scale a
6003 // pointer (both of which seem meaningless).
6004 if (ResultPtr || AddrMode.Scale != 1)
6005 return Modified;
6006
6007 ResultPtr = AddrMode.ScaledReg;
6008 AddrMode.Scale = 0;
6009 }
6010
6011 // It is only safe to sign extend the BaseReg if we know that the math
6012 // required to create it did not overflow before we extend it. Since
6013 // the original IR value was tossed in favor of a constant back when
6014 // the AddrMode was created we need to bail out gracefully if widths
6015 // do not match instead of extending it.
6016 //
6017 // (See below for code to add the scale.)
6018 if (AddrMode.Scale) {
6019 Type *ScaledRegTy = AddrMode.ScaledReg->getType();
6020 if (cast<IntegerType>(IntPtrTy)->getBitWidth() >
6021 cast<IntegerType>(ScaledRegTy)->getBitWidth())
6022 return Modified;
6023 }
6024
6025 GlobalValue *BaseGV = AddrMode.BaseGV;
6026 if (BaseGV != nullptr) {
6027 if (ResultPtr)
6028 return Modified;
6029
6030 if (BaseGV->isThreadLocal()) {
6031 ResultPtr = Builder.CreateThreadLocalAddress(BaseGV);
6032 } else {
6033 ResultPtr = BaseGV;
6034 }
6035 }
6036
6037 // If the real base value actually came from an inttoptr, then the matcher
6038 // will look through it and provide only the integer value. In that case,
6039 // use it here.
6040 if (!DL->isNonIntegralPointerType(Addr->getType())) {
6041 if (!ResultPtr && AddrMode.BaseReg) {
6042 ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(),
6043 "sunkaddr");
6044 AddrMode.BaseReg = nullptr;
6045 } else if (!ResultPtr && AddrMode.Scale == 1) {
6046 ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(),
6047 "sunkaddr");
6048 AddrMode.Scale = 0;
6049 }
6050 }
6051
6052 if (!ResultPtr && !AddrMode.BaseReg && !AddrMode.Scale &&
6053 !AddrMode.BaseOffs) {
6054 SunkAddr = Constant::getNullValue(Addr->getType());
6055 } else if (!ResultPtr) {
6056 return Modified;
6057 } else {
6058 Type *I8PtrTy =
6059 Builder.getPtrTy(Addr->getType()->getPointerAddressSpace());
6060
6061 // Start with the base register. Do this first so that subsequent address
6062 // matching finds it last, which will prevent it from trying to match it
6063 // as the scaled value in case it happens to be a mul. That would be
6064 // problematic if we've sunk a different mul for the scale, because then
6065 // we'd end up sinking both muls.
6066 if (AddrMode.BaseReg) {
6067 Value *V = AddrMode.BaseReg;
6068 if (V->getType() != IntPtrTy)
6069 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
6070
6071 ResultIndex = V;
6072 }
6073
6074 // Add the scale value.
6075 if (AddrMode.Scale) {
6076 Value *V = AddrMode.ScaledReg;
6077 if (V->getType() == IntPtrTy) {
6078 // done.
6079 } else {
6080 assert(cast<IntegerType>(IntPtrTy)->getBitWidth() <
6081 cast<IntegerType>(V->getType())->getBitWidth() &&
6082 "We can't transform if ScaledReg is too narrow");
6083 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
6084 }
6085
6086 if (AddrMode.Scale != 1)
6087 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
6088 "sunkaddr");
6089 if (ResultIndex)
6090 ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
6091 else
6092 ResultIndex = V;
6093 }
6094
6095 // Add in the Base Offset if present.
6096 if (AddrMode.BaseOffs) {
6097 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
6098 if (ResultIndex) {
6099 // We need to add this separately from the scale above to help with
6100 // SDAG consecutive load/store merging.
6101 if (ResultPtr->getType() != I8PtrTy)
6102 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
6103 ResultPtr = Builder.CreatePtrAdd(ResultPtr, ResultIndex, "sunkaddr",
6104 AddrMode.InBounds);
6105 }
6106
6107 ResultIndex = V;
6108 }
6109
6110 if (!ResultIndex) {
6111 auto PtrInst = dyn_cast<Instruction>(ResultPtr);
6112 // We know that we have a pointer without any offsets. If this pointer
6113 // originates from a different basic block than the current one, we
6114 // must be able to recreate it in the current basic block.
6115 // We do not support the recreation of any instructions yet.
6116 if (PtrInst && PtrInst->getParent() != MemoryInst->getParent())
6117 return Modified;
6118 SunkAddr = ResultPtr;
6119 } else {
6120 if (ResultPtr->getType() != I8PtrTy)
6121 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
6122 SunkAddr = Builder.CreatePtrAdd(ResultPtr, ResultIndex, "sunkaddr",
6123 AddrMode.InBounds);
6124 }
6125
6126 if (SunkAddr->getType() != Addr->getType()) {
6127 if (SunkAddr->getType()->getPointerAddressSpace() !=
6128 Addr->getType()->getPointerAddressSpace() &&
6129 !DL->isNonIntegralPointerType(Addr->getType())) {
6130 // There are two reasons the address spaces might not match: a no-op
6131 // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
6132 // ptrtoint/inttoptr pair to ensure we match the original semantics.
6133 // TODO: allow bitcast between different address space pointers with
6134 // the same size.
6135 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
6136 SunkAddr =
6137 Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
6138 } else
6139 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
6140 }
6141 }
6142 } else {
6143 // We'd require a ptrtoint/inttoptr down the line, which we can't do for
6144 // non-integral pointers, so in that case bail out now.
6145 Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr;
6146 Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr;
6147 PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy);
6148 PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy);
6149 if (DL->isNonIntegralPointerType(Addr->getType()) ||
6150 (BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) ||
6151 (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) ||
6152 (AddrMode.BaseGV &&
6153 DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))
6154 return Modified;
6155
6156 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
6157 << " for " << *MemoryInst << "\n");
6158 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
6159 Value *Result = nullptr;
6160
6161 // Start with the base register. Do this first so that subsequent address
6162 // matching finds it last, which will prevent it from trying to match it
6163 // as the scaled value in case it happens to be a mul. That would be
6164 // problematic if we've sunk a different mul for the scale, because then
6165 // we'd end up sinking both muls.
6166 if (AddrMode.BaseReg) {
6167 Value *V = AddrMode.BaseReg;
6168 if (V->getType()->isPointerTy())
6169 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
6170 if (V->getType() != IntPtrTy)
6171 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
6172 Result = V;
6173 }
6174
6175 // Add the scale value.
6176 if (AddrMode.Scale) {
6177 Value *V = AddrMode.ScaledReg;
6178 if (V->getType() == IntPtrTy) {
6179 // done.
6180 } else if (V->getType()->isPointerTy()) {
6181 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
6182 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
6183 cast<IntegerType>(V->getType())->getBitWidth()) {
6184 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
6185 } else {
6186 // It is only safe to sign extend the BaseReg if we know that the math
6187 // required to create it did not overflow before we extend it. Since
6188 // the original IR value was tossed in favor of a constant back when
6189 // the AddrMode was created we need to bail out gracefully if widths
6190 // do not match instead of extending it.
6191 Instruction *I = dyn_cast_or_null<Instruction>(Result);
6192 if (I && (Result != AddrMode.BaseReg))
6193 I->eraseFromParent();
6194 return Modified;
6195 }
6196 if (AddrMode.Scale != 1)
6197 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
6198 "sunkaddr");
6199 if (Result)
6200 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6201 else
6202 Result = V;
6203 }
6204
6205 // Add in the BaseGV if present.
6206 GlobalValue *BaseGV = AddrMode.BaseGV;
6207 if (BaseGV != nullptr) {
6208 Value *BaseGVPtr;
6209 if (BaseGV->isThreadLocal()) {
6210 BaseGVPtr = Builder.CreateThreadLocalAddress(BaseGV);
6211 } else {
6212 BaseGVPtr = BaseGV;
6213 }
6214 Value *V = Builder.CreatePtrToInt(BaseGVPtr, IntPtrTy, "sunkaddr");
6215 if (Result)
6216 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6217 else
6218 Result = V;
6219 }
6220
6221 // Add in the Base Offset if present.
6222 if (AddrMode.BaseOffs) {
6223 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
6224 if (Result)
6225 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6226 else
6227 Result = V;
6228 }
6229
6230 if (!Result)
6231 SunkAddr = Constant::getNullValue(Addr->getType());
6232 else
6233 SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
6234 }
6235
6236 MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
6237 // Store the newly computed address into the cache. In the case we reused a
6238 // value, this should be idempotent.
6239 SunkAddrs[Addr] = WeakTrackingVH(SunkAddr);
6240
6241 // If we have no uses, recursively delete the value and all dead instructions
6242 // using it.
6243 if (Repl->use_empty()) {
6244 resetIteratorIfInvalidatedWhileCalling(CurInstIterator->getParent(), [&]() {
6245 RecursivelyDeleteTriviallyDeadInstructions(
6246 Repl, TLInfo, nullptr,
6247 [&](Value *V) { removeAllAssertingVHReferences(V); });
6248 });
6249 }
6250 ++NumMemoryInsts;
6251 return true;
6252}
6253
6254/// Rewrite GEP input to gather/scatter to enable SelectionDAGBuilder to find
6255/// a uniform base to use for ISD::MGATHER/MSCATTER. SelectionDAGBuilder can
6256/// only handle a 2 operand GEP in the same basic block or a splat constant
6257/// vector. The 2 operands to the GEP must have a scalar pointer and a vector
6258/// index.
6259///
6260/// If the existing GEP has a vector base pointer that is splat, we can look
6261/// through the splat to find the scalar pointer. If we can't find a scalar
6262/// pointer there's nothing we can do.
6263///
6264/// If we have a GEP with more than 2 indices where the middle indices are all
6265/// zeroes, we can replace it with 2 GEPs where the second has 2 operands.
6266///
6267/// If the final index isn't a vector or is a splat, we can emit a scalar GEP
6268/// followed by a GEP with an all zeroes vector index. This will enable
6269/// SelectionDAGBuilder to use the scalar GEP as the uniform base and have a
6270/// zero index.
6271bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
6272 Value *Ptr) {
6273 Value *NewAddr;
6274
6275 if (const auto *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
6276 // Don't optimize GEPs that don't have indices.
6277 if (!GEP->hasIndices())
6278 return false;
6279
6280 // If the GEP and the gather/scatter aren't in the same BB, don't optimize.
6281 // FIXME: We should support this by sinking the GEP.
6282 if (MemoryInst->getParent() != GEP->getParent())
6283 return false;
6284
6285 SmallVector<Value *, 2> Ops(GEP->operands());
6286
6287 bool RewriteGEP = false;
6288
6289 if (Ops[0]->getType()->isVectorTy()) {
6290 Ops[0] = getSplatValue(Ops[0]);
6291 if (!Ops[0])
6292 return false;
6293 RewriteGEP = true;
6294 }
6295
6296 unsigned FinalIndex = Ops.size() - 1;
6297
6298 // Ensure all but the last index is 0.
6299 // FIXME: This isn't strictly required. All that's required is that they are
6300 // all scalars or splats.
6301 for (unsigned i = 1; i < FinalIndex; ++i) {
6302 auto *C = dyn_cast<Constant>(Ops[i]);
6303 if (!C)
6304 return false;
6305 if (isa<VectorType>(C->getType()))
6306 C = C->getSplatValue();
6307 auto *CI = dyn_cast_or_null<ConstantInt>(C);
6308 if (!CI || !CI->isZero())
6309 return false;
6310 // Scalarize the index if needed.
6311 Ops[i] = CI;
6312 }
6313
6314 // Try to scalarize the final index.
6315 if (Ops[FinalIndex]->getType()->isVectorTy()) {
6316 if (Value *V = getSplatValue(Ops[FinalIndex])) {
6317 auto *C = dyn_cast<ConstantInt>(V);
6318 // Don't scalarize all zeros vector.
6319 if (!C || !C->isZero()) {
6320 Ops[FinalIndex] = V;
6321 RewriteGEP = true;
6322 }
6323 }
6324 }
6325
6326 // If we made any changes or the we have extra operands, we need to generate
6327 // new instructions.
6328 if (!RewriteGEP && Ops.size() == 2)
6329 return false;
6330
6331 auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
6332
6333 IRBuilder<> Builder(MemoryInst);
6334
6335 Type *SourceTy = GEP->getSourceElementType();
6336 Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType());
6337
6338 // If the final index isn't a vector, emit a scalar GEP containing all ops
6339 // and a vector GEP with all zeroes final index.
6340 if (!Ops[FinalIndex]->getType()->isVectorTy()) {
6341 NewAddr = Builder.CreateGEP(SourceTy, Ops[0], ArrayRef(Ops).drop_front());
6342 auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
6343 auto *SecondTy = GetElementPtrInst::getIndexedType(
6344 SourceTy, ArrayRef(Ops).drop_front());
6345 NewAddr =
6346 Builder.CreateGEP(SecondTy, NewAddr, Constant::getNullValue(IndexTy));
6347 } else {
6348 Value *Base = Ops[0];
6349 Value *Index = Ops[FinalIndex];
6350
6351 // Create a scalar GEP if there are more than 2 operands.
6352 if (Ops.size() != 2) {
6353 // Replace the last index with 0.
6354 Ops[FinalIndex] =
6355 Constant::getNullValue(Ops[FinalIndex]->getType()->getScalarType());
6356 Base = Builder.CreateGEP(SourceTy, Base, ArrayRef(Ops).drop_front());
6358 SourceTy, ArrayRef(Ops).drop_front());
6359 }
6360
6361 // Now create the GEP with scalar pointer and vector index.
6362 NewAddr = Builder.CreateGEP(SourceTy, Base, Index);
6363 }
6364 } else if (!isa<Constant>(Ptr)) {
6365 // Not a GEP, maybe its a splat and we can create a GEP to enable
6366 // SelectionDAGBuilder to use it as a uniform base.
6368 if (!V)
6369 return false;
6370
6371 auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
6372
6373 IRBuilder<> Builder(MemoryInst);
6374
6375 // Emit a vector GEP with a scalar pointer and all 0s vector index.
6376 Type *ScalarIndexTy = DL->getIndexType(V->getType()->getScalarType());
6377 auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
6378 Type *ScalarTy;
6379 if (cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
6380 Intrinsic::masked_gather) {
6381 ScalarTy = MemoryInst->getType()->getScalarType();
6382 } else {
6383 assert(cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
6384 Intrinsic::masked_scatter);
6385 ScalarTy = MemoryInst->getOperand(0)->getType()->getScalarType();
6386 }
6387 NewAddr = Builder.CreateGEP(ScalarTy, V, Constant::getNullValue(IndexTy));
6388 } else {
6389 // Constant, SelectionDAGBuilder knows to check if its a splat.
6390 return false;
6391 }
6392
6393 MemoryInst->replaceUsesOfWith(Ptr, NewAddr);
6394
6395 // If we have no uses, recursively delete the value and all dead instructions
6396 // using it.
6397 if (Ptr->use_empty())
6399 Ptr, TLInfo, nullptr,
6400 [&](Value *V) { removeAllAssertingVHReferences(V); });
6401
6402 return true;
6403}
6404
6405/// If there are any memory operands, use OptimizeMemoryInst to sink their
6406/// address computing into the block when possible / profitable.
6407bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
6408 bool MadeChange = false;
6409
6410 const TargetRegisterInfo *TRI =
6411 TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo();
6412 TargetLowering::AsmOperandInfoVector TargetConstraints =
6413 TLI->ParseConstraints(*DL, TRI, *CS);
6414 unsigned ArgNo = 0;
6415 for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
6416 // Compute the constraint code and ConstraintType to use.
6417 TLI->ComputeConstraintToUse(OpInfo, SDValue());
6418
6419 // TODO: Also handle C_Address?
6420 if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
6421 OpInfo.isIndirect) {
6422 Value *OpVal = CS->getArgOperand(ArgNo++);
6423 MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
6424 } else if (OpInfo.Type == InlineAsm::isInput)
6425 ArgNo++;
6426 }
6427
6428 return MadeChange;
6429}
6430
6431/// Check if all the uses of \p Val are equivalent (or free) zero or
6432/// sign extensions.
6433static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
6434 assert(!Val->use_empty() && "Input must have at least one use");
6435 const Instruction *FirstUser = cast<Instruction>(*Val->user_begin());
6436 bool IsSExt = isa<SExtInst>(FirstUser);
6437 Type *ExtTy = FirstUser->getType();
6438 for (const User *U : Val->users()) {
6439 const Instruction *UI = cast<Instruction>(U);
6440 if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
6441 return false;
6442 Type *CurTy = UI->getType();
6443 // Same input and output types: Same instruction after CSE.
6444 if (CurTy == ExtTy)
6445 continue;
6446
6447 // If IsSExt is true, we are in this situation:
6448 // a = Val
6449 // b = sext ty1 a to ty2
6450 // c = sext ty1 a to ty3
6451 // Assuming ty2 is shorter than ty3, this could be turned into:
6452 // a = Val
6453 // b = sext ty1 a to ty2
6454 // c = sext ty2 b to ty3
6455 // However, the last sext is not free.
6456 if (IsSExt)
6457 return false;
6458
6459 // This is a ZExt, maybe this is free to extend from one type to another.
6460 // In that case, we would not account for a different use.
6461 Type *NarrowTy;
6462 Type *LargeTy;
6463 if (ExtTy->getScalarType()->getIntegerBitWidth() >
6464 CurTy->getScalarType()->getIntegerBitWidth()) {
6465 NarrowTy = CurTy;
6466 LargeTy = ExtTy;
6467 } else {
6468 NarrowTy = ExtTy;
6469 LargeTy = CurTy;
6470 }
6471
6472 if (!TLI.isZExtFree(NarrowTy, LargeTy))
6473 return false;
6474 }
6475 // All uses are the same or can be derived from one another for free.
6476 return true;
6477}
6478
6479/// Try to speculatively promote extensions in \p Exts and continue
6480/// promoting through newly promoted operands recursively as far as doing so is
6481/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.
6482/// When some promotion happened, \p TPT contains the proper state to revert
6483/// them.
6484///
6485/// \return true if some promotion happened, false otherwise.
6486bool CodeGenPrepare::tryToPromoteExts(
6487 TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
6488 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
6489 unsigned CreatedInstsCost) {
6490 bool Promoted = false;
6491
6492 // Iterate over all the extensions to try to promote them.
6493 for (auto *I : Exts) {
6494 // Early check if we directly have ext(load).
6495 if (isa<LoadInst>(I->getOperand(0))) {
6496 ProfitablyMovedExts.push_back(I);
6497 continue;
6498 }
6499
6500 // Check whether or not we want to do any promotion. The reason we have
6501 // this check inside the for loop is to catch the case where an extension
6502 // is directly fed by a load because in such case the extension can be moved
6503 // up without any promotion on its operands.
6505 return false;
6506
6507 // Get the action to perform the promotion.
6508 TypePromotionHelper::Action TPH =
6509 TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts);
6510 // Check if we can promote.
6511 if (!TPH) {
6512 // Save the current extension as we cannot move up through its operand.
6513 ProfitablyMovedExts.push_back(I);
6514 continue;
6515 }
6516
6517 // Save the current state.
6518 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
6519 TPT.getRestorationPoint();
6521 unsigned NewCreatedInstsCost = 0;
6522 unsigned ExtCost = !TLI->isExtFree(I);
6523 // Promote.
6524 Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
6525 &NewExts, nullptr, *TLI);
6526 assert(PromotedVal &&
6527 "TypePromotionHelper should have filtered out those cases");
6528
6529 // We would be able to merge only one extension in a load.
6530 // Therefore, if we have more than 1 new extension we heuristically
6531 // cut this search path, because it means we degrade the code quality.
6532 // With exactly 2, the transformation is neutral, because we will merge
6533 // one extension but leave one. However, we optimistically keep going,
6534 // because the new extension may be removed too. Also avoid replacing a
6535 // single free extension with multiple extensions, as this increases the
6536 // number of IR instructions while not providing any savings.
6537 long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
6538 // FIXME: It would be possible to propagate a negative value instead of
6539 // conservatively ceiling it to 0.
6540 TotalCreatedInstsCost =
6541 std::max((long long)0, (TotalCreatedInstsCost - ExtCost));
6542 if (!StressExtLdPromotion &&
6543 (TotalCreatedInstsCost > 1 ||
6544 !isPromotedInstructionLegal(*TLI, *DL, PromotedVal) ||
6545 (ExtCost == 0 && NewExts.size() > 1))) {
6546 // This promotion is not profitable, rollback to the previous state, and
6547 // save the current extension in ProfitablyMovedExts as the latest
6548 // speculative promotion turned out to be unprofitable.
6549 TPT.rollback(LastKnownGood);
6550 ProfitablyMovedExts.push_back(I);
6551 continue;
6552 }
6553 // Continue promoting NewExts as far as doing so is profitable.
6554 SmallVector<Instruction *, 2> NewlyMovedExts;
6555 (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);
6556 bool NewPromoted = false;
6557 for (auto *ExtInst : NewlyMovedExts) {
6558 Instruction *MovedExt = cast<Instruction>(ExtInst);
6559 Value *ExtOperand = MovedExt->getOperand(0);
6560 // If we have reached to a load, we need this extra profitability check
6561 // as it could potentially be merged into an ext(load).
6562 if (isa<LoadInst>(ExtOperand) &&
6563 !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
6564 (ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI))))
6565 continue;
6566
6567 ProfitablyMovedExts.push_back(MovedExt);
6568 NewPromoted = true;
6569 }
6570
6571 // If none of speculative promotions for NewExts is profitable, rollback
6572 // and save the current extension (I) as the last profitable extension.
6573 if (!NewPromoted) {
6574 TPT.rollback(LastKnownGood);
6575 ProfitablyMovedExts.push_back(I);
6576 continue;
6577 }
6578 // The promotion is profitable.
6579 Promoted = true;
6580 }
6581 return Promoted;
6582}
6583
6584/// Merging redundant sexts when one is dominating the other.
6585bool CodeGenPrepare::mergeSExts(Function &F) {
6586 bool Changed = false;
6587 for (auto &Entry : ValToSExtendedUses) {
6588 SExts &Insts = Entry.second;
6589 SExts CurPts;
6590 for (Instruction *Inst : Insts) {
6591 if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
6592 Inst->getOperand(0) != Entry.first)
6593 continue;
6594 bool inserted = false;
6595 for (auto &Pt : CurPts) {
6596 if (getDT(F).dominates(Inst, Pt)) {
6597 replaceAllUsesWith(Pt, Inst, FreshBBs, IsHugeFunc);
6598 RemovedInsts.insert(Pt);
6599 Pt->removeFromParent();
6600 Pt = Inst;
6601 inserted = true;
6602 Changed = true;
6603 break;
6604 }
6605 if (!getDT(F).dominates(Pt, Inst))
6606 // Give up if we need to merge in a common dominator as the
6607 // experiments show it is not profitable.
6608 continue;
6609 replaceAllUsesWith(Inst, Pt, FreshBBs, IsHugeFunc);
6610 RemovedInsts.insert(Inst);
6611 Inst->removeFromParent();
6612 inserted = true;
6613 Changed = true;
6614 break;
6615 }
6616 if (!inserted)
6617 CurPts.push_back(Inst);
6618 }
6619 }
6620 return Changed;
6621}
6622
6623// Splitting large data structures so that the GEPs accessing them can have
6624// smaller offsets so that they can be sunk to the same blocks as their users.
6625// For example, a large struct starting from %base is split into two parts
6626// where the second part starts from %new_base.
6627//
6628// Before:
6629// BB0:
6630// %base =
6631//
6632// BB1:
6633// %gep0 = gep %base, off0
6634// %gep1 = gep %base, off1
6635// %gep2 = gep %base, off2
6636//
6637// BB2:
6638// %load1 = load %gep0
6639// %load2 = load %gep1
6640// %load3 = load %gep2
6641//
6642// After:
6643// BB0:
6644// %base =
6645// %new_base = gep %base, off0
6646//
6647// BB1:
6648// %new_gep0 = %new_base
6649// %new_gep1 = gep %new_base, off1 - off0
6650// %new_gep2 = gep %new_base, off2 - off0
6651//
6652// BB2:
6653// %load1 = load i32, i32* %new_gep0
6654// %load2 = load i32, i32* %new_gep1
6655// %load3 = load i32, i32* %new_gep2
6656//
6657// %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because
6658// their offsets are smaller enough to fit into the addressing mode.
6659bool CodeGenPrepare::splitLargeGEPOffsets() {
6660 bool Changed = false;
6661 for (auto &Entry : LargeOffsetGEPMap) {
6662 Value *OldBase = Entry.first;
6664 &LargeOffsetGEPs = Entry.second;
6665 auto compareGEPOffset =
6666 [&](const std::pair<GetElementPtrInst *, int64_t> &LHS,
6667 const std::pair<GetElementPtrInst *, int64_t> &RHS) {
6668 if (LHS.first == RHS.first)
6669 return false;
6670 if (LHS.second != RHS.second)
6671 return LHS.second < RHS.second;
6672 return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first];
6673 };
6674 // Sorting all the GEPs of the same data structures based on the offsets.
6675 llvm::sort(LargeOffsetGEPs, compareGEPOffset);
6676 LargeOffsetGEPs.erase(llvm::unique(LargeOffsetGEPs), LargeOffsetGEPs.end());
6677 // Skip if all the GEPs have the same offsets.
6678 if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second)
6679 continue;
6680 GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first;
6681 int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
6682 Value *NewBaseGEP = nullptr;
6683
6684 auto createNewBase = [&](int64_t BaseOffset, Value *OldBase,
6686 LLVMContext &Ctx = GEP->getContext();
6687 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
6688 Type *I8PtrTy =
6689 PointerType::get(Ctx, GEP->getType()->getPointerAddressSpace());
6690
6691 BasicBlock::iterator NewBaseInsertPt;
6692 BasicBlock *NewBaseInsertBB;
6693 if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
6694 // If the base of the struct is an instruction, the new base will be
6695 // inserted close to it.
6696 NewBaseInsertBB = BaseI->getParent();
6697 if (isa<PHINode>(BaseI))
6698 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6699 else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
6700 NewBaseInsertBB =
6701 SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI);
6702 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6703 } else
6704 NewBaseInsertPt = std::next(BaseI->getIterator());
6705 } else {
6706 // If the current base is an argument or global value, the new base
6707 // will be inserted to the entry block.
6708 NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
6709 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6710 }
6711 IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
6712 // Create a new base.
6713 Value *BaseIndex = ConstantInt::get(PtrIdxTy, BaseOffset);
6714 NewBaseGEP = OldBase;
6715 if (NewBaseGEP->getType() != I8PtrTy)
6716 NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
6717 NewBaseGEP =
6718 NewBaseBuilder.CreatePtrAdd(NewBaseGEP, BaseIndex, "splitgep");
6719 NewGEPBases.insert(NewBaseGEP);
6720 return;
6721 };
6722
6723 // Check whether all the offsets can be encoded with prefered common base.
6724 if (int64_t PreferBase = TLI->getPreferredLargeGEPBaseOffset(
6725 LargeOffsetGEPs.front().second, LargeOffsetGEPs.back().second)) {
6726 BaseOffset = PreferBase;
6727 // Create a new base if the offset of the BaseGEP can be decoded with one
6728 // instruction.
6729 createNewBase(BaseOffset, OldBase, BaseGEP);
6730 }
6731
6732 auto *LargeOffsetGEP = LargeOffsetGEPs.begin();
6733 while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
6734 GetElementPtrInst *GEP = LargeOffsetGEP->first;
6735 int64_t Offset = LargeOffsetGEP->second;
6736 if (Offset != BaseOffset) {
6738 AddrMode.HasBaseReg = true;
6739 AddrMode.BaseOffs = Offset - BaseOffset;
6740 // The result type of the GEP might not be the type of the memory
6741 // access.
6742 if (!TLI->isLegalAddressingMode(*DL, AddrMode,
6743 GEP->getResultElementType(),
6744 GEP->getAddressSpace())) {
6745 // We need to create a new base if the offset to the current base is
6746 // too large to fit into the addressing mode. So, a very large struct
6747 // may be split into several parts.
6748 BaseGEP = GEP;
6749 BaseOffset = Offset;
6750 NewBaseGEP = nullptr;
6751 }
6752 }
6753
6754 // Generate a new GEP to replace the current one.
6755 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
6756
6757 if (!NewBaseGEP) {
6758 // Create a new base if we don't have one yet. Find the insertion
6759 // pointer for the new base first.
6760 createNewBase(BaseOffset, OldBase, GEP);
6761 }
6762
6763 IRBuilder<> Builder(GEP);
6764 Value *NewGEP = NewBaseGEP;
6765 if (Offset != BaseOffset) {
6766 // Calculate the new offset for the new GEP.
6767 Value *Index = ConstantInt::get(PtrIdxTy, Offset - BaseOffset);
6768 NewGEP = Builder.CreatePtrAdd(NewBaseGEP, Index);
6769 }
6770 replaceAllUsesWith(GEP, NewGEP, FreshBBs, IsHugeFunc);
6771 LargeOffsetGEPID.erase(GEP);
6772 LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP);
6773 GEP->eraseFromParent();
6774 Changed = true;
6775 }
6776 }
6777 return Changed;
6778}
6779
6780bool CodeGenPrepare::optimizePhiType(
6782 SmallPtrSetImpl<Instruction *> &DeletedInstrs) {
6783 // We are looking for a collection on interconnected phi nodes that together
6784 // only use loads/bitcasts and are used by stores/bitcasts, and the bitcasts
6785 // are of the same type. Convert the whole set of nodes to the type of the
6786 // bitcast.
6787 Type *PhiTy = I->getType();
6788 Type *ConvertTy = nullptr;
6789 if (Visited.count(I) ||
6790 (!I->getType()->isIntegerTy() && !I->getType()->isFloatingPointTy()))
6791 return false;
6792
6794 Worklist.push_back(cast<Instruction>(I));
6797 PhiNodes.insert(I);
6798 Visited.insert(I);
6801 // This works by adding extra bitcasts between load/stores and removing
6802 // existing bicasts. If we have a phi(bitcast(load)) or a store(bitcast(phi))
6803 // we can get in the situation where we remove a bitcast in one iteration
6804 // just to add it again in the next. We need to ensure that at least one
6805 // bitcast we remove are anchored to something that will not change back.
6806 bool AnyAnchored = false;
6807
6808 while (!Worklist.empty()) {
6809 Instruction *II = Worklist.pop_back_val();
6810
6811 if (auto *Phi = dyn_cast<PHINode>(II)) {
6812 // Handle Defs, which might also be PHI's
6813 for (Value *V : Phi->incoming_values()) {
6814 if (auto *OpPhi = dyn_cast<PHINode>(V)) {
6815 if (!PhiNodes.count(OpPhi)) {
6816 if (!Visited.insert(OpPhi).second)
6817 return false;
6818 PhiNodes.insert(OpPhi);
6819 Worklist.push_back(OpPhi);
6820 }
6821 } else if (auto *OpLoad = dyn_cast<LoadInst>(V)) {
6822 if (!OpLoad->isSimple())
6823 return false;
6824 if (Defs.insert(OpLoad).second)
6825 Worklist.push_back(OpLoad);
6826 } else if (auto *OpEx = dyn_cast<ExtractElementInst>(V)) {
6827 if (Defs.insert(OpEx).second)
6828 Worklist.push_back(OpEx);
6829 } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
6830 if (!ConvertTy)
6831 ConvertTy = OpBC->getOperand(0)->getType();
6832 if (OpBC->getOperand(0)->getType() != ConvertTy)
6833 return false;
6834 if (Defs.insert(OpBC).second) {
6835 Worklist.push_back(OpBC);
6836 AnyAnchored |= !isa<LoadInst>(OpBC->getOperand(0)) &&
6837 !isa<ExtractElementInst>(OpBC->getOperand(0));
6838 }
6839 } else if (auto *OpC = dyn_cast<ConstantData>(V))
6840 Constants.insert(OpC);
6841 else
6842 return false;
6843 }
6844 }
6845
6846 // Handle uses which might also be phi's
6847 for (User *V : II->users()) {
6848 if (auto *OpPhi = dyn_cast<PHINode>(V)) {
6849 if (!PhiNodes.count(OpPhi)) {
6850 if (Visited.count(OpPhi))
6851 return false;
6852 PhiNodes.insert(OpPhi);
6853 Visited.insert(OpPhi);
6854 Worklist.push_back(OpPhi);
6855 }
6856 } else if (auto *OpStore = dyn_cast<StoreInst>(V)) {
6857 if (!OpStore->isSimple() || OpStore->getOperand(0) != II)
6858 return false;
6859 Uses.insert(OpStore);
6860 } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
6861 if (!ConvertTy)
6862 ConvertTy = OpBC->getType();
6863 if (OpBC->getType() != ConvertTy)
6864 return false;
6865 Uses.insert(OpBC);
6866 AnyAnchored |=
6867 any_of(OpBC->users(), [](User *U) { return !isa<StoreInst>(U); });
6868 } else {
6869 return false;
6870 }
6871 }
6872 }
6873
6874 if (!ConvertTy || !AnyAnchored ||
6875 !TLI->shouldConvertPhiType(PhiTy, ConvertTy))
6876 return false;
6877
6878 LLVM_DEBUG(dbgs() << "Converting " << *I << "\n and connected nodes to "
6879 << *ConvertTy << "\n");
6880
6881 // Create all the new phi nodes of the new type, and bitcast any loads to the
6882 // correct type.
6883 ValueToValueMap ValMap;
6884 for (ConstantData *C : Constants)
6885 ValMap[C] = ConstantExpr::getBitCast(C, ConvertTy);
6886 for (Instruction *D : Defs) {
6887 if (isa<BitCastInst>(D)) {
6888 ValMap[D] = D->getOperand(0);
6889 DeletedInstrs.insert(D);
6890 } else {
6891 BasicBlock::iterator insertPt = std::next(D->getIterator());
6892 ValMap[D] = new BitCastInst(D, ConvertTy, D->getName() + ".bc", insertPt);
6893 }
6894 }
6895 for (PHINode *Phi : PhiNodes)
6896 ValMap[Phi] = PHINode::Create(ConvertTy, Phi->getNumIncomingValues(),
6897 Phi->getName() + ".tc", Phi->getIterator());
6898 // Pipe together all the PhiNodes.
6899 for (PHINode *Phi : PhiNodes) {
6900 PHINode *NewPhi = cast<PHINode>(ValMap[Phi]);
6901 for (int i = 0, e = Phi->getNumIncomingValues(); i < e; i++)
6902 NewPhi->addIncoming(ValMap[Phi->getIncomingValue(i)],
6903 Phi->getIncomingBlock(i));
6904 Visited.insert(NewPhi);
6905 }
6906 // And finally pipe up the stores and bitcasts
6907 for (Instruction *U : Uses) {
6908 if (isa<BitCastInst>(U)) {
6909 DeletedInstrs.insert(U);
6910 replaceAllUsesWith(U, ValMap[U->getOperand(0)], FreshBBs, IsHugeFunc);
6911 } else {
6912 U->setOperand(0, new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc",
6913 U->getIterator()));
6914 }
6915 }
6916
6917 // Save the removed phis to be deleted later.
6918 DeletedInstrs.insert_range(PhiNodes);
6919 return true;
6920}
6921
6922bool CodeGenPrepare::optimizePhiTypes(Function &F) {
6923 if (!OptimizePhiTypes)
6924 return false;
6925
6926 bool Changed = false;
6928 SmallPtrSet<Instruction *, 4> DeletedInstrs;
6929
6930 // Attempt to optimize all the phis in the functions to the correct type.
6931 for (auto &BB : F)
6932 for (auto &Phi : BB.phis())
6933 Changed |= optimizePhiType(&Phi, Visited, DeletedInstrs);
6934
6935 // Remove any old phi's that have been converted.
6936 for (auto *I : DeletedInstrs) {
6937 replaceAllUsesWith(I, PoisonValue::get(I->getType()), FreshBBs, IsHugeFunc);
6938 I->eraseFromParent();
6939 }
6940
6941 return Changed;
6942}
6943
6944/// Return true, if an ext(load) can be formed from an extension in
6945/// \p MovedExts.
6946bool CodeGenPrepare::canFormExtLd(
6947 const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI,
6948 Instruction *&Inst, bool HasPromoted) {
6949 for (auto *MovedExtInst : MovedExts) {
6950 if (isa<LoadInst>(MovedExtInst->getOperand(0))) {
6951 LI = cast<LoadInst>(MovedExtInst->getOperand(0));
6952 Inst = MovedExtInst;
6953 break;
6954 }
6955 }
6956 if (!LI)
6957 return false;
6958
6959 // If they're already in the same block, there's nothing to do.
6960 // Make the cheap checks first if we did not promote.
6961 // If we promoted, we need to check if it is indeed profitable.
6962 if (!HasPromoted && LI->getParent() == Inst->getParent())
6963 return false;
6964
6965 return TLI->isExtLoad(LI, Inst, *DL);
6966}
6967
6968/// Move a zext or sext fed by a load into the same basic block as the load,
6969/// unless conditions are unfavorable. This allows SelectionDAG to fold the
6970/// extend into the load.
6971///
6972/// E.g.,
6973/// \code
6974/// %ld = load i32* %addr
6975/// %add = add nuw i32 %ld, 4
6976/// %zext = zext i32 %add to i64
6977// \endcode
6978/// =>
6979/// \code
6980/// %ld = load i32* %addr
6981/// %zext = zext i32 %ld to i64
6982/// %add = add nuw i64 %zext, 4
6983/// \encode
6984/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
6985/// allow us to match zext(load i32*) to i64.
6986///
6987/// Also, try to promote the computations used to obtain a sign extended
6988/// value used into memory accesses.
6989/// E.g.,
6990/// \code
6991/// a = add nsw i32 b, 3
6992/// d = sext i32 a to i64
6993/// e = getelementptr ..., i64 d
6994/// \endcode
6995/// =>
6996/// \code
6997/// f = sext i32 b to i64
6998/// a = add nsw i64 f, 3
6999/// e = getelementptr ..., i64 a
7000/// \endcode
7001///
7002/// \p Inst[in/out] the extension may be modified during the process if some
7003/// promotions apply.
7004bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
7005 bool AllowPromotionWithoutCommonHeader = false;
7006 /// See if it is an interesting sext operations for the address type
7007 /// promotion before trying to promote it, e.g., the ones with the right
7008 /// type and used in memory accesses.
7009 bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
7010 *Inst, AllowPromotionWithoutCommonHeader);
7011 TypePromotionTransaction TPT(RemovedInsts);
7012 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
7013 TPT.getRestorationPoint();
7015 SmallVector<Instruction *, 2> SpeculativelyMovedExts;
7016 Exts.push_back(Inst);
7017
7018 bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
7019
7020 // Look for a load being extended.
7021 LoadInst *LI = nullptr;
7022 Instruction *ExtFedByLoad;
7023
7024 // Try to promote a chain of computation if it allows to form an extended
7025 // load.
7026 if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
7027 assert(LI && ExtFedByLoad && "Expect a valid load and extension");
7028 TPT.commit();
7029 // Move the extend into the same block as the load.
7030 ExtFedByLoad->moveAfter(LI);
7031 ++NumExtsMoved;
7032 Inst = ExtFedByLoad;
7033 return true;
7034 }
7035
7036 // Continue promoting SExts if known as considerable depending on targets.
7037 if (ATPConsiderable &&
7038 performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
7039 HasPromoted, TPT, SpeculativelyMovedExts))
7040 return true;
7041
7042 TPT.rollback(LastKnownGood);
7043 return false;
7044}
7045
7046// Perform address type promotion if doing so is profitable.
7047// If AllowPromotionWithoutCommonHeader == false, we should find other sext
7048// instructions that sign extended the same initial value. However, if
7049// AllowPromotionWithoutCommonHeader == true, we expect promoting the
7050// extension is just profitable.
7051bool CodeGenPrepare::performAddressTypePromotion(
7052 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
7053 bool HasPromoted, TypePromotionTransaction &TPT,
7054 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
7055 bool Promoted = false;
7056 SmallPtrSet<Instruction *, 1> UnhandledExts;
7057 bool AllSeenFirst = true;
7058 for (auto *I : SpeculativelyMovedExts) {
7059 Value *HeadOfChain = I->getOperand(0);
7061 SeenChainsForSExt.find(HeadOfChain);
7062 // If there is an unhandled SExt which has the same header, try to promote
7063 // it as well.
7064 if (AlreadySeen != SeenChainsForSExt.end()) {
7065 if (AlreadySeen->second != nullptr)
7066 UnhandledExts.insert(AlreadySeen->second);
7067 AllSeenFirst = false;
7068 }
7069 }
7070
7071 if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
7072 SpeculativelyMovedExts.size() == 1)) {
7073 TPT.commit();
7074 if (HasPromoted)
7075 Promoted = true;
7076 for (auto *I : SpeculativelyMovedExts) {
7077 Value *HeadOfChain = I->getOperand(0);
7078 SeenChainsForSExt[HeadOfChain] = nullptr;
7079 ValToSExtendedUses[HeadOfChain].push_back(I);
7080 }
7081 // Update Inst as promotion happen.
7082 Inst = SpeculativelyMovedExts.pop_back_val();
7083 } else {
7084 // This is the first chain visited from the header, keep the current chain
7085 // as unhandled. Defer to promote this until we encounter another SExt
7086 // chain derived from the same header.
7087 for (auto *I : SpeculativelyMovedExts) {
7088 Value *HeadOfChain = I->getOperand(0);
7089 SeenChainsForSExt[HeadOfChain] = Inst;
7090 }
7091 return false;
7092 }
7093
7094 if (!AllSeenFirst && !UnhandledExts.empty())
7095 for (auto *VisitedSExt : UnhandledExts) {
7096 if (RemovedInsts.count(VisitedSExt))
7097 continue;
7098 TypePromotionTransaction TPT(RemovedInsts);
7101 Exts.push_back(VisitedSExt);
7102 bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
7103 TPT.commit();
7104 if (HasPromoted)
7105 Promoted = true;
7106 for (auto *I : Chains) {
7107 Value *HeadOfChain = I->getOperand(0);
7108 // Mark this as handled.
7109 SeenChainsForSExt[HeadOfChain] = nullptr;
7110 ValToSExtendedUses[HeadOfChain].push_back(I);
7111 }
7112 }
7113 return Promoted;
7114}
7115
7116bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
7117 BasicBlock *DefBB = I->getParent();
7118
7119 // If the result of a {s|z}ext and its source are both live out, rewrite all
7120 // other uses of the source with result of extension.
7121 Value *Src = I->getOperand(0);
7122 if (Src->hasOneUse())
7123 return false;
7124
7125 // Only do this xform if truncating is free.
7126 if (!TLI->isTruncateFree(I->getType(), Src->getType()))
7127 return false;
7128
7129 // Only safe to perform the optimization if the source is also defined in
7130 // this block.
7131 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
7132 return false;
7133
7134 bool DefIsLiveOut = false;
7135 for (User *U : I->users()) {
7136 Instruction *UI = cast<Instruction>(U);
7137
7138 // Figure out which BB this ext is used in.
7139 BasicBlock *UserBB = UI->getParent();
7140 if (UserBB == DefBB)
7141 continue;
7142 DefIsLiveOut = true;
7143 break;
7144 }
7145 if (!DefIsLiveOut)
7146 return false;
7147
7148 // Make sure none of the uses are PHI nodes.
7149 for (User *U : Src->users()) {
7150 Instruction *UI = cast<Instruction>(U);
7151 BasicBlock *UserBB = UI->getParent();
7152 if (UserBB == DefBB)
7153 continue;
7154 // Be conservative. We don't want this xform to end up introducing
7155 // reloads just before load / store instructions.
7156 if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
7157 return false;
7158 }
7159
7160 // InsertedTruncs - Only insert one trunc in each block once.
7162
7163 bool MadeChange = false;
7164 for (Use &U : Src->uses()) {
7165 Instruction *User = cast<Instruction>(U.getUser());
7166
7167 // Figure out which BB this ext is used in.
7168 BasicBlock *UserBB = User->getParent();
7169 if (UserBB == DefBB)
7170 continue;
7171
7172 // Both src and def are live in this block. Rewrite the use.
7173 Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
7174
7175 if (!InsertedTrunc) {
7176 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
7177 assert(InsertPt != UserBB->end());
7178 InsertedTrunc = new TruncInst(I, Src->getType(), "");
7179 InsertedTrunc->insertBefore(*UserBB, InsertPt);
7180 InsertedInsts.insert(InsertedTrunc);
7181 }
7182
7183 // Replace a use of the {s|z}ext source with a use of the result.
7184 U = InsertedTrunc;
7185 ++NumExtUses;
7186 MadeChange = true;
7187 }
7188
7189 return MadeChange;
7190}
7191
7192// Find loads whose uses only use some of the loaded value's bits. Add an "and"
7193// just after the load if the target can fold this into one extload instruction,
7194// with the hope of eliminating some of the other later "and" instructions using
7195// the loaded value. "and"s that are made trivially redundant by the insertion
7196// of the new "and" are removed by this function, while others (e.g. those whose
7197// path from the load goes through a phi) are left for isel to potentially
7198// remove.
7199//
7200// For example:
7201//
7202// b0:
7203// x = load i32
7204// ...
7205// b1:
7206// y = and x, 0xff
7207// z = use y
7208//
7209// becomes:
7210//
7211// b0:
7212// x = load i32
7213// x' = and x, 0xff
7214// ...
7215// b1:
7216// z = use x'
7217//
7218// whereas:
7219//
7220// b0:
7221// x1 = load i32
7222// ...
7223// b1:
7224// x2 = load i32
7225// ...
7226// b2:
7227// x = phi x1, x2
7228// y = and x, 0xff
7229//
7230// becomes (after a call to optimizeLoadExt for each load):
7231//
7232// b0:
7233// x1 = load i32
7234// x1' = and x1, 0xff
7235// ...
7236// b1:
7237// x2 = load i32
7238// x2' = and x2, 0xff
7239// ...
7240// b2:
7241// x = phi x1', x2'
7242// y = and x, 0xff
7243bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
7244 if (!Load->isSimple() || !Load->getType()->isIntOrPtrTy())
7245 return false;
7246
7247 // Skip loads we've already transformed.
7248 if (Load->hasOneUse() &&
7249 InsertedInsts.count(cast<Instruction>(*Load->user_begin())))
7250 return false;
7251
7252 // Look at all uses of Load, looking through phis, to determine how many bits
7253 // of the loaded value are needed.
7256 SmallVector<Instruction *, 8> AndsToMaybeRemove;
7258 for (auto *U : Load->users())
7259 WorkList.push_back(cast<Instruction>(U));
7260
7261 EVT LoadResultVT = TLI->getValueType(*DL, Load->getType());
7262 unsigned BitWidth = LoadResultVT.getSizeInBits();
7263 // If the BitWidth is 0, do not try to optimize the type
7264 if (BitWidth == 0)
7265 return false;
7266
7267 APInt DemandBits(BitWidth, 0);
7268 APInt WidestAndBits(BitWidth, 0);
7269
7270 while (!WorkList.empty()) {
7271 Instruction *I = WorkList.pop_back_val();
7272
7273 // Break use-def graph loops.
7274 if (!Visited.insert(I).second)
7275 continue;
7276
7277 // For a PHI node, push all of its users.
7278 if (auto *Phi = dyn_cast<PHINode>(I)) {
7279 for (auto *U : Phi->users())
7280 WorkList.push_back(cast<Instruction>(U));
7281 continue;
7282 }
7283
7284 switch (I->getOpcode()) {
7285 case Instruction::And: {
7286 auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));
7287 if (!AndC)
7288 return false;
7289 APInt AndBits = AndC->getValue();
7290 DemandBits |= AndBits;
7291 // Keep track of the widest and mask we see.
7292 if (AndBits.ugt(WidestAndBits))
7293 WidestAndBits = AndBits;
7294 if (AndBits == WidestAndBits && I->getOperand(0) == Load)
7295 AndsToMaybeRemove.push_back(I);
7296 break;
7297 }
7298
7299 case Instruction::Shl: {
7300 auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));
7301 if (!ShlC)
7302 return false;
7303 uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
7304 DemandBits.setLowBits(BitWidth - ShiftAmt);
7305 DropFlags.push_back(I);
7306 break;
7307 }
7308
7309 case Instruction::Trunc: {
7310 EVT TruncVT = TLI->getValueType(*DL, I->getType());
7311 unsigned TruncBitWidth = TruncVT.getSizeInBits();
7312 DemandBits.setLowBits(TruncBitWidth);
7313 DropFlags.push_back(I);
7314 break;
7315 }
7316
7317 default:
7318 return false;
7319 }
7320 }
7321
7322 uint32_t ActiveBits = DemandBits.getActiveBits();
7323 // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the
7324 // target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example,
7325 // for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but
7326 // (and (load x) 1) is not matched as a single instruction, rather as a LDR
7327 // followed by an AND.
7328 // TODO: Look into removing this restriction by fixing backends to either
7329 // return false for isLoadExtLegal for i1 or have them select this pattern to
7330 // a single instruction.
7331 //
7332 // Also avoid hoisting if we didn't see any ands with the exact DemandBits
7333 // mask, since these are the only ands that will be removed by isel.
7334 if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) ||
7335 WidestAndBits != DemandBits)
7336 return false;
7337
7338 LLVMContext &Ctx = Load->getType()->getContext();
7339 Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits);
7340 EVT TruncVT = TLI->getValueType(*DL, TruncTy);
7341
7342 // Reject cases that won't be matched as extloads.
7343 if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() ||
7344 !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT))
7345 return false;
7346
7347 IRBuilder<> Builder(Load->getNextNode());
7348 auto *NewAnd = cast<Instruction>(
7349 Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
7350 // Mark this instruction as "inserted by CGP", so that other
7351 // optimizations don't touch it.
7352 InsertedInsts.insert(NewAnd);
7353
7354 // Replace all uses of load with new and (except for the use of load in the
7355 // new and itself).
7356 replaceAllUsesWith(Load, NewAnd, FreshBBs, IsHugeFunc);
7357 NewAnd->setOperand(0, Load);
7358
7359 // Remove any and instructions that are now redundant.
7360 for (auto *And : AndsToMaybeRemove)
7361 // Check that the and mask is the same as the one we decided to put on the
7362 // new and.
7363 if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) {
7364 replaceAllUsesWith(And, NewAnd, FreshBBs, IsHugeFunc);
7365 if (&*CurInstIterator == And)
7366 CurInstIterator = std::next(And->getIterator());
7367 And->eraseFromParent();
7368 ++NumAndUses;
7369 }
7370
7371 // NSW flags may not longer hold.
7372 for (auto *Inst : DropFlags)
7373 Inst->setHasNoSignedWrap(false);
7374
7375 ++NumAndsAdded;
7376 return true;
7377}
7378
7379/// Check if V (an operand of a select instruction) is an expensive instruction
7380/// that is only used once.
7382 auto *I = dyn_cast<Instruction>(V);
7383 // If it's safe to speculatively execute, then it should not have side
7384 // effects; therefore, it's safe to sink and possibly *not* execute.
7385 return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
7387}
7388
7389/// Returns true if a SelectInst should be turned into an explicit branch.
7391 const TargetLowering *TLI,
7392 SelectInst *SI) {
7393 // If even a predictable select is cheap, then a branch can't be cheaper.
7394 if (!TLI->isPredictableSelectExpensive())
7395 return false;
7396
7397 // FIXME: This should use the same heuristics as IfConversion to determine
7398 // whether a select is better represented as a branch.
7399
7400 // If metadata tells us that the select condition is obviously predictable,
7401 // then we want to replace the select with a branch.
7402 uint64_t TrueWeight, FalseWeight;
7403 if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
7404 uint64_t Max = std::max(TrueWeight, FalseWeight);
7405 uint64_t Sum = TrueWeight + FalseWeight;
7406 if (Sum != 0) {
7407 auto Probability = BranchProbability::getBranchProbability(Max, Sum);
7408 if (Probability > TTI->getPredictableBranchThreshold())
7409 return true;
7410 }
7411 }
7412
7413 CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
7414
7415 // If a branch is predictable, an out-of-order CPU can avoid blocking on its
7416 // comparison condition. If the compare has more than one use, there's
7417 // probably another cmov or setcc around, so it's not worth emitting a branch.
7418 if (!Cmp || !Cmp->hasOneUse())
7419 return false;
7420
7421 // If either operand of the select is expensive and only needed on one side
7422 // of the select, we should form a branch.
7423 if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
7424 sinkSelectOperand(TTI, SI->getFalseValue()))
7425 return true;
7426
7427 return false;
7428}
7429
7430/// If \p isTrue is true, return the true value of \p SI, otherwise return
7431/// false value of \p SI. If the true/false value of \p SI is defined by any
7432/// select instructions in \p Selects, look through the defining select
7433/// instruction until the true/false value is not defined in \p Selects.
7434static Value *
7436 const SmallPtrSet<const Instruction *, 2> &Selects) {
7437 Value *V = nullptr;
7438
7439 for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
7440 DefSI = dyn_cast<SelectInst>(V)) {
7441 assert(DefSI->getCondition() == SI->getCondition() &&
7442 "The condition of DefSI does not match with SI");
7443 V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
7444 }
7445
7446 assert(V && "Failed to get select true/false value");
7447 return V;
7448}
7449
7450bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
7451 assert(Shift->isShift() && "Expected a shift");
7452
7453 // If this is (1) a vector shift, (2) shifts by scalars are cheaper than
7454 // general vector shifts, and (3) the shift amount is a select-of-splatted
7455 // values, hoist the shifts before the select:
7456 // shift Op0, (select Cond, TVal, FVal) -->
7457 // select Cond, (shift Op0, TVal), (shift Op0, FVal)
7458 //
7459 // This is inverting a generic IR transform when we know that the cost of a
7460 // general vector shift is more than the cost of 2 shift-by-scalars.
7461 // We can't do this effectively in SDAG because we may not be able to
7462 // determine if the select operands are splats from within a basic block.
7463 Type *Ty = Shift->getType();
7464 if (!Ty->isVectorTy() || !TTI->isVectorShiftByScalarCheap(Ty))
7465 return false;
7466 Value *Cond, *TVal, *FVal;
7467 if (!match(Shift->getOperand(1),
7468 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
7469 return false;
7470 if (!isSplatValue(TVal) || !isSplatValue(FVal))
7471 return false;
7472
7473 IRBuilder<> Builder(Shift);
7474 BinaryOperator::BinaryOps Opcode = Shift->getOpcode();
7475 Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal);
7476 Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal);
7477 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
7478 replaceAllUsesWith(Shift, NewSel, FreshBBs, IsHugeFunc);
7479 Shift->eraseFromParent();
7480 return true;
7481}
7482
7483bool CodeGenPrepare::optimizeFunnelShift(IntrinsicInst *Fsh) {
7484 Intrinsic::ID Opcode = Fsh->getIntrinsicID();
7485 assert((Opcode == Intrinsic::fshl || Opcode == Intrinsic::fshr) &&
7486 "Expected a funnel shift");
7487
7488 // If this is (1) a vector funnel shift, (2) shifts by scalars are cheaper
7489 // than general vector shifts, and (3) the shift amount is select-of-splatted
7490 // values, hoist the funnel shifts before the select:
7491 // fsh Op0, Op1, (select Cond, TVal, FVal) -->
7492 // select Cond, (fsh Op0, Op1, TVal), (fsh Op0, Op1, FVal)
7493 //
7494 // This is inverting a generic IR transform when we know that the cost of a
7495 // general vector shift is more than the cost of 2 shift-by-scalars.
7496 // We can't do this effectively in SDAG because we may not be able to
7497 // determine if the select operands are splats from within a basic block.
7498 Type *Ty = Fsh->getType();
7499 if (!Ty->isVectorTy() || !TTI->isVectorShiftByScalarCheap(Ty))
7500 return false;
7501 Value *Cond, *TVal, *FVal;
7502 if (!match(Fsh->getOperand(2),
7503 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
7504 return false;
7505 if (!isSplatValue(TVal) || !isSplatValue(FVal))
7506 return false;
7507
7508 IRBuilder<> Builder(Fsh);
7509 Value *X = Fsh->getOperand(0), *Y = Fsh->getOperand(1);
7510 Value *NewTVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, TVal});
7511 Value *NewFVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, FVal});
7512 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
7513 replaceAllUsesWith(Fsh, NewSel, FreshBBs, IsHugeFunc);
7514 Fsh->eraseFromParent();
7515 return true;
7516}
7517
7518/// If we have a SelectInst that will likely profit from branch prediction,
7519/// turn it into a branch.
7520bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
7522 return false;
7523
7524 // If the SelectOptimize pass is enabled, selects have already been optimized.
7526 return false;
7527
7528 // Find all consecutive select instructions that share the same condition.
7530 ASI.push_back(SI);
7532 It != SI->getParent()->end(); ++It) {
7533 SelectInst *I = dyn_cast<SelectInst>(&*It);
7534 if (I && SI->getCondition() == I->getCondition()) {
7535 ASI.push_back(I);
7536 } else {
7537 break;
7538 }
7539 }
7540
7541 SelectInst *LastSI = ASI.back();
7542 // Increment the current iterator to skip all the rest of select instructions
7543 // because they will be either "not lowered" or "all lowered" to branch.
7544 CurInstIterator = std::next(LastSI->getIterator());
7545 // Examine debug-info attached to the consecutive select instructions. They
7546 // won't be individually optimised by optimizeInst, so we need to perform
7547 // DbgVariableRecord maintenence here instead.
7548 for (SelectInst *SI : ArrayRef(ASI).drop_front())
7549 fixupDbgVariableRecordsOnInst(*SI);
7550
7551 bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
7552
7553 // Can we convert the 'select' to CF ?
7554 if (VectorCond || SI->getMetadata(LLVMContext::MD_unpredictable))
7555 return false;
7556
7558 if (SI->getType()->isVectorTy())
7559 SelectKind = TargetLowering::ScalarCondVectorVal;
7560 else
7561 SelectKind = TargetLowering::ScalarValSelect;
7562
7563 if (TLI->isSelectSupported(SelectKind) &&
7565 llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get())))
7566 return false;
7567
7568 // The DominatorTree needs to be rebuilt by any consumers after this
7569 // transformation. We simply reset here rather than setting the ModifiedDT
7570 // flag to avoid restarting the function walk in runOnFunction for each
7571 // select optimized.
7572 DT.reset();
7573
7574 // Transform a sequence like this:
7575 // start:
7576 // %cmp = cmp uge i32 %a, %b
7577 // %sel = select i1 %cmp, i32 %c, i32 %d
7578 //
7579 // Into:
7580 // start:
7581 // %cmp = cmp uge i32 %a, %b
7582 // %cmp.frozen = freeze %cmp
7583 // br i1 %cmp.frozen, label %select.true, label %select.false
7584 // select.true:
7585 // br label %select.end
7586 // select.false:
7587 // br label %select.end
7588 // select.end:
7589 // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
7590 //
7591 // %cmp should be frozen, otherwise it may introduce undefined behavior.
7592 // In addition, we may sink instructions that produce %c or %d from
7593 // the entry block into the destination(s) of the new branch.
7594 // If the true or false blocks do not contain a sunken instruction, that
7595 // block and its branch may be optimized away. In that case, one side of the
7596 // first branch will point directly to select.end, and the corresponding PHI
7597 // predecessor block will be the start block.
7598
7599 // Collect values that go on the true side and the values that go on the false
7600 // side.
7601 SmallVector<Instruction *> TrueInstrs, FalseInstrs;
7602 for (SelectInst *SI : ASI) {
7603 if (Value *V = SI->getTrueValue(); sinkSelectOperand(TTI, V))
7604 TrueInstrs.push_back(cast<Instruction>(V));
7605 if (Value *V = SI->getFalseValue(); sinkSelectOperand(TTI, V))
7606 FalseInstrs.push_back(cast<Instruction>(V));
7607 }
7608
7609 // Split the select block, according to how many (if any) values go on each
7610 // side.
7611 BasicBlock *StartBlock = SI->getParent();
7612 BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(LastSI));
7613 // We should split before any debug-info.
7614 SplitPt.setHeadBit(true);
7615
7616 IRBuilder<> IB(SI);
7617 auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
7618
7619 BasicBlock *TrueBlock = nullptr;
7620 BasicBlock *FalseBlock = nullptr;
7621 BasicBlock *EndBlock = nullptr;
7622 BranchInst *TrueBranch = nullptr;
7623 BranchInst *FalseBranch = nullptr;
7624 if (TrueInstrs.size() == 0) {
7625 FalseBranch = cast<BranchInst>(SplitBlockAndInsertIfElse(
7626 CondFr, SplitPt, false, nullptr, nullptr, LI));
7627 FalseBlock = FalseBranch->getParent();
7628 EndBlock = cast<BasicBlock>(FalseBranch->getOperand(0));
7629 } else if (FalseInstrs.size() == 0) {
7630 TrueBranch = cast<BranchInst>(SplitBlockAndInsertIfThen(
7631 CondFr, SplitPt, false, nullptr, nullptr, LI));
7632 TrueBlock = TrueBranch->getParent();
7633 EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
7634 } else {
7635 Instruction *ThenTerm = nullptr;
7636 Instruction *ElseTerm = nullptr;
7637 SplitBlockAndInsertIfThenElse(CondFr, SplitPt, &ThenTerm, &ElseTerm,
7638 nullptr, nullptr, LI);
7639 TrueBranch = cast<BranchInst>(ThenTerm);
7640 FalseBranch = cast<BranchInst>(ElseTerm);
7641 TrueBlock = TrueBranch->getParent();
7642 FalseBlock = FalseBranch->getParent();
7643 EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
7644 }
7645
7646 EndBlock->setName("select.end");
7647 if (TrueBlock)
7648 TrueBlock->setName("select.true.sink");
7649 if (FalseBlock)
7650 FalseBlock->setName(FalseInstrs.size() == 0 ? "select.false"
7651 : "select.false.sink");
7652
7653 if (IsHugeFunc) {
7654 if (TrueBlock)
7655 FreshBBs.insert(TrueBlock);
7656 if (FalseBlock)
7657 FreshBBs.insert(FalseBlock);
7658 FreshBBs.insert(EndBlock);
7659 }
7660
7661 BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock));
7662
7663 static const unsigned MD[] = {
7664 LLVMContext::MD_prof, LLVMContext::MD_unpredictable,
7665 LLVMContext::MD_make_implicit, LLVMContext::MD_dbg};
7666 StartBlock->getTerminator()->copyMetadata(*SI, MD);
7667
7668 // Sink expensive instructions into the conditional blocks to avoid executing
7669 // them speculatively.
7670 for (Instruction *I : TrueInstrs)
7671 I->moveBefore(TrueBranch->getIterator());
7672 for (Instruction *I : FalseInstrs)
7673 I->moveBefore(FalseBranch->getIterator());
7674
7675 // If we did not create a new block for one of the 'true' or 'false' paths
7676 // of the condition, it means that side of the branch goes to the end block
7677 // directly and the path originates from the start block from the point of
7678 // view of the new PHI.
7679 if (TrueBlock == nullptr)
7680 TrueBlock = StartBlock;
7681 else if (FalseBlock == nullptr)
7682 FalseBlock = StartBlock;
7683
7685 // Use reverse iterator because later select may use the value of the
7686 // earlier select, and we need to propagate value through earlier select
7687 // to get the PHI operand.
7688 for (SelectInst *SI : llvm::reverse(ASI)) {
7689 // The select itself is replaced with a PHI Node.
7690 PHINode *PN = PHINode::Create(SI->getType(), 2, "");
7691 PN->insertBefore(EndBlock->begin());
7692 PN->takeName(SI);
7693 PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
7694 PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
7695 PN->setDebugLoc(SI->getDebugLoc());
7696
7697 replaceAllUsesWith(SI, PN, FreshBBs, IsHugeFunc);
7698 SI->eraseFromParent();
7699 INS.erase(SI);
7700 ++NumSelectsExpanded;
7701 }
7702
7703 // Instruct OptimizeBlock to skip to the next block.
7704 CurInstIterator = StartBlock->end();
7705 return true;
7706}
7707
7708/// Some targets only accept certain types for splat inputs. For example a VDUP
7709/// in MVE takes a GPR (integer) register, and the instruction that incorporate
7710/// a VDUP (such as a VADD qd, qm, rm) also require a gpr register.
7711bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
7712 // Accept shuf(insertelem(undef/poison, val, 0), undef/poison, <0,0,..>) only
7714 m_Undef(), m_ZeroMask())))
7715 return false;
7716 Type *NewType = TLI->shouldConvertSplatType(SVI);
7717 if (!NewType)
7718 return false;
7719
7720 auto *SVIVecType = cast<FixedVectorType>(SVI->getType());
7721 assert(!NewType->isVectorTy() && "Expected a scalar type!");
7722 assert(NewType->getScalarSizeInBits() == SVIVecType->getScalarSizeInBits() &&
7723 "Expected a type of the same size!");
7724 auto *NewVecType =
7725 FixedVectorType::get(NewType, SVIVecType->getNumElements());
7726
7727 // Create a bitcast (shuffle (insert (bitcast(..))))
7728 IRBuilder<> Builder(SVI->getContext());
7729 Builder.SetInsertPoint(SVI);
7730 Value *BC1 = Builder.CreateBitCast(
7731 cast<Instruction>(SVI->getOperand(0))->getOperand(1), NewType);
7732 Value *Shuffle = Builder.CreateVectorSplat(NewVecType->getNumElements(), BC1);
7733 Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType);
7734
7735 replaceAllUsesWith(SVI, BC2, FreshBBs, IsHugeFunc);
7737 SVI, TLInfo, nullptr,
7738 [&](Value *V) { removeAllAssertingVHReferences(V); });
7739
7740 // Also hoist the bitcast up to its operand if it they are not in the same
7741 // block.
7742 if (auto *BCI = dyn_cast<Instruction>(BC1))
7743 if (auto *Op = dyn_cast<Instruction>(BCI->getOperand(0)))
7744 if (BCI->getParent() != Op->getParent() && !isa<PHINode>(Op) &&
7745 !Op->isTerminator() && !Op->isEHPad())
7746 BCI->moveAfter(Op);
7747
7748 return true;
7749}
7750
7751bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
7752 // If the operands of I can be folded into a target instruction together with
7753 // I, duplicate and sink them.
7754 SmallVector<Use *, 4> OpsToSink;
7755 if (!TTI->isProfitableToSinkOperands(I, OpsToSink))
7756 return false;
7757
7758 // OpsToSink can contain multiple uses in a use chain (e.g.
7759 // (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating
7760 // uses must come first, so we process the ops in reverse order so as to not
7761 // create invalid IR.
7762 BasicBlock *TargetBB = I->getParent();
7763 bool Changed = false;
7764 SmallVector<Use *, 4> ToReplace;
7765 Instruction *InsertPoint = I;
7767 unsigned long InstNumber = 0;
7768 for (const auto &I : *TargetBB)
7769 InstOrdering[&I] = InstNumber++;
7770
7771 for (Use *U : reverse(OpsToSink)) {
7772 auto *UI = cast<Instruction>(U->get());
7773 if (isa<PHINode>(UI))
7774 continue;
7775 if (UI->getParent() == TargetBB) {
7776 if (InstOrdering[UI] < InstOrdering[InsertPoint])
7777 InsertPoint = UI;
7778 continue;
7779 }
7780 ToReplace.push_back(U);
7781 }
7782
7783 SetVector<Instruction *> MaybeDead;
7785 for (Use *U : ToReplace) {
7786 auto *UI = cast<Instruction>(U->get());
7787 Instruction *NI = UI->clone();
7788
7789 if (IsHugeFunc) {
7790 // Now we clone an instruction, its operands' defs may sink to this BB
7791 // now. So we put the operands defs' BBs into FreshBBs to do optimization.
7792 for (Value *Op : NI->operands())
7793 if (auto *OpDef = dyn_cast<Instruction>(Op))
7794 FreshBBs.insert(OpDef->getParent());
7795 }
7796
7797 NewInstructions[UI] = NI;
7798 MaybeDead.insert(UI);
7799 LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n");
7800 NI->insertBefore(InsertPoint->getIterator());
7801 InsertPoint = NI;
7802 InsertedInsts.insert(NI);
7803
7804 // Update the use for the new instruction, making sure that we update the
7805 // sunk instruction uses, if it is part of a chain that has already been
7806 // sunk.
7807 Instruction *OldI = cast<Instruction>(U->getUser());
7808 if (auto It = NewInstructions.find(OldI); It != NewInstructions.end())
7809 It->second->setOperand(U->getOperandNo(), NI);
7810 else
7811 U->set(NI);
7812 Changed = true;
7813 }
7814
7815 // Remove instructions that are dead after sinking.
7816 for (auto *I : MaybeDead) {
7817 if (!I->hasNUsesOrMore(1)) {
7818 LLVM_DEBUG(dbgs() << "Removing dead instruction: " << *I << "\n");
7819 I->eraseFromParent();
7820 }
7821 }
7822
7823 return Changed;
7824}
7825
7826bool CodeGenPrepare::optimizeSwitchType(SwitchInst *SI) {
7827 Value *Cond = SI->getCondition();
7828 Type *OldType = Cond->getType();
7829 LLVMContext &Context = Cond->getContext();
7830 EVT OldVT = TLI->getValueType(*DL, OldType);
7831 MVT RegType = TLI->getPreferredSwitchConditionType(Context, OldVT);
7832 unsigned RegWidth = RegType.getSizeInBits();
7833
7834 if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
7835 return false;
7836
7837 // If the register width is greater than the type width, expand the condition
7838 // of the switch instruction and each case constant to the width of the
7839 // register. By widening the type of the switch condition, subsequent
7840 // comparisons (for case comparisons) will not need to be extended to the
7841 // preferred register width, so we will potentially eliminate N-1 extends,
7842 // where N is the number of cases in the switch.
7843 auto *NewType = Type::getIntNTy(Context, RegWidth);
7844
7845 // Extend the switch condition and case constants using the target preferred
7846 // extend unless the switch condition is a function argument with an extend
7847 // attribute. In that case, we can avoid an unnecessary mask/extension by
7848 // matching the argument extension instead.
7849 Instruction::CastOps ExtType = Instruction::ZExt;
7850 // Some targets prefer SExt over ZExt.
7851 if (TLI->isSExtCheaperThanZExt(OldVT, RegType))
7852 ExtType = Instruction::SExt;
7853
7854 if (auto *Arg = dyn_cast<Argument>(Cond)) {
7855 if (Arg->hasSExtAttr())
7856 ExtType = Instruction::SExt;
7857 if (Arg->hasZExtAttr())
7858 ExtType = Instruction::ZExt;
7859 }
7860
7861 auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
7862 ExtInst->insertBefore(SI->getIterator());
7863 ExtInst->setDebugLoc(SI->getDebugLoc());
7864 SI->setCondition(ExtInst);
7865 for (auto Case : SI->cases()) {
7866 const APInt &NarrowConst = Case.getCaseValue()->getValue();
7867 APInt WideConst = (ExtType == Instruction::ZExt)
7868 ? NarrowConst.zext(RegWidth)
7869 : NarrowConst.sext(RegWidth);
7870 Case.setValue(ConstantInt::get(Context, WideConst));
7871 }
7872
7873 return true;
7874}
7875
7876bool CodeGenPrepare::optimizeSwitchPhiConstants(SwitchInst *SI) {
7877 // The SCCP optimization tends to produce code like this:
7878 // switch(x) { case 42: phi(42, ...) }
7879 // Materializing the constant for the phi-argument needs instructions; So we
7880 // change the code to:
7881 // switch(x) { case 42: phi(x, ...) }
7882
7883 Value *Condition = SI->getCondition();
7884 // Avoid endless loop in degenerate case.
7885 if (isa<ConstantInt>(*Condition))
7886 return false;
7887
7888 bool Changed = false;
7889 BasicBlock *SwitchBB = SI->getParent();
7890 Type *ConditionType = Condition->getType();
7891
7892 for (const SwitchInst::CaseHandle &Case : SI->cases()) {
7893 ConstantInt *CaseValue = Case.getCaseValue();
7894 BasicBlock *CaseBB = Case.getCaseSuccessor();
7895 // Set to true if we previously checked that `CaseBB` is only reached by
7896 // a single case from this switch.
7897 bool CheckedForSinglePred = false;
7898 for (PHINode &PHI : CaseBB->phis()) {
7899 Type *PHIType = PHI.getType();
7900 // If ZExt is free then we can also catch patterns like this:
7901 // switch((i32)x) { case 42: phi((i64)42, ...); }
7902 // and replace `(i64)42` with `zext i32 %x to i64`.
7903 bool TryZExt =
7904 PHIType->isIntegerTy() &&
7905 PHIType->getIntegerBitWidth() > ConditionType->getIntegerBitWidth() &&
7906 TLI->isZExtFree(ConditionType, PHIType);
7907 if (PHIType == ConditionType || TryZExt) {
7908 // Set to true to skip this case because of multiple preds.
7909 bool SkipCase = false;
7910 Value *Replacement = nullptr;
7911 for (unsigned I = 0, E = PHI.getNumIncomingValues(); I != E; I++) {
7912 Value *PHIValue = PHI.getIncomingValue(I);
7913 if (PHIValue != CaseValue) {
7914 if (!TryZExt)
7915 continue;
7916 ConstantInt *PHIValueInt = dyn_cast<ConstantInt>(PHIValue);
7917 if (!PHIValueInt ||
7918 PHIValueInt->getValue() !=
7919 CaseValue->getValue().zext(PHIType->getIntegerBitWidth()))
7920 continue;
7921 }
7922 if (PHI.getIncomingBlock(I) != SwitchBB)
7923 continue;
7924 // We cannot optimize if there are multiple case labels jumping to
7925 // this block. This check may get expensive when there are many
7926 // case labels so we test for it last.
7927 if (!CheckedForSinglePred) {
7928 CheckedForSinglePred = true;
7929 if (SI->findCaseDest(CaseBB) == nullptr) {
7930 SkipCase = true;
7931 break;
7932 }
7933 }
7934
7935 if (Replacement == nullptr) {
7936 if (PHIValue == CaseValue) {
7937 Replacement = Condition;
7938 } else {
7939 IRBuilder<> Builder(SI);
7940 Replacement = Builder.CreateZExt(Condition, PHIType);
7941 }
7942 }
7943 PHI.setIncomingValue(I, Replacement);
7944 Changed = true;
7945 }
7946 if (SkipCase)
7947 break;
7948 }
7949 }
7950 }
7951 return Changed;
7952}
7953
7954bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
7955 bool Changed = optimizeSwitchType(SI);
7956 Changed |= optimizeSwitchPhiConstants(SI);
7957 return Changed;
7958}
7959
7960namespace {
7961
7962/// Helper class to promote a scalar operation to a vector one.
7963/// This class is used to move downward extractelement transition.
7964/// E.g.,
7965/// a = vector_op <2 x i32>
7966/// b = extractelement <2 x i32> a, i32 0
7967/// c = scalar_op b
7968/// store c
7969///
7970/// =>
7971/// a = vector_op <2 x i32>
7972/// c = vector_op a (equivalent to scalar_op on the related lane)
7973/// * d = extractelement <2 x i32> c, i32 0
7974/// * store d
7975/// Assuming both extractelement and store can be combine, we get rid of the
7976/// transition.
7977class VectorPromoteHelper {
7978 /// DataLayout associated with the current module.
7979 const DataLayout &DL;
7980
7981 /// Used to perform some checks on the legality of vector operations.
7982 const TargetLowering &TLI;
7983
7984 /// Used to estimated the cost of the promoted chain.
7985 const TargetTransformInfo &TTI;
7986
7987 /// The transition being moved downwards.
7988 Instruction *Transition;
7989
7990 /// The sequence of instructions to be promoted.
7991 SmallVector<Instruction *, 4> InstsToBePromoted;
7992
7993 /// Cost of combining a store and an extract.
7994 unsigned StoreExtractCombineCost;
7995
7996 /// Instruction that will be combined with the transition.
7997 Instruction *CombineInst = nullptr;
7998
7999 /// The instruction that represents the current end of the transition.
8000 /// Since we are faking the promotion until we reach the end of the chain
8001 /// of computation, we need a way to get the current end of the transition.
8002 Instruction *getEndOfTransition() const {
8003 if (InstsToBePromoted.empty())
8004 return Transition;
8005 return InstsToBePromoted.back();
8006 }
8007
8008 /// Return the index of the original value in the transition.
8009 /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
8010 /// c, is at index 0.
8011 unsigned getTransitionOriginalValueIdx() const {
8012 assert(isa<ExtractElementInst>(Transition) &&
8013 "Other kind of transitions are not supported yet");
8014 return 0;
8015 }
8016
8017 /// Return the index of the index in the transition.
8018 /// E.g., for "extractelement <2 x i32> c, i32 0" the index
8019 /// is at index 1.
8020 unsigned getTransitionIdx() const {
8021 assert(isa<ExtractElementInst>(Transition) &&
8022 "Other kind of transitions are not supported yet");
8023 return 1;
8024 }
8025
8026 /// Get the type of the transition.
8027 /// This is the type of the original value.
8028 /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
8029 /// transition is <2 x i32>.
8030 Type *getTransitionType() const {
8031 return Transition->getOperand(getTransitionOriginalValueIdx())->getType();
8032 }
8033
8034 /// Promote \p ToBePromoted by moving \p Def downward through.
8035 /// I.e., we have the following sequence:
8036 /// Def = Transition <ty1> a to <ty2>
8037 /// b = ToBePromoted <ty2> Def, ...
8038 /// =>
8039 /// b = ToBePromoted <ty1> a, ...
8040 /// Def = Transition <ty1> ToBePromoted to <ty2>
8041 void promoteImpl(Instruction *ToBePromoted);
8042
8043 /// Check whether or not it is profitable to promote all the
8044 /// instructions enqueued to be promoted.
8045 bool isProfitableToPromote() {
8046 Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());
8047 unsigned Index = isa<ConstantInt>(ValIdx)
8048 ? cast<ConstantInt>(ValIdx)->getZExtValue()
8049 : -1;
8050 Type *PromotedType = getTransitionType();
8051
8052 StoreInst *ST = cast<StoreInst>(CombineInst);
8053 unsigned AS = ST->getPointerAddressSpace();
8054 // Check if this store is supported.
8056 TLI.getValueType(DL, ST->getValueOperand()->getType()), AS,
8057 ST->getAlign())) {
8058 // If this is not supported, there is no way we can combine
8059 // the extract with the store.
8060 return false;
8061 }
8062
8063 // The scalar chain of computation has to pay for the transition
8064 // scalar to vector.
8065 // The vector chain has to account for the combining cost.
8068 InstructionCost ScalarCost =
8069 TTI.getVectorInstrCost(*Transition, PromotedType, CostKind, Index);
8070 InstructionCost VectorCost = StoreExtractCombineCost;
8071 for (const auto &Inst : InstsToBePromoted) {
8072 // Compute the cost.
8073 // By construction, all instructions being promoted are arithmetic ones.
8074 // Moreover, one argument is a constant that can be viewed as a splat
8075 // constant.
8076 Value *Arg0 = Inst->getOperand(0);
8077 bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
8078 isa<ConstantFP>(Arg0);
8079 TargetTransformInfo::OperandValueInfo Arg0Info, Arg1Info;
8080 if (IsArg0Constant)
8082 else
8084
8085 ScalarCost += TTI.getArithmeticInstrCost(
8086 Inst->getOpcode(), Inst->getType(), CostKind, Arg0Info, Arg1Info);
8087 VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
8088 CostKind, Arg0Info, Arg1Info);
8089 }
8090 LLVM_DEBUG(
8091 dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
8092 << ScalarCost << "\nVector: " << VectorCost << '\n');
8093 return ScalarCost > VectorCost;
8094 }
8095
8096 /// Generate a constant vector with \p Val with the same
8097 /// number of elements as the transition.
8098 /// \p UseSplat defines whether or not \p Val should be replicated
8099 /// across the whole vector.
8100 /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
8101 /// otherwise we generate a vector with as many poison as possible:
8102 /// <poison, ..., poison, Val, poison, ..., poison> where \p Val is only
8103 /// used at the index of the extract.
8104 Value *getConstantVector(Constant *Val, bool UseSplat) const {
8105 unsigned ExtractIdx = std::numeric_limits<unsigned>::max();
8106 if (!UseSplat) {
8107 // If we cannot determine where the constant must be, we have to
8108 // use a splat constant.
8109 Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());
8110 if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))
8111 ExtractIdx = CstVal->getSExtValue();
8112 else
8113 UseSplat = true;
8114 }
8115
8116 ElementCount EC = cast<VectorType>(getTransitionType())->getElementCount();
8117 if (UseSplat)
8118 return ConstantVector::getSplat(EC, Val);
8119
8120 if (!EC.isScalable()) {
8122 PoisonValue *PoisonVal = PoisonValue::get(Val->getType());
8123 for (unsigned Idx = 0; Idx != EC.getKnownMinValue(); ++Idx) {
8124 if (Idx == ExtractIdx)
8125 ConstVec.push_back(Val);
8126 else
8127 ConstVec.push_back(PoisonVal);
8128 }
8129 return ConstantVector::get(ConstVec);
8130 } else
8132 "Generate scalable vector for non-splat is unimplemented");
8133 }
8134
8135 /// Check if promoting to a vector type an operand at \p OperandIdx
8136 /// in \p Use can trigger undefined behavior.
8137 static bool canCauseUndefinedBehavior(const Instruction *Use,
8138 unsigned OperandIdx) {
8139 // This is not safe to introduce undef when the operand is on
8140 // the right hand side of a division-like instruction.
8141 if (OperandIdx != 1)
8142 return false;
8143 switch (Use->getOpcode()) {
8144 default:
8145 return false;
8146 case Instruction::SDiv:
8147 case Instruction::UDiv:
8148 case Instruction::SRem:
8149 case Instruction::URem:
8150 return true;
8151 case Instruction::FDiv:
8152 case Instruction::FRem:
8153 return !Use->hasNoNaNs();
8154 }
8155 llvm_unreachable(nullptr);
8156 }
8157
8158public:
8159 VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI,
8160 const TargetTransformInfo &TTI, Instruction *Transition,
8161 unsigned CombineCost)
8162 : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition),
8163 StoreExtractCombineCost(CombineCost) {
8164 assert(Transition && "Do not know how to promote null");
8165 }
8166
8167 /// Check if we can promote \p ToBePromoted to \p Type.
8168 bool canPromote(const Instruction *ToBePromoted) const {
8169 // We could support CastInst too.
8170 return isa<BinaryOperator>(ToBePromoted);
8171 }
8172
8173 /// Check if it is profitable to promote \p ToBePromoted
8174 /// by moving downward the transition through.
8175 bool shouldPromote(const Instruction *ToBePromoted) const {
8176 // Promote only if all the operands can be statically expanded.
8177 // Indeed, we do not want to introduce any new kind of transitions.
8178 for (const Use &U : ToBePromoted->operands()) {
8179 const Value *Val = U.get();
8180 if (Val == getEndOfTransition()) {
8181 // If the use is a division and the transition is on the rhs,
8182 // we cannot promote the operation, otherwise we may create a
8183 // division by zero.
8184 if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()))
8185 return false;
8186 continue;
8187 }
8188 if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
8189 !isa<ConstantFP>(Val))
8190 return false;
8191 }
8192 // Check that the resulting operation is legal.
8193 int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());
8194 if (!ISDOpcode)
8195 return false;
8196 return StressStoreExtract ||
8198 ISDOpcode, TLI.getValueType(DL, getTransitionType(), true));
8199 }
8200
8201 /// Check whether or not \p Use can be combined
8202 /// with the transition.
8203 /// I.e., is it possible to do Use(Transition) => AnotherUse?
8204 bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); }
8205
8206 /// Record \p ToBePromoted as part of the chain to be promoted.
8207 void enqueueForPromotion(Instruction *ToBePromoted) {
8208 InstsToBePromoted.push_back(ToBePromoted);
8209 }
8210
8211 /// Set the instruction that will be combined with the transition.
8212 void recordCombineInstruction(Instruction *ToBeCombined) {
8213 assert(canCombine(ToBeCombined) && "Unsupported instruction to combine");
8214 CombineInst = ToBeCombined;
8215 }
8216
8217 /// Promote all the instructions enqueued for promotion if it is
8218 /// is profitable.
8219 /// \return True if the promotion happened, false otherwise.
8220 bool promote() {
8221 // Check if there is something to promote.
8222 // Right now, if we do not have anything to combine with,
8223 // we assume the promotion is not profitable.
8224 if (InstsToBePromoted.empty() || !CombineInst)
8225 return false;
8226
8227 // Check cost.
8228 if (!StressStoreExtract && !isProfitableToPromote())
8229 return false;
8230
8231 // Promote.
8232 for (auto &ToBePromoted : InstsToBePromoted)
8233 promoteImpl(ToBePromoted);
8234 InstsToBePromoted.clear();
8235 return true;
8236 }
8237};
8238
8239} // end anonymous namespace
8240
8241void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
8242 // At this point, we know that all the operands of ToBePromoted but Def
8243 // can be statically promoted.
8244 // For Def, we need to use its parameter in ToBePromoted:
8245 // b = ToBePromoted ty1 a
8246 // Def = Transition ty1 b to ty2
8247 // Move the transition down.
8248 // 1. Replace all uses of the promoted operation by the transition.
8249 // = ... b => = ... Def.
8250 assert(ToBePromoted->getType() == Transition->getType() &&
8251 "The type of the result of the transition does not match "
8252 "the final type");
8253 ToBePromoted->replaceAllUsesWith(Transition);
8254 // 2. Update the type of the uses.
8255 // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.
8256 Type *TransitionTy = getTransitionType();
8257 ToBePromoted->mutateType(TransitionTy);
8258 // 3. Update all the operands of the promoted operation with promoted
8259 // operands.
8260 // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.
8261 for (Use &U : ToBePromoted->operands()) {
8262 Value *Val = U.get();
8263 Value *NewVal = nullptr;
8264 if (Val == Transition)
8265 NewVal = Transition->getOperand(getTransitionOriginalValueIdx());
8266 else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||
8267 isa<ConstantFP>(Val)) {
8268 // Use a splat constant if it is not safe to use undef.
8269 NewVal = getConstantVector(
8270 cast<Constant>(Val),
8271 isa<UndefValue>(Val) ||
8272 canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));
8273 } else
8274 llvm_unreachable("Did you modified shouldPromote and forgot to update "
8275 "this?");
8276 ToBePromoted->setOperand(U.getOperandNo(), NewVal);
8277 }
8278 Transition->moveAfter(ToBePromoted);
8279 Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
8280}
8281
8282/// Some targets can do store(extractelement) with one instruction.
8283/// Try to push the extractelement towards the stores when the target
8284/// has this feature and this is profitable.
8285bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
8286 unsigned CombineCost = std::numeric_limits<unsigned>::max();
8287 if (DisableStoreExtract ||
8290 Inst->getOperand(1), CombineCost)))
8291 return false;
8292
8293 // At this point we know that Inst is a vector to scalar transition.
8294 // Try to move it down the def-use chain, until:
8295 // - We can combine the transition with its single use
8296 // => we got rid of the transition.
8297 // - We escape the current basic block
8298 // => we would need to check that we are moving it at a cheaper place and
8299 // we do not do that for now.
8300 BasicBlock *Parent = Inst->getParent();
8301 LLVM_DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');
8302 VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost);
8303 // If the transition has more than one use, assume this is not going to be
8304 // beneficial.
8305 while (Inst->hasOneUse()) {
8306 Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());
8307 LLVM_DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n');
8308
8309 if (ToBePromoted->getParent() != Parent) {
8310 LLVM_DEBUG(dbgs() << "Instruction to promote is in a different block ("
8311 << ToBePromoted->getParent()->getName()
8312 << ") than the transition (" << Parent->getName()
8313 << ").\n");
8314 return false;
8315 }
8316
8317 if (VPH.canCombine(ToBePromoted)) {
8318 LLVM_DEBUG(dbgs() << "Assume " << *Inst << '\n'
8319 << "will be combined with: " << *ToBePromoted << '\n');
8320 VPH.recordCombineInstruction(ToBePromoted);
8321 bool Changed = VPH.promote();
8322 NumStoreExtractExposed += Changed;
8323 return Changed;
8324 }
8325
8326 LLVM_DEBUG(dbgs() << "Try promoting.\n");
8327 if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
8328 return false;
8329
8330 LLVM_DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n");
8331
8332 VPH.enqueueForPromotion(ToBePromoted);
8333 Inst = ToBePromoted;
8334 }
8335 return false;
8336}
8337
8338/// For the instruction sequence of store below, F and I values
8339/// are bundled together as an i64 value before being stored into memory.
8340/// Sometimes it is more efficient to generate separate stores for F and I,
8341/// which can remove the bitwise instructions or sink them to colder places.
8342///
8343/// (store (or (zext (bitcast F to i32) to i64),
8344/// (shl (zext I to i64), 32)), addr) -->
8345/// (store F, addr) and (store I, addr+4)
8346///
8347/// Similarly, splitting for other merged store can also be beneficial, like:
8348/// For pair of {i32, i32}, i64 store --> two i32 stores.
8349/// For pair of {i32, i16}, i64 store --> two i32 stores.
8350/// For pair of {i16, i16}, i32 store --> two i16 stores.
8351/// For pair of {i16, i8}, i32 store --> two i16 stores.
8352/// For pair of {i8, i8}, i16 store --> two i8 stores.
8353///
8354/// We allow each target to determine specifically which kind of splitting is
8355/// supported.
8356///
8357/// The store patterns are commonly seen from the simple code snippet below
8358/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
8359/// void goo(const std::pair<int, float> &);
8360/// hoo() {
8361/// ...
8362/// goo(std::make_pair(tmp, ftmp));
8363/// ...
8364/// }
8365///
8366/// Although we already have similar splitting in DAG Combine, we duplicate
8367/// it in CodeGenPrepare to catch the case in which pattern is across
8368/// multiple BBs. The logic in DAG Combine is kept to catch case generated
8369/// during code expansion.
8371 const TargetLowering &TLI) {
8372 // Handle simple but common cases only.
8373 Type *StoreType = SI.getValueOperand()->getType();
8374
8375 // The code below assumes shifting a value by <number of bits>,
8376 // whereas scalable vectors would have to be shifted by
8377 // <2log(vscale) + number of bits> in order to store the
8378 // low/high parts. Bailing out for now.
8379 if (StoreType->isScalableTy())
8380 return false;
8381
8382 if (!DL.typeSizeEqualsStoreSize(StoreType) ||
8383 DL.getTypeSizeInBits(StoreType) == 0)
8384 return false;
8385
8386 unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;
8387 Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);
8388 if (!DL.typeSizeEqualsStoreSize(SplitStoreType))
8389 return false;
8390
8391 // Don't split the store if it is volatile.
8392 if (SI.isVolatile())
8393 return false;
8394
8395 // Match the following patterns:
8396 // (store (or (zext LValue to i64),
8397 // (shl (zext HValue to i64), 32)), HalfValBitSize)
8398 // or
8399 // (store (or (shl (zext HValue to i64), 32)), HalfValBitSize)
8400 // (zext LValue to i64),
8401 // Expect both operands of OR and the first operand of SHL have only
8402 // one use.
8403 Value *LValue, *HValue;
8404 if (!match(SI.getValueOperand(),
8407 m_SpecificInt(HalfValBitSize))))))
8408 return false;
8409
8410 // Check LValue and HValue are int with size less or equal than 32.
8411 if (!LValue->getType()->isIntegerTy() ||
8412 DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize ||
8413 !HValue->getType()->isIntegerTy() ||
8414 DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize)
8415 return false;
8416
8417 // If LValue/HValue is a bitcast instruction, use the EVT before bitcast
8418 // as the input of target query.
8419 auto *LBC = dyn_cast<BitCastInst>(LValue);
8420 auto *HBC = dyn_cast<BitCastInst>(HValue);
8421 EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType())
8422 : EVT::getEVT(LValue->getType());
8423 EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType())
8424 : EVT::getEVT(HValue->getType());
8425 if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
8426 return false;
8427
8428 // Start to split store.
8429 IRBuilder<> Builder(SI.getContext());
8430 Builder.SetInsertPoint(&SI);
8431
8432 // If LValue/HValue is a bitcast in another BB, create a new one in current
8433 // BB so it may be merged with the splitted stores by dag combiner.
8434 if (LBC && LBC->getParent() != SI.getParent())
8435 LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType());
8436 if (HBC && HBC->getParent() != SI.getParent())
8437 HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType());
8438
8439 bool IsLE = SI.getDataLayout().isLittleEndian();
8440 auto CreateSplitStore = [&](Value *V, bool Upper) {
8441 V = Builder.CreateZExtOrBitCast(V, SplitStoreType);
8442 Value *Addr = SI.getPointerOperand();
8443 Align Alignment = SI.getAlign();
8444 const bool IsOffsetStore = (IsLE && Upper) || (!IsLE && !Upper);
8445 if (IsOffsetStore) {
8446 Addr = Builder.CreateGEP(
8447 SplitStoreType, Addr,
8448 ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1));
8449
8450 // When splitting the store in half, naturally one half will retain the
8451 // alignment of the original wider store, regardless of whether it was
8452 // over-aligned or not, while the other will require adjustment.
8453 Alignment = commonAlignment(Alignment, HalfValBitSize / 8);
8454 }
8455 Builder.CreateAlignedStore(V, Addr, Alignment);
8456 };
8457
8458 CreateSplitStore(LValue, false);
8459 CreateSplitStore(HValue, true);
8460
8461 // Delete the old store.
8462 SI.eraseFromParent();
8463 return true;
8464}
8465
8466// Return true if the GEP has two operands, the first operand is of a sequential
8467// type, and the second operand is a constant.
8470 return GEP->getNumOperands() == 2 && I.isSequential() &&
8471 isa<ConstantInt>(GEP->getOperand(1));
8472}
8473
8474// Try unmerging GEPs to reduce liveness interference (register pressure) across
8475// IndirectBr edges. Since IndirectBr edges tend to touch on many blocks,
8476// reducing liveness interference across those edges benefits global register
8477// allocation. Currently handles only certain cases.
8478//
8479// For example, unmerge %GEPI and %UGEPI as below.
8480//
8481// ---------- BEFORE ----------
8482// SrcBlock:
8483// ...
8484// %GEPIOp = ...
8485// ...
8486// %GEPI = gep %GEPIOp, Idx
8487// ...
8488// indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ]
8489// (* %GEPI is alive on the indirectbr edges due to other uses ahead)
8490// (* %GEPIOp is alive on the indirectbr edges only because of it's used by
8491// %UGEPI)
8492//
8493// DstB0: ... (there may be a gep similar to %UGEPI to be unmerged)
8494// DstB1: ... (there may be a gep similar to %UGEPI to be unmerged)
8495// ...
8496//
8497// DstBi:
8498// ...
8499// %UGEPI = gep %GEPIOp, UIdx
8500// ...
8501// ---------------------------
8502//
8503// ---------- AFTER ----------
8504// SrcBlock:
8505// ... (same as above)
8506// (* %GEPI is still alive on the indirectbr edges)
8507// (* %GEPIOp is no longer alive on the indirectbr edges as a result of the
8508// unmerging)
8509// ...
8510//
8511// DstBi:
8512// ...
8513// %UGEPI = gep %GEPI, (UIdx-Idx)
8514// ...
8515// ---------------------------
8516//
8517// The register pressure on the IndirectBr edges is reduced because %GEPIOp is
8518// no longer alive on them.
8519//
8520// We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging
8521// of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as
8522// not to disable further simplications and optimizations as a result of GEP
8523// merging.
8524//
8525// Note this unmerging may increase the length of the data flow critical path
8526// (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff
8527// between the register pressure and the length of data-flow critical
8528// path. Restricting this to the uncommon IndirectBr case would minimize the
8529// impact of potentially longer critical path, if any, and the impact on compile
8530// time.
8532 const TargetTransformInfo *TTI) {
8533 BasicBlock *SrcBlock = GEPI->getParent();
8534 // Check that SrcBlock ends with an IndirectBr. If not, give up. The common
8535 // (non-IndirectBr) cases exit early here.
8536 if (!isa<IndirectBrInst>(SrcBlock->getTerminator()))
8537 return false;
8538 // Check that GEPI is a simple gep with a single constant index.
8539 if (!GEPSequentialConstIndexed(GEPI))
8540 return false;
8541 ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1));
8542 // Check that GEPI is a cheap one.
8543 if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType(),
8546 return false;
8547 Value *GEPIOp = GEPI->getOperand(0);
8548 // Check that GEPIOp is an instruction that's also defined in SrcBlock.
8549 if (!isa<Instruction>(GEPIOp))
8550 return false;
8551 auto *GEPIOpI = cast<Instruction>(GEPIOp);
8552 if (GEPIOpI->getParent() != SrcBlock)
8553 return false;
8554 // Check that GEP is used outside the block, meaning it's alive on the
8555 // IndirectBr edge(s).
8556 if (llvm::none_of(GEPI->users(), [&](User *Usr) {
8557 if (auto *I = dyn_cast<Instruction>(Usr)) {
8558 if (I->getParent() != SrcBlock) {
8559 return true;
8560 }
8561 }
8562 return false;
8563 }))
8564 return false;
8565 // The second elements of the GEP chains to be unmerged.
8566 std::vector<GetElementPtrInst *> UGEPIs;
8567 // Check each user of GEPIOp to check if unmerging would make GEPIOp not alive
8568 // on IndirectBr edges.
8569 for (User *Usr : GEPIOp->users()) {
8570 if (Usr == GEPI)
8571 continue;
8572 // Check if Usr is an Instruction. If not, give up.
8573 if (!isa<Instruction>(Usr))
8574 return false;
8575 auto *UI = cast<Instruction>(Usr);
8576 // Check if Usr in the same block as GEPIOp, which is fine, skip.
8577 if (UI->getParent() == SrcBlock)
8578 continue;
8579 // Check if Usr is a GEP. If not, give up.
8580 if (!isa<GetElementPtrInst>(Usr))
8581 return false;
8582 auto *UGEPI = cast<GetElementPtrInst>(Usr);
8583 // Check if UGEPI is a simple gep with a single constant index and GEPIOp is
8584 // the pointer operand to it. If so, record it in the vector. If not, give
8585 // up.
8586 if (!GEPSequentialConstIndexed(UGEPI))
8587 return false;
8588 if (UGEPI->getOperand(0) != GEPIOp)
8589 return false;
8590 if (UGEPI->getSourceElementType() != GEPI->getSourceElementType())
8591 return false;
8592 if (GEPIIdx->getType() !=
8593 cast<ConstantInt>(UGEPI->getOperand(1))->getType())
8594 return false;
8595 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8596 if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType(),
8599 return false;
8600 UGEPIs.push_back(UGEPI);
8601 }
8602 if (UGEPIs.size() == 0)
8603 return false;
8604 // Check the materializing cost of (Uidx-Idx).
8605 for (GetElementPtrInst *UGEPI : UGEPIs) {
8606 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8607 APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();
8609 NewIdx, GEPIIdx->getType(), TargetTransformInfo::TCK_SizeAndLatency);
8610 if (ImmCost > TargetTransformInfo::TCC_Basic)
8611 return false;
8612 }
8613 // Now unmerge between GEPI and UGEPIs.
8614 for (GetElementPtrInst *UGEPI : UGEPIs) {
8615 UGEPI->setOperand(0, GEPI);
8616 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8617 Constant *NewUGEPIIdx = ConstantInt::get(
8618 GEPIIdx->getType(), UGEPIIdx->getValue() - GEPIIdx->getValue());
8619 UGEPI->setOperand(1, NewUGEPIIdx);
8620 // If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not
8621 // inbounds to avoid UB.
8622 if (!GEPI->isInBounds()) {
8623 UGEPI->setIsInBounds(false);
8624 }
8625 }
8626 // After unmerging, verify that GEPIOp is actually only used in SrcBlock (not
8627 // alive on IndirectBr edges).
8628 assert(llvm::none_of(GEPIOp->users(),
8629 [&](User *Usr) {
8630 return cast<Instruction>(Usr)->getParent() != SrcBlock;
8631 }) &&
8632 "GEPIOp is used outside SrcBlock");
8633 return true;
8634}
8635
8636static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI,
8638 bool IsHugeFunc) {
8639 // Try and convert
8640 // %c = icmp ult %x, 8
8641 // br %c, bla, blb
8642 // %tc = lshr %x, 3
8643 // to
8644 // %tc = lshr %x, 3
8645 // %c = icmp eq %tc, 0
8646 // br %c, bla, blb
8647 // Creating the cmp to zero can be better for the backend, especially if the
8648 // lshr produces flags that can be used automatically.
8649 if (!TLI.preferZeroCompareBranch() || !Branch->isConditional())
8650 return false;
8651
8652 ICmpInst *Cmp = dyn_cast<ICmpInst>(Branch->getCondition());
8653 if (!Cmp || !isa<ConstantInt>(Cmp->getOperand(1)) || !Cmp->hasOneUse())
8654 return false;
8655
8656 Value *X = Cmp->getOperand(0);
8657 if (!X->hasUseList())
8658 return false;
8659
8660 APInt CmpC = cast<ConstantInt>(Cmp->getOperand(1))->getValue();
8661
8662 for (auto *U : X->users()) {
8663 Instruction *UI = dyn_cast<Instruction>(U);
8664 // A quick dominance check
8665 if (!UI ||
8666 (UI->getParent() != Branch->getParent() &&
8667 UI->getParent() != Branch->getSuccessor(0) &&
8668 UI->getParent() != Branch->getSuccessor(1)) ||
8669 (UI->getParent() != Branch->getParent() &&
8670 !UI->getParent()->getSinglePredecessor()))
8671 continue;
8672
8673 if (CmpC.isPowerOf2() && Cmp->getPredicate() == ICmpInst::ICMP_ULT &&
8674 match(UI, m_Shr(m_Specific(X), m_SpecificInt(CmpC.logBase2())))) {
8675 IRBuilder<> Builder(Branch);
8676 if (UI->getParent() != Branch->getParent())
8677 UI->moveBefore(Branch->getIterator());
8679 Value *NewCmp = Builder.CreateCmp(ICmpInst::ICMP_EQ, UI,
8680 ConstantInt::get(UI->getType(), 0));
8681 LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
8682 LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
8683 replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
8684 return true;
8685 }
8686 if (Cmp->isEquality() &&
8687 (match(UI, m_Add(m_Specific(X), m_SpecificInt(-CmpC))) ||
8688 match(UI, m_Sub(m_Specific(X), m_SpecificInt(CmpC))) ||
8689 match(UI, m_Xor(m_Specific(X), m_SpecificInt(CmpC))))) {
8690 IRBuilder<> Builder(Branch);
8691 if (UI->getParent() != Branch->getParent())
8692 UI->moveBefore(Branch->getIterator());
8694 Value *NewCmp = Builder.CreateCmp(Cmp->getPredicate(), UI,
8695 ConstantInt::get(UI->getType(), 0));
8696 LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
8697 LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
8698 replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
8699 return true;
8700 }
8701 }
8702 return false;
8703}
8704
8705bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
8706 bool AnyChange = false;
8707 AnyChange = fixupDbgVariableRecordsOnInst(*I);
8708
8709 // Bail out if we inserted the instruction to prevent optimizations from
8710 // stepping on each other's toes.
8711 if (InsertedInsts.count(I))
8712 return AnyChange;
8713
8714 // TODO: Move into the switch on opcode below here.
8715 if (PHINode *P = dyn_cast<PHINode>(I)) {
8716 // It is possible for very late stage optimizations (such as SimplifyCFG)
8717 // to introduce PHI nodes too late to be cleaned up. If we detect such a
8718 // trivial PHI, go ahead and zap it here.
8719 if (Value *V = simplifyInstruction(P, {*DL, TLInfo})) {
8720 LargeOffsetGEPMap.erase(P);
8721 replaceAllUsesWith(P, V, FreshBBs, IsHugeFunc);
8722 P->eraseFromParent();
8723 ++NumPHIsElim;
8724 return true;
8725 }
8726 return AnyChange;
8727 }
8728
8729 if (CastInst *CI = dyn_cast<CastInst>(I)) {
8730 // If the source of the cast is a constant, then this should have
8731 // already been constant folded. The only reason NOT to constant fold
8732 // it is if something (e.g. LSR) was careful to place the constant
8733 // evaluation in a block other than then one that uses it (e.g. to hoist
8734 // the address of globals out of a loop). If this is the case, we don't
8735 // want to forward-subst the cast.
8736 if (isa<Constant>(CI->getOperand(0)))
8737 return AnyChange;
8738
8739 if (OptimizeNoopCopyExpression(CI, *TLI, *DL))
8740 return true;
8741
8742 if ((isa<UIToFPInst>(I) || isa<SIToFPInst>(I) || isa<FPToUIInst>(I) ||
8743 isa<TruncInst>(I)) &&
8745 I, LI->getLoopFor(I->getParent()), *TTI))
8746 return true;
8747
8748 if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
8749 /// Sink a zext or sext into its user blocks if the target type doesn't
8750 /// fit in one register
8751 if (TLI->getTypeAction(CI->getContext(),
8752 TLI->getValueType(*DL, CI->getType())) ==
8753 TargetLowering::TypeExpandInteger) {
8754 return SinkCast(CI);
8755 } else {
8757 I, LI->getLoopFor(I->getParent()), *TTI))
8758 return true;
8759
8760 bool MadeChange = optimizeExt(I);
8761 return MadeChange | optimizeExtUses(I);
8762 }
8763 }
8764 return AnyChange;
8765 }
8766
8767 if (auto *Cmp = dyn_cast<CmpInst>(I))
8768 if (optimizeCmp(Cmp, ModifiedDT))
8769 return true;
8770
8771 if (match(I, m_URem(m_Value(), m_Value())))
8772 if (optimizeURem(I))
8773 return true;
8774
8775 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
8776 LI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
8777 bool Modified = optimizeLoadExt(LI);
8778 unsigned AS = LI->getPointerAddressSpace();
8779 Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
8780 return Modified;
8781 }
8782
8783 if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
8784 if (splitMergedValStore(*SI, *DL, *TLI))
8785 return true;
8786 SI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
8787 unsigned AS = SI->getPointerAddressSpace();
8788 return optimizeMemoryInst(I, SI->getOperand(1),
8789 SI->getOperand(0)->getType(), AS);
8790 }
8791
8792 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
8793 unsigned AS = RMW->getPointerAddressSpace();
8794 return optimizeMemoryInst(I, RMW->getPointerOperand(), RMW->getType(), AS);
8795 }
8796
8797 if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(I)) {
8798 unsigned AS = CmpX->getPointerAddressSpace();
8799 return optimizeMemoryInst(I, CmpX->getPointerOperand(),
8800 CmpX->getCompareOperand()->getType(), AS);
8801 }
8802
8803 BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
8804
8805 if (BinOp && BinOp->getOpcode() == Instruction::And && EnableAndCmpSinking &&
8806 sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts))
8807 return true;
8808
8809 // TODO: Move this into the switch on opcode - it handles shifts already.
8810 if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
8811 BinOp->getOpcode() == Instruction::LShr)) {
8812 ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
8813 if (CI && TLI->hasExtractBitsInsn())
8814 if (OptimizeExtractBits(BinOp, CI, *TLI, *DL))
8815 return true;
8816 }
8817
8818 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
8819 if (GEPI->hasAllZeroIndices()) {
8820 /// The GEP operand must be a pointer, so must its result -> BitCast
8821 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
8822 GEPI->getName(), GEPI->getIterator());
8823 NC->setDebugLoc(GEPI->getDebugLoc());
8824 replaceAllUsesWith(GEPI, NC, FreshBBs, IsHugeFunc);
8826 GEPI, TLInfo, nullptr,
8827 [&](Value *V) { removeAllAssertingVHReferences(V); });
8828 ++NumGEPsElim;
8829 optimizeInst(NC, ModifiedDT);
8830 return true;
8831 }
8833 return true;
8834 }
8835 }
8836
8837 if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
8838 // freeze(icmp a, const)) -> icmp (freeze a), const
8839 // This helps generate efficient conditional jumps.
8840 Instruction *CmpI = nullptr;
8841 if (ICmpInst *II = dyn_cast<ICmpInst>(FI->getOperand(0)))
8842 CmpI = II;
8843 else if (FCmpInst *F = dyn_cast<FCmpInst>(FI->getOperand(0)))
8844 CmpI = F->getFastMathFlags().none() ? F : nullptr;
8845
8846 if (CmpI && CmpI->hasOneUse()) {
8847 auto Op0 = CmpI->getOperand(0), Op1 = CmpI->getOperand(1);
8848 bool Const0 = isa<ConstantInt>(Op0) || isa<ConstantFP>(Op0) ||
8849 isa<ConstantPointerNull>(Op0);
8850 bool Const1 = isa<ConstantInt>(Op1) || isa<ConstantFP>(Op1) ||
8851 isa<ConstantPointerNull>(Op1);
8852 if (Const0 || Const1) {
8853 if (!Const0 || !Const1) {
8854 auto *F = new FreezeInst(Const0 ? Op1 : Op0, "", CmpI->getIterator());
8855 F->takeName(FI);
8856 CmpI->setOperand(Const0 ? 1 : 0, F);
8857 }
8858 replaceAllUsesWith(FI, CmpI, FreshBBs, IsHugeFunc);
8859 FI->eraseFromParent();
8860 return true;
8861 }
8862 }
8863 return AnyChange;
8864 }
8865
8866 if (tryToSinkFreeOperands(I))
8867 return true;
8868
8869 switch (I->getOpcode()) {
8870 case Instruction::Shl:
8871 case Instruction::LShr:
8872 case Instruction::AShr:
8873 return optimizeShiftInst(cast<BinaryOperator>(I));
8874 case Instruction::Call:
8875 return optimizeCallInst(cast<CallInst>(I), ModifiedDT);
8876 case Instruction::Select:
8877 return optimizeSelectInst(cast<SelectInst>(I));
8878 case Instruction::ShuffleVector:
8879 return optimizeShuffleVectorInst(cast<ShuffleVectorInst>(I));
8880 case Instruction::Switch:
8881 return optimizeSwitchInst(cast<SwitchInst>(I));
8882 case Instruction::ExtractElement:
8883 return optimizeExtractElementInst(cast<ExtractElementInst>(I));
8884 case Instruction::Br:
8885 return optimizeBranch(cast<BranchInst>(I), *TLI, FreshBBs, IsHugeFunc);
8886 }
8887
8888 return AnyChange;
8889}
8890
8891/// Given an OR instruction, check to see if this is a bitreverse
8892/// idiom. If so, insert the new intrinsic and return true.
8893bool CodeGenPrepare::makeBitReverse(Instruction &I) {
8894 if (!I.getType()->isIntegerTy() ||
8896 TLI->getValueType(*DL, I.getType(), true)))
8897 return false;
8898
8900 if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts))
8901 return false;
8902 Instruction *LastInst = Insts.back();
8903 replaceAllUsesWith(&I, LastInst, FreshBBs, IsHugeFunc);
8905 &I, TLInfo, nullptr,
8906 [&](Value *V) { removeAllAssertingVHReferences(V); });
8907 return true;
8908}
8909
8910// In this pass we look for GEP and cast instructions that are used
8911// across basic blocks and rewrite them to improve basic-block-at-a-time
8912// selection.
8913bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT) {
8914 SunkAddrs.clear();
8915 bool MadeChange = false;
8916
8917 do {
8918 CurInstIterator = BB.begin();
8919 ModifiedDT = ModifyDT::NotModifyDT;
8920 while (CurInstIterator != BB.end()) {
8921 MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
8922 if (ModifiedDT != ModifyDT::NotModifyDT) {
8923 // For huge function we tend to quickly go though the inner optmization
8924 // opportunities in the BB. So we go back to the BB head to re-optimize
8925 // each instruction instead of go back to the function head.
8926 if (IsHugeFunc) {
8927 DT.reset();
8928 getDT(*BB.getParent());
8929 break;
8930 } else {
8931 return true;
8932 }
8933 }
8934 }
8935 } while (ModifiedDT == ModifyDT::ModifyInstDT);
8936
8937 bool MadeBitReverse = true;
8938 while (MadeBitReverse) {
8939 MadeBitReverse = false;
8940 for (auto &I : reverse(BB)) {
8941 if (makeBitReverse(I)) {
8942 MadeBitReverse = MadeChange = true;
8943 break;
8944 }
8945 }
8946 }
8947 MadeChange |= dupRetToEnableTailCallOpts(&BB, ModifiedDT);
8948
8949 return MadeChange;
8950}
8951
8952bool CodeGenPrepare::fixupDbgVariableRecordsOnInst(Instruction &I) {
8953 bool AnyChange = false;
8954 for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
8955 AnyChange |= fixupDbgVariableRecord(DVR);
8956 return AnyChange;
8957}
8958
8959// FIXME: should updating debug-info really cause the "changed" flag to fire,
8960// which can cause a function to be reprocessed?
8961bool CodeGenPrepare::fixupDbgVariableRecord(DbgVariableRecord &DVR) {
8962 if (DVR.Type != DbgVariableRecord::LocationType::Value &&
8963 DVR.Type != DbgVariableRecord::LocationType::Assign)
8964 return false;
8965
8966 // Does this DbgVariableRecord refer to a sunk address calculation?
8967 bool AnyChange = false;
8968 SmallDenseSet<Value *> LocationOps(DVR.location_ops().begin(),
8969 DVR.location_ops().end());
8970 for (Value *Location : LocationOps) {
8971 WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
8972 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
8973 if (SunkAddr) {
8974 // Point dbg.value at locally computed address, which should give the best
8975 // opportunity to be accurately lowered. This update may change the type
8976 // of pointer being referred to; however this makes no difference to
8977 // debugging information, and we can't generate bitcasts that may affect
8978 // codegen.
8979 DVR.replaceVariableLocationOp(Location, SunkAddr);
8980 AnyChange = true;
8981 }
8982 }
8983 return AnyChange;
8984}
8985
8987 DVR->removeFromParent();
8988 BasicBlock *VIBB = VI->getParent();
8989 if (isa<PHINode>(VI))
8990 VIBB->insertDbgRecordBefore(DVR, VIBB->getFirstInsertionPt());
8991 else
8992 VIBB->insertDbgRecordAfter(DVR, &*VI);
8993}
8994
8995// A llvm.dbg.value may be using a value before its definition, due to
8996// optimizations in this pass and others. Scan for such dbg.values, and rescue
8997// them by moving the dbg.value to immediately after the value definition.
8998// FIXME: Ideally this should never be necessary, and this has the potential
8999// to re-order dbg.value intrinsics.
9000bool CodeGenPrepare::placeDbgValues(Function &F) {
9001 bool MadeChange = false;
9002 DominatorTree DT(F);
9003
9004 auto DbgProcessor = [&](auto *DbgItem, Instruction *Position) {
9006 for (Value *V : DbgItem->location_ops())
9007 if (Instruction *VI = dyn_cast_or_null<Instruction>(V))
9008 VIs.push_back(VI);
9009
9010 // This item may depend on multiple instructions, complicating any
9011 // potential sink. This block takes the defensive approach, opting to
9012 // "undef" the item if it has more than one instruction and any of them do
9013 // not dominate iem.
9014 for (Instruction *VI : VIs) {
9015 if (VI->isTerminator())
9016 continue;
9017
9018 // If VI is a phi in a block with an EHPad terminator, we can't insert
9019 // after it.
9020 if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
9021 continue;
9022
9023 // If the defining instruction dominates the dbg.value, we do not need
9024 // to move the dbg.value.
9025 if (DT.dominates(VI, Position))
9026 continue;
9027
9028 // If we depend on multiple instructions and any of them doesn't
9029 // dominate this DVI, we probably can't salvage it: moving it to
9030 // after any of the instructions could cause us to lose the others.
9031 if (VIs.size() > 1) {
9032 LLVM_DEBUG(
9033 dbgs()
9034 << "Unable to find valid location for Debug Value, undefing:\n"
9035 << *DbgItem);
9036 DbgItem->setKillLocation();
9037 break;
9038 }
9039
9040 LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
9041 << *DbgItem << ' ' << *VI);
9042 DbgInserterHelper(DbgItem, VI->getIterator());
9043 MadeChange = true;
9044 ++NumDbgValueMoved;
9045 }
9046 };
9047
9048 for (BasicBlock &BB : F) {
9049 for (Instruction &Insn : llvm::make_early_inc_range(BB)) {
9050 // Process any DbgVariableRecord records attached to this
9051 // instruction.
9053 filterDbgVars(Insn.getDbgRecordRange()))) {
9054 if (DVR.Type != DbgVariableRecord::LocationType::Value)
9055 continue;
9056 DbgProcessor(&DVR, &Insn);
9057 }
9058 }
9059 }
9060
9061 return MadeChange;
9062}
9063
9064// Group scattered pseudo probes in a block to favor SelectionDAG. Scattered
9065// probes can be chained dependencies of other regular DAG nodes and block DAG
9066// combine optimizations.
9067bool CodeGenPrepare::placePseudoProbes(Function &F) {
9068 bool MadeChange = false;
9069 for (auto &Block : F) {
9070 // Move the rest probes to the beginning of the block.
9071 auto FirstInst = Block.getFirstInsertionPt();
9072 while (FirstInst != Block.end() && FirstInst->isDebugOrPseudoInst())
9073 ++FirstInst;
9074 BasicBlock::iterator I(FirstInst);
9075 I++;
9076 while (I != Block.end()) {
9077 if (auto *II = dyn_cast<PseudoProbeInst>(I++)) {
9078 II->moveBefore(FirstInst);
9079 MadeChange = true;
9080 }
9081 }
9082 }
9083 return MadeChange;
9084}
9085
9086/// Scale down both weights to fit into uint32_t.
9087static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
9088 uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
9089 uint32_t Scale = (NewMax / std::numeric_limits<uint32_t>::max()) + 1;
9090 NewTrue = NewTrue / Scale;
9091 NewFalse = NewFalse / Scale;
9092}
9093
9094/// Some targets prefer to split a conditional branch like:
9095/// \code
9096/// %0 = icmp ne i32 %a, 0
9097/// %1 = icmp ne i32 %b, 0
9098/// %or.cond = or i1 %0, %1
9099/// br i1 %or.cond, label %TrueBB, label %FalseBB
9100/// \endcode
9101/// into multiple branch instructions like:
9102/// \code
9103/// bb1:
9104/// %0 = icmp ne i32 %a, 0
9105/// br i1 %0, label %TrueBB, label %bb2
9106/// bb2:
9107/// %1 = icmp ne i32 %b, 0
9108/// br i1 %1, label %TrueBB, label %FalseBB
9109/// \endcode
9110/// This usually allows instruction selection to do even further optimizations
9111/// and combine the compare with the branch instruction. Currently this is
9112/// applied for targets which have "cheap" jump instructions.
9113///
9114/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
9115///
9116bool CodeGenPrepare::splitBranchCondition(Function &F, ModifyDT &ModifiedDT) {
9117 if (!TM->Options.EnableFastISel || TLI->isJumpExpensive())
9118 return false;
9119
9120 bool MadeChange = false;
9121 for (auto &BB : F) {
9122 // Does this BB end with the following?
9123 // %cond1 = icmp|fcmp|binary instruction ...
9124 // %cond2 = icmp|fcmp|binary instruction ...
9125 // %cond.or = or|and i1 %cond1, cond2
9126 // br i1 %cond.or label %dest1, label %dest2"
9127 Instruction *LogicOp;
9128 BasicBlock *TBB, *FBB;
9129 if (!match(BB.getTerminator(),
9130 m_Br(m_OneUse(m_Instruction(LogicOp)), TBB, FBB)))
9131 continue;
9132
9133 auto *Br1 = cast<BranchInst>(BB.getTerminator());
9134 if (Br1->getMetadata(LLVMContext::MD_unpredictable))
9135 continue;
9136
9137 // The merging of mostly empty BB can cause a degenerate branch.
9138 if (TBB == FBB)
9139 continue;
9140
9141 unsigned Opc;
9142 Value *Cond1, *Cond2;
9143 if (match(LogicOp,
9144 m_LogicalAnd(m_OneUse(m_Value(Cond1)), m_OneUse(m_Value(Cond2)))))
9145 Opc = Instruction::And;
9146 else if (match(LogicOp, m_LogicalOr(m_OneUse(m_Value(Cond1)),
9147 m_OneUse(m_Value(Cond2)))))
9148 Opc = Instruction::Or;
9149 else
9150 continue;
9151
9152 auto IsGoodCond = [](Value *Cond) {
9153 return match(
9154 Cond,
9156 m_LogicalOr(m_Value(), m_Value()))));
9157 };
9158 if (!IsGoodCond(Cond1) || !IsGoodCond(Cond2))
9159 continue;
9160
9161 LLVM_DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
9162
9163 // Create a new BB.
9164 auto *TmpBB =
9165 BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split",
9166 BB.getParent(), BB.getNextNode());
9167 if (IsHugeFunc)
9168 FreshBBs.insert(TmpBB);
9169
9170 // Update original basic block by using the first condition directly by the
9171 // branch instruction and removing the no longer needed and/or instruction.
9172 Br1->setCondition(Cond1);
9173 LogicOp->eraseFromParent();
9174
9175 // Depending on the condition we have to either replace the true or the
9176 // false successor of the original branch instruction.
9177 if (Opc == Instruction::And)
9178 Br1->setSuccessor(0, TmpBB);
9179 else
9180 Br1->setSuccessor(1, TmpBB);
9181
9182 // Fill in the new basic block.
9183 auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB);
9184 if (auto *I = dyn_cast<Instruction>(Cond2)) {
9185 I->removeFromParent();
9186 I->insertBefore(Br2->getIterator());
9187 }
9188
9189 // Update PHI nodes in both successors. The original BB needs to be
9190 // replaced in one successor's PHI nodes, because the branch comes now from
9191 // the newly generated BB (NewBB). In the other successor we need to add one
9192 // incoming edge to the PHI nodes, because both branch instructions target
9193 // now the same successor. Depending on the original branch condition
9194 // (and/or) we have to swap the successors (TrueDest, FalseDest), so that
9195 // we perform the correct update for the PHI nodes.
9196 // This doesn't change the successor order of the just created branch
9197 // instruction (or any other instruction).
9198 if (Opc == Instruction::Or)
9199 std::swap(TBB, FBB);
9200
9201 // Replace the old BB with the new BB.
9202 TBB->replacePhiUsesWith(&BB, TmpBB);
9203
9204 // Add another incoming edge from the new BB.
9205 for (PHINode &PN : FBB->phis()) {
9206 auto *Val = PN.getIncomingValueForBlock(&BB);
9207 PN.addIncoming(Val, TmpBB);
9208 }
9209
9210 // Update the branch weights (from SelectionDAGBuilder::
9211 // FindMergedConditions).
9212 if (Opc == Instruction::Or) {
9213 // Codegen X | Y as:
9214 // BB1:
9215 // jmp_if_X TBB
9216 // jmp TmpBB
9217 // TmpBB:
9218 // jmp_if_Y TBB
9219 // jmp FBB
9220 //
9221
9222 // We have flexibility in setting Prob for BB1 and Prob for NewBB.
9223 // The requirement is that
9224 // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
9225 // = TrueProb for original BB.
9226 // Assuming the original weights are A and B, one choice is to set BB1's
9227 // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
9228 // assumes that
9229 // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
9230 // Another choice is to assume TrueProb for BB1 equals to TrueProb for
9231 // TmpBB, but the math is more complicated.
9232 uint64_t TrueWeight, FalseWeight;
9233 if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
9234 uint64_t NewTrueWeight = TrueWeight;
9235 uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
9236 scaleWeights(NewTrueWeight, NewFalseWeight);
9237 Br1->setMetadata(LLVMContext::MD_prof,
9238 MDBuilder(Br1->getContext())
9239 .createBranchWeights(TrueWeight, FalseWeight,
9240 hasBranchWeightOrigin(*Br1)));
9241
9242 NewTrueWeight = TrueWeight;
9243 NewFalseWeight = 2 * FalseWeight;
9244 scaleWeights(NewTrueWeight, NewFalseWeight);
9245 Br2->setMetadata(LLVMContext::MD_prof,
9246 MDBuilder(Br2->getContext())
9247 .createBranchWeights(TrueWeight, FalseWeight));
9248 }
9249 } else {
9250 // Codegen X & Y as:
9251 // BB1:
9252 // jmp_if_X TmpBB
9253 // jmp FBB
9254 // TmpBB:
9255 // jmp_if_Y TBB
9256 // jmp FBB
9257 //
9258 // This requires creation of TmpBB after CurBB.
9259
9260 // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
9261 // The requirement is that
9262 // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
9263 // = FalseProb for original BB.
9264 // Assuming the original weights are A and B, one choice is to set BB1's
9265 // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
9266 // assumes that
9267 // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
9268 uint64_t TrueWeight, FalseWeight;
9269 if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
9270 uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
9271 uint64_t NewFalseWeight = FalseWeight;
9272 scaleWeights(NewTrueWeight, NewFalseWeight);
9273 Br1->setMetadata(LLVMContext::MD_prof,
9274 MDBuilder(Br1->getContext())
9275 .createBranchWeights(TrueWeight, FalseWeight));
9276
9277 NewTrueWeight = 2 * TrueWeight;
9278 NewFalseWeight = FalseWeight;
9279 scaleWeights(NewTrueWeight, NewFalseWeight);
9280 Br2->setMetadata(LLVMContext::MD_prof,
9281 MDBuilder(Br2->getContext())
9282 .createBranchWeights(TrueWeight, FalseWeight));
9283 }
9284 }
9285
9286 ModifiedDT = ModifyDT::ModifyBBDT;
9287 MadeChange = true;
9288
9289 LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
9290 TmpBB->dump());
9291 }
9292 return MadeChange;
9293}
#define Success
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static unsigned getIntrinsicID(const SDNode *N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static bool sinkAndCmp0Expression(Instruction *AndI, const TargetLowering &TLI, SetOfInstrs &InsertedInsts)
Duplicate and sink the given 'and' instruction into user blocks where it is used in a compare to allo...
static bool SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, DenseMap< BasicBlock *, BinaryOperator * > &InsertedShifts, const TargetLowering &TLI, const DataLayout &DL)
Sink both shift and truncate instruction to the use of truncate's BB.
static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, SmallVectorImpl< Value * > &OffsetV)
Optimize for code generation
static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V)
Check if V (an operand of a select instruction) is an expensive instruction that is only used once.
static bool isExtractBitsCandidateUse(Instruction *User)
Check if the candidates could be combined with a shift instruction, which includes:
static cl::opt< unsigned > MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100), cl::Hidden, cl::desc("Max number of address users to look at"))
static cl::opt< bool > OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true), cl::desc("Enable converting phi types in CodeGenPrepare"))
static cl::opt< bool > DisableStoreExtract("disable-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Disable store(extract) optimizations in CodeGenPrepare"))
static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI, const DataLayout &DL)
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI)
Sink the given CmpInst into user blocks to reduce the number of virtual registers that must be create...
static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse)
Scale down both weights to fit into uint32_t.
static cl::opt< bool > ProfileUnknownInSpecialSection("profile-unknown-in-special-section", cl::Hidden, cl::desc("In profiling mode like sampleFDO, if a function doesn't have " "profile, we cannot tell the function is cold for sure because " "it may be a function newly added without ever being sampled. " "With the flag enabled, compiler can put such profile unknown " "functions into a special section, so runtime system can choose " "to handle it in a different way than .text section, to save " "RAM for example. "))
static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, const TargetLowering &TLI, const DataLayout &DL)
Sink the shift right instruction into user blocks if the uses could potentially be combined with this...
static cl::opt< bool > DisableExtLdPromotion("disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in " "CodeGenPrepare"))
static cl::opt< bool > DisablePreheaderProtect("disable-preheader-prot", cl::Hidden, cl::init(false), cl::desc("Disable protection against removing loop preheaders"))
static cl::opt< bool > AddrSinkCombineBaseOffs("addr-sink-combine-base-offs", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseOffs field in Address sinking."))
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, const DataLayout &DL)
If the specified cast instruction is a noop copy (e.g.
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static bool SinkCast(CastInst *CI)
Sink the specified cast instruction into its user blocks.
static bool swapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp)
Many architectures use the same instruction for both subtract and cmp.
static cl::opt< bool > AddrSinkCombineBaseReg("addr-sink-combine-base-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseReg field in Address sinking."))
static bool FindAllMemoryUses(Instruction *I, SmallVectorImpl< std::pair< Use *, Type * > > &MemoryUses, SmallPtrSetImpl< Instruction * > &ConsideredInsts, const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, unsigned &SeenInsts)
Recursively walk all the uses of I until we find a memory use.
static cl::opt< bool > StressStoreExtract("stress-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"))
static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, const TargetLowering *TLI, SelectInst *SI)
Returns true if a SelectInst should be turned into an explicit branch.
static std::optional< std::pair< Instruction *, Constant * > > getIVIncrement(const PHINode *PN, const LoopInfo *LI)
If given PN is an inductive variable with value IVInc coming from the backedge, and on each iteration...
static cl::opt< bool > AddrSinkCombineBaseGV("addr-sink-combine-base-gv", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseGV field in Address sinking."))
static cl::opt< bool > AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true), cl::desc("Address sinking in CGP using GEPs."))
static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)
If isTrue is true, return the true value of SI, otherwise return false value of SI.
static cl::opt< bool > DisableBranchOpts("disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare"))
static cl::opt< bool > EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden, cl::desc("Enable merging of redundant sexts when one is dominating" " the other."), cl::init(true))
static cl::opt< bool > ProfileGuidedSectionPrefix("profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use profile info to add section prefix for hot/cold functions"))
static cl::opt< unsigned > HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden, cl::desc("Least BB number of huge function."))
static cl::opt< bool > AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true), cl::desc("Allow creation of selects in Address sinking."))
static bool foldURemOfLoopIncrement(Instruction *Rem, const DataLayout *DL, const LoopInfo *LI, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, const TargetTransformInfo *TTI)
static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, const TargetLowering &TLI, const TargetRegisterInfo &TRI)
Check to see if all uses of OpVal by the specified inline asm call are due to memory operands.
static bool isIntrinsicOrLFToBeTailCalled(const TargetLibraryInfo *TLInfo, const CallInst *CI)
static void replaceAllUsesWith(Value *Old, Value *New, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
Replace all old uses with new ones, and push the updated BBs into FreshBBs.
static cl::opt< bool > ForceSplitStore("force-split-store", cl::Hidden, cl::init(false), cl::desc("Force store splitting no matter what the target query says."))
static void computeBaseDerivedRelocateMap(const SmallVectorImpl< GCRelocateInst * > &AllRelocateCalls, MapVector< GCRelocateInst *, SmallVector< GCRelocateInst *, 0 > > &RelocateInstMap)
static bool simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, const SmallVectorImpl< GCRelocateInst * > &Targets)
static cl::opt< bool > AddrSinkCombineScaledReg("addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of ScaledReg field in Address sinking."))
static bool foldICmpWithDominatingICmp(CmpInst *Cmp, const TargetLowering &TLI)
For pattern like:
static bool MightBeFoldableInst(Instruction *I)
This is a little filter, which returns true if an addressing computation involving I might be folded ...
static bool matchIncrement(const Instruction *IVInc, Instruction *&LHS, Constant *&Step)
static cl::opt< bool > EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden, cl::init(true), cl::desc("Enable splitting large offset of GEP."))
static cl::opt< bool > DisableComplexAddrModes("disable-complex-addr-modes", cl::Hidden, cl::init(false), cl::desc("Disables combining addressing modes with different parts " "in optimizeMemoryInst."))
static cl::opt< bool > EnableICMP_EQToICMP_ST("cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false), cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."))
static cl::opt< bool > VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false), cl::desc("Enable BFI update verification for " "CodeGenPrepare."))
static cl::opt< bool > BBSectionsGuidedSectionPrefix("bbsections-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use the basic-block-sections profile to determine the text " "section prefix for hot functions. Functions with " "basic-block-sections profile will be placed in `.text.hot` " "regardless of their FDO profile info. Other functions won't be " "impacted, i.e., their prefixes will be decided by FDO/sampleFDO " "profiles."))
static bool isRemOfLoopIncrementWithLoopInvariant(Instruction *Rem, const LoopInfo *LI, Value *&RemAmtOut, Value *&AddInstOut, Value *&AddOffsetOut, PHINode *&LoopIncrPNOut)
static bool isIVIncrement(const Value *V, const LoopInfo *LI)
static cl::opt< bool > DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), cl::desc("Disable GC optimizations in CodeGenPrepare"))
static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP)
static void DbgInserterHelper(DbgVariableRecord *DVR, BasicBlock::iterator VI)
static bool isPromotedInstructionLegal(const TargetLowering &TLI, const DataLayout &DL, Value *Val)
Check whether or not Val is a legal instruction for TLI.
static cl::opt< uint64_t > FreqRatioToSkipMerge("cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), cl::desc("Skip merging empty blocks if (frequency of empty block) / " "(frequency of destination block) is greater than this ratio"))
static BasicBlock::iterator findInsertPos(Value *Addr, Instruction *MemoryInst, Value *SunkAddr)
#define DEBUG_TYPE
static bool IsNonLocalValue(Value *V, BasicBlock *BB)
Return true if the specified values are defined in a different basic block than BB.
static cl::opt< bool > EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true), cl::desc("Enable sinking and/cmp into branches."))
static bool hasSameExtUse(Value *Val, const TargetLowering &TLI)
Check if all the uses of Val are equivalent (or free) zero or sign extensions.
static bool despeculateCountZeros(IntrinsicInst *CountZeros, LoopInfo &LI, const TargetLowering *TLI, const DataLayout *DL, ModifyDT &ModifiedDT, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
If counting leading or trailing zeros is an expensive operation and a zero input is defined,...
static cl::opt< bool > StressExtLdPromotion("stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) " "optimization in CodeGenPrepare"))
static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp, BinaryOperator *&Add)
Match special-case patterns that check for unsigned add overflow.
static cl::opt< bool > DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion."))
static cl::opt< bool > DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false), cl::desc("Disable elimination of dead PHI nodes."))
static cl::opt< bool > AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false), cl::desc("Allow creation of Phis in Address sinking."))
Defines an IR pass for CodeGen Prepare.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:638
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:298
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
uint64_t Addr
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon Common GEP
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This defines the Use class.
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
Definition: LICM.cpp:1451
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Register const TargetRegisterInfo * TRI
This file implements a map that provides insertion order iteration.
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:39
This file defines the PointerIntPair class.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, const MachineInstr &B)
Remove Loads Into Fake Uses
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
#define LLVM_DEBUG(...)
Definition: Debug.h:119
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc=0)
This file describes how to lower LLVM code to machine code.
static cl::opt< bool > DisableSelectOptimize("disable-select-optimize", cl::init(true), cl::Hidden, cl::desc("Disable the select-optimization pass from running"))
Disable the select optimization pass.
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
static Constant * getConstantVector(MVT VT, ArrayRef< APInt > Bits, const APInt &Undefs, LLVMContext &C)
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:1012
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:435
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1531
unsigned logBase2() const
Definition: APInt.h:1761
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:985
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:440
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1562
an instruction to allocate memory on the stack
Definition: Instructions.h:64
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:128
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:121
void setAlignment(Align Align)
Definition: Instructions.h:132
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:431
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:412
Represent the analysis usage information of a pass.
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Value handle that asserts if the Value is deleted.
Definition: ValueHandle.h:265
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:506
static unsigned getPointerOperandIndex()
Definition: Instructions.h:636
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:709
static unsigned getPointerOperandIndex()
Definition: Instructions.h:888
Analysis pass providing the BasicBlockSectionsProfileReader.
bool isFunctionHot(StringRef FuncName) const
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
iterator end()
Definition: BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:459
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:528
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:393
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition: BasicBlock.h:690
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:337
LLVM_ABI void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
InstListType::const_iterator const_iterator
Definition: BasicBlock.h:171
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:206
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:354
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:555
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:437
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:445
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:467
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:213
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
Definition: BasicBlock.cpp:235
LLVM_ABI void insertDbgRecordAfter(DbgRecord *DR, Instruction *I)
Insert a DbgRecord into a block at the position given by I.
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:131
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:233
BinaryOps getOpcode() const
Definition: InstrTypes.h:374
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
This class represents a no-op cast from one type to another.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
LLVM_ABI void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Analysis providing branch probability information.
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
bool isInlineAsm() const
Check if this call is an inline asm statement.
Definition: InstrTypes.h:1415
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1348
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
Definition: InstrTypes.h:1458
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1292
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1297
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1283
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:448
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:666
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:678
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:707
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:701
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:705
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:703
@ ICMP_EQ
equal
Definition: InstrTypes.h:699
@ ICMP_NE
not equal
Definition: InstrTypes.h:700
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:704
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:829
static LLVM_ABI CmpInst * Create(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:767
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition: CmpPredicate.h:23
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Base class for constants with no operands.
Definition: Constants.h:56
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1120
static LLVM_ABI Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2328
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
Definition: Constants.cpp:2635
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:868
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:214
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:169
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:154
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:1474
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1423
This is an important base class in LLVM.
Definition: Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:420
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:373
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
LLVM_ABI IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:850
LLVM_ABI void removeFromParent()
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LocationType Type
Classification of the debug-info record that this DbgVariableRecord represents.
LLVM_ABI void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
LLVM_ABI iterator_range< location_op_iterator > location_ops() const
Get the locations corresponding to the variable referenced by the debug info intrinsic.
A debug info location.
Definition: DebugLoc.h:124
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177
bool erase(const KeyT &Val)
Definition: DenseMap.h:319
unsigned size() const
Definition: DenseMap.h:120
iterator end()
Definition: DenseMap.h:87
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:230
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:165
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:334
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:135
This instruction extracts a struct member or array element value from an aggregate value.
iterator_range< idx_iterator > indices() const
This instruction compares its operands according to the predicate given to the constructor.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:803
This class implements simplifications for calls to fortified library functions (__st*cpy_chk,...
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:314
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
const BasicBlock & getEntryBlock() const
Definition: Function.h:807
LLVM_ABI const Value * getStatepoint() const
The statepoint with which this gc.relocate is associated.
Represents calls to the gc.relocate intrinsic.
unsigned getBasePtrIndex() const
The index into the associate statepoint's argument list which contains the base pointer of the pointe...
Represents a gc.statepoint intrinsic call.
Definition: Statepoint.h:61
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:949
static LLVM_ABI Type * getIndexedType(Type *Ty, ArrayRef< Value * > IdxList)
Returns the result type of a getelementptr with the given source element type and indexes.
LLVM_ABI bool canIncreaseAlignment() const
Returns true if the alignment of the value can be unilaterally increased.
Definition: Globals.cpp:330
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:265
Type * getValueType() const
Definition: GlobalValue.h:298
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalVariable.
This instruction compares its operands according to the predicate given to the constructor.
bool isEquality() const
Return true if this predicate is either EQ or NE.
Value * CreateZExtOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2214
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:502
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1005
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2637
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:247
Value * CreateNUWAdd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1416
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1923
LLVM_ABI Value * createIsFPClass(Value *FPNum, unsigned Test)
Definition: IRBuilder.cpp:1223
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2463
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2494
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2329
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2204
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1197
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:207
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1883
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2439
ConstantInt * getInt(const APInt &AI)
Get a constant integer value.
Definition: IRBuilder.h:538
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2780
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
Definition: Instruction.cpp:90
LLVM_ABI bool isDebugOrPseudoInst() const LLVM_READONLY
Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:513
LLVM_ABI void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
Definition: Instruction.h:406
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:879
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:171
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:82
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1718
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:312
bool isShift() const
Definition: Instruction.h:320
LLVM_ABI void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
LLVM_ABI std::optional< simple_ilist< DbgRecord >::iterator > getDbgReinsertionPosition()
Return an iterator to the position of the "Next" DbgRecord after this instruction,...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:510
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:49
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:56
Invoke instruction.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
An instruction for reading from memory.
Definition: Instructions.h:180
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:265
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:570
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:597
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:40
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:38
Machine Value Type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
LLVM_ABI void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
iterator end()
Definition: MapVector.h:67
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition: MapVector.h:167
iterator find(const KeyT &Key)
Definition: MapVector.h:141
bool empty() const
Definition: MapVector.h:75
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: MapVector.h:115
void clear()
Definition: MapVector.h:84
This is the common base class for memset/memcpy/memmove.
This class wraps the llvm.memcpy/memmove intrinsics.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:716
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:112
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:85
PointerIntPair - This class implements a pair of a pointer and small integer.
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
Definition: Constants.h:1468
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1885
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition: Analysis.h:132
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
Return a value (possibly void), from a function.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
A vector that has set insertion semantics.
Definition: SetVector.h:59
void clear()
Completely clear the SetVector.
Definition: SetVector.h:284
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:279
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:99
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:168
value_type pop_back_val()
Definition: SetVector.h:296
This instruction constructs a fixed permutation of two input vectors.
VectorType * getType() const
Overload to return most specific vector type.
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition: DenseSet.h:283
size_type size() const
Definition: SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:380
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:418
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:470
void insert_range(Range &&R)
Definition: SmallPtrSet.h:490
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:476
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:356
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:176
bool erase(const T &V)
Definition: SmallSet.h:198
void clear()
Definition: SmallSet.h:209
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:182
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:938
void reserve(size_type N)
Definition: SmallVector.h:664
iterator erase(const_iterator CI)
Definition: SmallVector.h:738
typename SuperClass::iterator iterator
Definition: SmallVector.h:578
void resize(size_type N)
Definition: SmallVector.h:639
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
An instruction for storing to memory.
Definition: Instructions.h:296
static unsigned getPointerOperandIndex()
Definition: Instructions.h:388
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:626
TypeSize getElementOffset(unsigned Idx) const
Definition: DataLayout.h:657
Class to represent struct types.
Definition: DerivedTypes.h:218
Multiway switch.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool isSelectSupported(SelectSupportKind) const
virtual bool isEqualityCmpFoldedWithSignedCmp() const
Return true if instruction generated for equality comparison is folded with instruction generated for...
virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT, bool MathUsed) const
Try to convert math with an overflow comparison into the corresponding DAG node operation.
virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const
Return if the target supports combining a chain like:
bool isExtLoad(const LoadInst *Load, const Instruction *Ext, const DataLayout &DL) const
Return true if Load and Ext can form an ExtLoad.
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool enableExtLdPromotion() const
Return true if the target wants to use the optimization that turns ext(promotableInst1(....
virtual bool isCheapToSpeculateCttz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isJumpExpensive() const
Return true if Flow Control is an expensive operation that should be avoided.
bool hasExtractBitsInsn() const
Return true if the target has BitExtract instructions.
SelectSupportKind
Enum that describes what type of support for selects the target has.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isSlowDivBypassed() const
Returns true if target has indicated at least one type should be bypassed.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool hasMultipleConditionRegisters(EVT VT) const
Does the target have multiple (allocatable) condition registers that can be used to store the results...
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual MVT getPreferredSwitchConditionType(LLVMContext &Context, EVT ConditionVT) const
Returns preferred type for switch condition.
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const
Return true if the target can combine store(extractelement VectorTy, Idx).
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
virtual bool shouldConsiderGEPOffsetSplit() const
bool isExtFree(const Instruction *I) const
Return true if the extension represented by I is free.
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
virtual bool getAddrModeArguments(const IntrinsicInst *, SmallVectorImpl< Value * > &, Type *&) const
CodeGenPrepare sinks address calculations into the same BB as Load/Store instructions reading the add...
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
const DenseMap< unsigned int, unsigned int > & getBypassSlowDivWidths() const
Returns map of slow types for division or remainder with corresponding fast types.
virtual bool isCheapToSpeculateCtlz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic ctlz.
virtual bool useSoftFloat() const
virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset) const
Return the prefered common base offset.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool shouldAlignPointerArgs(CallInst *, unsigned &, Align &) const
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
virtual Type * shouldConvertSplatType(ShuffleVectorInst *SVI) const
Given a shuffle vector SVI representing a vector splat, return a new scalar type of size equal to SVI...
virtual bool addressingModeSupportsTLS(const GlobalValue &) const
Returns true if the targets addressing mode can target thread local storage (TLS).
virtual bool shouldConvertPhiType(Type *From, Type *To) const
Given a set in interconnected phis of type 'From' that are loaded/stored or bitcast to type 'To',...
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
virtual bool preferZeroCompareBranch() const
Return true if the heuristic to prefer icmp eq zero should be used in code gen prepare.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool optimizeExtendOrTruncateConversion(Instruction *I, Loop *L, const TargetTransformInfo &TTI) const
Try to optimize extending or truncating conversion instructions (like zext, trunc,...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
std::vector< AsmOperandInfo > AsmOperandInfoVector
virtual bool ExpandInlineAsm(CallInst *) const
This hook allows the target to expand an inline asm call to be explicit llvm code if it wants to.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual bool mayBeEmittedAsTailCall(const CallInst *) const
Return true if the target may be able emit the call instruction as a tail call.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:83
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
virtual bool addrSinkUsingGEPs() const
Sink addresses into blocks using GEP instructions rather than pointer casts and arithmetic.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, const Value *Op0=nullptr, const Value *Op1=nullptr) const
LLVM_ABI bool isExpensiveToSpeculativelyExecute(const Instruction *I) const
Return true if the cost of the instruction is too high to speculatively execute and should be kept be...
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
LLVM_ABI InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
LLVM_ABI InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
LLVM_ABI bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
LLVM_ABI BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
@ TCC_Basic
The cost of a typical 'add' instruction.
LLVM_ABI bool isVectorShiftByScalarCheap(Type *Ty) const
Return true if it's significantly cheaper to shift a vector by a uniform scalar than by an amount whi...
LLVM_ABI bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
Return true if sinking I's operands to the same basic block as I is profitable, e....
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:273
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:255
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:240
LLVM_ABI unsigned getIntegerBitWidth() const
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:352
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1866
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
op_range operands()
Definition: User.h:292
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
const Use & getOperandUse(unsigned i) const
Definition: User.h:245
void setOperand(unsigned i, Value *Val)
Definition: User.h:237
Value * getOperand(unsigned i) const
Definition: User.h:232
unsigned getNumOperands() const
Definition: User.h:254
See the file comment.
Definition: ValueMap.h:84
void clear()
Definition: ValueMap.h:149
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
Definition: Value.h:759
user_iterator user_begin()
Definition: Value.h:402
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:390
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:546
iterator_range< user_iterator > users()
Definition: Value.h:426
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:953
LLVM_ABI bool isUsedInBasicBlock(const BasicBlock *BB) const
Check if this value is used in the specified basic block.
Definition: Value.cpp:242
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:701
bool use_empty() const
Definition: Value.h:346
user_iterator user_end()
Definition: Value.h:410
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1098
iterator_range< use_iterator > uses()
Definition: Value.h:380
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
Definition: Value.h:838
user_iterator_impl< User > user_iterator
Definition: Value.h:391
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:396
LLVM_ABI void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:5465
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:205
bool pointsToAliveValue() const
Definition: ValueHandle.h:225
This class represents zero extension of integer types.
int getNumOccurrences() const
Definition: CommandLine.h:400
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:203
constexpr bool isNonZero() const
Definition: TypeSize.h:159
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:172
TypeSize getSequentialElementStride(const DataLayout &DL) const
const ParentTy * getParent() const
Definition: ilist_node.h:34
self_iterator getIterator()
Definition: ilist_node.h:134
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:359
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition: COFF.h:862
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned getAddrMode(MCInstrInfo const &MCII, MCInst const &MCI)
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
Definition: PatternMatch.h:524
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:165
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:862
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:962
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap, true > m_c_NUWAdd(const LHS &L, const RHS &R)
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:168
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:592
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
apint_match m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
Definition: PatternMatch.h:305
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:105
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:931
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:299
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
UAddWithOverflow_match< LHS_t, RHS_t, Sum_t > m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S)
Match an icmp instruction checking for unsigned overflow on addition.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
int compare(DigitsT LDigits, int16_t LScale, DigitsT RDigits, int16_t RScale)
Compare two scaled numbers.
Definition: ScaledNumber.h:255
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ Assume
Do not drop type tests (default).
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition: SFrame.h:77
LLVM_ABI const_iterator begin(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
LLVM_ABI const_iterator end(StringRef path LLVM_LIFETIME_BOUND)
Get end iterator over path.
Definition: Path.cpp:235
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:338
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition: DWP.cpp:477
LLVM_ABI bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:307
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1702
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:533
LLVM_ABI bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition: Local.cpp:134
LLVM_ABI void findDbgValues(Value *V, SmallVectorImpl< DbgVariableRecord * > &DbgVariableRecords)
Finds the dbg.values describing a value.
Definition: DebugInfo.cpp:124
APInt operator*(APInt a, uint64_t RHS)
Definition: APInt.h:2235
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1723
auto successors(const MachineBasicBlock *BB)
LLVM_ABI ReturnInst * FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, BasicBlock *Pred, DomTreeUpdater *DTU=nullptr)
This method duplicates the specified return instruction into a predecessor which ends in an unconditi...
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2113
constexpr from_range_t from_range
LLVM_ABI Instruction * SplitBlockAndInsertIfElse(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ElseBlock=nullptr)
Similar to SplitBlockAndInsertIfThen, but the inserted block is on the false path of the branch.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2155
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:663
LLVM_ABI void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
LLVM_ABI void initializeCodeGenPrepareLegacyPassPass(PassRegistry &)
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto unique(Range &&R, Predicate P)
Definition: STLExtras.h:2095
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool hasBranchWeightOrigin(const Instruction &I)
Check if Branch Weight Metadata has an "expected" field from an llvm.expect* intrinsic.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI Value * simplifyAddInst(Value *LHS, Value *RHS, bool IsNSW, bool IsNUW, const SimplifyQuery &Q)
Given operands for an Add, fold the result or return null.
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition: Local.h:252
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
LLVM_ABI bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr, SmallSetVector< Instruction *, 8 > *UnsimplifiedUsers=nullptr)
Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
LLVM_ABI bool recognizeBSwapOrBitReverseIdiom(Instruction *I, bool MatchBSwaps, bool MatchBitReversals, SmallVectorImpl< Instruction * > &InsertedInsts)
Try to match a bswap or bitreverse idiom.
Definition: Local.cpp:3725
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1669
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1758
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition: STLExtras.h:1444
LLVM_ABI FunctionPass * createCodeGenPrepareLegacyPass()
createCodeGenPrepareLegacyPass - Transform the code to expose more pattern matching during instructio...
ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred)
getFCmpCondCode - Return the ISD condition code corresponding to the given LLVM IR floating-point con...
Definition: Analysis.cpp:207
LLVM_ABI bool VerifyLoopInfo
Enable verification of loop info.
Definition: LoopInfo.cpp:51
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
bool attributesPermitTailCall(const Function *F, const Instruction *I, const ReturnInst *Ret, const TargetLoweringBase &TLI, bool *AllowDifferingSizes=nullptr)
Test if given that the input instruction is in the tail call position, if there is an attribute misma...
Definition: Analysis.cpp:592
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1973
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:312
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:223
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
bool bypassSlowDivision(BasicBlock *BB, const DenseMap< unsigned int, unsigned int > &BypassWidth)
This optimization identifies DIV instructions in a BB that can be profitably bypassed and carried out...
gep_type_iterator gep_type_begin(const User *GEP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2139
auto predecessors(const MachineBasicBlock *BB)
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition: iterator.h:363
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1916
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:119
std::enable_if_t< std::is_signed_v< T >, T > AddOverflow(T X, T Y, T &Result)
Add two signed integers, computing the two's complement truncated result, returning true if overflow ...
Definition: MathExtras.h:706
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
LLVM_ABI BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
std::pair< Value *, FPClassTest > fcmpToClassTest(FCmpInst::Predicate Pred, const Function &F, Value *LHS, Value *RHS, bool LookThroughSrc=true)
Returns a pair of values, which if passed to llvm.is.fpclass, returns the same result as an fcmp with...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI Value * simplifyURemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a URem, fold the result or return null.
LLVM_ABI CGPassBuilderOption getCGPassBuilderOption()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
#define NC
Definition: regutils.h:42
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:279
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:295
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:299
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:243
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
ExtAddrMode()=default
Matching combinators.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
const DataLayout & DL
Definition: SimplifyQuery.h:72
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.