LLVM 22.0.0git
LoopUtils.cpp
Go to the documentation of this file.
1//===-- LoopUtils.cpp - Loop Utility functions -------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines common loop utility functions.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/DenseSet.h"
16#include "llvm/ADT/ScopeExit.h"
17#include "llvm/ADT/SetVector.h"
33#include "llvm/IR/DIBuilder.h"
34#include "llvm/IR/Dominators.h"
37#include "llvm/IR/MDBuilder.h"
38#include "llvm/IR/Module.h"
41#include "llvm/IR/ValueHandle.h"
43#include "llvm/Pass.h"
45#include "llvm/Support/Debug.h"
49
50using namespace llvm;
51using namespace llvm::PatternMatch;
52
53#define DEBUG_TYPE "loop-utils"
54
55static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced";
56static const char *LLVMLoopDisableLICM = "llvm.licm.disable";
57
59 MemorySSAUpdater *MSSAU,
60 bool PreserveLCSSA) {
61 bool Changed = false;
62
63 // We re-use a vector for the in-loop predecesosrs.
64 SmallVector<BasicBlock *, 4> InLoopPredecessors;
65
66 auto RewriteExit = [&](BasicBlock *BB) {
67 assert(InLoopPredecessors.empty() &&
68 "Must start with an empty predecessors list!");
69 auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); });
70
71 // See if there are any non-loop predecessors of this exit block and
72 // keep track of the in-loop predecessors.
73 bool IsDedicatedExit = true;
74 for (auto *PredBB : predecessors(BB))
75 if (L->contains(PredBB)) {
76 if (isa<IndirectBrInst>(PredBB->getTerminator()))
77 // We cannot rewrite exiting edges from an indirectbr.
78 return false;
79
80 InLoopPredecessors.push_back(PredBB);
81 } else {
82 IsDedicatedExit = false;
83 }
84
85 assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!");
86
87 // Nothing to do if this is already a dedicated exit.
88 if (IsDedicatedExit)
89 return false;
90
91 auto *NewExitBB = SplitBlockPredecessors(
92 BB, InLoopPredecessors, ".loopexit", DT, LI, MSSAU, PreserveLCSSA);
93
94 if (!NewExitBB)
96 dbgs() << "WARNING: Can't create a dedicated exit block for loop: "
97 << *L << "\n");
98 else
99 LLVM_DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
100 << NewExitBB->getName() << "\n");
101 return true;
102 };
103
104 // Walk the exit blocks directly rather than building up a data structure for
105 // them, but only visit each one once.
107 for (auto *BB : L->blocks())
108 for (auto *SuccBB : successors(BB)) {
109 // We're looking for exit blocks so skip in-loop successors.
110 if (L->contains(SuccBB))
111 continue;
112
113 // Visit each exit block exactly once.
114 if (!Visited.insert(SuccBB).second)
115 continue;
116
117 Changed |= RewriteExit(SuccBB);
118 }
119
120 return Changed;
121}
122
123/// Returns the instructions that use values defined in the loop.
126
127 for (auto *Block : L->getBlocks())
128 // FIXME: I believe that this could use copy_if if the Inst reference could
129 // be adapted into a pointer.
130 for (auto &Inst : *Block) {
131 auto Users = Inst.users();
132 if (any_of(Users, [&](User *U) {
133 auto *Use = cast<Instruction>(U);
134 return !L->contains(Use->getParent());
135 }))
136 UsedOutside.push_back(&Inst);
137 }
138
139 return UsedOutside;
140}
141
143 // By definition, all loop passes need the LoopInfo analysis and the
144 // Dominator tree it depends on. Because they all participate in the loop
145 // pass manager, they must also preserve these.
150
151 // We must also preserve LoopSimplify and LCSSA. We locally access their IDs
152 // here because users shouldn't directly get them from this header.
153 extern char &LoopSimplifyID;
154 extern char &LCSSAID;
159 // This is used in the LPPassManager to perform LCSSA verification on passes
160 // which preserve lcssa form
163
164 // Loop passes are designed to run inside of a loop pass manager which means
165 // that any function analyses they require must be required by the first loop
166 // pass in the manager (so that it is computed before the loop pass manager
167 // runs) and preserved by all loop pasess in the manager. To make this
168 // reasonably robust, the set needed for most loop passes is maintained here.
169 // If your loop pass requires an analysis not listed here, you will need to
170 // carefully audit the loop pass manager nesting structure that results.
178 // FIXME: When all loop passes preserve MemorySSA, it can be required and
179 // preserved here instead of the individual handling in each pass.
180}
181
182/// Manually defined generic "LoopPass" dependency initialization. This is used
183/// to initialize the exact set of passes from above in \c
184/// getLoopAnalysisUsage. It can be used within a loop pass's initialization
185/// with:
186///
187/// INITIALIZE_PASS_DEPENDENCY(LoopPass)
188///
189/// As-if "LoopPass" were a pass.
193 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
194 INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
201}
202
203/// Create MDNode for input string.
204static MDNode *createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V) {
205 LLVMContext &Context = TheLoop->getHeader()->getContext();
206 Metadata *MDs[] = {
208 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Context), V))};
209 return MDNode::get(Context, MDs);
210}
211
212/// Set input string into loop metadata by keeping other values intact.
213/// If the string is already in loop metadata update value if it is
214/// different.
215void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *StringMD,
216 unsigned V) {
218 // If the loop already has metadata, retain it.
219 MDNode *LoopID = TheLoop->getLoopID();
220 if (LoopID) {
221 for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
222 MDNode *Node = cast<MDNode>(LoopID->getOperand(i));
223 // If it is of form key = value, try to parse it.
224 if (Node->getNumOperands() == 2) {
225 MDString *S = dyn_cast<MDString>(Node->getOperand(0));
226 if (S && S->getString() == StringMD) {
227 ConstantInt *IntMD =
228 mdconst::extract_or_null<ConstantInt>(Node->getOperand(1));
229 if (IntMD && IntMD->getSExtValue() == V)
230 // It is already in place. Do nothing.
231 return;
232 // We need to update the value, so just skip it here and it will
233 // be added after copying other existed nodes.
234 continue;
235 }
236 }
237 MDs.push_back(Node);
238 }
239 }
240 // Add new metadata.
241 MDs.push_back(createStringMetadata(TheLoop, StringMD, V));
242 // Replace current metadata node with new one.
243 LLVMContext &Context = TheLoop->getHeader()->getContext();
244 MDNode *NewLoopID = MDNode::get(Context, MDs);
245 // Set operand 0 to refer to the loop id itself.
246 NewLoopID->replaceOperandWith(0, NewLoopID);
247 TheLoop->setLoopID(NewLoopID);
248}
249
250std::optional<ElementCount>
252 std::optional<int> Width =
253 getOptionalIntLoopAttribute(TheLoop, "llvm.loop.vectorize.width");
254
255 if (Width) {
256 std::optional<int> IsScalable = getOptionalIntLoopAttribute(
257 TheLoop, "llvm.loop.vectorize.scalable.enable");
258 return ElementCount::get(*Width, IsScalable.value_or(false));
259 }
260
261 return std::nullopt;
262}
263
264std::optional<MDNode *> llvm::makeFollowupLoopID(
265 MDNode *OrigLoopID, ArrayRef<StringRef> FollowupOptions,
266 const char *InheritOptionsExceptPrefix, bool AlwaysNew) {
267 if (!OrigLoopID) {
268 if (AlwaysNew)
269 return nullptr;
270 return std::nullopt;
271 }
272
273 assert(OrigLoopID->getOperand(0) == OrigLoopID);
274
275 bool InheritAllAttrs = !InheritOptionsExceptPrefix;
276 bool InheritSomeAttrs =
277 InheritOptionsExceptPrefix && InheritOptionsExceptPrefix[0] != '\0';
279 MDs.push_back(nullptr);
280
281 bool Changed = false;
282 if (InheritAllAttrs || InheritSomeAttrs) {
283 for (const MDOperand &Existing : drop_begin(OrigLoopID->operands())) {
284 MDNode *Op = cast<MDNode>(Existing.get());
285
286 auto InheritThisAttribute = [InheritSomeAttrs,
287 InheritOptionsExceptPrefix](MDNode *Op) {
288 if (!InheritSomeAttrs)
289 return false;
290
291 // Skip malformatted attribute metadata nodes.
292 if (Op->getNumOperands() == 0)
293 return true;
294 Metadata *NameMD = Op->getOperand(0).get();
295 if (!isa<MDString>(NameMD))
296 return true;
297 StringRef AttrName = cast<MDString>(NameMD)->getString();
298
299 // Do not inherit excluded attributes.
300 return !AttrName.starts_with(InheritOptionsExceptPrefix);
301 };
302
303 if (InheritThisAttribute(Op))
304 MDs.push_back(Op);
305 else
306 Changed = true;
307 }
308 } else {
309 // Modified if we dropped at least one attribute.
310 Changed = OrigLoopID->getNumOperands() > 1;
311 }
312
313 bool HasAnyFollowup = false;
314 for (StringRef OptionName : FollowupOptions) {
315 MDNode *FollowupNode = findOptionMDForLoopID(OrigLoopID, OptionName);
316 if (!FollowupNode)
317 continue;
318
319 HasAnyFollowup = true;
320 for (const MDOperand &Option : drop_begin(FollowupNode->operands())) {
321 MDs.push_back(Option.get());
322 Changed = true;
323 }
324 }
325
326 // Attributes of the followup loop not specified explicity, so signal to the
327 // transformation pass to add suitable attributes.
328 if (!AlwaysNew && !HasAnyFollowup)
329 return std::nullopt;
330
331 // If no attributes were added or remove, the previous loop Id can be reused.
332 if (!AlwaysNew && !Changed)
333 return OrigLoopID;
334
335 // No attributes is equivalent to having no !llvm.loop metadata at all.
336 if (MDs.size() == 1)
337 return nullptr;
338
339 // Build the new loop ID.
340 MDTuple *FollowupLoopID = MDNode::get(OrigLoopID->getContext(), MDs);
341 FollowupLoopID->replaceOperandWith(0, FollowupLoopID);
342 return FollowupLoopID;
343}
344
347}
348
351}
352
354 if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable"))
355 return TM_SuppressedByUser;
356
357 std::optional<int> Count =
358 getOptionalIntLoopAttribute(L, "llvm.loop.unroll.count");
359 if (Count)
360 return *Count == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
361
362 if (getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"))
363 return TM_ForcedByUser;
364
365 if (getBooleanLoopAttribute(L, "llvm.loop.unroll.full"))
366 return TM_ForcedByUser;
367
369 return TM_Disable;
370
371 return TM_Unspecified;
372}
373
375 if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.disable"))
376 return TM_SuppressedByUser;
377
378 std::optional<int> Count =
379 getOptionalIntLoopAttribute(L, "llvm.loop.unroll_and_jam.count");
380 if (Count)
381 return *Count == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
382
383 if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.enable"))
384 return TM_ForcedByUser;
385
387 return TM_Disable;
388
389 return TM_Unspecified;
390}
391
393 std::optional<bool> Enable =
394 getOptionalBoolLoopAttribute(L, "llvm.loop.vectorize.enable");
395
396 if (Enable == false)
397 return TM_SuppressedByUser;
398
399 std::optional<ElementCount> VectorizeWidth =
401 std::optional<int> InterleaveCount =
402 getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
403
404 // 'Forcing' vector width and interleave count to one effectively disables
405 // this tranformation.
406 if (Enable == true && VectorizeWidth && VectorizeWidth->isScalar() &&
407 InterleaveCount == 1)
408 return TM_SuppressedByUser;
409
410 if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
411 return TM_Disable;
412
413 if (Enable == true)
414 return TM_ForcedByUser;
415
416 if ((VectorizeWidth && VectorizeWidth->isScalar()) && InterleaveCount == 1)
417 return TM_Disable;
418
419 if ((VectorizeWidth && VectorizeWidth->isVector()) || InterleaveCount > 1)
420 return TM_Enable;
421
423 return TM_Disable;
424
425 return TM_Unspecified;
426}
427
429 if (getBooleanLoopAttribute(L, "llvm.loop.distribute.enable"))
430 return TM_ForcedByUser;
431
433 return TM_Disable;
434
435 return TM_Unspecified;
436}
437
439 if (getBooleanLoopAttribute(L, "llvm.loop.licm_versioning.disable"))
440 return TM_SuppressedByUser;
441
443 return TM_Disable;
444
445 return TM_Unspecified;
446}
447
448/// Does a BFS from a given node to all of its children inside a given loop.
449/// The returned vector of basic blocks includes the starting point.
451 DomTreeNode *N,
452 const Loop *CurLoop) {
454 auto AddRegionToWorklist = [&](DomTreeNode *DTN) {
455 // Only include subregions in the top level loop.
456 BasicBlock *BB = DTN->getBlock();
457 if (CurLoop->contains(BB))
458 Worklist.push_back(DTN->getBlock());
459 };
460
461 AddRegionToWorklist(N);
462
463 for (size_t I = 0; I < Worklist.size(); I++) {
464 for (DomTreeNode *Child : DT->getNode(Worklist[I])->children())
465 AddRegionToWorklist(Child);
466 }
467
468 return Worklist;
469}
470
472 int LatchIdx = PN->getBasicBlockIndex(LatchBlock);
473 assert(LatchIdx != -1 && "LatchBlock is not a case in this PHINode");
474 Value *IncV = PN->getIncomingValue(LatchIdx);
475
476 for (User *U : PN->users())
477 if (U != Cond && U != IncV) return false;
478
479 for (User *U : IncV->users())
480 if (U != Cond && U != PN) return false;
481 return true;
482}
483
484
486 LoopInfo *LI, MemorySSA *MSSA) {
487 assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!");
488 auto *Preheader = L->getLoopPreheader();
489 assert(Preheader && "Preheader should exist!");
490
491 std::unique_ptr<MemorySSAUpdater> MSSAU;
492 if (MSSA)
493 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
494
495 // Now that we know the removal is safe, remove the loop by changing the
496 // branch from the preheader to go to the single exit block.
497 //
498 // Because we're deleting a large chunk of code at once, the sequence in which
499 // we remove things is very important to avoid invalidation issues.
500
501 // Tell ScalarEvolution that the loop is deleted. Do this before
502 // deleting the loop so that ScalarEvolution can look at the loop
503 // to determine what it needs to clean up.
504 if (SE) {
505 SE->forgetLoop(L);
507 }
508
509 Instruction *OldTerm = Preheader->getTerminator();
510 assert(!OldTerm->mayHaveSideEffects() &&
511 "Preheader must end with a side-effect-free terminator");
512 assert(OldTerm->getNumSuccessors() == 1 &&
513 "Preheader must have a single successor");
514 // Connect the preheader to the exit block. Keep the old edge to the header
515 // around to perform the dominator tree update in two separate steps
516 // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge
517 // preheader -> header.
518 //
519 //
520 // 0. Preheader 1. Preheader 2. Preheader
521 // | | | |
522 // V | V |
523 // Header <--\ | Header <--\ | Header <--\
524 // | | | | | | | | | | |
525 // | V | | | V | | | V |
526 // | Body --/ | | Body --/ | | Body --/
527 // V V V V V
528 // Exit Exit Exit
529 //
530 // By doing this is two separate steps we can perform the dominator tree
531 // update without using the batch update API.
532 //
533 // Even when the loop is never executed, we cannot remove the edge from the
534 // source block to the exit block. Consider the case where the unexecuted loop
535 // branches back to an outer loop. If we deleted the loop and removed the edge
536 // coming to this inner loop, this will break the outer loop structure (by
537 // deleting the backedge of the outer loop). If the outer loop is indeed a
538 // non-loop, it will be deleted in a future iteration of loop deletion pass.
539 IRBuilder<> Builder(OldTerm);
540
541 auto *ExitBlock = L->getUniqueExitBlock();
542 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
543 if (ExitBlock) {
544 assert(ExitBlock && "Should have a unique exit block!");
545 assert(L->hasDedicatedExits() && "Loop should have dedicated exits!");
546
547 Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock);
548 // Remove the old branch. The conditional branch becomes a new terminator.
549 OldTerm->eraseFromParent();
550
551 // Rewrite phis in the exit block to get their inputs from the Preheader
552 // instead of the exiting block.
553 for (PHINode &P : ExitBlock->phis()) {
554 // Set the zero'th element of Phi to be from the preheader and remove all
555 // other incoming values. Given the loop has dedicated exits, all other
556 // incoming values must be from the exiting blocks.
557 int PredIndex = 0;
558 P.setIncomingBlock(PredIndex, Preheader);
559 // Removes all incoming values from all other exiting blocks (including
560 // duplicate values from an exiting block).
561 // Nuke all entries except the zero'th entry which is the preheader entry.
562 P.removeIncomingValueIf([](unsigned Idx) { return Idx != 0; },
563 /* DeletePHIIfEmpty */ false);
564
565 assert((P.getNumIncomingValues() == 1 &&
566 P.getIncomingBlock(PredIndex) == Preheader) &&
567 "Should have exactly one value and that's from the preheader!");
568 }
569
570 if (DT) {
571 DTU.applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}});
572 if (MSSA) {
573 MSSAU->applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}},
574 *DT);
575 if (VerifyMemorySSA)
576 MSSA->verifyMemorySSA();
577 }
578 }
579
580 // Disconnect the loop body by branching directly to its exit.
581 Builder.SetInsertPoint(Preheader->getTerminator());
582 Builder.CreateBr(ExitBlock);
583 // Remove the old branch.
584 Preheader->getTerminator()->eraseFromParent();
585 } else {
586 assert(L->hasNoExitBlocks() &&
587 "Loop should have either zero or one exit blocks.");
588
589 Builder.SetInsertPoint(OldTerm);
590 Builder.CreateUnreachable();
591 Preheader->getTerminator()->eraseFromParent();
592 }
593
594 if (DT) {
595 DTU.applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}});
596 if (MSSA) {
597 MSSAU->applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}},
598 *DT);
599 SmallSetVector<BasicBlock *, 8> DeadBlockSet(L->block_begin(),
600 L->block_end());
601 MSSAU->removeBlocks(DeadBlockSet);
602 if (VerifyMemorySSA)
603 MSSA->verifyMemorySSA();
604 }
605 }
606
607 // Use a map to unique and a vector to guarantee deterministic ordering.
609 llvm::SmallVector<DbgVariableRecord *, 4> DeadDbgVariableRecords;
610
611 if (ExitBlock) {
612 // Given LCSSA form is satisfied, we should not have users of instructions
613 // within the dead loop outside of the loop. However, LCSSA doesn't take
614 // unreachable uses into account. We handle them here.
615 // We could do it after drop all references (in this case all users in the
616 // loop will be already eliminated and we have less work to do but according
617 // to API doc of User::dropAllReferences only valid operation after dropping
618 // references, is deletion. So let's substitute all usages of
619 // instruction from the loop with poison value of corresponding type first.
620 for (auto *Block : L->blocks())
621 for (Instruction &I : *Block) {
622 auto *Poison = PoisonValue::get(I.getType());
623 for (Use &U : llvm::make_early_inc_range(I.uses())) {
624 if (auto *Usr = dyn_cast<Instruction>(U.getUser()))
625 if (L->contains(Usr->getParent()))
626 continue;
627 // If we have a DT then we can check that uses outside a loop only in
628 // unreachable block.
629 if (DT)
631 "Unexpected user in reachable block");
632 U.set(Poison);
633 }
634
635 // For one of each variable encountered, preserve a debug record (set
636 // to Poison) and transfer it to the loop exit. This terminates any
637 // variable locations that were set during the loop.
638 for (DbgVariableRecord &DVR :
639 llvm::make_early_inc_range(filterDbgVars(I.getDbgRecordRange()))) {
640 DebugVariable Key(DVR.getVariable(), DVR.getExpression(),
641 DVR.getDebugLoc().get());
642 if (!DeadDebugSet.insert(Key).second)
643 continue;
644 // Unlinks the DVR from it's container, for later insertion.
645 DVR.removeFromParent();
646 DeadDbgVariableRecords.push_back(&DVR);
647 }
648 }
649
650 // After the loop has been deleted all the values defined and modified
651 // inside the loop are going to be unavailable. Values computed in the
652 // loop will have been deleted, automatically causing their debug uses
653 // be be replaced with undef. Loop invariant values will still be available.
654 // Move dbg.values out the loop so that earlier location ranges are still
655 // terminated and loop invariant assignments are preserved.
656 DIBuilder DIB(*ExitBlock->getModule());
657 BasicBlock::iterator InsertDbgValueBefore =
658 ExitBlock->getFirstInsertionPt();
659 assert(InsertDbgValueBefore != ExitBlock->end() &&
660 "There should be a non-PHI instruction in exit block, else these "
661 "instructions will have no parent.");
662
663 // Due to the "head" bit in BasicBlock::iterator, we're going to insert
664 // each DbgVariableRecord right at the start of the block, wheras dbg.values
665 // would be repeatedly inserted before the first instruction. To replicate
666 // this behaviour, do it backwards.
667 for (DbgVariableRecord *DVR : llvm::reverse(DeadDbgVariableRecords))
668 ExitBlock->insertDbgRecordBefore(DVR, InsertDbgValueBefore);
669 }
670
671 // Remove the block from the reference counting scheme, so that we can
672 // delete it freely later.
673 for (auto *Block : L->blocks())
674 Block->dropAllReferences();
675
676 if (MSSA && VerifyMemorySSA)
677 MSSA->verifyMemorySSA();
678
679 if (LI) {
680 // Erase the instructions and the blocks without having to worry
681 // about ordering because we already dropped the references.
682 // NOTE: This iteration is safe because erasing the block does not remove
683 // its entry from the loop's block list. We do that in the next section.
684 for (BasicBlock *BB : L->blocks())
685 BB->eraseFromParent();
686
687 // Finally, the blocks from loopinfo. This has to happen late because
688 // otherwise our loop iterators won't work.
689
691 for (BasicBlock *BB : blocks)
692 LI->removeBlock(BB);
693
694 // The last step is to update LoopInfo now that we've eliminated this loop.
695 // Note: LoopInfo::erase remove the given loop and relink its subloops with
696 // its parent. While removeLoop/removeChildLoop remove the given loop but
697 // not relink its subloops, which is what we want.
698 if (Loop *ParentLoop = L->getParentLoop()) {
699 Loop::iterator I = find(*ParentLoop, L);
700 assert(I != ParentLoop->end() && "Couldn't find loop");
701 ParentLoop->removeChildLoop(I);
702 } else {
703 Loop::iterator I = find(*LI, L);
704 assert(I != LI->end() && "Couldn't find loop");
705 LI->removeLoop(I);
706 }
707 LI->destroy(L);
708 }
709}
710
712 LoopInfo &LI, MemorySSA *MSSA) {
713 auto *Latch = L->getLoopLatch();
714 assert(Latch && "multiple latches not yet supported");
715 auto *Header = L->getHeader();
716 Loop *OutermostLoop = L->getOutermostLoop();
717
718 SE.forgetLoop(L);
720
721 std::unique_ptr<MemorySSAUpdater> MSSAU;
722 if (MSSA)
723 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
724
725 // Update the CFG and domtree. We chose to special case a couple of
726 // of common cases for code quality and test readability reasons.
727 [&]() -> void {
728 if (auto *BI = dyn_cast<BranchInst>(Latch->getTerminator())) {
729 if (!BI->isConditional()) {
730 DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
731 (void)changeToUnreachable(BI, /*PreserveLCSSA*/ true, &DTU,
732 MSSAU.get());
733 return;
734 }
735
736 // Conditional latch/exit - note that latch can be shared by inner
737 // and outer loop so the other target doesn't need to an exit
738 if (L->isLoopExiting(Latch)) {
739 // TODO: Generalize ConstantFoldTerminator so that it can be used
740 // here without invalidating LCSSA or MemorySSA. (Tricky case for
741 // LCSSA: header is an exit block of a preceeding sibling loop w/o
742 // dedicated exits.)
743 const unsigned ExitIdx = L->contains(BI->getSuccessor(0)) ? 1 : 0;
744 BasicBlock *ExitBB = BI->getSuccessor(ExitIdx);
745
746 DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
747 Header->removePredecessor(Latch, true);
748
749 IRBuilder<> Builder(BI);
750 auto *NewBI = Builder.CreateBr(ExitBB);
751 // Transfer the metadata to the new branch instruction (minus the
752 // loop info since this is no longer a loop)
753 NewBI->copyMetadata(*BI, {LLVMContext::MD_dbg,
754 LLVMContext::MD_annotation});
755
756 BI->eraseFromParent();
757 DTU.applyUpdates({{DominatorTree::Delete, Latch, Header}});
758 if (MSSA)
759 MSSAU->applyUpdates({{DominatorTree::Delete, Latch, Header}}, DT);
760 return;
761 }
762 }
763
764 // General case. By splitting the backedge, and then explicitly making it
765 // unreachable we gracefully handle corner cases such as switch and invoke
766 // termiantors.
767 auto *BackedgeBB = SplitEdge(Latch, Header, &DT, &LI, MSSAU.get());
768
769 DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
770 (void)changeToUnreachable(BackedgeBB->getTerminator(),
771 /*PreserveLCSSA*/ true, &DTU, MSSAU.get());
772 }();
773
774 // Erase (and destroy) this loop instance. Handles relinking sub-loops
775 // and blocks within the loop as needed.
776 LI.erase(L);
777
778 // If the loop we broke had a parent, then changeToUnreachable might have
779 // caused a block to be removed from the parent loop (see loop_nest_lcssa
780 // test case in zero-btc.ll for an example), thus changing the parent's
781 // exit blocks. If that happened, we need to rebuild LCSSA on the outermost
782 // loop which might have a had a block removed.
783 if (OutermostLoop != L)
784 formLCSSARecursively(*OutermostLoop, DT, &LI, &SE);
785}
786
787
788/// Checks if \p L has an exiting latch branch. There may also be other
789/// exiting blocks. Returns branch instruction terminating the loop
790/// latch if above check is successful, nullptr otherwise.
792 BasicBlock *Latch = L->getLoopLatch();
793 if (!Latch)
794 return nullptr;
795
796 BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator());
797 if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch))
798 return nullptr;
799
800 assert((LatchBR->getSuccessor(0) == L->getHeader() ||
801 LatchBR->getSuccessor(1) == L->getHeader()) &&
802 "At least one edge out of the latch must go to the header");
803
804 return LatchBR;
805}
806
807/// Return the estimated trip count for any exiting branch which dominates
808/// the loop latch.
809static std::optional<unsigned> getEstimatedTripCount(BranchInst *ExitingBranch,
810 Loop *L,
811 uint64_t &OrigExitWeight) {
812 // To estimate the number of times the loop body was executed, we want to
813 // know the number of times the backedge was taken, vs. the number of times
814 // we exited the loop.
815 uint64_t LoopWeight, ExitWeight;
816 if (!extractBranchWeights(*ExitingBranch, LoopWeight, ExitWeight))
817 return std::nullopt;
818
819 if (L->contains(ExitingBranch->getSuccessor(1)))
820 std::swap(LoopWeight, ExitWeight);
821
822 if (!ExitWeight)
823 // Don't have a way to return predicated infinite
824 return std::nullopt;
825
826 OrigExitWeight = ExitWeight;
827
828 // Estimated exit count is a ratio of the loop weight by the weight of the
829 // edge exiting the loop, rounded to nearest.
830 uint64_t ExitCount = llvm::divideNearest(LoopWeight, ExitWeight);
831
832 // When ExitCount + 1 would wrap in unsigned, saturate at UINT_MAX.
833 if (ExitCount >= std::numeric_limits<unsigned>::max())
834 return std::numeric_limits<unsigned>::max();
835
836 // Estimated trip count is one plus estimated exit count.
837 return ExitCount + 1;
838}
839
840std::optional<unsigned>
842 unsigned *EstimatedLoopInvocationWeight) {
843 // Currently we take the estimate exit count only from the loop latch,
844 // ignoring other exiting blocks. This can overestimate the trip count
845 // if we exit through another exit, but can never underestimate it.
846 // TODO: incorporate information from other exits
847 if (BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L)) {
848 uint64_t ExitWeight;
849 if (std::optional<uint64_t> EstTripCount =
850 getEstimatedTripCount(LatchBranch, L, ExitWeight)) {
851 if (EstimatedLoopInvocationWeight)
852 *EstimatedLoopInvocationWeight = ExitWeight;
853 return *EstTripCount;
854 }
855 }
856 return std::nullopt;
857}
858
859bool llvm::setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount,
860 unsigned EstimatedloopInvocationWeight) {
861 // At the moment, we currently support changing the estimate trip count of
862 // the latch branch only. We could extend this API to manipulate estimated
863 // trip counts for any exit.
865 if (!LatchBranch)
866 return false;
867
868 // Calculate taken and exit weights.
869 unsigned LatchExitWeight = 0;
870 unsigned BackedgeTakenWeight = 0;
871
872 if (EstimatedTripCount > 0) {
873 LatchExitWeight = EstimatedloopInvocationWeight;
874 BackedgeTakenWeight = (EstimatedTripCount - 1) * LatchExitWeight;
875 }
876
877 // Make a swap if back edge is taken when condition is "false".
878 if (LatchBranch->getSuccessor(0) != L->getHeader())
879 std::swap(BackedgeTakenWeight, LatchExitWeight);
880
881 MDBuilder MDB(LatchBranch->getContext());
882
883 // Set/Update profile metadata.
884 LatchBranch->setMetadata(
885 LLVMContext::MD_prof,
886 MDB.createBranchWeights(BackedgeTakenWeight, LatchExitWeight));
887
888 return true;
889}
890
892 ScalarEvolution &SE) {
893 Loop *OuterL = InnerLoop->getParentLoop();
894 if (!OuterL)
895 return true;
896
897 // Get the backedge taken count for the inner loop
898 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
899 const SCEV *InnerLoopBECountSC = SE.getExitCount(InnerLoop, InnerLoopLatch);
900 if (isa<SCEVCouldNotCompute>(InnerLoopBECountSC) ||
901 !InnerLoopBECountSC->getType()->isIntegerTy())
902 return false;
903
904 // Get whether count is invariant to the outer loop
906 SE.getLoopDisposition(InnerLoopBECountSC, OuterL);
908 return false;
909
910 return true;
911}
912
914 switch (RK) {
915 default:
916 llvm_unreachable("Unexpected recurrence kind");
917 case RecurKind::AddChainWithSubs:
918 case RecurKind::Sub:
919 case RecurKind::Add:
920 return Intrinsic::vector_reduce_add;
921 case RecurKind::Mul:
922 return Intrinsic::vector_reduce_mul;
923 case RecurKind::And:
924 return Intrinsic::vector_reduce_and;
925 case RecurKind::Or:
926 return Intrinsic::vector_reduce_or;
927 case RecurKind::Xor:
928 return Intrinsic::vector_reduce_xor;
929 case RecurKind::FMulAdd:
930 case RecurKind::FAdd:
931 return Intrinsic::vector_reduce_fadd;
932 case RecurKind::FMul:
933 return Intrinsic::vector_reduce_fmul;
934 case RecurKind::SMax:
935 return Intrinsic::vector_reduce_smax;
936 case RecurKind::SMin:
937 return Intrinsic::vector_reduce_smin;
938 case RecurKind::UMax:
939 return Intrinsic::vector_reduce_umax;
940 case RecurKind::UMin:
941 return Intrinsic::vector_reduce_umin;
942 case RecurKind::FMax:
943 case RecurKind::FMaxNum:
944 return Intrinsic::vector_reduce_fmax;
945 case RecurKind::FMin:
946 case RecurKind::FMinNum:
947 return Intrinsic::vector_reduce_fmin;
948 case RecurKind::FMaximum:
949 return Intrinsic::vector_reduce_fmaximum;
950 case RecurKind::FMinimum:
951 return Intrinsic::vector_reduce_fminimum;
952 case RecurKind::FMaximumNum:
953 return Intrinsic::vector_reduce_fmax;
954 case RecurKind::FMinimumNum:
955 return Intrinsic::vector_reduce_fmin;
956 }
957}
958
960 switch (IID) {
961 default:
962 llvm_unreachable("Unexpected intrinsic id");
963 case Intrinsic::umin:
964 return Intrinsic::vector_reduce_umin;
965 case Intrinsic::umax:
966 return Intrinsic::vector_reduce_umax;
967 case Intrinsic::smin:
968 return Intrinsic::vector_reduce_smin;
969 case Intrinsic::smax:
970 return Intrinsic::vector_reduce_smax;
971 }
972}
973
974// This is the inverse to getReductionForBinop
976 switch (RdxID) {
977 case Intrinsic::vector_reduce_fadd:
978 return Instruction::FAdd;
979 case Intrinsic::vector_reduce_fmul:
980 return Instruction::FMul;
981 case Intrinsic::vector_reduce_add:
982 return Instruction::Add;
983 case Intrinsic::vector_reduce_mul:
984 return Instruction::Mul;
985 case Intrinsic::vector_reduce_and:
986 return Instruction::And;
987 case Intrinsic::vector_reduce_or:
988 return Instruction::Or;
989 case Intrinsic::vector_reduce_xor:
990 return Instruction::Xor;
991 case Intrinsic::vector_reduce_smax:
992 case Intrinsic::vector_reduce_smin:
993 case Intrinsic::vector_reduce_umax:
994 case Intrinsic::vector_reduce_umin:
995 return Instruction::ICmp;
996 case Intrinsic::vector_reduce_fmax:
997 case Intrinsic::vector_reduce_fmin:
998 return Instruction::FCmp;
999 default:
1000 llvm_unreachable("Unexpected ID");
1001 }
1002}
1003
1004// This is the inverse to getArithmeticReductionInstruction
1006 switch (Opc) {
1007 default:
1008 break;
1009 case Instruction::Add:
1010 return Intrinsic::vector_reduce_add;
1011 case Instruction::Mul:
1012 return Intrinsic::vector_reduce_mul;
1013 case Instruction::And:
1014 return Intrinsic::vector_reduce_and;
1015 case Instruction::Or:
1016 return Intrinsic::vector_reduce_or;
1017 case Instruction::Xor:
1018 return Intrinsic::vector_reduce_xor;
1019 }
1021}
1022
1024 switch (RdxID) {
1025 default:
1026 llvm_unreachable("Unknown min/max recurrence kind");
1027 case Intrinsic::vector_reduce_umin:
1028 return Intrinsic::umin;
1029 case Intrinsic::vector_reduce_umax:
1030 return Intrinsic::umax;
1031 case Intrinsic::vector_reduce_smin:
1032 return Intrinsic::smin;
1033 case Intrinsic::vector_reduce_smax:
1034 return Intrinsic::smax;
1035 case Intrinsic::vector_reduce_fmin:
1036 return Intrinsic::minnum;
1037 case Intrinsic::vector_reduce_fmax:
1038 return Intrinsic::maxnum;
1039 case Intrinsic::vector_reduce_fminimum:
1040 return Intrinsic::minimum;
1041 case Intrinsic::vector_reduce_fmaximum:
1042 return Intrinsic::maximum;
1043 }
1044}
1045
1047 switch (RK) {
1048 default:
1049 llvm_unreachable("Unknown min/max recurrence kind");
1050 case RecurKind::UMin:
1051 return Intrinsic::umin;
1052 case RecurKind::UMax:
1053 return Intrinsic::umax;
1054 case RecurKind::SMin:
1055 return Intrinsic::smin;
1056 case RecurKind::SMax:
1057 return Intrinsic::smax;
1058 case RecurKind::FMin:
1059 case RecurKind::FMinNum:
1060 return Intrinsic::minnum;
1061 case RecurKind::FMax:
1062 case RecurKind::FMaxNum:
1063 return Intrinsic::maxnum;
1064 case RecurKind::FMinimum:
1065 return Intrinsic::minimum;
1066 case RecurKind::FMaximum:
1067 return Intrinsic::maximum;
1068 case RecurKind::FMinimumNum:
1069 return Intrinsic::minimumnum;
1070 case RecurKind::FMaximumNum:
1071 return Intrinsic::maximumnum;
1072 }
1073}
1074
1076 switch (RdxID) {
1077 case Intrinsic::vector_reduce_smax:
1078 return RecurKind::SMax;
1079 case Intrinsic::vector_reduce_smin:
1080 return RecurKind::SMin;
1081 case Intrinsic::vector_reduce_umax:
1082 return RecurKind::UMax;
1083 case Intrinsic::vector_reduce_umin:
1084 return RecurKind::UMin;
1085 case Intrinsic::vector_reduce_fmax:
1086 return RecurKind::FMax;
1087 case Intrinsic::vector_reduce_fmin:
1088 return RecurKind::FMin;
1089 default:
1090 return RecurKind::None;
1091 }
1092}
1093
1095 switch (RK) {
1096 default:
1097 llvm_unreachable("Unknown min/max recurrence kind");
1098 case RecurKind::UMin:
1099 return CmpInst::ICMP_ULT;
1100 case RecurKind::UMax:
1101 return CmpInst::ICMP_UGT;
1102 case RecurKind::SMin:
1103 return CmpInst::ICMP_SLT;
1104 case RecurKind::SMax:
1105 return CmpInst::ICMP_SGT;
1106 case RecurKind::FMin:
1107 return CmpInst::FCMP_OLT;
1108 case RecurKind::FMax:
1109 return CmpInst::FCMP_OGT;
1110 // We do not add FMinimum/FMaximum recurrence kind here since there is no
1111 // equivalent predicate which compares signed zeroes according to the
1112 // semantics of the intrinsics (llvm.minimum/maximum).
1113 }
1114}
1115
1117 Value *Right) {
1118 Type *Ty = Left->getType();
1119 if (Ty->isIntOrIntVectorTy() ||
1120 (RK == RecurKind::FMinNum || RK == RecurKind::FMaxNum ||
1121 RK == RecurKind::FMinimum || RK == RecurKind::FMaximum ||
1122 RK == RecurKind::FMinimumNum || RK == RecurKind::FMaximumNum)) {
1124 return Builder.CreateIntrinsic(Ty, Id, {Left, Right}, nullptr,
1125 "rdx.minmax");
1126 }
1128 Value *Cmp = Builder.CreateCmp(Pred, Left, Right, "rdx.minmax.cmp");
1129 Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
1130 return Select;
1131}
1132
1133// Helper to generate an ordered reduction.
1135 unsigned Op, RecurKind RdxKind) {
1136 unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements();
1137
1138 // Extract and apply reduction ops in ascending order:
1139 // e.g. ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[VF-1]
1140 Value *Result = Acc;
1141 for (unsigned ExtractIdx = 0; ExtractIdx != VF; ++ExtractIdx) {
1142 Value *Ext =
1143 Builder.CreateExtractElement(Src, Builder.getInt32(ExtractIdx));
1144
1145 if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
1146 Result = Builder.CreateBinOp((Instruction::BinaryOps)Op, Result, Ext,
1147 "bin.rdx");
1148 } else {
1150 "Invalid min/max");
1151 Result = createMinMaxOp(Builder, RdxKind, Result, Ext);
1152 }
1153 }
1154
1155 return Result;
1156}
1157
1158// Helper to generate a log2 shuffle reduction.
1160 unsigned Op,
1162 RecurKind RdxKind) {
1163 unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements();
1164 // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
1165 // and vector ops, reducing the set of values being computed by half each
1166 // round.
1167 assert(isPowerOf2_32(VF) &&
1168 "Reduction emission only supported for pow2 vectors!");
1169 // Note: fast-math-flags flags are controlled by the builder configuration
1170 // and are assumed to apply to all generated arithmetic instructions. Other
1171 // poison generating flags (nsw/nuw/inbounds/inrange/exact) are not part
1172 // of the builder configuration, and since they're not passed explicitly,
1173 // will never be relevant here. Note that it would be generally unsound to
1174 // propagate these from an intrinsic call to the expansion anyways as we/
1175 // change the order of operations.
1176 auto BuildShuffledOp = [&Builder, &Op,
1177 &RdxKind](SmallVectorImpl<int> &ShuffleMask,
1178 Value *&TmpVec) -> void {
1179 Value *Shuf = Builder.CreateShuffleVector(TmpVec, ShuffleMask, "rdx.shuf");
1180 if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
1181 TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
1182 "bin.rdx");
1183 } else {
1185 "Invalid min/max");
1186 TmpVec = createMinMaxOp(Builder, RdxKind, TmpVec, Shuf);
1187 }
1188 };
1189
1190 Value *TmpVec = Src;
1191 if (TargetTransformInfo::ReductionShuffle::Pairwise == RS) {
1192 SmallVector<int, 32> ShuffleMask(VF);
1193 for (unsigned stride = 1; stride < VF; stride <<= 1) {
1194 // Initialise the mask with undef.
1195 llvm::fill(ShuffleMask, -1);
1196 for (unsigned j = 0; j < VF; j += stride << 1) {
1197 ShuffleMask[j] = j + stride;
1198 }
1199 BuildShuffledOp(ShuffleMask, TmpVec);
1200 }
1201 } else {
1202 SmallVector<int, 32> ShuffleMask(VF);
1203 for (unsigned i = VF; i != 1; i >>= 1) {
1204 // Move the upper half of the vector to the lower half.
1205 for (unsigned j = 0; j != i / 2; ++j)
1206 ShuffleMask[j] = i / 2 + j;
1207
1208 // Fill the rest of the mask with undef.
1209 std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), -1);
1210 BuildShuffledOp(ShuffleMask, TmpVec);
1211 }
1212 }
1213 // The result is in the first element of the vector.
1214 return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
1215}
1216
1218 Value *InitVal, PHINode *OrigPhi) {
1219 Value *NewVal = nullptr;
1220
1221 // First use the original phi to determine the new value we're trying to
1222 // select from in the loop.
1223 SelectInst *SI = nullptr;
1224 for (auto *U : OrigPhi->users()) {
1225 if ((SI = dyn_cast<SelectInst>(U)))
1226 break;
1227 }
1228 assert(SI && "One user of the original phi should be a select");
1229
1230 if (SI->getTrueValue() == OrigPhi)
1231 NewVal = SI->getFalseValue();
1232 else {
1233 assert(SI->getFalseValue() == OrigPhi &&
1234 "At least one input to the select should be the original Phi");
1235 NewVal = SI->getTrueValue();
1236 }
1237
1238 // If any predicate is true it means that we want to select the new value.
1239 Value *AnyOf =
1240 Src->getType()->isVectorTy() ? Builder.CreateOrReduce(Src) : Src;
1241 // The compares in the loop may yield poison, which propagates through the
1242 // bitwise ORs. Freeze it here before the condition is used.
1243 AnyOf = Builder.CreateFreeze(AnyOf);
1244 return Builder.CreateSelect(AnyOf, NewVal, InitVal, "rdx.select");
1245}
1246
1248 RecurKind RdxKind, Value *Start,
1249 Value *Sentinel) {
1250 bool IsSigned = RecurrenceDescriptor::isSignedRecurrenceKind(RdxKind);
1251 bool IsMaxRdx = RecurrenceDescriptor::isFindLastIVRecurrenceKind(RdxKind);
1252 Value *MaxRdx = Src->getType()->isVectorTy()
1253 ? (IsMaxRdx ? Builder.CreateIntMaxReduce(Src, IsSigned)
1254 : Builder.CreateIntMinReduce(Src, IsSigned))
1255 : Src;
1256 // Correct the final reduction result back to the start value if the maximum
1257 // reduction is sentinel value.
1258 Value *Cmp =
1259 Builder.CreateCmp(CmpInst::ICMP_NE, MaxRdx, Sentinel, "rdx.select.cmp");
1260 return Builder.CreateSelect(Cmp, MaxRdx, Start, "rdx.select");
1261}
1262
1264 FastMathFlags Flags) {
1265 bool Negative = false;
1266 switch (RdxID) {
1267 default:
1268 llvm_unreachable("Expecting a reduction intrinsic");
1269 case Intrinsic::vector_reduce_add:
1270 case Intrinsic::vector_reduce_mul:
1271 case Intrinsic::vector_reduce_or:
1272 case Intrinsic::vector_reduce_xor:
1273 case Intrinsic::vector_reduce_and:
1274 case Intrinsic::vector_reduce_fadd:
1275 case Intrinsic::vector_reduce_fmul: {
1276 unsigned Opc = getArithmeticReductionInstruction(RdxID);
1277 return ConstantExpr::getBinOpIdentity(Opc, Ty, false,
1278 Flags.noSignedZeros());
1279 }
1280 case Intrinsic::vector_reduce_umax:
1281 case Intrinsic::vector_reduce_umin:
1282 case Intrinsic::vector_reduce_smin:
1283 case Intrinsic::vector_reduce_smax: {
1285 return ConstantExpr::getIntrinsicIdentity(ScalarID, Ty);
1286 }
1287 case Intrinsic::vector_reduce_fmax:
1288 case Intrinsic::vector_reduce_fmaximum:
1289 Negative = true;
1290 [[fallthrough]];
1291 case Intrinsic::vector_reduce_fmin:
1292 case Intrinsic::vector_reduce_fminimum: {
1293 bool PropagatesNaN = RdxID == Intrinsic::vector_reduce_fminimum ||
1294 RdxID == Intrinsic::vector_reduce_fmaximum;
1295 const fltSemantics &Semantics = Ty->getFltSemantics();
1296 return (!Flags.noNaNs() && !PropagatesNaN)
1297 ? ConstantFP::getQNaN(Ty, Negative)
1298 : !Flags.noInfs()
1299 ? ConstantFP::getInfinity(Ty, Negative)
1300 : ConstantFP::get(Ty, APFloat::getLargest(Semantics, Negative));
1301 }
1302 }
1303}
1304
1306 assert((!(K == RecurKind::FMin || K == RecurKind::FMax) ||
1307 (FMF.noNaNs() && FMF.noSignedZeros())) &&
1308 "nnan, nsz is expected to be set for FP min/max reduction.");
1310 return getReductionIdentity(RdxID, Tp, FMF);
1311}
1312
1314 RecurKind RdxKind) {
1315 auto *SrcVecEltTy = cast<VectorType>(Src->getType())->getElementType();
1316 auto getIdentity = [&]() {
1317 return getRecurrenceIdentity(RdxKind, SrcVecEltTy,
1318 Builder.getFastMathFlags());
1319 };
1320 switch (RdxKind) {
1321 case RecurKind::AddChainWithSubs:
1322 case RecurKind::Sub:
1323 case RecurKind::Add:
1324 case RecurKind::Mul:
1325 case RecurKind::And:
1326 case RecurKind::Or:
1327 case RecurKind::Xor:
1328 case RecurKind::SMax:
1329 case RecurKind::SMin:
1330 case RecurKind::UMax:
1331 case RecurKind::UMin:
1332 case RecurKind::FMax:
1333 case RecurKind::FMin:
1334 case RecurKind::FMinNum:
1335 case RecurKind::FMaxNum:
1336 case RecurKind::FMinimum:
1337 case RecurKind::FMaximum:
1338 case RecurKind::FMinimumNum:
1339 case RecurKind::FMaximumNum:
1340 return Builder.CreateUnaryIntrinsic(getReductionIntrinsicID(RdxKind), Src);
1341 case RecurKind::FMulAdd:
1342 case RecurKind::FAdd:
1343 return Builder.CreateFAddReduce(getIdentity(), Src);
1344 case RecurKind::FMul:
1345 return Builder.CreateFMulReduce(getIdentity(), Src);
1346 default:
1347 llvm_unreachable("Unhandled opcode");
1348 }
1349}
1350
1352 RecurKind Kind, Value *Mask, Value *EVL) {
1355 "AnyOf and FindIV reductions are not supported.");
1357 auto VPID = VPIntrinsic::getForIntrinsic(Id);
1359 "No VPIntrinsic for this reduction");
1360 auto *EltTy = cast<VectorType>(Src->getType())->getElementType();
1361 Value *Iden = getRecurrenceIdentity(Kind, EltTy, Builder.getFastMathFlags());
1362 Value *Ops[] = {Iden, Src, Mask, EVL};
1363 return Builder.CreateIntrinsic(EltTy, VPID, Ops);
1364}
1365
1367 Value *Src, Value *Start) {
1368 assert((Kind == RecurKind::FAdd || Kind == RecurKind::FMulAdd) &&
1369 "Unexpected reduction kind");
1370 assert(Src->getType()->isVectorTy() && "Expected a vector type");
1371 assert(!Start->getType()->isVectorTy() && "Expected a scalar type");
1372
1373 return B.CreateFAddReduce(Start, Src);
1374}
1375
1377 Value *Src, Value *Start, Value *Mask,
1378 Value *EVL) {
1379 assert((Kind == RecurKind::FAdd || Kind == RecurKind::FMulAdd) &&
1380 "Unexpected reduction kind");
1381 assert(Src->getType()->isVectorTy() && "Expected a vector type");
1382 assert(!Start->getType()->isVectorTy() && "Expected a scalar type");
1383
1384 Intrinsic::ID Id = getReductionIntrinsicID(RecurKind::FAdd);
1385 auto VPID = VPIntrinsic::getForIntrinsic(Id);
1387 "No VPIntrinsic for this reduction");
1388 auto *EltTy = cast<VectorType>(Src->getType())->getElementType();
1389 Value *Ops[] = {Start, Src, Mask, EVL};
1390 return Builder.CreateIntrinsic(EltTy, VPID, Ops);
1391}
1392
1394 bool IncludeWrapFlags) {
1395 auto *VecOp = dyn_cast<Instruction>(I);
1396 if (!VecOp)
1397 return;
1398 auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0])
1399 : dyn_cast<Instruction>(OpValue);
1400 if (!Intersection)
1401 return;
1402 const unsigned Opcode = Intersection->getOpcode();
1403 VecOp->copyIRFlags(Intersection, IncludeWrapFlags);
1404 for (auto *V : VL) {
1405 auto *Instr = dyn_cast<Instruction>(V);
1406 if (!Instr)
1407 continue;
1408 if (OpValue == nullptr || Opcode == Instr->getOpcode())
1409 VecOp->andIRFlags(V);
1410 }
1411}
1412
1413bool llvm::isKnownNegativeInLoop(const SCEV *S, const Loop *L,
1414 ScalarEvolution &SE) {
1415 const SCEV *Zero = SE.getZero(S->getType());
1416 return SE.isAvailableAtLoopEntry(S, L) &&
1417 SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, S, Zero);
1418}
1419
1421 ScalarEvolution &SE) {
1422 const SCEV *Zero = SE.getZero(S->getType());
1423 return SE.isAvailableAtLoopEntry(S, L) &&
1424 SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGE, S, Zero);
1425}
1426
1427bool llvm::isKnownPositiveInLoop(const SCEV *S, const Loop *L,
1428 ScalarEvolution &SE) {
1429 const SCEV *Zero = SE.getZero(S->getType());
1430 return SE.isAvailableAtLoopEntry(S, L) &&
1431 SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGT, S, Zero);
1432}
1433
1435 ScalarEvolution &SE) {
1436 const SCEV *Zero = SE.getZero(S->getType());
1437 return SE.isAvailableAtLoopEntry(S, L) &&
1438 SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLE, S, Zero);
1439}
1440
1442 bool Signed) {
1443 unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth();
1446 auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1447 return SE.isAvailableAtLoopEntry(S, L) &&
1448 SE.isLoopEntryGuardedByCond(L, Predicate, S,
1449 SE.getConstant(Min));
1450}
1451
1453 bool Signed) {
1454 unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth();
1457 auto Predicate = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1458 return SE.isAvailableAtLoopEntry(S, L) &&
1459 SE.isLoopEntryGuardedByCond(L, Predicate, S,
1460 SE.getConstant(Max));
1461}
1462
1463//===----------------------------------------------------------------------===//
1464// rewriteLoopExitValues - Optimize IV users outside the loop.
1465// As a side effect, reduces the amount of IV processing within the loop.
1466//===----------------------------------------------------------------------===//
1467
1468static bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) {
1471 Visited.insert(I);
1472 WorkList.push_back(I);
1473 while (!WorkList.empty()) {
1474 const Instruction *Curr = WorkList.pop_back_val();
1475 // This use is outside the loop, nothing to do.
1476 if (!L->contains(Curr))
1477 continue;
1478 // Do we assume it is a "hard" use which will not be eliminated easily?
1479 if (Curr->mayHaveSideEffects())
1480 return true;
1481 // Otherwise, add all its users to worklist.
1482 for (const auto *U : Curr->users()) {
1483 auto *UI = cast<Instruction>(U);
1484 if (Visited.insert(UI).second)
1485 WorkList.push_back(UI);
1486 }
1487 }
1488 return false;
1489}
1490
1491// Collect information about PHI nodes which can be transformed in
1492// rewriteLoopExitValues.
1494 PHINode *PN; // For which PHI node is this replacement?
1495 unsigned Ith; // For which incoming value?
1496 const SCEV *ExpansionSCEV; // The SCEV of the incoming value we are rewriting.
1497 Instruction *ExpansionPoint; // Where we'd like to expand that SCEV?
1498 bool HighCost; // Is this expansion a high-cost?
1499
1500 RewritePhi(PHINode *P, unsigned I, const SCEV *Val, Instruction *ExpansionPt,
1501 bool H)
1502 : PN(P), Ith(I), ExpansionSCEV(Val), ExpansionPoint(ExpansionPt),
1503 HighCost(H) {}
1504};
1505
1506// Check whether it is possible to delete the loop after rewriting exit
1507// value. If it is possible, ignore ReplaceExitValue and do rewriting
1508// aggressively.
1509static bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
1510 BasicBlock *Preheader = L->getLoopPreheader();
1511 // If there is no preheader, the loop will not be deleted.
1512 if (!Preheader)
1513 return false;
1514
1515 // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
1516 // We obviate multiple ExitingBlocks case for simplicity.
1517 // TODO: If we see testcase with multiple ExitingBlocks can be deleted
1518 // after exit value rewriting, we can enhance the logic here.
1519 SmallVector<BasicBlock *, 4> ExitingBlocks;
1520 L->getExitingBlocks(ExitingBlocks);
1522 L->getUniqueExitBlocks(ExitBlocks);
1523 if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1)
1524 return false;
1525
1526 BasicBlock *ExitBlock = ExitBlocks[0];
1527 BasicBlock::iterator BI = ExitBlock->begin();
1528 while (PHINode *P = dyn_cast<PHINode>(BI)) {
1529 Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
1530
1531 // If the Incoming value of P is found in RewritePhiSet, we know it
1532 // could be rewritten to use a loop invariant value in transformation
1533 // phase later. Skip it in the loop invariant check below.
1534 bool found = false;
1535 for (const RewritePhi &Phi : RewritePhiSet) {
1536 unsigned i = Phi.Ith;
1537 if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
1538 found = true;
1539 break;
1540 }
1541 }
1542
1543 Instruction *I;
1544 if (!found && (I = dyn_cast<Instruction>(Incoming)))
1545 if (!L->hasLoopInvariantOperands(I))
1546 return false;
1547
1548 ++BI;
1549 }
1550
1551 for (auto *BB : L->blocks())
1552 if (llvm::any_of(*BB, [](Instruction &I) {
1553 return I.mayHaveSideEffects();
1554 }))
1555 return false;
1556
1557 return true;
1558}
1559
1560/// Checks if it is safe to call InductionDescriptor::isInductionPHI for \p Phi,
1561/// and returns true if this Phi is an induction phi in the loop. When
1562/// isInductionPHI returns true, \p ID will be also be set by isInductionPHI.
1563static bool checkIsIndPhi(PHINode *Phi, Loop *L, ScalarEvolution *SE,
1565 if (!Phi)
1566 return false;
1567 if (!L->getLoopPreheader())
1568 return false;
1569 if (Phi->getParent() != L->getHeader())
1570 return false;
1571 return InductionDescriptor::isInductionPHI(Phi, L, SE, ID);
1572}
1573
1575 ScalarEvolution *SE,
1576 const TargetTransformInfo *TTI,
1577 SCEVExpander &Rewriter, DominatorTree *DT,
1580 // Check a pre-condition.
1581 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1582 "Indvars did not preserve LCSSA!");
1583
1584 SmallVector<BasicBlock*, 8> ExitBlocks;
1585 L->getUniqueExitBlocks(ExitBlocks);
1586
1587 SmallVector<RewritePhi, 8> RewritePhiSet;
1588 // Find all values that are computed inside the loop, but used outside of it.
1589 // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
1590 // the exit blocks of the loop to find them.
1591 for (BasicBlock *ExitBB : ExitBlocks) {
1592 // If there are no PHI nodes in this exit block, then no values defined
1593 // inside the loop are used on this path, skip it.
1594 PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
1595 if (!PN) continue;
1596
1597 unsigned NumPreds = PN->getNumIncomingValues();
1598
1599 // Iterate over all of the PHI nodes.
1600 BasicBlock::iterator BBI = ExitBB->begin();
1601 while ((PN = dyn_cast<PHINode>(BBI++))) {
1602 if (PN->use_empty())
1603 continue; // dead use, don't replace it
1604
1605 if (!SE->isSCEVable(PN->getType()))
1606 continue;
1607
1608 // Iterate over all of the values in all the PHI nodes.
1609 for (unsigned i = 0; i != NumPreds; ++i) {
1610 // If the value being merged in is not integer or is not defined
1611 // in the loop, skip it.
1612 Value *InVal = PN->getIncomingValue(i);
1613 if (!isa<Instruction>(InVal))
1614 continue;
1615
1616 // If this pred is for a subloop, not L itself, skip it.
1617 if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
1618 continue; // The Block is in a subloop, skip it.
1619
1620 // Check that InVal is defined in the loop.
1621 Instruction *Inst = cast<Instruction>(InVal);
1622 if (!L->contains(Inst))
1623 continue;
1624
1625 // Find exit values which are induction variables in the loop, and are
1626 // unused in the loop, with the only use being the exit block PhiNode,
1627 // and the induction variable update binary operator.
1628 // The exit value can be replaced with the final value when it is cheap
1629 // to do so.
1632 PHINode *IndPhi = dyn_cast<PHINode>(Inst);
1633 if (IndPhi) {
1634 if (!checkIsIndPhi(IndPhi, L, SE, ID))
1635 continue;
1636 // This is an induction PHI. Check that the only users are PHI
1637 // nodes, and induction variable update binary operators.
1638 if (llvm::any_of(Inst->users(), [&](User *U) {
1639 if (!isa<PHINode>(U) && !isa<BinaryOperator>(U))
1640 return true;
1641 BinaryOperator *B = dyn_cast<BinaryOperator>(U);
1642 if (B && B != ID.getInductionBinOp())
1643 return true;
1644 return false;
1645 }))
1646 continue;
1647 } else {
1648 // If it is not an induction phi, it must be an induction update
1649 // binary operator with an induction phi user.
1650 BinaryOperator *B = dyn_cast<BinaryOperator>(Inst);
1651 if (!B)
1652 continue;
1653 if (llvm::any_of(Inst->users(), [&](User *U) {
1654 PHINode *Phi = dyn_cast<PHINode>(U);
1655 if (Phi != PN && !checkIsIndPhi(Phi, L, SE, ID))
1656 return true;
1657 return false;
1658 }))
1659 continue;
1660 if (B != ID.getInductionBinOp())
1661 continue;
1662 }
1663 }
1664
1665 // Okay, this instruction has a user outside of the current loop
1666 // and varies predictably *inside* the loop. Evaluate the value it
1667 // contains when the loop exits, if possible. We prefer to start with
1668 // expressions which are true for all exits (so as to maximize
1669 // expression reuse by the SCEVExpander), but resort to per-exit
1670 // evaluation if that fails.
1671 const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
1672 if (isa<SCEVCouldNotCompute>(ExitValue) ||
1673 !SE->isLoopInvariant(ExitValue, L) ||
1674 !Rewriter.isSafeToExpand(ExitValue)) {
1675 // TODO: This should probably be sunk into SCEV in some way; maybe a
1676 // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for
1677 // most SCEV expressions and other recurrence types (e.g. shift
1678 // recurrences). Is there existing code we can reuse?
1679 const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i));
1680 if (isa<SCEVCouldNotCompute>(ExitCount))
1681 continue;
1682 if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst)))
1683 if (AddRec->getLoop() == L)
1684 ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE);
1685 if (isa<SCEVCouldNotCompute>(ExitValue) ||
1686 !SE->isLoopInvariant(ExitValue, L) ||
1687 !Rewriter.isSafeToExpand(ExitValue))
1688 continue;
1689 }
1690
1691 // Computing the value outside of the loop brings no benefit if it is
1692 // definitely used inside the loop in a way which can not be optimized
1693 // away. Avoid doing so unless we know we have a value which computes
1694 // the ExitValue already. TODO: This should be merged into SCEV
1695 // expander to leverage its knowledge of existing expressions.
1696 if (ReplaceExitValue != AlwaysRepl && !isa<SCEVConstant>(ExitValue) &&
1697 !isa<SCEVUnknown>(ExitValue) && hasHardUserWithinLoop(L, Inst))
1698 continue;
1699
1700 // Check if expansions of this SCEV would count as being high cost.
1701 bool HighCost = Rewriter.isHighCostExpansion(
1702 ExitValue, L, SCEVCheapExpansionBudget, TTI, Inst);
1703
1704 // Note that we must not perform expansions until after
1705 // we query *all* the costs, because if we perform temporary expansion
1706 // inbetween, one that we might not intend to keep, said expansion
1707 // *may* affect cost calculation of the next SCEV's we'll query,
1708 // and next SCEV may errneously get smaller cost.
1709
1710 // Collect all the candidate PHINodes to be rewritten.
1711 Instruction *InsertPt =
1712 (isa<PHINode>(Inst) || isa<LandingPadInst>(Inst)) ?
1713 &*Inst->getParent()->getFirstInsertionPt() : Inst;
1714 RewritePhiSet.emplace_back(PN, i, ExitValue, InsertPt, HighCost);
1715 }
1716 }
1717 }
1718
1719 // TODO: evaluate whether it is beneficial to change how we calculate
1720 // high-cost: if we have SCEV 'A' which we know we will expand, should we
1721 // calculate the cost of other SCEV's after expanding SCEV 'A', thus
1722 // potentially giving cost bonus to those other SCEV's?
1723
1724 bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
1725 int NumReplaced = 0;
1726
1727 // Transformation.
1728 for (const RewritePhi &Phi : RewritePhiSet) {
1729 PHINode *PN = Phi.PN;
1730
1731 // Only do the rewrite when the ExitValue can be expanded cheaply.
1732 // If LoopCanBeDel is true, rewrite exit value aggressively.
1735 !LoopCanBeDel && Phi.HighCost)
1736 continue;
1737
1738 Value *ExitVal = Rewriter.expandCodeFor(
1739 Phi.ExpansionSCEV, Phi.PN->getType(), Phi.ExpansionPoint);
1740
1741 LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = " << *ExitVal
1742 << '\n'
1743 << " LoopVal = " << *(Phi.ExpansionPoint) << "\n");
1744
1745#ifndef NDEBUG
1746 // If we reuse an instruction from a loop which is neither L nor one of
1747 // its containing loops, we end up breaking LCSSA form for this loop by
1748 // creating a new use of its instruction.
1749 if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
1750 if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
1751 if (EVL != L)
1752 assert(EVL->contains(L) && "LCSSA breach detected!");
1753#endif
1754
1755 NumReplaced++;
1756 Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
1757 PN->setIncomingValue(Phi.Ith, ExitVal);
1758 // It's necessary to tell ScalarEvolution about this explicitly so that
1759 // it can walk the def-use list and forget all SCEVs, as it may not be
1760 // watching the PHI itself. Once the new exit value is in place, there
1761 // may not be a def-use connection between the loop and every instruction
1762 // which got a SCEVAddRecExpr for that loop.
1763 SE->forgetValue(PN);
1764
1765 // If this instruction is dead now, delete it. Don't do it now to avoid
1766 // invalidating iterators.
1767 if (isInstructionTriviallyDead(Inst, TLI))
1768 DeadInsts.push_back(Inst);
1769
1770 // Replace PN with ExitVal if that is legal and does not break LCSSA.
1771 if (PN->getNumIncomingValues() == 1 &&
1772 LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
1773 PN->replaceAllUsesWith(ExitVal);
1774 PN->eraseFromParent();
1775 }
1776 }
1777
1778 // The insertion point instruction may have been deleted; clear it out
1779 // so that the rewriter doesn't trip over it later.
1780 Rewriter.clearInsertPoint();
1781 return NumReplaced;
1782}
1783
1784/// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for
1785/// \p OrigLoop.
1786void llvm::setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop,
1787 Loop *RemainderLoop, uint64_t UF) {
1788 assert(UF > 0 && "Zero unrolled factor is not supported");
1789 assert(UnrolledLoop != RemainderLoop &&
1790 "Unrolled and Remainder loops are expected to distinct");
1791
1792 // Get number of iterations in the original scalar loop.
1793 unsigned OrigLoopInvocationWeight = 0;
1794 std::optional<unsigned> OrigAverageTripCount =
1795 getLoopEstimatedTripCount(OrigLoop, &OrigLoopInvocationWeight);
1796 if (!OrigAverageTripCount)
1797 return;
1798
1799 // Calculate number of iterations in unrolled loop.
1800 unsigned UnrolledAverageTripCount = *OrigAverageTripCount / UF;
1801 // Calculate number of iterations for remainder loop.
1802 unsigned RemainderAverageTripCount = *OrigAverageTripCount % UF;
1803
1804 setLoopEstimatedTripCount(UnrolledLoop, UnrolledAverageTripCount,
1805 OrigLoopInvocationWeight);
1806 setLoopEstimatedTripCount(RemainderLoop, RemainderAverageTripCount,
1807 OrigLoopInvocationWeight);
1808}
1809
1810/// Utility that implements appending of loops onto a worklist.
1811/// Loops are added in preorder (analogous for reverse postorder for trees),
1812/// and the worklist is processed LIFO.
1813template <typename RangeT>
1815 RangeT &&Loops, SmallPriorityWorklist<Loop *, 4> &Worklist) {
1816 // We use an internal worklist to build up the preorder traversal without
1817 // recursion.
1818 SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist;
1819
1820 // We walk the initial sequence of loops in reverse because we generally want
1821 // to visit defs before uses and the worklist is LIFO.
1822 for (Loop *RootL : Loops) {
1823 assert(PreOrderLoops.empty() && "Must start with an empty preorder walk.");
1824 assert(PreOrderWorklist.empty() &&
1825 "Must start with an empty preorder walk worklist.");
1826 PreOrderWorklist.push_back(RootL);
1827 do {
1828 Loop *L = PreOrderWorklist.pop_back_val();
1829 PreOrderWorklist.append(L->begin(), L->end());
1830 PreOrderLoops.push_back(L);
1831 } while (!PreOrderWorklist.empty());
1832
1833 Worklist.insert(std::move(PreOrderLoops));
1834 PreOrderLoops.clear();
1835 }
1836}
1837
1838template <typename RangeT>
1842}
1843
1844template LLVM_EXPORT_TEMPLATE void
1845llvm::appendLoopsToWorklist<ArrayRef<Loop *> &>(
1847
1848template LLVM_EXPORT_TEMPLATE void
1851
1854 appendReversedLoopsToWorklist(LI, Worklist);
1855}
1856
1858 LoopInfo *LI, LPPassManager *LPM) {
1859 Loop &New = *LI->AllocateLoop();
1860 if (PL)
1861 PL->addChildLoop(&New);
1862 else
1863 LI->addTopLevelLoop(&New);
1864
1865 if (LPM)
1866 LPM->addLoop(New);
1867
1868 // Add all of the blocks in L to the new loop.
1869 for (BasicBlock *BB : L->blocks())
1870 if (LI->getLoopFor(BB) == L)
1871 New.addBasicBlockToLoop(cast<BasicBlock>(VM[BB]), *LI);
1872
1873 // Add all of the subloops to the new loop.
1874 for (Loop *I : *L)
1875 cloneLoop(I, &New, VM, LI, LPM);
1876
1877 return &New;
1878}
1879
1880/// IR Values for the lower and upper bounds of a pointer evolution. We
1881/// need to use value-handles because SCEV expansion can invalidate previously
1882/// expanded values. Thus expansion of a pointer can invalidate the bounds for
1883/// a previous one.
1888};
1889
1890/// Expand code for the lower and upper bound of the pointer group \p CG
1891/// in \p TheLoop. \return the values for the bounds.
1893 Loop *TheLoop, Instruction *Loc,
1894 SCEVExpander &Exp, bool HoistRuntimeChecks) {
1895 LLVMContext &Ctx = Loc->getContext();
1896 Type *PtrArithTy = PointerType::get(Ctx, CG->AddressSpace);
1897
1898 Value *Start = nullptr, *End = nullptr;
1899 LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
1900 const SCEV *Low = CG->Low, *High = CG->High, *Stride = nullptr;
1901
1902 // If the Low and High values are themselves loop-variant, then we may want
1903 // to expand the range to include those covered by the outer loop as well.
1904 // There is a trade-off here with the advantage being that creating checks
1905 // using the expanded range permits the runtime memory checks to be hoisted
1906 // out of the outer loop. This reduces the cost of entering the inner loop,
1907 // which can be significant for low trip counts. The disadvantage is that
1908 // there is a chance we may now never enter the vectorized inner loop,
1909 // whereas using a restricted range check could have allowed us to enter at
1910 // least once. This is why the behaviour is not currently the default and is
1911 // controlled by the parameter 'HoistRuntimeChecks'.
1912 if (HoistRuntimeChecks && TheLoop->getParentLoop() &&
1913 isa<SCEVAddRecExpr>(High) && isa<SCEVAddRecExpr>(Low)) {
1914 auto *HighAR = cast<SCEVAddRecExpr>(High);
1915 auto *LowAR = cast<SCEVAddRecExpr>(Low);
1916 const Loop *OuterLoop = TheLoop->getParentLoop();
1917 ScalarEvolution &SE = *Exp.getSE();
1918 const SCEV *Recur = LowAR->getStepRecurrence(SE);
1919 if (Recur == HighAR->getStepRecurrence(SE) &&
1920 HighAR->getLoop() == OuterLoop && LowAR->getLoop() == OuterLoop) {
1921 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1922 const SCEV *OuterExitCount = SE.getExitCount(OuterLoop, OuterLoopLatch);
1923 if (!isa<SCEVCouldNotCompute>(OuterExitCount) &&
1924 OuterExitCount->getType()->isIntegerTy()) {
1925 const SCEV *NewHigh =
1926 cast<SCEVAddRecExpr>(High)->evaluateAtIteration(OuterExitCount, SE);
1927 if (!isa<SCEVCouldNotCompute>(NewHigh)) {
1928 LLVM_DEBUG(dbgs() << "LAA: Expanded RT check for range to include "
1929 "outer loop in order to permit hoisting\n");
1930 High = NewHigh;
1931 Low = cast<SCEVAddRecExpr>(Low)->getStart();
1932 // If there is a possibility that the stride is negative then we have
1933 // to generate extra checks to ensure the stride is positive.
1934 if (!SE.isKnownNonNegative(
1935 SE.applyLoopGuards(Recur, HighAR->getLoop()))) {
1936 Stride = Recur;
1937 LLVM_DEBUG(dbgs() << "LAA: ... but need to check stride is "
1938 "positive: "
1939 << *Stride << '\n');
1940 }
1941 }
1942 }
1943 }
1944 }
1945
1946 Start = Exp.expandCodeFor(Low, PtrArithTy, Loc);
1947 End = Exp.expandCodeFor(High, PtrArithTy, Loc);
1948 if (CG->NeedsFreeze) {
1949 IRBuilder<> Builder(Loc);
1950 Start = Builder.CreateFreeze(Start, Start->getName() + ".fr");
1951 End = Builder.CreateFreeze(End, End->getName() + ".fr");
1952 }
1953 Value *StrideVal =
1954 Stride ? Exp.expandCodeFor(Stride, Stride->getType(), Loc) : nullptr;
1955 LLVM_DEBUG(dbgs() << "Start: " << *Low << " End: " << *High << "\n");
1956 return {Start, End, StrideVal};
1957}
1958
1959/// Turns a collection of checks into a collection of expanded upper and
1960/// lower bounds for both pointers in the check.
1963 Instruction *Loc, SCEVExpander &Exp, bool HoistRuntimeChecks) {
1965
1966 // Here we're relying on the SCEV Expander's cache to only emit code for the
1967 // same bounds once.
1968 transform(PointerChecks, std::back_inserter(ChecksWithBounds),
1969 [&](const RuntimePointerCheck &Check) {
1970 PointerBounds First = expandBounds(Check.first, L, Loc, Exp,
1972 Second = expandBounds(Check.second, L, Loc, Exp,
1974 return std::make_pair(First, Second);
1975 });
1976
1977 return ChecksWithBounds;
1978}
1979
1981 Instruction *Loc, Loop *TheLoop,
1982 const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,
1983 SCEVExpander &Exp, bool HoistRuntimeChecks) {
1984 // TODO: Move noalias annotation code from LoopVersioning here and share with LV if possible.
1985 // TODO: Pass RtPtrChecking instead of PointerChecks and SE separately, if possible
1986 auto ExpandedChecks =
1987 expandBounds(PointerChecks, TheLoop, Loc, Exp, HoistRuntimeChecks);
1988
1989 LLVMContext &Ctx = Loc->getContext();
1990 IRBuilder ChkBuilder(Ctx, InstSimplifyFolder(Loc->getDataLayout()));
1991 ChkBuilder.SetInsertPoint(Loc);
1992 // Our instructions might fold to a constant.
1993 Value *MemoryRuntimeCheck = nullptr;
1994
1995 for (const auto &[A, B] : ExpandedChecks) {
1996 // Check if two pointers (A and B) conflict where conflict is computed as:
1997 // start(A) <= end(B) && start(B) <= end(A)
1998
1999 assert((A.Start->getType()->getPointerAddressSpace() ==
2000 B.End->getType()->getPointerAddressSpace()) &&
2001 (B.Start->getType()->getPointerAddressSpace() ==
2002 A.End->getType()->getPointerAddressSpace()) &&
2003 "Trying to bounds check pointers with different address spaces");
2004
2005 // [A|B].Start points to the first accessed byte under base [A|B].
2006 // [A|B].End points to the last accessed byte, plus one.
2007 // There is no conflict when the intervals are disjoint:
2008 // NoConflict = (B.Start >= A.End) || (A.Start >= B.End)
2009 //
2010 // bound0 = (B.Start < A.End)
2011 // bound1 = (A.Start < B.End)
2012 // IsConflict = bound0 & bound1
2013 Value *Cmp0 = ChkBuilder.CreateICmpULT(A.Start, B.End, "bound0");
2014 Value *Cmp1 = ChkBuilder.CreateICmpULT(B.Start, A.End, "bound1");
2015 Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
2016 if (A.StrideToCheck) {
2017 Value *IsNegativeStride = ChkBuilder.CreateICmpSLT(
2018 A.StrideToCheck, ConstantInt::get(A.StrideToCheck->getType(), 0),
2019 "stride.check");
2020 IsConflict = ChkBuilder.CreateOr(IsConflict, IsNegativeStride);
2021 }
2022 if (B.StrideToCheck) {
2023 Value *IsNegativeStride = ChkBuilder.CreateICmpSLT(
2024 B.StrideToCheck, ConstantInt::get(B.StrideToCheck->getType(), 0),
2025 "stride.check");
2026 IsConflict = ChkBuilder.CreateOr(IsConflict, IsNegativeStride);
2027 }
2028 if (MemoryRuntimeCheck) {
2029 IsConflict =
2030 ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
2031 }
2032 MemoryRuntimeCheck = IsConflict;
2033 }
2034
2035 return MemoryRuntimeCheck;
2036}
2037
2039 Instruction *Loc, ArrayRef<PointerDiffInfo> Checks, SCEVExpander &Expander,
2040 function_ref<Value *(IRBuilderBase &, unsigned)> GetVF, unsigned IC) {
2041
2042 LLVMContext &Ctx = Loc->getContext();
2043 IRBuilder ChkBuilder(Ctx, InstSimplifyFolder(Loc->getDataLayout()));
2044 ChkBuilder.SetInsertPoint(Loc);
2045 // Our instructions might fold to a constant.
2046 Value *MemoryRuntimeCheck = nullptr;
2047
2048 auto &SE = *Expander.getSE();
2049 // Map to keep track of created compares, The key is the pair of operands for
2050 // the compare, to allow detecting and re-using redundant compares.
2052 for (const auto &[SrcStart, SinkStart, AccessSize, NeedsFreeze] : Checks) {
2053 Type *Ty = SinkStart->getType();
2054 // Compute VF * IC * AccessSize.
2055 auto *VFTimesICTimesSize =
2056 ChkBuilder.CreateMul(GetVF(ChkBuilder, Ty->getScalarSizeInBits()),
2057 ConstantInt::get(Ty, IC * AccessSize));
2058 Value *Diff =
2059 Expander.expandCodeFor(SE.getMinusSCEV(SinkStart, SrcStart), Ty, Loc);
2060
2061 // Check if the same compare has already been created earlier. In that case,
2062 // there is no need to check it again.
2063 Value *IsConflict = SeenCompares.lookup({Diff, VFTimesICTimesSize});
2064 if (IsConflict)
2065 continue;
2066
2067 IsConflict =
2068 ChkBuilder.CreateICmpULT(Diff, VFTimesICTimesSize, "diff.check");
2069 SeenCompares.insert({{Diff, VFTimesICTimesSize}, IsConflict});
2070 if (NeedsFreeze)
2071 IsConflict =
2072 ChkBuilder.CreateFreeze(IsConflict, IsConflict->getName() + ".fr");
2073 if (MemoryRuntimeCheck) {
2074 IsConflict =
2075 ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
2076 }
2077 MemoryRuntimeCheck = IsConflict;
2078 }
2079
2080 return MemoryRuntimeCheck;
2081}
2082
2083std::optional<IVConditionInfo>
2085 const MemorySSA &MSSA, AAResults &AA) {
2086 auto *TI = dyn_cast<BranchInst>(L.getHeader()->getTerminator());
2087 if (!TI || !TI->isConditional())
2088 return {};
2089
2090 auto *CondI = dyn_cast<Instruction>(TI->getCondition());
2091 // The case with the condition outside the loop should already be handled
2092 // earlier.
2093 // Allow CmpInst and TruncInsts as they may be users of load instructions
2094 // and have potential for partial unswitching
2095 if (!CondI || !isa<CmpInst, TruncInst>(CondI) || !L.contains(CondI))
2096 return {};
2097
2098 SmallVector<Instruction *> InstToDuplicate;
2099 InstToDuplicate.push_back(CondI);
2100
2101 SmallVector<Value *, 4> WorkList;
2102 WorkList.append(CondI->op_begin(), CondI->op_end());
2103
2104 SmallVector<MemoryAccess *, 4> AccessesToCheck;
2105 SmallVector<MemoryLocation, 4> AccessedLocs;
2106 while (!WorkList.empty()) {
2107 Instruction *I = dyn_cast<Instruction>(WorkList.pop_back_val());
2108 if (!I || !L.contains(I))
2109 continue;
2110
2111 // TODO: support additional instructions.
2112 if (!isa<LoadInst>(I) && !isa<GetElementPtrInst>(I))
2113 return {};
2114
2115 // Do not duplicate volatile and atomic loads.
2116 if (auto *LI = dyn_cast<LoadInst>(I))
2117 if (LI->isVolatile() || LI->isAtomic())
2118 return {};
2119
2120 InstToDuplicate.push_back(I);
2121 if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) {
2122 if (auto *MemUse = dyn_cast_or_null<MemoryUse>(MA)) {
2123 // Queue the defining access to check for alias checks.
2124 AccessesToCheck.push_back(MemUse->getDefiningAccess());
2125 AccessedLocs.push_back(MemoryLocation::get(I));
2126 } else {
2127 // MemoryDefs may clobber the location or may be atomic memory
2128 // operations. Bail out.
2129 return {};
2130 }
2131 }
2132 WorkList.append(I->op_begin(), I->op_end());
2133 }
2134
2135 if (InstToDuplicate.empty())
2136 return {};
2137
2138 SmallVector<BasicBlock *, 4> ExitingBlocks;
2139 L.getExitingBlocks(ExitingBlocks);
2140 auto HasNoClobbersOnPath =
2141 [&L, &AA, &AccessedLocs, &ExitingBlocks, &InstToDuplicate,
2142 MSSAThreshold](BasicBlock *Succ, BasicBlock *Header,
2143 SmallVector<MemoryAccess *, 4> AccessesToCheck)
2144 -> std::optional<IVConditionInfo> {
2146 // First, collect all blocks in the loop that are on a patch from Succ
2147 // to the header.
2149 WorkList.push_back(Succ);
2150 WorkList.push_back(Header);
2152 Seen.insert(Header);
2153 Info.PathIsNoop &=
2154 all_of(*Header, [](Instruction &I) { return !I.mayHaveSideEffects(); });
2155
2156 while (!WorkList.empty()) {
2157 BasicBlock *Current = WorkList.pop_back_val();
2158 if (!L.contains(Current))
2159 continue;
2160 const auto &SeenIns = Seen.insert(Current);
2161 if (!SeenIns.second)
2162 continue;
2163
2164 Info.PathIsNoop &= all_of(
2165 *Current, [](Instruction &I) { return !I.mayHaveSideEffects(); });
2166 WorkList.append(succ_begin(Current), succ_end(Current));
2167 }
2168
2169 // Require at least 2 blocks on a path through the loop. This skips
2170 // paths that directly exit the loop.
2171 if (Seen.size() < 2)
2172 return {};
2173
2174 // Next, check if there are any MemoryDefs that are on the path through
2175 // the loop (in the Seen set) and they may-alias any of the locations in
2176 // AccessedLocs. If that is the case, they may modify the condition and
2177 // partial unswitching is not possible.
2178 SmallPtrSet<MemoryAccess *, 4> SeenAccesses;
2179 while (!AccessesToCheck.empty()) {
2180 MemoryAccess *Current = AccessesToCheck.pop_back_val();
2181 auto SeenI = SeenAccesses.insert(Current);
2182 if (!SeenI.second || !Seen.contains(Current->getBlock()))
2183 continue;
2184
2185 // Bail out if exceeded the threshold.
2186 if (SeenAccesses.size() >= MSSAThreshold)
2187 return {};
2188
2189 // MemoryUse are read-only accesses.
2190 if (isa<MemoryUse>(Current))
2191 continue;
2192
2193 // For a MemoryDef, check if is aliases any of the location feeding
2194 // the original condition.
2195 if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) {
2196 if (any_of(AccessedLocs, [&AA, CurrentDef](MemoryLocation &Loc) {
2197 return isModSet(
2198 AA.getModRefInfo(CurrentDef->getMemoryInst(), Loc));
2199 }))
2200 return {};
2201 }
2202
2203 for (Use &U : Current->uses())
2204 AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser()));
2205 }
2206
2207 // We could also allow loops with known trip counts without mustprogress,
2208 // but ScalarEvolution may not be available.
2209 Info.PathIsNoop &= isMustProgress(&L);
2210
2211 // If the path is considered a no-op so far, check if it reaches a
2212 // single exit block without any phis. This ensures no values from the
2213 // loop are used outside of the loop.
2214 if (Info.PathIsNoop) {
2215 for (auto *Exiting : ExitingBlocks) {
2216 if (!Seen.contains(Exiting))
2217 continue;
2218 for (auto *Succ : successors(Exiting)) {
2219 if (L.contains(Succ))
2220 continue;
2221
2222 Info.PathIsNoop &= Succ->phis().empty() &&
2223 (!Info.ExitForPath || Info.ExitForPath == Succ);
2224 if (!Info.PathIsNoop)
2225 break;
2226 assert((!Info.ExitForPath || Info.ExitForPath == Succ) &&
2227 "cannot have multiple exit blocks");
2228 Info.ExitForPath = Succ;
2229 }
2230 }
2231 }
2232 if (!Info.ExitForPath)
2233 Info.PathIsNoop = false;
2234
2235 Info.InstToDuplicate = InstToDuplicate;
2236 return Info;
2237 };
2238
2239 // If we branch to the same successor, partial unswitching will not be
2240 // beneficial.
2241 if (TI->getSuccessor(0) == TI->getSuccessor(1))
2242 return {};
2243
2244 if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(0), L.getHeader(),
2245 AccessesToCheck)) {
2246 Info->KnownValue = ConstantInt::getTrue(TI->getContext());
2247 return Info;
2248 }
2249 if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(1), L.getHeader(),
2250 AccessesToCheck)) {
2251 Info->KnownValue = ConstantInt::getFalse(TI->getContext());
2252 return Info;
2253 }
2254
2255 return {};
2256}
@ Poison
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This is the interface for LLVM's primary stateless and local alias analysis.
bbsections Prepares for basic block by splitting functions into clusters of basic blocks
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_EXPORT_TEMPLATE
Definition: Compiler.h:215
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseSet and SmallDenseSet classes.
@ Enable
Definition: DwarfDebug.cpp:86
std::string Name
bool End
Definition: ELF_riscv.cpp:480
#define Check(C,...)
This is the interface for a simple mod/ref and alias analysis over globals.
static const HTTPClientCleanup Cleanup
Definition: HTTPClient.cpp:42
Hexagon Hardware Loops
Module.h This file contains the declarations for the Module class.
iv Induction Variable Users
Definition: IVUsers.cpp:48
static cl::opt< ReplaceExitVal > ReplaceExitValue("replexitval", cl::Hidden, cl::init(OnlyCheapRepl), cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), clEnumValN(OnlyCheapRepl, "cheap", "only replace exit value when the cost is cheap"), clEnumValN(UnusedIndVarInLoop, "unusedindvarinloop", "only replace exit value when it is an unused " "induction variable in the loop and has cheap replacement cost"), clEnumValN(NoHardUse, "noharduse", "only replace exit values when loop def likely dead"), clEnumValN(AlwaysRepl, "always", "always replace exit value whenever possible")))
static cl::opt< bool, true > HoistRuntimeChecks("hoist-runtime-checks", cl::Hidden, cl::desc("Hoist inner loop runtime memory checks to outer loop if possible"), cl::location(VectorizerParams::HoistRuntimeChecks), cl::init(true))
static bool hasHardUserWithinLoop(const Loop *L, const Instruction *I)
Definition: LoopUtils.cpp:1468
static const char * LLVMLoopDisableLICM
Definition: LoopUtils.cpp:56
static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG, Loop *TheLoop, Instruction *Loc, SCEVExpander &Exp, bool HoistRuntimeChecks)
Expand code for the lower and upper bound of the pointer group CG in TheLoop.
Definition: LoopUtils.cpp:1892
static bool canLoopBeDeleted(Loop *L, SmallVector< RewritePhi, 8 > &RewritePhiSet)
Definition: LoopUtils.cpp:1509
static const char * LLVMLoopDisableNonforced
Definition: LoopUtils.cpp:55
static MDNode * createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V)
Create MDNode for input string.
Definition: LoopUtils.cpp:204
static BranchInst * getExpectedExitLoopLatchBranch(Loop *L)
Checks if L has an exiting latch branch.
Definition: LoopUtils.cpp:791
static bool checkIsIndPhi(PHINode *Phi, Loop *L, ScalarEvolution *SE, InductionDescriptor &ID)
Checks if it is safe to call InductionDescriptor::isInductionPHI for Phi, and returns true if this Ph...
Definition: LoopUtils.cpp:1563
static std::optional< unsigned > getEstimatedTripCount(BranchInst *ExitingBranch, Loop *L, uint64_t &OrigExitWeight)
Return the estimated trip count for any exiting branch which dominates the loop latch.
Definition: LoopUtils.cpp:809
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
uint64_t High
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:42
This file provides a priority worklist.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This is the interface for a SCEV-based alias analysis.
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file implements a set that has insertion order iteration characteristics.
static cl::opt< unsigned > MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition: Debug.h:119
Virtual Register Rewriter
Definition: VirtRegMap.cpp:269
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
A private abstract base class describing the concept of an individual alias analysis implementation.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition: APFloat.h:1138
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:206
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:209
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition: APInt.h:216
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:219
Represent the analysis usage information of a pass.
LLVM_ABI AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:284
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Legacy wrapper pass to provide the BasicAAResult object.
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:459
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:131
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:233
Conditional or Unconditional Branch instruction.
unsigned getNumSuccessors() const
BasicBlock * getSuccessor(unsigned i) const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:678
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:707
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:684
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:682
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:701
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:705
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:703
@ ICMP_NE
not equal
Definition: InstrTypes.h:700
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:535
static LLVM_ABI Constant * getIntrinsicIdentity(Intrinsic::ID, Type *Ty)
Definition: Constants.cpp:2739
static LLVM_ABI Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2694
static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)
Definition: Constants.cpp:1105
static LLVM_ABI Constant * getQNaN(Type *Ty, bool Negative=false, APInt *Payload=nullptr)
Definition: Constants.cpp:1037
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:868
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:875
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:169
This class represents an Operation in the Expression.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Identifies a unique instance of a variable.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:203
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:230
iterator_range< iterator > children()
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:322
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:165
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:334
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition: TypeSize.h:318
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:22
bool noSignedZeros() const
Definition: FMF.h:67
bool noNaNs() const
Definition: FMF.h:65
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
Legacy wrapper pass to provide the GlobalsAAResult object.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:114
LLVM_ABI CallInst * CreateFAddReduce(Value *Acc, Value *Src)
Create a sequential vector fadd reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:356
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2345
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2559
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:1339
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1005
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2637
LLVM_ABI CallInst * CreateOrReduce(Value *Src)
Create a vector int OR reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:378
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:834
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:522
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2463
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1197
LLVM_ABI CallInst * CreateUnaryIntrinsic(Intrinsic::ID ID, Value *V, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 1 operand which is mangled on its type.
Definition: IRBuilder.cpp:815
FastMathFlags getFastMathFlags() const
Get the flags to be applied to created floating point ops.
Definition: IRBuilder.h:334
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2593
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1551
LLVM_ABI CallInst * CreateIntMaxReduce(Value *Src, bool IsSigned=false)
Create a vector integer max reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:386
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition: IRBuilder.h:507
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1708
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition: IRBuilder.h:1191
Value * CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2361
LLVM_ABI CallInst * CreateIntMinReduce(Value *Src, bool IsSigned=false)
Create a vector integer min reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:392
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:207
LLVM_ABI CallInst * CreateFMulReduce(Value *Acc, Value *Src)
Create a sequential vector fmul reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:361
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition: IRBuilder.h:1573
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1437
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2780
A struct for saving information about induction variables.
static LLVM_ABI bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1718
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:86
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
void addLoop(Loop &L)
Definition: LoopPass.cpp:77
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
BlockT * getHeader() const
std::vector< Loop * >::const_iterator iterator
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
iterator end() const
void removeBlock(BlockT *BB)
This method completely removes BB from all data structures, including all of the Loop objects it is n...
LoopT * AllocateLoop(ArgsTy &&...Args)
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:597
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Definition: LoopInfo.h:442
LLVM_ABI void erase(Loop *L)
Update LoopInfo after removing the last backedge from a loop.
Definition: LoopInfo.cpp:899
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:40
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:538
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:514
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:38
Metadata node.
Definition: Metadata.h:1077
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
Definition: Metadata.cpp:1078
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1445
ArrayRef< MDOperand > operands() const
Definition: Metadata.h:1443
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1565
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1451
LLVMContext & getContext() const
Definition: Metadata.h:1241
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:899
A single uniqued string.
Definition: Metadata.h:720
LLVM_ABI StringRef getString() const
Definition: Metadata.cpp:617
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:607
Tuple of metadata.
Definition: Metadata.h:1493
BasicBlock * getBlock() const
Definition: MemorySSA.h:162
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:993
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:702
LLVM_ABI void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1905
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:720
Root of the metadata hierarchy.
Definition: Metadata.h:63
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1885
bool insert(const T &X)
Insert a new element into the PriorityWorklist.
static bool isSignedRecurrenceKind(RecurKind Kind)
Returns true if recurrece kind is a signed redux kind.
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isFindLastIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isFindIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
A global registry used in conjunction with static constructors to make pluggable components (like tar...
Definition: Registry.h:44
Legacy wrapper pass to provide the SCEVAAResult object.
This class uses information about analyze scalars to rewrite expressions in canonical form.
ScalarEvolution * getSE()
LLVM_ABI Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI bool isKnownNonNegative(const SCEV *S)
Test if the given expression is known to be non-negative.
LLVM_ABI bool isLoopEntryGuardedByCond(const Loop *L, CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test whether entry to the loop is protected by a conditional between LHS and RHS.
LLVM_ABI const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI LoopDisposition getLoopDisposition(const SCEV *S, const Loop *L)
Return the "disposition" of the given SCEV with respect to the given loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
LLVM_ABI void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
LoopDisposition
An enum describing the relationship between a SCEV and a loop.
@ LoopInvariant
The SCEV is loop-invariant.
LLVM_ABI bool isAvailableAtLoopEntry(const SCEV *S, const Loop *L)
Determine if the SCEV can be evaluated at loop's entry.
LLVM_ABI const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
This class represents the LLVM 'select' instruction.
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition: DenseSet.h:283
A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...
size_type size() const
Definition: SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:476
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:356
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:938
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:684
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:269
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Value handle that tracks a Value across RAUW.
Definition: ValueHandle.h:332
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:246
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
LLVM_ABI const fltSemantics & getFltSemantics() const
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:240
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
static LLVM_ABI Intrinsic::ID getForIntrinsic(Intrinsic::ID Id)
The llvm.vp.
static LLVM_ABI bool isVPReduction(Intrinsic::ID ID)
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:546
iterator_range< user_iterator > users()
Definition: Value.h:426
bool use_empty() const
Definition: Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1098
iterator_range< use_iterator > uses()
Definition: Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:322
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:194
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:34
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:338
LLVM_ABI Value * createSimpleReduction(IRBuilderBase &B, Value *Src, RecurKind RdxKind)
Create a reduction of the given vector.
Definition: LoopUtils.cpp:1313
LLVM_ABI std::optional< ElementCount > getOptionalElementCountLoopAttribute(const Loop *TheLoop)
Find a combination of metadata ("llvm.loop.vectorize.width" and "llvm.loop.vectorize....
Definition: LoopUtils.cpp:251
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
LLVM_ABI Value * addRuntimeChecks(Instruction *Loc, Loop *TheLoop, const SmallVectorImpl< RuntimePointerCheck > &PointerChecks, SCEVExpander &Expander, bool HoistRuntimeChecks=false)
Add code that checks at runtime if the accessed arrays in PointerChecks overlap.
Definition: LoopUtils.cpp:1980
LLVM_ABI Value * createFindLastIVReduction(IRBuilderBase &B, Value *Src, RecurKind RdxKind, Value *Start, Value *Sentinel)
Create a reduction of the given vector Src for a reduction of the kind RecurKind::FindLastIV.
Definition: LoopUtils.cpp:1247
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1770
void fill(R &&Range, T &&Value)
Provide wrappers to std::fill which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1764
LLVM_ABI std::optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Returns a loop's estimated trip count based on branch weight metadata.
Definition: LoopUtils.cpp:841
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
Definition: LoopUtils.cpp:1023
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
Definition: LoopInfo.cpp:1121
std::pair< const RuntimeCheckingPtrGroup *, const RuntimeCheckingPtrGroup * > RuntimePointerCheck
A memcheck which made up of a pair of grouped pointers.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
Definition: ScopeExit.h:59
LLVM_ABI bool isKnownNonPositiveInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always non-positive in loop L.
Definition: LoopUtils.cpp:1434
LLVM_ABI std::optional< bool > getOptionalBoolLoopAttribute(const Loop *TheLoop, StringRef Name)
Definition: LoopInfo.cpp:1103
void appendReversedLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
Definition: LoopUtils.cpp:1814
auto successors(const MachineBasicBlock *BB)
LLVM_ABI void initializeLoopPassPass(PassRegistry &)
Manually defined generic "LoopPass" dependency initialization.
Definition: LoopUtils.cpp:190
constexpr from_range_t from_range
LLVM_ABI bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:449
LLVM_ABI Value * getReductionIdentity(Intrinsic::ID RdxID, Type *Ty, FastMathFlags FMF)
Given information about an @llvm.vector.reduce.
Definition: LoopUtils.cpp:1263
LLVM_ABI std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:264
LLVM_ABI unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
Definition: LoopUtils.cpp:975
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:663
LLVM_ABI char & LCSSAID
Definition: LCSSA.cpp:526
LLVM_ABI char & LoopSimplifyID
LLVM_ABI Value * createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right)
Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
Definition: LoopUtils.cpp:1116
LLVM_ABI SmallVector< BasicBlock *, 16 > collectChildrenInLoop(DominatorTree *DT, DomTreeNode *N, const Loop *CurLoop)
Does a BFS from a given node to all of its children inside a given loop.
Definition: LoopUtils.cpp:450
LLVM_ABI void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V=0)
Set input string into loop metadata by keeping other values intact.
Definition: LoopUtils.cpp:215
LLVM_ABI bool cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, bool Signed)
Returns true if S is defined and never is equal to signed/unsigned max.
Definition: LoopUtils.cpp:1452
constexpr T divideNearest(U Numerator, V Denominator)
Returns (Numerator / Denominator) rounded by round-half-up.
Definition: MathExtras.h:463
LLVM_ABI TransformationMode hasVectorizeTransformation(const Loop *L)
Definition: LoopUtils.cpp:392
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1987
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:402
LLVM_ABI SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
Definition: LoopUtils.cpp:124
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
LLVM_ABI constexpr Intrinsic::ID getReductionIntrinsicID(RecurKind RK)
Returns the llvm.vector.reduce intrinsic that corresponds to the recurrence kind.
Definition: LoopUtils.cpp:913
LLVM_ABI bool isMustProgress(const Loop *L)
Return true if this loop can be assumed to make progress.
Definition: LoopInfo.cpp:1174
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:288
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:49
LLVM_ABI TransformationMode hasUnrollAndJamTransformation(const Loop *L)
Definition: LoopUtils.cpp:374
LLVM_ABI void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, LoopInfo *LI, MemorySSA *MSSA=nullptr)
This function deletes dead loops.
Definition: LoopUtils.cpp:485
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
LLVM_ABI bool hasDisableAllTransformsHint(const Loop *L)
Look for the loop attribute that disables all transformation heuristic.
Definition: LoopUtils.cpp:345
LLVM_TEMPLATE_ABI void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
LLVM_ABI cl::opt< unsigned > SCEVCheapExpansionBudget
LLVM_ABI Value * getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op, TargetTransformInfo::ReductionShuffle RS, RecurKind MinMaxKind=RecurKind::None)
Generates a vector reduction using shufflevectors to reduce the value.
Definition: LoopUtils.cpp:1159
LLVM_ABI TransformationMode hasUnrollTransformation(const Loop *L)
Definition: LoopUtils.cpp:353
LLVM_ABI TransformationMode hasDistributeTransformation(const Loop *L)
Definition: LoopUtils.cpp:428
LLVM_ABI void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI, MemorySSA *MSSA)
Remove the backedge of the specified loop.
Definition: LoopUtils.cpp:711
LLVM_ABI void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:142
LLVM_ABI void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
Definition: LoopUtils.cpp:1393
LLVM_ABI bool isKnownPositiveInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always positive in loop L.
Definition: LoopUtils.cpp:1427
LLVM_ABI unsigned changeToUnreachable(Instruction *I, bool PreserveLCSSA=false, DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Insert an unreachable instruction before the specified instruction, making it and the rest of the cod...
Definition: Local.cpp:2513
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
LLVM_ABI std::optional< int > getOptionalIntLoopAttribute(const Loop *TheLoop, StringRef Name)
Find named metadata for a loop with an integer value.
Definition: LoopInfo.cpp:1125
LLVM_ABI BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
LLVM_ABI CmpInst::Predicate getMinMaxReductionPredicate(RecurKind RK)
Returns the comparison predicate used when expanding a min/max reduction.
Definition: LoopUtils.cpp:1094
LLVM_ABI TransformationMode hasLICMVersioningTransformation(const Loop *L)
Definition: LoopUtils.cpp:438
LLVM_ABI bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:84
TransformationMode
The mode sets how eager a transformation should be applied.
Definition: LoopUtils.h:285
@ TM_Unspecified
The pass can use heuristics to determine whether a transformation should be applied.
Definition: LoopUtils.h:288
@ TM_SuppressedByUser
The transformation must not be applied.
Definition: LoopUtils.h:308
@ TM_ForcedByUser
The transformation was directed by the user, e.g.
Definition: LoopUtils.h:302
@ TM_Disable
The transformation should not be applied.
Definition: LoopUtils.h:294
@ TM_Enable
The transformation should be applied without considering a cost model.
Definition: LoopUtils.h:291
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
LLVM_ABI bool hasDisableLICMTransformsHint(const Loop *L)
Look for the loop attribute that disables the LICM transformation heuristics.
Definition: LoopUtils.cpp:349
template LLVM_TEMPLATE_ABI void appendLoopsToWorklist< Loop & >(Loop &L, SmallPriorityWorklist< Loop *, 4 > &Worklist)
LLVM_ABI Intrinsic::ID getReductionForBinop(Instruction::BinaryOps Opc)
Returns the reduction intrinsic id corresponding to the binary operation.
Definition: LoopUtils.cpp:1005
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:34
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
LLVM_ABI bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, unsigned EstimatedLoopInvocationWeight)
Set a loop's branch weight metadata to reflect that loop has EstimatedTripCount iterations and Estima...
Definition: LoopUtils.cpp:859
LLVM_ABI Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
Definition: LoopUtils.cpp:1305
LLVM_ABI void setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop, Loop *RemainderLoop, uint64_t UF)
Set weights for UnrolledLoop and RemainderLoop based on weights for OrigLoop and the following distri...
Definition: LoopUtils.cpp:1786
LLVM_ABI bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition: LoopUtils.cpp:58
DWARFExpression::Operation Op
LLVM_ABI bool isKnownNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always negative in loop L.
Definition: LoopUtils.cpp:1413
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:223
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
LLVM_ABI bool hasIterationCountInvariantInParent(Loop *L, ScalarEvolution &SE)
Check inner loop (L) backedge count is known to be invariant on all iterations of its outer loop.
Definition: LoopUtils.cpp:891
LLVM_ABI bool isAlmostDeadIV(PHINode *IV, BasicBlock *LatchBlock, Value *Cond)
Return true if the induction variable IV in a Loop whose latch is LatchBlock would become dead if the...
Definition: LoopUtils.cpp:471
auto predecessors(const MachineBasicBlock *BB)
LLVM_ABI int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
Definition: LoopUtils.cpp:1574
LLVM_ABI Value * createOrderedReduction(IRBuilderBase &B, RecurKind RdxKind, Value *Src, Value *Start)
Create an ordered reduction intrinsic using the given recurrence kind RdxKind.
Definition: LoopUtils.cpp:1366
LLVM_ABI Value * addDiffRuntimeChecks(Instruction *Loc, ArrayRef< PointerDiffInfo > Checks, SCEVExpander &Expander, function_ref< Value *(IRBuilderBase &, unsigned)> GetVF, unsigned IC)
Definition: LoopUtils.cpp:2038
LLVM_ABI RecurKind getMinMaxReductionRecurKind(Intrinsic::ID RdxID)
Returns the recurence kind used when expanding a min/max reduction.
Definition: LoopUtils.cpp:1075
ReplaceExitVal
Definition: LoopUtils.h:491
@ UnusedIndVarInLoop
Definition: LoopUtils.h:495
@ OnlyCheapRepl
Definition: LoopUtils.h:493
@ AlwaysRepl
Definition: LoopUtils.h:496
LLVM_ABI BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
LLVM_ABI std::optional< IVConditionInfo > hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, const MemorySSA &MSSA, AAResults &AA)
Check if the loop header has a conditional branch that is not loop-invariant, because it involves loa...
Definition: LoopUtils.cpp:2084
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI Value * createAnyOfReduction(IRBuilderBase &B, Value *Src, Value *InitVal, PHINode *OrigPhi)
Create a reduction of the given vector Src for a reduction of kind RecurKind::AnyOf.
Definition: LoopUtils.cpp:1217
LLVM_ABI bool cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, bool Signed)
Returns true if S is defined and never is equal to signed/unsigned min.
Definition: LoopUtils.cpp:1441
LLVM_ABI bool isKnownNonNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always non-negative in loop L.
Definition: LoopUtils.cpp:1420
LLVM_ABI Value * getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, unsigned Op, RecurKind MinMaxKind=RecurKind::None)
Generates an ordered vector reduction using extracts to reduce the value.
Definition: LoopUtils.cpp:1134
LLVM_ABI MDNode * findOptionMDForLoopID(MDNode *LoopID, StringRef Name)
Find and return the loop attribute node for the attribute Name in LoopID.
Definition: LoopInfo.cpp:1053
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicID(Intrinsic::ID IID)
Returns the llvm.vector.reduce min/max intrinsic that corresponds to the intrinsic op.
Definition: LoopUtils.cpp:959
LLVM_ABI Loop * cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, LPPassManager *LPM)
Recursively clone the specified loop and all of its children, mapping the blocks with the specified m...
Definition: LoopUtils.cpp:1857
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
#define N
IR Values for the lower and upper bounds of a pointer evolution.
Definition: LoopUtils.cpp:1884
TrackingVH< Value > Start
Definition: LoopUtils.cpp:1885
TrackingVH< Value > End
Definition: LoopUtils.cpp:1886
Value * StrideToCheck
Definition: LoopUtils.cpp:1887
unsigned Ith
Definition: LoopUtils.cpp:1495
RewritePhi(PHINode *P, unsigned I, const SCEV *Val, Instruction *ExpansionPt, bool H)
Definition: LoopUtils.cpp:1500
const SCEV * ExpansionSCEV
Definition: LoopUtils.cpp:1496
PHINode * PN
Definition: LoopUtils.cpp:1494
Instruction * ExpansionPoint
Definition: LoopUtils.cpp:1497
Struct to hold information about a partially invariant condition.
Definition: LoopUtils.h:580
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
unsigned AddressSpace
Address space of the involved pointers.
bool NeedsFreeze
Whether the pointer needs to be frozen after expansion, e.g.
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.