LLVM 22.0.0git
Reassociate.cpp
Go to the documentation of this file.
1//===- Reassociate.cpp - Reassociate binary expressions -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass reassociates commutative expressions in an order that is designed
10// to promote better constant propagation, GCSE, LICM, PRE, etc.
11//
12// For example: 4 + (x + 5) -> x + (4 + 5)
13//
14// In the implementation of this algorithm, constants are assigned rank = 0,
15// function arguments are rank = 1, and other values are assigned ranks
16// corresponding to the reverse post order traversal of current function
17// (starting at 2), which effectively gives values in deep loops higher rank
18// than values not in loops.
19//
20//===----------------------------------------------------------------------===//
21
23#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/APInt.h"
25#include "llvm/ADT/DenseMap.h"
28#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/Statistic.h"
35#include "llvm/IR/Argument.h"
36#include "llvm/IR/BasicBlock.h"
37#include "llvm/IR/CFG.h"
38#include "llvm/IR/Constant.h"
39#include "llvm/IR/Constants.h"
40#include "llvm/IR/Function.h"
41#include "llvm/IR/IRBuilder.h"
42#include "llvm/IR/InstrTypes.h"
43#include "llvm/IR/Instruction.h"
45#include "llvm/IR/Operator.h"
46#include "llvm/IR/PassManager.h"
48#include "llvm/IR/Type.h"
49#include "llvm/IR/User.h"
50#include "llvm/IR/Value.h"
51#include "llvm/IR/ValueHandle.h"
53#include "llvm/Pass.h"
56#include "llvm/Support/Debug.h"
60#include <algorithm>
61#include <cassert>
62#include <utility>
63
64using namespace llvm;
65using namespace reassociate;
66using namespace PatternMatch;
67
68#define DEBUG_TYPE "reassociate"
69
70STATISTIC(NumChanged, "Number of insts reassociated");
71STATISTIC(NumAnnihil, "Number of expr tree annihilated");
72STATISTIC(NumFactor , "Number of multiplies factored");
73
74static cl::opt<bool>
75 UseCSELocalOpt(DEBUG_TYPE "-use-cse-local",
76 cl::desc("Only reorder expressions within a basic block "
77 "when exposing CSE opportunities"),
78 cl::init(true), cl::Hidden);
79
80#ifndef NDEBUG
81/// Print out the expression identified in the Ops list.
83 Module *M = I->getModule();
84 dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " "
85 << *Ops[0].Op->getType() << '\t';
86 for (const ValueEntry &Op : Ops) {
87 dbgs() << "[ ";
88 Op.Op->printAsOperand(dbgs(), false, M);
89 dbgs() << ", #" << Op.Rank << "] ";
90 }
91}
92#endif
93
94/// Utility class representing a non-constant Xor-operand. We classify
95/// non-constant Xor-Operands into two categories:
96/// C1) The operand is in the form "X & C", where C is a constant and C != ~0
97/// C2)
98/// C2.1) The operand is in the form of "X | C", where C is a non-zero
99/// constant.
100/// C2.2) Any operand E which doesn't fall into C1 and C2.1, we view this
101/// operand as "E | 0"
103public:
104 XorOpnd(Value *V);
105
106 bool isInvalid() const { return SymbolicPart == nullptr; }
107 bool isOrExpr() const { return isOr; }
108 Value *getValue() const { return OrigVal; }
109 Value *getSymbolicPart() const { return SymbolicPart; }
110 unsigned getSymbolicRank() const { return SymbolicRank; }
111 const APInt &getConstPart() const { return ConstPart; }
112
113 void Invalidate() { SymbolicPart = OrigVal = nullptr; }
114 void setSymbolicRank(unsigned R) { SymbolicRank = R; }
115
116private:
117 Value *OrigVal;
118 Value *SymbolicPart;
119 APInt ConstPart;
120 unsigned SymbolicRank;
121 bool isOr;
122};
123
125 assert(!isa<ConstantInt>(V) && "No ConstantInt");
126 OrigVal = V;
127 Instruction *I = dyn_cast<Instruction>(V);
128 SymbolicRank = 0;
129
130 if (I && (I->getOpcode() == Instruction::Or ||
131 I->getOpcode() == Instruction::And)) {
132 Value *V0 = I->getOperand(0);
133 Value *V1 = I->getOperand(1);
134 const APInt *C;
135 if (match(V0, m_APInt(C)))
136 std::swap(V0, V1);
137
138 if (match(V1, m_APInt(C))) {
139 ConstPart = *C;
140 SymbolicPart = V0;
141 isOr = (I->getOpcode() == Instruction::Or);
142 return;
143 }
144 }
145
146 // view the operand as "V | 0"
147 SymbolicPart = V;
148 ConstPart = APInt::getZero(V->getType()->getScalarSizeInBits());
149 isOr = true;
150}
151
152/// Return true if I is an instruction with the FastMathFlags that are needed
153/// for general reassociation set. This is not the same as testing
154/// Instruction::isAssociative() because it includes operations like fsub.
155/// (This routine is only intended to be called for floating-point operations.)
157 assert(I && isa<FPMathOperator>(I) && "Should only check FP ops");
158 return I->hasAllowReassoc() && I->hasNoSignedZeros();
159}
160
161/// Return true if V is an instruction of the specified opcode and if it
162/// only has one use.
163static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
164 auto *BO = dyn_cast<BinaryOperator>(V);
165 if (BO && BO->hasOneUse() && BO->getOpcode() == Opcode)
166 if (!isa<FPMathOperator>(BO) || hasFPAssociativeFlags(BO))
167 return BO;
168 return nullptr;
169}
170
171static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1,
172 unsigned Opcode2) {
173 auto *BO = dyn_cast<BinaryOperator>(V);
174 if (BO && BO->hasOneUse() &&
175 (BO->getOpcode() == Opcode1 || BO->getOpcode() == Opcode2))
176 if (!isa<FPMathOperator>(BO) || hasFPAssociativeFlags(BO))
177 return BO;
178 return nullptr;
179}
180
181void ReassociatePass::BuildRankMap(Function &F,
183 unsigned Rank = 2;
184
185 // Assign distinct ranks to function arguments.
186 for (auto &Arg : F.args()) {
187 ValueRankMap[&Arg] = ++Rank;
188 LLVM_DEBUG(dbgs() << "Calculated Rank[" << Arg.getName() << "] = " << Rank
189 << "\n");
190 }
191
192 // Traverse basic blocks in ReversePostOrder.
193 for (BasicBlock *BB : RPOT) {
194 unsigned BBRank = RankMap[BB] = ++Rank << 16;
195
196 // Walk the basic block, adding precomputed ranks for any instructions that
197 // we cannot move. This ensures that the ranks for these instructions are
198 // all different in the block.
199 for (Instruction &I : *BB)
201 ValueRankMap[&I] = ++BBRank;
202 }
203}
204
205unsigned ReassociatePass::getRank(Value *V) {
206 Instruction *I = dyn_cast<Instruction>(V);
207 if (!I) {
208 if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument.
209 return 0; // Otherwise it's a global or constant, rank 0.
210 }
211
212 if (unsigned Rank = ValueRankMap[I])
213 return Rank; // Rank already known?
214
215 // If this is an expression, return the 1+MAX(rank(LHS), rank(RHS)) so that
216 // we can reassociate expressions for code motion! Since we do not recurse
217 // for PHI nodes, we cannot have infinite recursion here, because there
218 // cannot be loops in the value graph that do not go through PHI nodes.
219 unsigned Rank = 0, MaxRank = RankMap[I->getParent()];
220 for (unsigned i = 0, e = I->getNumOperands(); i != e && Rank != MaxRank; ++i)
221 Rank = std::max(Rank, getRank(I->getOperand(i)));
222
223 // If this is a 'not' or 'neg' instruction, do not count it for rank. This
224 // assures us that X and ~X will have the same rank.
225 if (!match(I, m_Not(m_Value())) && !match(I, m_Neg(m_Value())) &&
226 !match(I, m_FNeg(m_Value())))
227 ++Rank;
228
229 LLVM_DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " << Rank
230 << "\n");
231
232 return ValueRankMap[I] = Rank;
233}
234
235// Canonicalize constants to RHS. Otherwise, sort the operands by rank.
236void ReassociatePass::canonicalizeOperands(Instruction *I) {
237 assert(isa<BinaryOperator>(I) && "Expected binary operator.");
238 assert(I->isCommutative() && "Expected commutative operator.");
239
240 Value *LHS = I->getOperand(0);
241 Value *RHS = I->getOperand(1);
242 if (LHS == RHS || isa<Constant>(RHS))
243 return;
244 if (isa<Constant>(LHS) || getRank(RHS) < getRank(LHS)) {
245 cast<BinaryOperator>(I)->swapOperands();
246 MadeChange = true;
247 }
248}
249
251 BasicBlock::iterator InsertBefore,
252 Value *FlagsOp) {
253 if (S1->getType()->isIntOrIntVectorTy())
254 return BinaryOperator::CreateAdd(S1, S2, Name, InsertBefore);
255 else {
256 BinaryOperator *Res =
257 BinaryOperator::CreateFAdd(S1, S2, Name, InsertBefore);
258 Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
259 return Res;
260 }
261}
262
264 BasicBlock::iterator InsertBefore,
265 Value *FlagsOp) {
266 if (S1->getType()->isIntOrIntVectorTy())
267 return BinaryOperator::CreateMul(S1, S2, Name, InsertBefore);
268 else {
269 BinaryOperator *Res =
270 BinaryOperator::CreateFMul(S1, S2, Name, InsertBefore);
271 Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
272 return Res;
273 }
274}
275
277 BasicBlock::iterator InsertBefore,
278 Value *FlagsOp) {
279 if (S1->getType()->isIntOrIntVectorTy())
280 return BinaryOperator::CreateNeg(S1, Name, InsertBefore);
281
282 if (auto *FMFSource = dyn_cast<Instruction>(FlagsOp))
283 return UnaryOperator::CreateFNegFMF(S1, FMFSource, Name, InsertBefore);
284
285 return UnaryOperator::CreateFNeg(S1, Name, InsertBefore);
286}
287
288/// Replace 0-X with X*-1.
290 assert((isa<UnaryOperator>(Neg) || isa<BinaryOperator>(Neg)) &&
291 "Expected a Negate!");
292 // FIXME: It's not safe to lower a unary FNeg into a FMul by -1.0.
293 unsigned OpNo = isa<BinaryOperator>(Neg) ? 1 : 0;
294 Type *Ty = Neg->getType();
295 Constant *NegOne = Ty->isIntOrIntVectorTy() ?
296 ConstantInt::getAllOnesValue(Ty) : ConstantFP::get(Ty, -1.0);
297
298 BinaryOperator *Res =
299 CreateMul(Neg->getOperand(OpNo), NegOne, "", Neg->getIterator(), Neg);
300 Neg->setOperand(OpNo, Constant::getNullValue(Ty)); // Drop use of op.
301 Res->takeName(Neg);
302 Neg->replaceAllUsesWith(Res);
303 Res->setDebugLoc(Neg->getDebugLoc());
304 return Res;
305}
306
307using RepeatedValue = std::pair<Value *, uint64_t>;
308
309/// Given an associative binary expression, return the leaf
310/// nodes in Ops along with their weights (how many times the leaf occurs). The
311/// original expression is the same as
312/// (Ops[0].first op Ops[0].first op ... Ops[0].first) <- Ops[0].second times
313/// op
314/// (Ops[1].first op Ops[1].first op ... Ops[1].first) <- Ops[1].second times
315/// op
316/// ...
317/// op
318/// (Ops[N].first op Ops[N].first op ... Ops[N].first) <- Ops[N].second times
319///
320/// Note that the values Ops[0].first, ..., Ops[N].first are all distinct.
321///
322/// This routine may modify the function, in which case it returns 'true'. The
323/// changes it makes may well be destructive, changing the value computed by 'I'
324/// to something completely different. Thus if the routine returns 'true' then
325/// you MUST either replace I with a new expression computed from the Ops array,
326/// or use RewriteExprTree to put the values back in.
327///
328/// A leaf node is either not a binary operation of the same kind as the root
329/// node 'I' (i.e. is not a binary operator at all, or is, but with a different
330/// opcode), or is the same kind of binary operator but has a use which either
331/// does not belong to the expression, or does belong to the expression but is
332/// a leaf node. Every leaf node has at least one use that is a non-leaf node
333/// of the expression, while for non-leaf nodes (except for the root 'I') every
334/// use is a non-leaf node of the expression.
335///
336/// For example:
337/// expression graph node names
338///
339/// + | I
340/// / \ |
341/// + + | A, B
342/// / \ / \ |
343/// * + * | C, D, E
344/// / \ / \ / \ |
345/// + * | F, G
346///
347/// The leaf nodes are C, E, F and G. The Ops array will contain (maybe not in
348/// that order) (C, 1), (E, 1), (F, 2), (G, 2).
349///
350/// The expression is maximal: if some instruction is a binary operator of the
351/// same kind as 'I', and all of its uses are non-leaf nodes of the expression,
352/// then the instruction also belongs to the expression, is not a leaf node of
353/// it, and its operands also belong to the expression (but may be leaf nodes).
354///
355/// NOTE: This routine will set operands of non-leaf non-root nodes to undef in
356/// order to ensure that every non-root node in the expression has *exactly one*
357/// use by a non-leaf node of the expression. This destruction means that the
358/// caller MUST either replace 'I' with a new expression or use something like
359/// RewriteExprTree to put the values back in if the routine indicates that it
360/// made a change by returning 'true'.
361///
362/// In the above example either the right operand of A or the left operand of B
363/// will be replaced by undef. If it is B's operand then this gives:
364///
365/// + | I
366/// / \ |
367/// + + | A, B - operand of B replaced with undef
368/// / \ \ |
369/// * + * | C, D, E
370/// / \ / \ / \ |
371/// + * | F, G
372///
373/// Note that such undef operands can only be reached by passing through 'I'.
374/// For example, if you visit operands recursively starting from a leaf node
375/// then you will never see such an undef operand unless you get back to 'I',
376/// which requires passing through a phi node.
377///
378/// Note that this routine may also mutate binary operators of the wrong type
379/// that have all uses inside the expression (i.e. only used by non-leaf nodes
380/// of the expression) if it can turn them into binary operators of the right
381/// type and thus make the expression bigger.
385 OverflowTracking &Flags) {
386 assert((isa<UnaryOperator>(I) || isa<BinaryOperator>(I)) &&
387 "Expected a UnaryOperator or BinaryOperator!");
388 LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I << '\n');
389 unsigned Opcode = I->getOpcode();
390 assert(I->isAssociative() && I->isCommutative() &&
391 "Expected an associative and commutative operation!");
392
393 // Visit all operands of the expression, keeping track of their weight (the
394 // number of paths from the expression root to the operand, or if you like
395 // the number of times that operand occurs in the linearized expression).
396 // For example, if I = X + A, where X = A + B, then I, X and B have weight 1
397 // while A has weight two.
398
399 // Worklist of non-leaf nodes (their operands are in the expression too) along
400 // with their weights, representing a certain number of paths to the operator.
401 // If an operator occurs in the worklist multiple times then we found multiple
402 // ways to get to it.
403 SmallVector<std::pair<Instruction *, uint64_t>, 8> Worklist; // (Op, Weight)
404 Worklist.push_back(std::make_pair(I, 1));
405 bool Changed = false;
406
407 // Leaves of the expression are values that either aren't the right kind of
408 // operation (eg: a constant, or a multiply in an add tree), or are, but have
409 // some uses that are not inside the expression. For example, in I = X + X,
410 // X = A + B, the value X has two uses (by I) that are in the expression. If
411 // X has any other uses, for example in a return instruction, then we consider
412 // X to be a leaf, and won't analyze it further. When we first visit a value,
413 // if it has more than one use then at first we conservatively consider it to
414 // be a leaf. Later, as the expression is explored, we may discover some more
415 // uses of the value from inside the expression. If all uses turn out to be
416 // from within the expression (and the value is a binary operator of the right
417 // kind) then the value is no longer considered to be a leaf, and its operands
418 // are explored.
419
420 // Leaves - Keeps track of the set of putative leaves as well as the number of
421 // paths to each leaf seen so far.
422 using LeafMap = DenseMap<Value *, uint64_t>;
423 LeafMap Leaves; // Leaf -> Total weight so far.
424 SmallVector<Value *, 8> LeafOrder; // Ensure deterministic leaf output order.
425 const DataLayout &DL = I->getDataLayout();
426
427#ifndef NDEBUG
428 SmallPtrSet<Value *, 8> Visited; // For checking the iteration scheme.
429#endif
430 while (!Worklist.empty()) {
431 // We examine the operands of this binary operator.
432 auto [I, Weight] = Worklist.pop_back_val();
433
434 Flags.mergeFlags(*I);
435
436 for (unsigned OpIdx = 0; OpIdx < I->getNumOperands(); ++OpIdx) { // Visit operands.
437 Value *Op = I->getOperand(OpIdx);
438 LLVM_DEBUG(dbgs() << "OPERAND: " << *Op << " (" << Weight << ")\n");
439 assert((!Op->hasUseList() || !Op->use_empty()) &&
440 "No uses, so how did we get to it?!");
441
442 // If this is a binary operation of the right kind with only one use then
443 // add its operands to the expression.
444 if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) {
445 assert(Visited.insert(Op).second && "Not first visit!");
446 LLVM_DEBUG(dbgs() << "DIRECT ADD: " << *Op << " (" << Weight << ")\n");
447 Worklist.push_back(std::make_pair(BO, Weight));
448 continue;
449 }
450
451 // Appears to be a leaf. Is the operand already in the set of leaves?
452 LeafMap::iterator It = Leaves.find(Op);
453 if (It == Leaves.end()) {
454 // Not in the leaf map. Must be the first time we saw this operand.
455 assert(Visited.insert(Op).second && "Not first visit!");
456 if (!Op->hasOneUse()) {
457 // This value has uses not accounted for by the expression, so it is
458 // not safe to modify. Mark it as being a leaf.
460 << "ADD USES LEAF: " << *Op << " (" << Weight << ")\n");
461 LeafOrder.push_back(Op);
462 Leaves[Op] = Weight;
463 continue;
464 }
465 // No uses outside the expression, try morphing it.
466 } else {
467 // Already in the leaf map.
468 assert(It != Leaves.end() && Visited.count(Op) &&
469 "In leaf map but not visited!");
470
471 // Update the number of paths to the leaf.
472 It->second += Weight;
473 assert(It->second >= Weight && "Weight overflows");
474
475 // If we still have uses that are not accounted for by the expression
476 // then it is not safe to modify the value.
477 if (!Op->hasOneUse())
478 continue;
479
480 // No uses outside the expression, try morphing it.
481 Weight = It->second;
482 Leaves.erase(It); // Since the value may be morphed below.
483 }
484
485 // At this point we have a value which, first of all, is not a binary
486 // expression of the right kind, and secondly, is only used inside the
487 // expression. This means that it can safely be modified. See if we
488 // can usefully morph it into an expression of the right kind.
489 assert((!isa<Instruction>(Op) ||
490 cast<Instruction>(Op)->getOpcode() != Opcode
491 || (isa<FPMathOperator>(Op) &&
492 !hasFPAssociativeFlags(cast<Instruction>(Op)))) &&
493 "Should have been handled above!");
494 assert(Op->hasOneUse() && "Has uses outside the expression tree!");
495
496 // If this is a multiply expression, turn any internal negations into
497 // multiplies by -1 so they can be reassociated. Add any users of the
498 // newly created multiplication by -1 to the redo list, so any
499 // reassociation opportunities that are exposed will be reassociated
500 // further.
501 Instruction *Neg;
502 if (((Opcode == Instruction::Mul && match(Op, m_Neg(m_Value()))) ||
503 (Opcode == Instruction::FMul && match(Op, m_FNeg(m_Value())))) &&
504 match(Op, m_Instruction(Neg))) {
506 << "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
508 LLVM_DEBUG(dbgs() << *Mul << '\n');
509 Worklist.push_back(std::make_pair(Mul, Weight));
510 for (User *U : Mul->users()) {
511 if (BinaryOperator *UserBO = dyn_cast<BinaryOperator>(U))
512 ToRedo.insert(UserBO);
513 }
514 ToRedo.insert(Neg);
515 Changed = true;
516 continue;
517 }
518
519 // Failed to morph into an expression of the right type. This really is
520 // a leaf.
521 LLVM_DEBUG(dbgs() << "ADD LEAF: " << *Op << " (" << Weight << ")\n");
522 assert(!isReassociableOp(Op, Opcode) && "Value was morphed?");
523 LeafOrder.push_back(Op);
524 Leaves[Op] = Weight;
525 }
526 }
527
528 // The leaves, repeated according to their weights, represent the linearized
529 // form of the expression.
530 for (Value *V : LeafOrder) {
531 LeafMap::iterator It = Leaves.find(V);
532 if (It == Leaves.end())
533 // Node initially thought to be a leaf wasn't.
534 continue;
535 assert(!isReassociableOp(V, Opcode) && "Shouldn't be a leaf!");
536 uint64_t Weight = It->second;
537 // Ensure the leaf is only output once.
538 It->second = 0;
539 Ops.push_back(std::make_pair(V, Weight));
540 if (Opcode == Instruction::Add && Flags.AllKnownNonNegative && Flags.HasNSW)
541 Flags.AllKnownNonNegative &= isKnownNonNegative(V, SimplifyQuery(DL));
542 else if (Opcode == Instruction::Mul) {
543 // To preserve NUW we need all inputs non-zero.
544 // To preserve NSW we need all inputs strictly positive.
545 if (Flags.AllKnownNonZero &&
546 (Flags.HasNUW || (Flags.HasNSW && Flags.AllKnownNonNegative))) {
547 Flags.AllKnownNonZero &= isKnownNonZero(V, SimplifyQuery(DL));
548 if (Flags.HasNSW && Flags.AllKnownNonNegative)
549 Flags.AllKnownNonNegative &= isKnownNonNegative(V, SimplifyQuery(DL));
550 }
551 }
552 }
553
554 // For nilpotent operations or addition there may be no operands, for example
555 // because the expression was "X xor X" or consisted of 2^Bitwidth additions:
556 // in both cases the weight reduces to 0 causing the value to be skipped.
557 if (Ops.empty()) {
558 Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType());
559 assert(Identity && "Associative operation without identity!");
560 Ops.emplace_back(Identity, 1);
561 }
562
563 return Changed;
564}
565
566/// Now that the operands for this expression tree are
567/// linearized and optimized, emit them in-order.
568void ReassociatePass::RewriteExprTree(BinaryOperator *I,
570 OverflowTracking Flags) {
571 assert(Ops.size() > 1 && "Single values should be used directly!");
572
573 // Since our optimizations should never increase the number of operations, the
574 // new expression can usually be written reusing the existing binary operators
575 // from the original expression tree, without creating any new instructions,
576 // though the rewritten expression may have a completely different topology.
577 // We take care to not change anything if the new expression will be the same
578 // as the original. If more than trivial changes (like commuting operands)
579 // were made then we are obliged to clear out any optional subclass data like
580 // nsw flags.
581
582 /// NodesToRewrite - Nodes from the original expression available for writing
583 /// the new expression into.
584 SmallVector<BinaryOperator*, 8> NodesToRewrite;
585 unsigned Opcode = I->getOpcode();
587
588 /// NotRewritable - The operands being written will be the leaves of the new
589 /// expression and must not be used as inner nodes (via NodesToRewrite) by
590 /// mistake. Inner nodes are always reassociable, and usually leaves are not
591 /// (if they were they would have been incorporated into the expression and so
592 /// would not be leaves), so most of the time there is no danger of this. But
593 /// in rare cases a leaf may become reassociable if an optimization kills uses
594 /// of it, or it may momentarily become reassociable during rewriting (below)
595 /// due it being removed as an operand of one of its uses. Ensure that misuse
596 /// of leaf nodes as inner nodes cannot occur by remembering all of the future
597 /// leaves and refusing to reuse any of them as inner nodes.
598 SmallPtrSet<Value*, 8> NotRewritable;
599 for (const ValueEntry &Op : Ops)
600 NotRewritable.insert(Op.Op);
601
602 // ExpressionChangedStart - Non-null if the rewritten expression differs from
603 // the original in some non-trivial way, requiring the clearing of optional
604 // flags. Flags are cleared from the operator in ExpressionChangedStart up to
605 // ExpressionChangedEnd inclusive.
606 BinaryOperator *ExpressionChangedStart = nullptr,
607 *ExpressionChangedEnd = nullptr;
608 for (unsigned i = 0; ; ++i) {
609 // The last operation (which comes earliest in the IR) is special as both
610 // operands will come from Ops, rather than just one with the other being
611 // a subexpression.
612 if (i+2 == Ops.size()) {
613 Value *NewLHS = Ops[i].Op;
614 Value *NewRHS = Ops[i+1].Op;
615 Value *OldLHS = Op->getOperand(0);
616 Value *OldRHS = Op->getOperand(1);
617
618 if (NewLHS == OldLHS && NewRHS == OldRHS)
619 // Nothing changed, leave it alone.
620 break;
621
622 if (NewLHS == OldRHS && NewRHS == OldLHS) {
623 // The order of the operands was reversed. Swap them.
624 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
625 Op->swapOperands();
626 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
627 MadeChange = true;
628 ++NumChanged;
629 break;
630 }
631
632 // The new operation differs non-trivially from the original. Overwrite
633 // the old operands with the new ones.
634 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
635 if (NewLHS != OldLHS) {
636 BinaryOperator *BO = isReassociableOp(OldLHS, Opcode);
637 if (BO && !NotRewritable.count(BO))
638 NodesToRewrite.push_back(BO);
639 Op->setOperand(0, NewLHS);
640 }
641 if (NewRHS != OldRHS) {
642 BinaryOperator *BO = isReassociableOp(OldRHS, Opcode);
643 if (BO && !NotRewritable.count(BO))
644 NodesToRewrite.push_back(BO);
645 Op->setOperand(1, NewRHS);
646 }
647 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
648
649 ExpressionChangedStart = Op;
650 if (!ExpressionChangedEnd)
651 ExpressionChangedEnd = Op;
652 MadeChange = true;
653 ++NumChanged;
654
655 break;
656 }
657
658 // Not the last operation. The left-hand side will be a sub-expression
659 // while the right-hand side will be the current element of Ops.
660 Value *NewRHS = Ops[i].Op;
661 if (NewRHS != Op->getOperand(1)) {
662 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
663 if (NewRHS == Op->getOperand(0)) {
664 // The new right-hand side was already present as the left operand. If
665 // we are lucky then swapping the operands will sort out both of them.
666 Op->swapOperands();
667 } else {
668 // Overwrite with the new right-hand side.
669 BinaryOperator *BO = isReassociableOp(Op->getOperand(1), Opcode);
670 if (BO && !NotRewritable.count(BO))
671 NodesToRewrite.push_back(BO);
672 Op->setOperand(1, NewRHS);
673 ExpressionChangedStart = Op;
674 if (!ExpressionChangedEnd)
675 ExpressionChangedEnd = Op;
676 }
677 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
678 MadeChange = true;
679 ++NumChanged;
680 }
681
682 // Now deal with the left-hand side. If this is already an operation node
683 // from the original expression then just rewrite the rest of the expression
684 // into it.
685 BinaryOperator *BO = isReassociableOp(Op->getOperand(0), Opcode);
686 if (BO && !NotRewritable.count(BO)) {
687 Op = BO;
688 continue;
689 }
690
691 // Otherwise, grab a spare node from the original expression and use that as
692 // the left-hand side. If there are no nodes left then the optimizers made
693 // an expression with more nodes than the original! This usually means that
694 // they did something stupid but it might mean that the problem was just too
695 // hard (finding the mimimal number of multiplications needed to realize a
696 // multiplication expression is NP-complete). Whatever the reason, smart or
697 // stupid, create a new node if there are none left.
698 BinaryOperator *NewOp;
699 if (NodesToRewrite.empty()) {
700 Constant *Poison = PoisonValue::get(I->getType());
702 Poison, "", I->getIterator());
703 if (isa<FPMathOperator>(NewOp))
704 NewOp->setFastMathFlags(I->getFastMathFlags());
705 } else {
706 NewOp = NodesToRewrite.pop_back_val();
707 }
708
709 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
710 Op->setOperand(0, NewOp);
711 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
712 ExpressionChangedStart = Op;
713 if (!ExpressionChangedEnd)
714 ExpressionChangedEnd = Op;
715 MadeChange = true;
716 ++NumChanged;
717 Op = NewOp;
718 }
719
720 // If the expression changed non-trivially then clear out all subclass data
721 // starting from the operator specified in ExpressionChanged, and compactify
722 // the operators to just before the expression root to guarantee that the
723 // expression tree is dominated by all of Ops.
724 if (ExpressionChangedStart) {
725 bool ClearFlags = true;
726 do {
727 // Preserve flags.
728 if (ClearFlags) {
729 if (isa<FPMathOperator>(I)) {
730 FastMathFlags Flags = I->getFastMathFlags();
731 ExpressionChangedStart->clearSubclassOptionalData();
732 ExpressionChangedStart->setFastMathFlags(Flags);
733 } else {
734 Flags.applyFlags(*ExpressionChangedStart);
735 }
736 }
737
738 if (ExpressionChangedStart == ExpressionChangedEnd)
739 ClearFlags = false;
740 if (ExpressionChangedStart == I)
741 break;
742
743 // Discard any debug info related to the expressions that has changed (we
744 // can leave debug info related to the root and any operation that didn't
745 // change, since the result of the expression tree should be the same
746 // even after reassociation).
747 if (ClearFlags)
748 replaceDbgUsesWithUndef(ExpressionChangedStart);
749
750 ExpressionChangedStart->moveBefore(I->getIterator());
751 ExpressionChangedStart =
752 cast<BinaryOperator>(*ExpressionChangedStart->user_begin());
753 } while (true);
754 }
755
756 // Throw away any left over nodes from the original expression.
757 RedoInsts.insert_range(NodesToRewrite);
758}
759
760/// Insert instructions before the instruction pointed to by BI,
761/// that computes the negative version of the value specified. The negative
762/// version of the value is returned, and BI is left pointing at the instruction
763/// that should be processed next by the reassociation pass.
764/// Also add intermediate instructions to the redo list that are modified while
765/// pushing the negates through adds. These will be revisited to see if
766/// additional opportunities have been exposed.
769 if (auto *C = dyn_cast<Constant>(V)) {
770 const DataLayout &DL = BI->getDataLayout();
771 Constant *Res = C->getType()->isFPOrFPVectorTy()
772 ? ConstantFoldUnaryOpOperand(Instruction::FNeg, C, DL)
774 if (Res)
775 return Res;
776 }
777
778 // We are trying to expose opportunity for reassociation. One of the things
779 // that we want to do to achieve this is to push a negation as deep into an
780 // expression chain as possible, to expose the add instructions. In practice,
781 // this means that we turn this:
782 // X = -(A+12+C+D) into X = -A + -12 + -C + -D = -12 + -A + -C + -D
783 // so that later, a: Y = 12+X could get reassociated with the -12 to eliminate
784 // the constants. We assume that instcombine will clean up the mess later if
785 // we introduce tons of unnecessary negation instructions.
786 //
787 if (BinaryOperator *I =
788 isReassociableOp(V, Instruction::Add, Instruction::FAdd)) {
789 // Push the negates through the add.
790 I->setOperand(0, NegateValue(I->getOperand(0), BI, ToRedo));
791 I->setOperand(1, NegateValue(I->getOperand(1), BI, ToRedo));
792 if (I->getOpcode() == Instruction::Add) {
793 I->setHasNoUnsignedWrap(false);
794 I->setHasNoSignedWrap(false);
795 }
796
797 // We must move the add instruction here, because the neg instructions do
798 // not dominate the old add instruction in general. By moving it, we are
799 // assured that the neg instructions we just inserted dominate the
800 // instruction we are about to insert after them.
801 //
802 I->moveBefore(BI->getIterator());
803 I->setName(I->getName()+".neg");
804
805 // Add the intermediate negates to the redo list as processing them later
806 // could expose more reassociating opportunities.
807 ToRedo.insert(I);
808 return I;
809 }
810
811 // Okay, we need to materialize a negated version of V with an instruction.
812 // Scan the use lists of V to see if we have one already.
813 for (User *U : V->users()) {
814 if (!match(U, m_Neg(m_Value())) && !match(U, m_FNeg(m_Value())))
815 continue;
816
817 // We found one! Now we have to make sure that the definition dominates
818 // this use. We do this by moving it to the entry block (if it is a
819 // non-instruction value) or right after the definition. These negates will
820 // be zapped by reassociate later, so we don't need much finesse here.
821 Instruction *TheNeg = dyn_cast<Instruction>(U);
822
823 // We can't safely propagate a vector zero constant with poison/undef lanes.
824 Constant *C;
825 if (match(TheNeg, m_BinOp(m_Constant(C), m_Value())) &&
826 C->containsUndefOrPoisonElement())
827 continue;
828
829 // Verify that the negate is in this function, V might be a constant expr.
830 if (!TheNeg ||
831 TheNeg->getParent()->getParent() != BI->getParent()->getParent())
832 continue;
833
834 BasicBlock::iterator InsertPt;
835 if (Instruction *InstInput = dyn_cast<Instruction>(V)) {
836 auto InsertPtOpt = InstInput->getInsertionPointAfterDef();
837 if (!InsertPtOpt)
838 continue;
839 InsertPt = *InsertPtOpt;
840 } else {
841 InsertPt = TheNeg->getFunction()
842 ->getEntryBlock()
844 ->getIterator();
845 }
846
847 // Check that if TheNeg is moved out of its parent block, we drop its
848 // debug location to avoid extra coverage.
849 // See test dropping_debugloc_the_neg.ll for a detailed example.
850 if (TheNeg->getParent() != InsertPt->getParent())
851 TheNeg->dropLocation();
852 TheNeg->moveBefore(*InsertPt->getParent(), InsertPt);
853
854 if (TheNeg->getOpcode() == Instruction::Sub) {
855 TheNeg->setHasNoUnsignedWrap(false);
856 TheNeg->setHasNoSignedWrap(false);
857 } else {
858 TheNeg->andIRFlags(BI);
859 }
860 ToRedo.insert(TheNeg);
861 return TheNeg;
862 }
863
864 // Insert a 'neg' instruction that subtracts the value from zero to get the
865 // negation.
866 Instruction *NewNeg =
867 CreateNeg(V, V->getName() + ".neg", BI->getIterator(), BI);
868 // NewNeg is generated to potentially replace BI, so use its DebugLoc.
869 NewNeg->setDebugLoc(BI->getDebugLoc());
870 ToRedo.insert(NewNeg);
871 return NewNeg;
872}
873
874// See if this `or` looks like an load widening reduction, i.e. that it
875// consists of an `or`/`shl`/`zext`/`load` nodes only. Note that we don't
876// ensure that the pattern is *really* a load widening reduction,
877// we do not ensure that it can really be replaced with a widened load,
878// only that it mostly looks like one.
882
883 auto Enqueue = [&](Value *V) {
884 auto *I = dyn_cast<Instruction>(V);
885 // Each node of an `or` reduction must be an instruction,
886 if (!I)
887 return false; // Node is certainly not part of an `or` load reduction.
888 // Only process instructions we have never processed before.
889 if (Visited.insert(I).second)
890 Worklist.emplace_back(I);
891 return true; // Will need to look at parent nodes.
892 };
893
894 if (!Enqueue(Or))
895 return false; // Not an `or` reduction pattern.
896
897 while (!Worklist.empty()) {
898 auto *I = Worklist.pop_back_val();
899
900 // Okay, which instruction is this node?
901 switch (I->getOpcode()) {
902 case Instruction::Or:
903 // Got an `or` node. That's fine, just recurse into it's operands.
904 for (Value *Op : I->operands())
905 if (!Enqueue(Op))
906 return false; // Not an `or` reduction pattern.
907 continue;
908
909 case Instruction::Shl:
910 case Instruction::ZExt:
911 // `shl`/`zext` nodes are fine, just recurse into their base operand.
912 if (!Enqueue(I->getOperand(0)))
913 return false; // Not an `or` reduction pattern.
914 continue;
915
916 case Instruction::Load:
917 // Perfect, `load` node means we've reached an edge of the graph.
918 continue;
919
920 default: // Unknown node.
921 return false; // Not an `or` reduction pattern.
922 }
923 }
924
925 return true;
926}
927
928/// Return true if it may be profitable to convert this (X|Y) into (X+Y).
930 // Don't bother to convert this up unless either the LHS is an associable add
931 // or subtract or mul or if this is only used by one of the above.
932 // This is only a compile-time improvement, it is not needed for correctness!
933 auto isInteresting = [](Value *V) {
934 for (auto Op : {Instruction::Add, Instruction::Sub, Instruction::Mul,
935 Instruction::Shl})
936 if (isReassociableOp(V, Op))
937 return true;
938 return false;
939 };
940
941 if (any_of(Or->operands(), isInteresting))
942 return true;
943
944 Value *VB = Or->user_back();
945 if (Or->hasOneUse() && isInteresting(VB))
946 return true;
947
948 return false;
949}
950
951/// If we have (X|Y), and iff X and Y have no common bits set,
952/// transform this into (X+Y) to allow arithmetics reassociation.
954 // Convert an or into an add.
955 BinaryOperator *New = CreateAdd(Or->getOperand(0), Or->getOperand(1), "",
956 Or->getIterator(), Or);
957 New->setHasNoSignedWrap();
958 New->setHasNoUnsignedWrap();
959 New->takeName(Or);
960
961 // Everyone now refers to the add instruction.
962 Or->replaceAllUsesWith(New);
963 New->setDebugLoc(Or->getDebugLoc());
964
965 LLVM_DEBUG(dbgs() << "Converted or into an add: " << *New << '\n');
966 return New;
967}
968
969/// Return true if we should break up this subtract of X-Y into (X + -Y).
971 // If this is a negation, we can't split it up!
972 if (match(Sub, m_Neg(m_Value())) || match(Sub, m_FNeg(m_Value())))
973 return false;
974
975 // Don't breakup X - undef.
976 if (isa<UndefValue>(Sub->getOperand(1)))
977 return false;
978
979 // Don't bother to break this up unless either the LHS is an associable add or
980 // subtract or if this is only used by one.
981 Value *V0 = Sub->getOperand(0);
982 if (isReassociableOp(V0, Instruction::Add, Instruction::FAdd) ||
983 isReassociableOp(V0, Instruction::Sub, Instruction::FSub))
984 return true;
985 Value *V1 = Sub->getOperand(1);
986 if (isReassociableOp(V1, Instruction::Add, Instruction::FAdd) ||
987 isReassociableOp(V1, Instruction::Sub, Instruction::FSub))
988 return true;
989 Value *VB = Sub->user_back();
990 if (Sub->hasOneUse() &&
991 (isReassociableOp(VB, Instruction::Add, Instruction::FAdd) ||
992 isReassociableOp(VB, Instruction::Sub, Instruction::FSub)))
993 return true;
994
995 return false;
996}
997
998/// If we have (X-Y), and if either X is an add, or if this is only used by an
999/// add, transform this into (X+(0-Y)) to promote better reassociation.
1002 // Convert a subtract into an add and a neg instruction. This allows sub
1003 // instructions to be commuted with other add instructions.
1004 //
1005 // Calculate the negative value of Operand 1 of the sub instruction,
1006 // and set it as the RHS of the add instruction we just made.
1007 Value *NegVal = NegateValue(Sub->getOperand(1), Sub, ToRedo);
1008 BinaryOperator *New =
1009 CreateAdd(Sub->getOperand(0), NegVal, "", Sub->getIterator(), Sub);
1010 Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op.
1011 Sub->setOperand(1, Constant::getNullValue(Sub->getType())); // Drop use of op.
1012 New->takeName(Sub);
1013
1014 // Everyone now refers to the add instruction.
1015 Sub->replaceAllUsesWith(New);
1016 New->setDebugLoc(Sub->getDebugLoc());
1017
1018 LLVM_DEBUG(dbgs() << "Negated: " << *New << '\n');
1019 return New;
1020}
1021
1022/// If this is a shift of a reassociable multiply or is used by one, change
1023/// this into a multiply by a constant to assist with further reassociation.
1025 Constant *MulCst = ConstantInt::get(Shl->getType(), 1);
1026 auto *SA = cast<ConstantInt>(Shl->getOperand(1));
1027 MulCst = ConstantFoldBinaryInstruction(Instruction::Shl, MulCst, SA);
1028 assert(MulCst && "Constant folding of immediate constants failed");
1029
1030 BinaryOperator *Mul = BinaryOperator::CreateMul(Shl->getOperand(0), MulCst,
1031 "", Shl->getIterator());
1032 Shl->setOperand(0, PoisonValue::get(Shl->getType())); // Drop use of op.
1033 Mul->takeName(Shl);
1034
1035 // Everyone now refers to the mul instruction.
1036 Shl->replaceAllUsesWith(Mul);
1037 Mul->setDebugLoc(Shl->getDebugLoc());
1038
1039 // We can safely preserve the nuw flag in all cases. It's also safe to turn a
1040 // nuw nsw shl into a nuw nsw mul. However, nsw in isolation requires special
1041 // handling. It can be preserved as long as we're not left shifting by
1042 // bitwidth - 1.
1043 bool NSW = cast<BinaryOperator>(Shl)->hasNoSignedWrap();
1044 bool NUW = cast<BinaryOperator>(Shl)->hasNoUnsignedWrap();
1045 unsigned BitWidth = Shl->getType()->getScalarSizeInBits();
1046 if (NSW && (NUW || SA->getValue().ult(BitWidth - 1)))
1047 Mul->setHasNoSignedWrap(true);
1048 Mul->setHasNoUnsignedWrap(NUW);
1049 return Mul;
1050}
1051
1052/// Scan backwards and forwards among values with the same rank as element i
1053/// to see if X exists. If X does not exist, return i. This is useful when
1054/// scanning for 'x' when we see '-x' because they both get the same rank.
1056 unsigned i, Value *X) {
1057 unsigned XRank = Ops[i].Rank;
1058 unsigned e = Ops.size();
1059 for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j) {
1060 if (Ops[j].Op == X)
1061 return j;
1062 if (Instruction *I1 = dyn_cast<Instruction>(Ops[j].Op))
1063 if (Instruction *I2 = dyn_cast<Instruction>(X))
1064 if (I1->isIdenticalTo(I2))
1065 return j;
1066 }
1067 // Scan backwards.
1068 for (unsigned j = i-1; j != ~0U && Ops[j].Rank == XRank; --j) {
1069 if (Ops[j].Op == X)
1070 return j;
1071 if (Instruction *I1 = dyn_cast<Instruction>(Ops[j].Op))
1072 if (Instruction *I2 = dyn_cast<Instruction>(X))
1073 if (I1->isIdenticalTo(I2))
1074 return j;
1075 }
1076 return i;
1077}
1078
1079/// Emit a tree of add instructions, summing Ops together
1080/// and returning the result. Insert the tree before I.
1083 if (Ops.size() == 1) return Ops.back();
1084
1085 Value *V1 = Ops.pop_back_val();
1086 Value *V2 = EmitAddTreeOfValues(I, Ops);
1087 auto *NewAdd = CreateAdd(V2, V1, "reass.add", I->getIterator(), I);
1088 NewAdd->setDebugLoc(I->getDebugLoc());
1089 return NewAdd;
1090}
1091
1092/// If V is an expression tree that is a multiplication sequence,
1093/// and if this sequence contains a multiply by Factor,
1094/// remove Factor from the tree and return the new tree.
1095/// If new instructions are inserted to generate this tree, DL should be used
1096/// as the DebugLoc for these instructions.
1097Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor,
1098 DebugLoc DL) {
1099 BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
1100 if (!BO)
1101 return nullptr;
1102
1105 MadeChange |= LinearizeExprTree(BO, Tree, RedoInsts, Flags);
1107 Factors.reserve(Tree.size());
1108 for (const RepeatedValue &E : Tree)
1109 Factors.append(E.second, ValueEntry(getRank(E.first), E.first));
1110
1111 bool FoundFactor = false;
1112 bool NeedsNegate = false;
1113 for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
1114 if (Factors[i].Op == Factor) {
1115 FoundFactor = true;
1116 Factors.erase(Factors.begin()+i);
1117 break;
1118 }
1119
1120 // If this is a negative version of this factor, remove it.
1121 if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Factor)) {
1122 if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Factors[i].Op))
1123 if (FC1->getValue() == -FC2->getValue()) {
1124 FoundFactor = NeedsNegate = true;
1125 Factors.erase(Factors.begin()+i);
1126 break;
1127 }
1128 } else if (ConstantFP *FC1 = dyn_cast<ConstantFP>(Factor)) {
1129 if (ConstantFP *FC2 = dyn_cast<ConstantFP>(Factors[i].Op)) {
1130 const APFloat &F1 = FC1->getValueAPF();
1131 APFloat F2(FC2->getValueAPF());
1132 F2.changeSign();
1133 if (F1 == F2) {
1134 FoundFactor = NeedsNegate = true;
1135 Factors.erase(Factors.begin() + i);
1136 break;
1137 }
1138 }
1139 }
1140 }
1141
1142 if (!FoundFactor) {
1143 // Make sure to restore the operands to the expression tree.
1144 RewriteExprTree(BO, Factors, Flags);
1145 return nullptr;
1146 }
1147
1148 BasicBlock::iterator InsertPt = ++BO->getIterator();
1149
1150 // If this was just a single multiply, remove the multiply and return the only
1151 // remaining operand.
1152 if (Factors.size() == 1) {
1153 RedoInsts.insert(BO);
1154 V = Factors[0].Op;
1155 } else {
1156 RewriteExprTree(BO, Factors, Flags);
1157 V = BO;
1158 }
1159
1160 if (NeedsNegate) {
1161 V = CreateNeg(V, "neg", InsertPt, BO);
1162 cast<Instruction>(V)->setDebugLoc(DL);
1163 }
1164
1165 return V;
1166}
1167
1168/// If V is a single-use multiply, recursively add its operands as factors,
1169/// otherwise add V to the list of factors.
1170///
1171/// Ops is the top-level list of add operands we're trying to factor.
1173 SmallVectorImpl<Value*> &Factors) {
1174 BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
1175 if (!BO) {
1176 Factors.push_back(V);
1177 return;
1178 }
1179
1180 // Otherwise, add the LHS and RHS to the list of factors.
1183}
1184
1185/// Optimize a series of operands to an 'and', 'or', or 'xor' instruction.
1186/// This optimizes based on identities. If it can be reduced to a single Value,
1187/// it is returned, otherwise the Ops list is mutated as necessary.
1188static Value *OptimizeAndOrXor(unsigned Opcode,
1190 // Scan the operand lists looking for X and ~X pairs, along with X,X pairs.
1191 // If we find any, we can simplify the expression. X&~X == 0, X|~X == -1.
1192 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1193 // First, check for X and ~X in the operand list.
1194 assert(i < Ops.size());
1195 Value *X;
1196 if (match(Ops[i].Op, m_Not(m_Value(X)))) { // Cannot occur for ^.
1197 unsigned FoundX = FindInOperandList(Ops, i, X);
1198 if (FoundX != i) {
1199 if (Opcode == Instruction::And) // ...&X&~X = 0
1200 return Constant::getNullValue(X->getType());
1201
1202 if (Opcode == Instruction::Or) // ...|X|~X = -1
1203 return Constant::getAllOnesValue(X->getType());
1204 }
1205 }
1206
1207 // Next, check for duplicate pairs of values, which we assume are next to
1208 // each other, due to our sorting criteria.
1209 assert(i < Ops.size());
1210 if (i+1 != Ops.size() && Ops[i+1].Op == Ops[i].Op) {
1211 if (Opcode == Instruction::And || Opcode == Instruction::Or) {
1212 // Drop duplicate values for And and Or.
1213 Ops.erase(Ops.begin()+i);
1214 --i; --e;
1215 ++NumAnnihil;
1216 continue;
1217 }
1218
1219 // Drop pairs of values for Xor.
1220 assert(Opcode == Instruction::Xor);
1221 if (e == 2)
1222 return Constant::getNullValue(Ops[0].Op->getType());
1223
1224 // Y ^ X^X -> Y
1225 Ops.erase(Ops.begin()+i, Ops.begin()+i+2);
1226 i -= 1; e -= 2;
1227 ++NumAnnihil;
1228 }
1229 }
1230 return nullptr;
1231}
1232
1233/// Helper function of CombineXorOpnd(). It creates a bitwise-and
1234/// instruction with the given two operands, and return the resulting
1235/// instruction. There are two special cases: 1) if the constant operand is 0,
1236/// it will return NULL. 2) if the constant is ~0, the symbolic operand will
1237/// be returned.
1239 const APInt &ConstOpnd) {
1240 if (ConstOpnd.isZero())
1241 return nullptr;
1242
1243 if (ConstOpnd.isAllOnes())
1244 return Opnd;
1245
1246 Instruction *I = BinaryOperator::CreateAnd(
1247 Opnd, ConstantInt::get(Opnd->getType(), ConstOpnd), "and.ra",
1248 InsertBefore);
1249 I->setDebugLoc(InsertBefore->getDebugLoc());
1250 return I;
1251}
1252
1253// Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd"
1254// into "R ^ C", where C would be 0, and R is a symbolic value.
1255//
1256// If it was successful, true is returned, and the "R" and "C" is returned
1257// via "Res" and "ConstOpnd", respectively; otherwise, false is returned,
1258// and both "Res" and "ConstOpnd" remain unchanged.
1259bool ReassociatePass::CombineXorOpnd(BasicBlock::iterator It, XorOpnd *Opnd1,
1260 APInt &ConstOpnd, Value *&Res) {
1261 // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2
1262 // = ((x | c1) ^ c1) ^ (c1 ^ c2)
1263 // = (x & ~c1) ^ (c1 ^ c2)
1264 // It is useful only when c1 == c2.
1265 if (!Opnd1->isOrExpr() || Opnd1->getConstPart().isZero())
1266 return false;
1267
1268 if (!Opnd1->getValue()->hasOneUse())
1269 return false;
1270
1271 const APInt &C1 = Opnd1->getConstPart();
1272 if (C1 != ConstOpnd)
1273 return false;
1274
1275 Value *X = Opnd1->getSymbolicPart();
1276 Res = createAndInstr(It, X, ~C1);
1277 // ConstOpnd was C2, now C1 ^ C2.
1278 ConstOpnd ^= C1;
1279
1280 if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
1281 RedoInsts.insert(T);
1282 return true;
1283}
1284
1285// Helper function of OptimizeXor(). It tries to simplify
1286// "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a
1287// symbolic value.
1288//
1289// If it was successful, true is returned, and the "R" and "C" is returned
1290// via "Res" and "ConstOpnd", respectively (If the entire expression is
1291// evaluated to a constant, the Res is set to NULL); otherwise, false is
1292// returned, and both "Res" and "ConstOpnd" remain unchanged.
1293bool ReassociatePass::CombineXorOpnd(BasicBlock::iterator It, XorOpnd *Opnd1,
1294 XorOpnd *Opnd2, APInt &ConstOpnd,
1295 Value *&Res) {
1296 Value *X = Opnd1->getSymbolicPart();
1297 if (X != Opnd2->getSymbolicPart())
1298 return false;
1299
1300 // This many instruction become dead.(At least "Opnd1 ^ Opnd2" will die.)
1301 int DeadInstNum = 1;
1302 if (Opnd1->getValue()->hasOneUse())
1303 DeadInstNum++;
1304 if (Opnd2->getValue()->hasOneUse())
1305 DeadInstNum++;
1306
1307 // Xor-Rule 2:
1308 // (x | c1) ^ (x & c2)
1309 // = (x|c1) ^ (x&c2) ^ (c1 ^ c1) = ((x|c1) ^ c1) ^ (x & c2) ^ c1
1310 // = (x & ~c1) ^ (x & c2) ^ c1 // Xor-Rule 1
1311 // = (x & c3) ^ c1, where c3 = ~c1 ^ c2 // Xor-rule 3
1312 //
1313 if (Opnd1->isOrExpr() != Opnd2->isOrExpr()) {
1314 if (Opnd2->isOrExpr())
1315 std::swap(Opnd1, Opnd2);
1316
1317 const APInt &C1 = Opnd1->getConstPart();
1318 const APInt &C2 = Opnd2->getConstPart();
1319 APInt C3((~C1) ^ C2);
1320
1321 // Do not increase code size!
1322 if (!C3.isZero() && !C3.isAllOnes()) {
1323 int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
1324 if (NewInstNum > DeadInstNum)
1325 return false;
1326 }
1327
1328 Res = createAndInstr(It, X, C3);
1329 ConstOpnd ^= C1;
1330 } else if (Opnd1->isOrExpr()) {
1331 // Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2
1332 //
1333 const APInt &C1 = Opnd1->getConstPart();
1334 const APInt &C2 = Opnd2->getConstPart();
1335 APInt C3 = C1 ^ C2;
1336
1337 // Do not increase code size
1338 if (!C3.isZero() && !C3.isAllOnes()) {
1339 int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
1340 if (NewInstNum > DeadInstNum)
1341 return false;
1342 }
1343
1344 Res = createAndInstr(It, X, C3);
1345 ConstOpnd ^= C3;
1346 } else {
1347 // Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2))
1348 //
1349 const APInt &C1 = Opnd1->getConstPart();
1350 const APInt &C2 = Opnd2->getConstPart();
1351 APInt C3 = C1 ^ C2;
1352 Res = createAndInstr(It, X, C3);
1353 }
1354
1355 // Put the original operands in the Redo list; hope they will be deleted
1356 // as dead code.
1357 if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
1358 RedoInsts.insert(T);
1359 if (Instruction *T = dyn_cast<Instruction>(Opnd2->getValue()))
1360 RedoInsts.insert(T);
1361
1362 return true;
1363}
1364
1365/// Optimize a series of operands to an 'xor' instruction. If it can be reduced
1366/// to a single Value, it is returned, otherwise the Ops list is mutated as
1367/// necessary.
1368Value *ReassociatePass::OptimizeXor(Instruction *I,
1370 if (Value *V = OptimizeAndOrXor(Instruction::Xor, Ops))
1371 return V;
1372
1373 if (Ops.size() == 1)
1374 return nullptr;
1375
1377 SmallVector<XorOpnd*, 8> OpndPtrs;
1378 Type *Ty = Ops[0].Op->getType();
1379 APInt ConstOpnd(Ty->getScalarSizeInBits(), 0);
1380
1381 // Step 1: Convert ValueEntry to XorOpnd
1382 for (const ValueEntry &Op : Ops) {
1383 Value *V = Op.Op;
1384 const APInt *C;
1385 // TODO: Support non-splat vectors.
1386 if (match(V, m_APInt(C))) {
1387 ConstOpnd ^= *C;
1388 } else {
1389 XorOpnd O(V);
1390 O.setSymbolicRank(getRank(O.getSymbolicPart()));
1391 Opnds.push_back(O);
1392 }
1393 }
1394
1395 // NOTE: From this point on, do *NOT* add/delete element to/from "Opnds".
1396 // It would otherwise invalidate the "Opnds"'s iterator, and hence invalidate
1397 // the "OpndPtrs" as well. For the similar reason, do not fuse this loop
1398 // with the previous loop --- the iterator of the "Opnds" may be invalidated
1399 // when new elements are added to the vector.
1400 for (XorOpnd &Op : Opnds)
1401 OpndPtrs.push_back(&Op);
1402
1403 // Step 2: Sort the Xor-Operands in a way such that the operands containing
1404 // the same symbolic value cluster together. For instance, the input operand
1405 // sequence ("x | 123", "y & 456", "x & 789") will be sorted into:
1406 // ("x | 123", "x & 789", "y & 456").
1407 //
1408 // The purpose is twofold:
1409 // 1) Cluster together the operands sharing the same symbolic-value.
1410 // 2) Operand having smaller symbolic-value-rank is permuted earlier, which
1411 // could potentially shorten crital path, and expose more loop-invariants.
1412 // Note that values' rank are basically defined in RPO order (FIXME).
1413 // So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier
1414 // than Y which is defined earlier than Z. Permute "x | 1", "Y & 2",
1415 // "z" in the order of X-Y-Z is better than any other orders.
1416 llvm::stable_sort(OpndPtrs, [](XorOpnd *LHS, XorOpnd *RHS) {
1417 return LHS->getSymbolicRank() < RHS->getSymbolicRank();
1418 });
1419
1420 // Step 3: Combine adjacent operands
1421 XorOpnd *PrevOpnd = nullptr;
1422 bool Changed = false;
1423 for (unsigned i = 0, e = Opnds.size(); i < e; i++) {
1424 XorOpnd *CurrOpnd = OpndPtrs[i];
1425 // The combined value
1426 Value *CV;
1427
1428 // Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd"
1429 if (!ConstOpnd.isZero() &&
1430 CombineXorOpnd(I->getIterator(), CurrOpnd, ConstOpnd, CV)) {
1431 Changed = true;
1432 if (CV)
1433 *CurrOpnd = XorOpnd(CV);
1434 else {
1435 CurrOpnd->Invalidate();
1436 continue;
1437 }
1438 }
1439
1440 if (!PrevOpnd || CurrOpnd->getSymbolicPart() != PrevOpnd->getSymbolicPart()) {
1441 PrevOpnd = CurrOpnd;
1442 continue;
1443 }
1444
1445 // step 3.2: When previous and current operands share the same symbolic
1446 // value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd"
1447 if (CombineXorOpnd(I->getIterator(), CurrOpnd, PrevOpnd, ConstOpnd, CV)) {
1448 // Remove previous operand
1449 PrevOpnd->Invalidate();
1450 if (CV) {
1451 *CurrOpnd = XorOpnd(CV);
1452 PrevOpnd = CurrOpnd;
1453 } else {
1454 CurrOpnd->Invalidate();
1455 PrevOpnd = nullptr;
1456 }
1457 Changed = true;
1458 }
1459 }
1460
1461 // Step 4: Reassemble the Ops
1462 if (Changed) {
1463 Ops.clear();
1464 for (const XorOpnd &O : Opnds) {
1465 if (O.isInvalid())
1466 continue;
1467 ValueEntry VE(getRank(O.getValue()), O.getValue());
1468 Ops.push_back(VE);
1469 }
1470 if (!ConstOpnd.isZero()) {
1471 Value *C = ConstantInt::get(Ty, ConstOpnd);
1472 ValueEntry VE(getRank(C), C);
1473 Ops.push_back(VE);
1474 }
1475 unsigned Sz = Ops.size();
1476 if (Sz == 1)
1477 return Ops.back().Op;
1478 if (Sz == 0) {
1479 assert(ConstOpnd.isZero());
1480 return ConstantInt::get(Ty, ConstOpnd);
1481 }
1482 }
1483
1484 return nullptr;
1485}
1486
1487/// Optimize a series of operands to an 'add' instruction. This
1488/// optimizes based on identities. If it can be reduced to a single Value, it
1489/// is returned, otherwise the Ops list is mutated as necessary.
1490Value *ReassociatePass::OptimizeAdd(Instruction *I,
1492 // Scan the operand lists looking for X and -X pairs. If we find any, we
1493 // can simplify expressions like X+-X == 0 and X+~X ==-1. While we're at it,
1494 // scan for any
1495 // duplicates. We want to canonicalize Y+Y+Y+Z -> 3*Y+Z.
1496
1497 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1498 Value *TheOp = Ops[i].Op;
1499 // Check to see if we've seen this operand before. If so, we factor all
1500 // instances of the operand together. Due to our sorting criteria, we know
1501 // that these need to be next to each other in the vector.
1502 if (i+1 != Ops.size() && Ops[i+1].Op == TheOp) {
1503 // Rescan the list, remove all instances of this operand from the expr.
1504 unsigned NumFound = 0;
1505 do {
1506 Ops.erase(Ops.begin()+i);
1507 ++NumFound;
1508 } while (i != Ops.size() && Ops[i].Op == TheOp);
1509
1510 LLVM_DEBUG(dbgs() << "\nFACTORING [" << NumFound << "]: " << *TheOp
1511 << '\n');
1512 ++NumFactor;
1513
1514 // Insert a new multiply.
1515 Type *Ty = TheOp->getType();
1516 Constant *C = Ty->isIntOrIntVectorTy() ?
1517 ConstantInt::get(Ty, NumFound) : ConstantFP::get(Ty, NumFound);
1518 Instruction *Mul = CreateMul(TheOp, C, "factor", I->getIterator(), I);
1519 Mul->setDebugLoc(I->getDebugLoc());
1520
1521 // Now that we have inserted a multiply, optimize it. This allows us to
1522 // handle cases that require multiple factoring steps, such as this:
1523 // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6
1524 RedoInsts.insert(Mul);
1525
1526 // If every add operand was a duplicate, return the multiply.
1527 if (Ops.empty())
1528 return Mul;
1529
1530 // Otherwise, we had some input that didn't have the dupe, such as
1531 // "A + A + B" -> "A*2 + B". Add the new multiply to the list of
1532 // things being added by this operation.
1533 Ops.insert(Ops.begin(), ValueEntry(getRank(Mul), Mul));
1534
1535 --i;
1536 e = Ops.size();
1537 continue;
1538 }
1539
1540 // Check for X and -X or X and ~X in the operand list.
1541 Value *X;
1542 if (!match(TheOp, m_Neg(m_Value(X))) && !match(TheOp, m_Not(m_Value(X))) &&
1543 !match(TheOp, m_FNeg(m_Value(X))))
1544 continue;
1545
1546 unsigned FoundX = FindInOperandList(Ops, i, X);
1547 if (FoundX == i)
1548 continue;
1549
1550 // Remove X and -X from the operand list.
1551 if (Ops.size() == 2 &&
1552 (match(TheOp, m_Neg(m_Value())) || match(TheOp, m_FNeg(m_Value()))))
1553 return Constant::getNullValue(X->getType());
1554
1555 // Remove X and ~X from the operand list.
1556 if (Ops.size() == 2 && match(TheOp, m_Not(m_Value())))
1557 return Constant::getAllOnesValue(X->getType());
1558
1559 Ops.erase(Ops.begin()+i);
1560 if (i < FoundX)
1561 --FoundX;
1562 else
1563 --i; // Need to back up an extra one.
1564 Ops.erase(Ops.begin()+FoundX);
1565 ++NumAnnihil;
1566 --i; // Revisit element.
1567 e -= 2; // Removed two elements.
1568
1569 // if X and ~X we append -1 to the operand list.
1570 if (match(TheOp, m_Not(m_Value()))) {
1571 Value *V = Constant::getAllOnesValue(X->getType());
1572 Ops.insert(Ops.end(), ValueEntry(getRank(V), V));
1573 e += 1;
1574 }
1575 }
1576
1577 // Scan the operand list, checking to see if there are any common factors
1578 // between operands. Consider something like A*A+A*B*C+D. We would like to
1579 // reassociate this to A*(A+B*C)+D, which reduces the number of multiplies.
1580 // To efficiently find this, we count the number of times a factor occurs
1581 // for any ADD operands that are MULs.
1582 DenseMap<Value*, unsigned> FactorOccurrences;
1583
1584 // Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4)
1585 // where they are actually the same multiply.
1586 unsigned MaxOcc = 0;
1587 Value *MaxOccVal = nullptr;
1588 for (const ValueEntry &Op : Ops) {
1589 BinaryOperator *BOp =
1590 isReassociableOp(Op.Op, Instruction::Mul, Instruction::FMul);
1591 if (!BOp)
1592 continue;
1593
1594 // Compute all of the factors of this added value.
1595 SmallVector<Value*, 8> Factors;
1596 FindSingleUseMultiplyFactors(BOp, Factors);
1597 assert(Factors.size() > 1 && "Bad linearize!");
1598
1599 // Add one to FactorOccurrences for each unique factor in this op.
1600 SmallPtrSet<Value*, 8> Duplicates;
1601 for (Value *Factor : Factors) {
1602 if (!Duplicates.insert(Factor).second)
1603 continue;
1604
1605 unsigned Occ = ++FactorOccurrences[Factor];
1606 if (Occ > MaxOcc) {
1607 MaxOcc = Occ;
1608 MaxOccVal = Factor;
1609 }
1610
1611 // If Factor is a negative constant, add the negated value as a factor
1612 // because we can percolate the negate out. Watch for minint, which
1613 // cannot be positivified.
1614 if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor)) {
1615 if (CI->isNegative() && !CI->isMinValue(true)) {
1616 Factor = ConstantInt::get(CI->getContext(), -CI->getValue());
1617 if (!Duplicates.insert(Factor).second)
1618 continue;
1619 unsigned Occ = ++FactorOccurrences[Factor];
1620 if (Occ > MaxOcc) {
1621 MaxOcc = Occ;
1622 MaxOccVal = Factor;
1623 }
1624 }
1625 } else if (ConstantFP *CF = dyn_cast<ConstantFP>(Factor)) {
1626 if (CF->isNegative()) {
1627 APFloat F(CF->getValueAPF());
1628 F.changeSign();
1629 Factor = ConstantFP::get(CF->getContext(), F);
1630 if (!Duplicates.insert(Factor).second)
1631 continue;
1632 unsigned Occ = ++FactorOccurrences[Factor];
1633 if (Occ > MaxOcc) {
1634 MaxOcc = Occ;
1635 MaxOccVal = Factor;
1636 }
1637 }
1638 }
1639 }
1640 }
1641
1642 // If any factor occurred more than one time, we can pull it out.
1643 if (MaxOcc > 1) {
1644 LLVM_DEBUG(dbgs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal
1645 << '\n');
1646 ++NumFactor;
1647
1648 // Create a new instruction that uses the MaxOccVal twice. If we don't do
1649 // this, we could otherwise run into situations where removing a factor
1650 // from an expression will drop a use of maxocc, and this can cause
1651 // RemoveFactorFromExpression on successive values to behave differently.
1652 Instruction *DummyInst =
1653 I->getType()->isIntOrIntVectorTy()
1654 ? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal)
1655 : BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal);
1656
1658 for (unsigned i = 0; i != Ops.size(); ++i) {
1659 // Only try to remove factors from expressions we're allowed to.
1660 BinaryOperator *BOp =
1661 isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul);
1662 if (!BOp)
1663 continue;
1664
1665 if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal,
1666 I->getDebugLoc())) {
1667 // The factorized operand may occur several times. Convert them all in
1668 // one fell swoop.
1669 for (unsigned j = Ops.size(); j != i;) {
1670 --j;
1671 if (Ops[j].Op == Ops[i].Op) {
1672 NewMulOps.push_back(V);
1673 Ops.erase(Ops.begin()+j);
1674 }
1675 }
1676 --i;
1677 }
1678 }
1679
1680 // No need for extra uses anymore.
1681 DummyInst->deleteValue();
1682
1683 unsigned NumAddedValues = NewMulOps.size();
1684 Value *V = EmitAddTreeOfValues(I, NewMulOps);
1685
1686 // Now that we have inserted the add tree, optimize it. This allows us to
1687 // handle cases that require multiple factoring steps, such as this:
1688 // A*A*B + A*A*C --> A*(A*B+A*C) --> A*(A*(B+C))
1689 assert(NumAddedValues > 1 && "Each occurrence should contribute a value");
1690 (void)NumAddedValues;
1691 if (Instruction *VI = dyn_cast<Instruction>(V))
1692 RedoInsts.insert(VI);
1693
1694 // Create the multiply.
1695 Instruction *V2 = CreateMul(V, MaxOccVal, "reass.mul", I->getIterator(), I);
1696 V2->setDebugLoc(I->getDebugLoc());
1697
1698 // Rerun associate on the multiply in case the inner expression turned into
1699 // a multiply. We want to make sure that we keep things in canonical form.
1700 RedoInsts.insert(V2);
1701
1702 // If every add operand included the factor (e.g. "A*B + A*C"), then the
1703 // entire result expression is just the multiply "A*(B+C)".
1704 if (Ops.empty())
1705 return V2;
1706
1707 // Otherwise, we had some input that didn't have the factor, such as
1708 // "A*B + A*C + D" -> "A*(B+C) + D". Add the new multiply to the list of
1709 // things being added by this operation.
1710 Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2));
1711 }
1712
1713 return nullptr;
1714}
1715
1716/// Build up a vector of value/power pairs factoring a product.
1717///
1718/// Given a series of multiplication operands, build a vector of factors and
1719/// the powers each is raised to when forming the final product. Sort them in
1720/// the order of descending power.
1721///
1722/// (x*x) -> [(x, 2)]
1723/// ((x*x)*x) -> [(x, 3)]
1724/// ((((x*y)*x)*y)*x) -> [(x, 3), (y, 2)]
1725///
1726/// \returns Whether any factors have a power greater than one.
1728 SmallVectorImpl<Factor> &Factors) {
1729 // FIXME: Have Ops be (ValueEntry, Multiplicity) pairs, simplifying this.
1730 // Compute the sum of powers of simplifiable factors.
1731 unsigned FactorPowerSum = 0;
1732 for (unsigned Idx = 1, Size = Ops.size(); Idx < Size; ++Idx) {
1733 Value *Op = Ops[Idx-1].Op;
1734
1735 // Count the number of occurrences of this value.
1736 unsigned Count = 1;
1737 for (; Idx < Size && Ops[Idx].Op == Op; ++Idx)
1738 ++Count;
1739 // Track for simplification all factors which occur 2 or more times.
1740 if (Count > 1)
1741 FactorPowerSum += Count;
1742 }
1743
1744 // We can only simplify factors if the sum of the powers of our simplifiable
1745 // factors is 4 or higher. When that is the case, we will *always* have
1746 // a simplification. This is an important invariant to prevent cyclicly
1747 // trying to simplify already minimal formations.
1748 if (FactorPowerSum < 4)
1749 return false;
1750
1751 // Now gather the simplifiable factors, removing them from Ops.
1752 FactorPowerSum = 0;
1753 for (unsigned Idx = 1; Idx < Ops.size(); ++Idx) {
1754 Value *Op = Ops[Idx-1].Op;
1755
1756 // Count the number of occurrences of this value.
1757 unsigned Count = 1;
1758 for (; Idx < Ops.size() && Ops[Idx].Op == Op; ++Idx)
1759 ++Count;
1760 if (Count == 1)
1761 continue;
1762 // Move an even number of occurrences to Factors.
1763 Count &= ~1U;
1764 Idx -= Count;
1765 FactorPowerSum += Count;
1766 Factors.push_back(Factor(Op, Count));
1767 Ops.erase(Ops.begin()+Idx, Ops.begin()+Idx+Count);
1768 }
1769
1770 // None of the adjustments above should have reduced the sum of factor powers
1771 // below our mininum of '4'.
1772 assert(FactorPowerSum >= 4);
1773
1774 llvm::stable_sort(Factors, [](const Factor &LHS, const Factor &RHS) {
1775 return LHS.Power > RHS.Power;
1776 });
1777 return true;
1778}
1779
1780/// Build a tree of multiplies, computing the product of Ops.
1783 if (Ops.size() == 1)
1784 return Ops.back();
1785
1786 Value *LHS = Ops.pop_back_val();
1787 do {
1788 if (LHS->getType()->isIntOrIntVectorTy())
1789 LHS = Builder.CreateMul(LHS, Ops.pop_back_val());
1790 else
1791 LHS = Builder.CreateFMul(LHS, Ops.pop_back_val());
1792 } while (!Ops.empty());
1793
1794 return LHS;
1795}
1796
1797/// Build a minimal multiplication DAG for (a^x)*(b^y)*(c^z)*...
1798///
1799/// Given a vector of values raised to various powers, where no two values are
1800/// equal and the powers are sorted in decreasing order, compute the minimal
1801/// DAG of multiplies to compute the final product, and return that product
1802/// value.
1803Value *
1804ReassociatePass::buildMinimalMultiplyDAG(IRBuilderBase &Builder,
1805 SmallVectorImpl<Factor> &Factors) {
1806 assert(Factors[0].Power);
1807 SmallVector<Value *, 4> OuterProduct;
1808 for (unsigned LastIdx = 0, Idx = 1, Size = Factors.size();
1809 Idx < Size && Factors[Idx].Power > 0; ++Idx) {
1810 if (Factors[Idx].Power != Factors[LastIdx].Power) {
1811 LastIdx = Idx;
1812 continue;
1813 }
1814
1815 // We want to multiply across all the factors with the same power so that
1816 // we can raise them to that power as a single entity. Build a mini tree
1817 // for that.
1818 SmallVector<Value *, 4> InnerProduct;
1819 InnerProduct.push_back(Factors[LastIdx].Base);
1820 do {
1821 InnerProduct.push_back(Factors[Idx].Base);
1822 ++Idx;
1823 } while (Idx < Size && Factors[Idx].Power == Factors[LastIdx].Power);
1824
1825 // Reset the base value of the first factor to the new expression tree.
1826 // We'll remove all the factors with the same power in a second pass.
1827 Value *M = Factors[LastIdx].Base = buildMultiplyTree(Builder, InnerProduct);
1828 if (Instruction *MI = dyn_cast<Instruction>(M))
1829 RedoInsts.insert(MI);
1830
1831 LastIdx = Idx;
1832 }
1833 // Unique factors with equal powers -- we've folded them into the first one's
1834 // base.
1835 Factors.erase(llvm::unique(Factors,
1836 [](const Factor &LHS, const Factor &RHS) {
1837 return LHS.Power == RHS.Power;
1838 }),
1839 Factors.end());
1840
1841 // Iteratively collect the base of each factor with an add power into the
1842 // outer product, and halve each power in preparation for squaring the
1843 // expression.
1844 for (Factor &F : Factors) {
1845 if (F.Power & 1)
1846 OuterProduct.push_back(F.Base);
1847 F.Power >>= 1;
1848 }
1849 if (Factors[0].Power) {
1850 Value *SquareRoot = buildMinimalMultiplyDAG(Builder, Factors);
1851 OuterProduct.push_back(SquareRoot);
1852 OuterProduct.push_back(SquareRoot);
1853 }
1854 if (OuterProduct.size() == 1)
1855 return OuterProduct.front();
1856
1857 Value *V = buildMultiplyTree(Builder, OuterProduct);
1858 return V;
1859}
1860
1861Value *ReassociatePass::OptimizeMul(BinaryOperator *I,
1863 // We can only optimize the multiplies when there is a chain of more than
1864 // three, such that a balanced tree might require fewer total multiplies.
1865 if (Ops.size() < 4)
1866 return nullptr;
1867
1868 // Try to turn linear trees of multiplies without other uses of the
1869 // intermediate stages into minimal multiply DAGs with perfect sub-expression
1870 // re-use.
1871 SmallVector<Factor, 4> Factors;
1872 if (!collectMultiplyFactors(Ops, Factors))
1873 return nullptr; // All distinct factors, so nothing left for us to do.
1874
1875 IRBuilder<> Builder(I);
1876 // The reassociate transformation for FP operations is performed only
1877 // if unsafe algebra is permitted by FastMathFlags. Propagate those flags
1878 // to the newly generated operations.
1879 if (auto FPI = dyn_cast<FPMathOperator>(I))
1880 Builder.setFastMathFlags(FPI->getFastMathFlags());
1881
1882 Value *V = buildMinimalMultiplyDAG(Builder, Factors);
1883 if (Ops.empty())
1884 return V;
1885
1886 ValueEntry NewEntry = ValueEntry(getRank(V), V);
1887 Ops.insert(llvm::lower_bound(Ops, NewEntry), NewEntry);
1888 return nullptr;
1889}
1890
1891Value *ReassociatePass::OptimizeExpression(BinaryOperator *I,
1893 // Now that we have the linearized expression tree, try to optimize it.
1894 // Start by folding any constants that we found.
1895 const DataLayout &DL = I->getDataLayout();
1896 Constant *Cst = nullptr;
1897 unsigned Opcode = I->getOpcode();
1898 while (!Ops.empty()) {
1899 if (auto *C = dyn_cast<Constant>(Ops.back().Op)) {
1900 if (!Cst) {
1901 Ops.pop_back();
1902 Cst = C;
1903 continue;
1904 }
1905 if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, C, Cst, DL)) {
1906 Ops.pop_back();
1907 Cst = Res;
1908 continue;
1909 }
1910 }
1911 break;
1912 }
1913 // If there was nothing but constants then we are done.
1914 if (Ops.empty())
1915 return Cst;
1916
1917 // Put the combined constant back at the end of the operand list, except if
1918 // there is no point. For example, an add of 0 gets dropped here, while a
1919 // multiplication by zero turns the whole expression into zero.
1920 if (Cst && Cst != ConstantExpr::getBinOpIdentity(Opcode, I->getType())) {
1921 if (Cst == ConstantExpr::getBinOpAbsorber(Opcode, I->getType()))
1922 return Cst;
1923 Ops.push_back(ValueEntry(0, Cst));
1924 }
1925
1926 if (Ops.size() == 1) return Ops[0].Op;
1927
1928 // Handle destructive annihilation due to identities between elements in the
1929 // argument list here.
1930 unsigned NumOps = Ops.size();
1931 switch (Opcode) {
1932 default: break;
1933 case Instruction::And:
1934 case Instruction::Or:
1935 if (Value *Result = OptimizeAndOrXor(Opcode, Ops))
1936 return Result;
1937 break;
1938
1939 case Instruction::Xor:
1940 if (Value *Result = OptimizeXor(I, Ops))
1941 return Result;
1942 break;
1943
1944 case Instruction::Add:
1945 case Instruction::FAdd:
1946 if (Value *Result = OptimizeAdd(I, Ops))
1947 return Result;
1948 break;
1949
1950 case Instruction::Mul:
1951 case Instruction::FMul:
1952 if (Value *Result = OptimizeMul(I, Ops))
1953 return Result;
1954 break;
1955 }
1956
1957 if (Ops.size() != NumOps)
1958 return OptimizeExpression(I, Ops);
1959 return nullptr;
1960}
1961
1962// Remove dead instructions and if any operands are trivially dead add them to
1963// Insts so they will be removed as well.
1964void ReassociatePass::RecursivelyEraseDeadInsts(Instruction *I,
1965 OrderedSet &Insts) {
1966 assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
1967 SmallVector<Value *, 4> Ops(I->operands());
1968 ValueRankMap.erase(I);
1969 Insts.remove(I);
1970 RedoInsts.remove(I);
1972 I->eraseFromParent();
1973 for (auto *Op : Ops)
1974 if (Instruction *OpInst = dyn_cast<Instruction>(Op))
1975 if (OpInst->use_empty())
1976 Insts.insert(OpInst);
1977}
1978
1979/// Zap the given instruction, adding interesting operands to the work list.
1980void ReassociatePass::EraseInst(Instruction *I) {
1981 assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
1982 LLVM_DEBUG(dbgs() << "Erasing dead inst: "; I->dump());
1983
1984 SmallVector<Value *, 8> Ops(I->operands());
1985 // Erase the dead instruction.
1986 ValueRankMap.erase(I);
1987 RedoInsts.remove(I);
1989 I->eraseFromParent();
1990 // Optimize its operands.
1991 SmallPtrSet<Instruction *, 8> Visited; // Detect self-referential nodes.
1992 for (Value *V : Ops)
1993 if (Instruction *Op = dyn_cast<Instruction>(V)) {
1994 // If this is a node in an expression tree, climb to the expression root
1995 // and add that since that's where optimization actually happens.
1996 unsigned Opcode = Op->getOpcode();
1997 while (Op->hasOneUse() && Op->user_back()->getOpcode() == Opcode &&
1998 Visited.insert(Op).second)
1999 Op = Op->user_back();
2000
2001 // The instruction we're going to push may be coming from a
2002 // dead block, and Reassociate skips the processing of unreachable
2003 // blocks because it's a waste of time and also because it can
2004 // lead to infinite loop due to LLVM's non-standard definition
2005 // of dominance.
2006 if (ValueRankMap.contains(Op))
2007 RedoInsts.insert(Op);
2008 }
2009
2010 MadeChange = true;
2011}
2012
2013/// Recursively analyze an expression to build a list of instructions that have
2014/// negative floating-point constant operands. The caller can then transform
2015/// the list to create positive constants for better reassociation and CSE.
2017 SmallVectorImpl<Instruction *> &Candidates) {
2018 // Handle only one-use instructions. Combining negations does not justify
2019 // replicating instructions.
2020 Instruction *I;
2021 if (!match(V, m_OneUse(m_Instruction(I))))
2022 return;
2023
2024 // Handle expressions of multiplications and divisions.
2025 // TODO: This could look through floating-point casts.
2026 const APFloat *C;
2027 switch (I->getOpcode()) {
2028 case Instruction::FMul:
2029 // Not expecting non-canonical code here. Bail out and wait.
2030 if (match(I->getOperand(0), m_Constant()))
2031 break;
2032
2033 if (match(I->getOperand(1), m_APFloat(C)) && C->isNegative()) {
2034 Candidates.push_back(I);
2035 LLVM_DEBUG(dbgs() << "FMul with negative constant: " << *I << '\n');
2036 }
2037 getNegatibleInsts(I->getOperand(0), Candidates);
2038 getNegatibleInsts(I->getOperand(1), Candidates);
2039 break;
2040 case Instruction::FDiv:
2041 // Not expecting non-canonical code here. Bail out and wait.
2042 if (match(I->getOperand(0), m_Constant()) &&
2043 match(I->getOperand(1), m_Constant()))
2044 break;
2045
2046 if ((match(I->getOperand(0), m_APFloat(C)) && C->isNegative()) ||
2047 (match(I->getOperand(1), m_APFloat(C)) && C->isNegative())) {
2048 Candidates.push_back(I);
2049 LLVM_DEBUG(dbgs() << "FDiv with negative constant: " << *I << '\n');
2050 }
2051 getNegatibleInsts(I->getOperand(0), Candidates);
2052 getNegatibleInsts(I->getOperand(1), Candidates);
2053 break;
2054 default:
2055 break;
2056 }
2057}
2058
2059/// Given an fadd/fsub with an operand that is a one-use instruction
2060/// (the fadd/fsub), try to change negative floating-point constants into
2061/// positive constants to increase potential for reassociation and CSE.
2062Instruction *ReassociatePass::canonicalizeNegFPConstantsForOp(Instruction *I,
2063 Instruction *Op,
2064 Value *OtherOp) {
2065 assert((I->getOpcode() == Instruction::FAdd ||
2066 I->getOpcode() == Instruction::FSub) && "Expected fadd/fsub");
2067
2068 // Collect instructions with negative FP constants from the subtree that ends
2069 // in Op.
2071 getNegatibleInsts(Op, Candidates);
2072 if (Candidates.empty())
2073 return nullptr;
2074
2075 // Don't canonicalize x + (-Constant * y) -> x - (Constant * y), if the
2076 // resulting subtract will be broken up later. This can get us into an
2077 // infinite loop during reassociation.
2078 bool IsFSub = I->getOpcode() == Instruction::FSub;
2079 bool NeedsSubtract = !IsFSub && Candidates.size() % 2 == 1;
2080 if (NeedsSubtract && ShouldBreakUpSubtract(I))
2081 return nullptr;
2082
2083 for (Instruction *Negatible : Candidates) {
2084 const APFloat *C;
2085 if (match(Negatible->getOperand(0), m_APFloat(C))) {
2086 assert(!match(Negatible->getOperand(1), m_Constant()) &&
2087 "Expecting only 1 constant operand");
2088 assert(C->isNegative() && "Expected negative FP constant");
2089 Negatible->setOperand(0, ConstantFP::get(Negatible->getType(), abs(*C)));
2090 MadeChange = true;
2091 }
2092 if (match(Negatible->getOperand(1), m_APFloat(C))) {
2093 assert(!match(Negatible->getOperand(0), m_Constant()) &&
2094 "Expecting only 1 constant operand");
2095 assert(C->isNegative() && "Expected negative FP constant");
2096 Negatible->setOperand(1, ConstantFP::get(Negatible->getType(), abs(*C)));
2097 MadeChange = true;
2098 }
2099 }
2100 assert(MadeChange == true && "Negative constant candidate was not changed");
2101
2102 // Negations cancelled out.
2103 if (Candidates.size() % 2 == 0)
2104 return I;
2105
2106 // Negate the final operand in the expression by flipping the opcode of this
2107 // fadd/fsub.
2108 assert(Candidates.size() % 2 == 1 && "Expected odd number");
2109 IRBuilder<> Builder(I);
2110 Value *NewInst = IsFSub ? Builder.CreateFAddFMF(OtherOp, Op, I)
2111 : Builder.CreateFSubFMF(OtherOp, Op, I);
2112 I->replaceAllUsesWith(NewInst);
2113 RedoInsts.insert(I);
2114 return dyn_cast<Instruction>(NewInst);
2115}
2116
2117/// Canonicalize expressions that contain a negative floating-point constant
2118/// of the following form:
2119/// OtherOp + (subtree) -> OtherOp {+/-} (canonical subtree)
2120/// (subtree) + OtherOp -> OtherOp {+/-} (canonical subtree)
2121/// OtherOp - (subtree) -> OtherOp {+/-} (canonical subtree)
2122///
2123/// The fadd/fsub opcode may be switched to allow folding a negation into the
2124/// input instruction.
2125Instruction *ReassociatePass::canonicalizeNegFPConstants(Instruction *I) {
2126 LLVM_DEBUG(dbgs() << "Combine negations for: " << *I << '\n');
2127 Value *X;
2128 Instruction *Op;
2130 if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
2131 I = R;
2133 if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
2134 I = R;
2136 if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
2137 I = R;
2138 return I;
2139}
2140
2141/// Inspect and optimize the given instruction. Note that erasing
2142/// instructions is not allowed.
2143void ReassociatePass::OptimizeInst(Instruction *I) {
2144 // Only consider operations that we understand.
2145 if (!isa<UnaryOperator>(I) && !isa<BinaryOperator>(I))
2146 return;
2147
2148 if (I->getOpcode() == Instruction::Shl && isa<ConstantInt>(I->getOperand(1)))
2149 // If an operand of this shift is a reassociable multiply, or if the shift
2150 // is used by a reassociable multiply or add, turn into a multiply.
2151 if (isReassociableOp(I->getOperand(0), Instruction::Mul) ||
2152 (I->hasOneUse() &&
2153 (isReassociableOp(I->user_back(), Instruction::Mul) ||
2154 isReassociableOp(I->user_back(), Instruction::Add)))) {
2156 RedoInsts.insert(I);
2157 MadeChange = true;
2158 I = NI;
2159 }
2160
2161 // Commute binary operators, to canonicalize the order of their operands.
2162 // This can potentially expose more CSE opportunities, and makes writing other
2163 // transformations simpler.
2164 if (I->isCommutative())
2165 canonicalizeOperands(I);
2166
2167 // Canonicalize negative constants out of expressions.
2168 if (Instruction *Res = canonicalizeNegFPConstants(I))
2169 I = Res;
2170
2171 // Don't optimize floating-point instructions unless they have the
2172 // appropriate FastMathFlags for reassociation enabled.
2173 if (isa<FPMathOperator>(I) && !hasFPAssociativeFlags(I))
2174 return;
2175
2176 // Do not reassociate boolean (i1/vXi1) expressions. We want to preserve the
2177 // original order of evaluation for short-circuited comparisons that
2178 // SimplifyCFG has folded to AND/OR expressions. If the expression
2179 // is not further optimized, it is likely to be transformed back to a
2180 // short-circuited form for code gen, and the source order may have been
2181 // optimized for the most likely conditions. For vector boolean expressions,
2182 // we should be optimizing for ILP and not serializing the logical operations.
2183 if (I->getType()->isIntOrIntVectorTy(1))
2184 return;
2185
2186 // If this is a bitwise or instruction of operands
2187 // with no common bits set, convert it to X+Y.
2188 if (I->getOpcode() == Instruction::Or &&
2190 (cast<PossiblyDisjointInst>(I)->isDisjoint() ||
2191 haveNoCommonBitsSet(I->getOperand(0), I->getOperand(1),
2192 SimplifyQuery(I->getDataLayout(),
2193 /*DT=*/nullptr, /*AC=*/nullptr, I)))) {
2195 RedoInsts.insert(I);
2196 MadeChange = true;
2197 I = NI;
2198 }
2199
2200 // If this is a subtract instruction which is not already in negate form,
2201 // see if we can convert it to X+-Y.
2202 if (I->getOpcode() == Instruction::Sub) {
2203 if (ShouldBreakUpSubtract(I)) {
2204 Instruction *NI = BreakUpSubtract(I, RedoInsts);
2205 RedoInsts.insert(I);
2206 MadeChange = true;
2207 I = NI;
2208 } else if (match(I, m_Neg(m_Value()))) {
2209 // Otherwise, this is a negation. See if the operand is a multiply tree
2210 // and if this is not an inner node of a multiply tree.
2211 if (isReassociableOp(I->getOperand(1), Instruction::Mul) &&
2212 (!I->hasOneUse() ||
2213 !isReassociableOp(I->user_back(), Instruction::Mul))) {
2215 // If the negate was simplified, revisit the users to see if we can
2216 // reassociate further.
2217 for (User *U : NI->users()) {
2218 if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
2219 RedoInsts.insert(Tmp);
2220 }
2221 RedoInsts.insert(I);
2222 MadeChange = true;
2223 I = NI;
2224 }
2225 }
2226 } else if (I->getOpcode() == Instruction::FNeg ||
2227 I->getOpcode() == Instruction::FSub) {
2228 if (ShouldBreakUpSubtract(I)) {
2229 Instruction *NI = BreakUpSubtract(I, RedoInsts);
2230 RedoInsts.insert(I);
2231 MadeChange = true;
2232 I = NI;
2233 } else if (match(I, m_FNeg(m_Value()))) {
2234 // Otherwise, this is a negation. See if the operand is a multiply tree
2235 // and if this is not an inner node of a multiply tree.
2236 Value *Op = isa<BinaryOperator>(I) ? I->getOperand(1) :
2237 I->getOperand(0);
2238 if (isReassociableOp(Op, Instruction::FMul) &&
2239 (!I->hasOneUse() ||
2240 !isReassociableOp(I->user_back(), Instruction::FMul))) {
2241 // If the negate was simplified, revisit the users to see if we can
2242 // reassociate further.
2244 for (User *U : NI->users()) {
2245 if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
2246 RedoInsts.insert(Tmp);
2247 }
2248 RedoInsts.insert(I);
2249 MadeChange = true;
2250 I = NI;
2251 }
2252 }
2253 }
2254
2255 // If this instruction is an associative binary operator, process it.
2256 if (!I->isAssociative()) return;
2257 BinaryOperator *BO = cast<BinaryOperator>(I);
2258
2259 // If this is an interior node of a reassociable tree, ignore it until we
2260 // get to the root of the tree, to avoid N^2 analysis.
2261 unsigned Opcode = BO->getOpcode();
2262 if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode) {
2263 // During the initial run we will get to the root of the tree.
2264 // But if we get here while we are redoing instructions, there is no
2265 // guarantee that the root will be visited. So Redo later
2266 if (BO->user_back() != BO &&
2267 BO->getParent() == BO->user_back()->getParent())
2268 RedoInsts.insert(BO->user_back());
2269 return;
2270 }
2271
2272 // If this is an add tree that is used by a sub instruction, ignore it
2273 // until we process the subtract.
2274 if (BO->hasOneUse() && BO->getOpcode() == Instruction::Add &&
2275 cast<Instruction>(BO->user_back())->getOpcode() == Instruction::Sub)
2276 return;
2277 if (BO->hasOneUse() && BO->getOpcode() == Instruction::FAdd &&
2278 cast<Instruction>(BO->user_back())->getOpcode() == Instruction::FSub)
2279 return;
2280
2281 ReassociateExpression(BO);
2282}
2283
2284void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
2285 // First, walk the expression tree, linearizing the tree, collecting the
2286 // operand information.
2289 MadeChange |= LinearizeExprTree(I, Tree, RedoInsts, Flags);
2291 Ops.reserve(Tree.size());
2292 for (const RepeatedValue &E : Tree)
2293 Ops.append(E.second, ValueEntry(getRank(E.first), E.first));
2294
2295 LLVM_DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n');
2296
2297 // Now that we have linearized the tree to a list and have gathered all of
2298 // the operands and their ranks, sort the operands by their rank. Use a
2299 // stable_sort so that values with equal ranks will have their relative
2300 // positions maintained (and so the compiler is deterministic). Note that
2301 // this sorts so that the highest ranking values end up at the beginning of
2302 // the vector.
2303 llvm::stable_sort(Ops);
2304
2305 // Now that we have the expression tree in a convenient
2306 // sorted form, optimize it globally if possible.
2307 if (Value *V = OptimizeExpression(I, Ops)) {
2308 if (V == I)
2309 // Self-referential expression in unreachable code.
2310 return;
2311 // This expression tree simplified to something that isn't a tree,
2312 // eliminate it.
2313 LLVM_DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n');
2314 I->replaceAllUsesWith(V);
2315 if (Instruction *VI = dyn_cast<Instruction>(V))
2316 if (I->getDebugLoc())
2317 VI->setDebugLoc(I->getDebugLoc());
2318 RedoInsts.insert(I);
2319 ++NumAnnihil;
2320 return;
2321 }
2322
2323 // We want to sink immediates as deeply as possible except in the case where
2324 // this is a multiply tree used only by an add, and the immediate is a -1.
2325 // In this case we reassociate to put the negation on the outside so that we
2326 // can fold the negation into the add: (-X)*Y + Z -> Z-X*Y
2327 if (I->hasOneUse()) {
2328 if (I->getOpcode() == Instruction::Mul &&
2329 cast<Instruction>(I->user_back())->getOpcode() == Instruction::Add &&
2330 isa<ConstantInt>(Ops.back().Op) &&
2331 cast<ConstantInt>(Ops.back().Op)->isMinusOne()) {
2332 ValueEntry Tmp = Ops.pop_back_val();
2333 Ops.insert(Ops.begin(), Tmp);
2334 } else if (I->getOpcode() == Instruction::FMul &&
2335 cast<Instruction>(I->user_back())->getOpcode() ==
2336 Instruction::FAdd &&
2337 isa<ConstantFP>(Ops.back().Op) &&
2338 cast<ConstantFP>(Ops.back().Op)->isExactlyValue(-1.0)) {
2339 ValueEntry Tmp = Ops.pop_back_val();
2340 Ops.insert(Ops.begin(), Tmp);
2341 }
2342 }
2343
2344 LLVM_DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n');
2345
2346 if (Ops.size() == 1) {
2347 if (Ops[0].Op == I)
2348 // Self-referential expression in unreachable code.
2349 return;
2350
2351 // This expression tree simplified to something that isn't a tree,
2352 // eliminate it.
2353 I->replaceAllUsesWith(Ops[0].Op);
2354 if (Instruction *OI = dyn_cast<Instruction>(Ops[0].Op))
2355 OI->setDebugLoc(I->getDebugLoc());
2356 RedoInsts.insert(I);
2357 return;
2358 }
2359
2360 if (Ops.size() > 2 && Ops.size() <= GlobalReassociateLimit) {
2361 // Find the pair with the highest count in the pairmap and move it to the
2362 // back of the list so that it can later be CSE'd.
2363 // example:
2364 // a*b*c*d*e
2365 // if c*e is the most "popular" pair, we can express this as
2366 // (((c*e)*d)*b)*a
2367 unsigned Max = 1;
2368 unsigned BestRank = 0;
2369 std::pair<unsigned, unsigned> BestPair;
2370 unsigned Idx = I->getOpcode() - Instruction::BinaryOpsBegin;
2371 unsigned LimitIdx = 0;
2372 // With the CSE-driven heuristic, we are about to slap two values at the
2373 // beginning of the expression whereas they could live very late in the CFG.
2374 // When using the CSE-local heuristic we avoid creating dependences from
2375 // completely unrelated part of the CFG by limiting the expression
2376 // reordering on the values that live in the first seen basic block.
2377 // The main idea is that we want to avoid forming expressions that would
2378 // become loop dependent.
2379 if (UseCSELocalOpt) {
2380 const BasicBlock *FirstSeenBB = nullptr;
2381 int StartIdx = Ops.size() - 1;
2382 // Skip the first value of the expression since we need at least two
2383 // values to materialize an expression. I.e., even if this value is
2384 // anchored in a different basic block, the actual first sub expression
2385 // will be anchored on the second value.
2386 for (int i = StartIdx - 1; i != -1; --i) {
2387 const Value *Val = Ops[i].Op;
2388 const auto *CurrLeafInstr = dyn_cast<Instruction>(Val);
2389 const BasicBlock *SeenBB = nullptr;
2390 if (!CurrLeafInstr) {
2391 // The value is free of any CFG dependencies.
2392 // Do as if it lives in the entry block.
2393 //
2394 // We do this to make sure all the values falling on this path are
2395 // seen through the same anchor point. The rationale is these values
2396 // can be combined together to from a sub expression free of any CFG
2397 // dependencies so we want them to stay together.
2398 // We could be cleverer and postpone the anchor down to the first
2399 // anchored value, but that's likely complicated to get right.
2400 // E.g., we wouldn't want to do that if that means being stuck in a
2401 // loop.
2402 //
2403 // For instance, we wouldn't want to change:
2404 // res = arg1 op arg2 op arg3 op ... op loop_val1 op loop_val2 ...
2405 // into
2406 // res = loop_val1 op arg1 op arg2 op arg3 op ... op loop_val2 ...
2407 // Because all the sub expressions with arg2..N would be stuck between
2408 // two loop dependent values.
2409 SeenBB = &I->getParent()->getParent()->getEntryBlock();
2410 } else {
2411 SeenBB = CurrLeafInstr->getParent();
2412 }
2413
2414 if (!FirstSeenBB) {
2415 FirstSeenBB = SeenBB;
2416 continue;
2417 }
2418 if (FirstSeenBB != SeenBB) {
2419 // ith value is in a different basic block.
2420 // Rewind the index once to point to the last value on the same basic
2421 // block.
2422 LimitIdx = i + 1;
2423 LLVM_DEBUG(dbgs() << "CSE reordering: Consider values between ["
2424 << LimitIdx << ", " << StartIdx << "]\n");
2425 break;
2426 }
2427 }
2428 }
2429 for (unsigned i = Ops.size() - 1; i > LimitIdx; --i) {
2430 // We must use int type to go below zero when LimitIdx is 0.
2431 for (int j = i - 1; j >= (int)LimitIdx; --j) {
2432 unsigned Score = 0;
2433 Value *Op0 = Ops[i].Op;
2434 Value *Op1 = Ops[j].Op;
2435 if (std::less<Value *>()(Op1, Op0))
2436 std::swap(Op0, Op1);
2437 auto it = PairMap[Idx].find({Op0, Op1});
2438 if (it != PairMap[Idx].end()) {
2439 // Functions like BreakUpSubtract() can erase the Values we're using
2440 // as keys and create new Values after we built the PairMap. There's a
2441 // small chance that the new nodes can have the same address as
2442 // something already in the table. We shouldn't accumulate the stored
2443 // score in that case as it refers to the wrong Value.
2444 if (it->second.isValid())
2445 Score += it->second.Score;
2446 }
2447
2448 unsigned MaxRank = std::max(Ops[i].Rank, Ops[j].Rank);
2449
2450 // By construction, the operands are sorted in reverse order of their
2451 // topological order.
2452 // So we tend to form (sub) expressions with values that are close to
2453 // each other.
2454 //
2455 // Now to expose more CSE opportunities we want to expose the pair of
2456 // operands that occur the most (as statically computed in
2457 // BuildPairMap.) as the first sub-expression.
2458 //
2459 // If two pairs occur as many times, we pick the one with the
2460 // lowest rank, meaning the one with both operands appearing first in
2461 // the topological order.
2462 if (Score > Max || (Score == Max && MaxRank < BestRank)) {
2463 BestPair = {j, i};
2464 Max = Score;
2465 BestRank = MaxRank;
2466 }
2467 }
2468 }
2469 if (Max > 1) {
2470 auto Op0 = Ops[BestPair.first];
2471 auto Op1 = Ops[BestPair.second];
2472 Ops.erase(&Ops[BestPair.second]);
2473 Ops.erase(&Ops[BestPair.first]);
2474 Ops.push_back(Op0);
2475 Ops.push_back(Op1);
2476 }
2477 }
2478 LLVM_DEBUG(dbgs() << "RAOut after CSE reorder:\t"; PrintOps(I, Ops);
2479 dbgs() << '\n');
2480 // Now that we ordered and optimized the expressions, splat them back into
2481 // the expression tree, removing any unneeded nodes.
2482 RewriteExprTree(I, Ops, Flags);
2483}
2484
2485void
2486ReassociatePass::BuildPairMap(ReversePostOrderTraversal<Function *> &RPOT) {
2487 // Make a "pairmap" of how often each operand pair occurs.
2488 for (BasicBlock *BI : RPOT) {
2489 for (Instruction &I : *BI) {
2490 if (!I.isAssociative() || !I.isBinaryOp())
2491 continue;
2492
2493 // Ignore nodes that aren't at the root of trees.
2494 if (I.hasOneUse() && I.user_back()->getOpcode() == I.getOpcode())
2495 continue;
2496
2497 // Collect all operands in a single reassociable expression.
2498 // Since Reassociate has already been run once, we can assume things
2499 // are already canonical according to Reassociation's regime.
2500 SmallVector<Value *, 8> Worklist = { I.getOperand(0), I.getOperand(1) };
2502 while (!Worklist.empty() && Ops.size() <= GlobalReassociateLimit) {
2503 Value *Op = Worklist.pop_back_val();
2504 Instruction *OpI = dyn_cast<Instruction>(Op);
2505 if (!OpI || OpI->getOpcode() != I.getOpcode() || !OpI->hasOneUse()) {
2506 Ops.push_back(Op);
2507 continue;
2508 }
2509 // Be paranoid about self-referencing expressions in unreachable code.
2510 if (OpI->getOperand(0) != OpI)
2511 Worklist.push_back(OpI->getOperand(0));
2512 if (OpI->getOperand(1) != OpI)
2513 Worklist.push_back(OpI->getOperand(1));
2514 }
2515 // Skip extremely long expressions.
2516 if (Ops.size() > GlobalReassociateLimit)
2517 continue;
2518
2519 // Add all pairwise combinations of operands to the pair map.
2520 unsigned BinaryIdx = I.getOpcode() - Instruction::BinaryOpsBegin;
2522 for (unsigned i = 0; i < Ops.size() - 1; ++i) {
2523 for (unsigned j = i + 1; j < Ops.size(); ++j) {
2524 // Canonicalize operand orderings.
2525 Value *Op0 = Ops[i];
2526 Value *Op1 = Ops[j];
2527 if (std::less<Value *>()(Op1, Op0))
2528 std::swap(Op0, Op1);
2529 if (!Visited.insert({Op0, Op1}).second)
2530 continue;
2531 auto res = PairMap[BinaryIdx].insert({{Op0, Op1}, {Op0, Op1, 1}});
2532 if (!res.second) {
2533 // If either key value has been erased then we've got the same
2534 // address by coincidence. That can't happen here because nothing is
2535 // erasing values but it can happen by the time we're querying the
2536 // map.
2537 assert(res.first->second.isValid() && "WeakVH invalidated");
2538 ++res.first->second.Score;
2539 }
2540 }
2541 }
2542 }
2543 }
2544}
2545
2547 // Get the functions basic blocks in Reverse Post Order. This order is used by
2548 // BuildRankMap to pre calculate ranks correctly. It also excludes dead basic
2549 // blocks (it has been seen that the analysis in this pass could hang when
2550 // analysing dead basic blocks).
2552
2553 // Calculate the rank map for F.
2554 BuildRankMap(F, RPOT);
2555
2556 // Build the pair map before running reassociate.
2557 // Technically this would be more accurate if we did it after one round
2558 // of reassociation, but in practice it doesn't seem to help much on
2559 // real-world code, so don't waste the compile time running reassociate
2560 // twice.
2561 // If a user wants, they could expicitly run reassociate twice in their
2562 // pass pipeline for further potential gains.
2563 // It might also be possible to update the pair map during runtime, but the
2564 // overhead of that may be large if there's many reassociable chains.
2565 BuildPairMap(RPOT);
2566
2567 MadeChange = false;
2568
2569 // Traverse the same blocks that were analysed by BuildRankMap.
2570 for (BasicBlock *BI : RPOT) {
2571 assert(RankMap.count(&*BI) && "BB should be ranked.");
2572 // Optimize every instruction in the basic block.
2573 for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;)
2575 EraseInst(&*II++);
2576 } else {
2577 OptimizeInst(&*II);
2578 assert(II->getParent() == &*BI && "Moved to a different block!");
2579 ++II;
2580 }
2581
2582 // Make a copy of all the instructions to be redone so we can remove dead
2583 // instructions.
2584 OrderedSet ToRedo(RedoInsts);
2585 // Iterate over all instructions to be reevaluated and remove trivially dead
2586 // instructions. If any operand of the trivially dead instruction becomes
2587 // dead mark it for deletion as well. Continue this process until all
2588 // trivially dead instructions have been removed.
2589 while (!ToRedo.empty()) {
2590 Instruction *I = ToRedo.pop_back_val();
2592 RecursivelyEraseDeadInsts(I, ToRedo);
2593 MadeChange = true;
2594 }
2595 }
2596
2597 // Now that we have removed dead instructions, we can reoptimize the
2598 // remaining instructions.
2599 while (!RedoInsts.empty()) {
2600 Instruction *I = RedoInsts.front();
2601 RedoInsts.erase(RedoInsts.begin());
2603 EraseInst(I);
2604 else
2605 OptimizeInst(I);
2606 }
2607 }
2608
2609 // We are done with the rank map and pair map.
2610 RankMap.clear();
2611 ValueRankMap.clear();
2612 for (auto &Entry : PairMap)
2613 Entry.clear();
2614
2615 if (MadeChange) {
2618 return PA;
2619 }
2620
2621 return PreservedAnalyses::all();
2622}
2623
2624namespace {
2625
2626 class ReassociateLegacyPass : public FunctionPass {
2627 ReassociatePass Impl;
2628
2629 public:
2630 static char ID; // Pass identification, replacement for typeid
2631
2632 ReassociateLegacyPass() : FunctionPass(ID) {
2634 }
2635
2636 bool runOnFunction(Function &F) override {
2637 if (skipFunction(F))
2638 return false;
2639
2640 FunctionAnalysisManager DummyFAM;
2641 auto PA = Impl.run(F, DummyFAM);
2642 return !PA.areAllPreserved();
2643 }
2644
2645 void getAnalysisUsage(AnalysisUsage &AU) const override {
2646 AU.setPreservesCFG();
2650 }
2651 };
2652
2653} // end anonymous namespace
2654
2655char ReassociateLegacyPass::ID = 0;
2656
2657INITIALIZE_PASS(ReassociateLegacyPass, "reassociate",
2658 "Reassociate expressions", false, false)
2659
2660// Public interface to the Reassociate pass
2662 return new ReassociateLegacyPass();
2663}
@ Poison
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This is the interface for LLVM's primary stateless and local alias analysis.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
std::string Name
uint64_t Size
static bool runOnFunction(Function &F, bool PostInlining)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This is the interface for a simple mod/ref and alias analysis over globals.
IRTranslator LLVM IR MI
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L, ScalarEvolution *SE, LoopInfo *LI)
isInteresting - Test whether the given expression is "interesting" when used by the given expression,...
Definition: IVUsers.cpp:56
static bool isReassociableOp(Instruction *I, unsigned IntOpcode, unsigned FPOpcode)
Definition: LICM.cpp:2749
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
nary reassociate
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:56
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static bool LinearizeExprTree(Instruction *I, SmallVectorImpl< RepeatedValue > &Ops, ReassociatePass::OrderedSet &ToRedo, OverflowTracking &Flags)
Given an associative binary expression, return the leaf nodes in Ops along with their weights (how ma...
static void PrintOps(Instruction *I, const SmallVectorImpl< ValueEntry > &Ops)
Print out the expression identified in the Ops list.
Definition: Reassociate.cpp:82
static bool ShouldBreakUpSubtract(Instruction *Sub)
Return true if we should break up this subtract of X-Y into (X + -Y).
static Value * buildMultiplyTree(IRBuilderBase &Builder, SmallVectorImpl< Value * > &Ops)
Build a tree of multiplies, computing the product of Ops.
static void getNegatibleInsts(Value *V, SmallVectorImpl< Instruction * > &Candidates)
Recursively analyze an expression to build a list of instructions that have negative floating-point c...
static BinaryOperator * CreateMul(Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore, Value *FlagsOp)
static BinaryOperator * BreakUpSubtract(Instruction *Sub, ReassociatePass::OrderedSet &ToRedo)
If we have (X-Y), and if either X is an add, or if this is only used by an add, transform this into (...
static void FindSingleUseMultiplyFactors(Value *V, SmallVectorImpl< Value * > &Factors)
If V is a single-use multiply, recursively add its operands as factors, otherwise add V to the list o...
std::pair< Value *, uint64_t > RepeatedValue
static Value * OptimizeAndOrXor(unsigned Opcode, SmallVectorImpl< ValueEntry > &Ops)
Optimize a series of operands to an 'and', 'or', or 'xor' instruction.
static BinaryOperator * convertOrWithNoCommonBitsToAdd(Instruction *Or)
If we have (X|Y), and iff X and Y have no common bits set, transform this into (X+Y) to allow arithme...
static BinaryOperator * CreateAdd(Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore, Value *FlagsOp)
static bool collectMultiplyFactors(SmallVectorImpl< ValueEntry > &Ops, SmallVectorImpl< Factor > &Factors)
Build up a vector of value/power pairs factoring a product.
static BinaryOperator * ConvertShiftToMul(Instruction *Shl)
If this is a shift of a reassociable multiply or is used by one, change this into a multiply by a con...
static cl::opt< bool > UseCSELocalOpt(DEBUG_TYPE "-use-cse-local", cl::desc("Only reorder expressions within a basic block " "when exposing CSE opportunities"), cl::init(true), cl::Hidden)
static unsigned FindInOperandList(const SmallVectorImpl< ValueEntry > &Ops, unsigned i, Value *X)
Scan backwards and forwards among values with the same rank as element i to see if X exists.
static BinaryOperator * LowerNegateToMultiply(Instruction *Neg)
Replace 0-X with X*-1.
static Instruction * CreateNeg(Value *S1, const Twine &Name, BasicBlock::iterator InsertBefore, Value *FlagsOp)
static bool hasFPAssociativeFlags(Instruction *I)
Return true if I is an instruction with the FastMathFlags that are needed for general reassociation s...
static Value * createAndInstr(BasicBlock::iterator InsertBefore, Value *Opnd, const APInt &ConstOpnd)
Helper function of CombineXorOpnd().
static Value * NegateValue(Value *V, Instruction *BI, ReassociatePass::OrderedSet &ToRedo)
Insert instructions before the instruction pointed to by BI, that computes the negative version of th...
static bool shouldConvertOrWithNoCommonBitsToAdd(Instruction *Or)
Return true if it may be profitable to convert this (X|Y) into (X+Y).
#define DEBUG_TYPE
Definition: Reassociate.cpp:68
static bool isLoadCombineCandidate(Instruction *Or)
static Value * EmitAddTreeOfValues(Instruction *I, SmallVectorImpl< WeakTrackingVH > &Ops)
Emit a tree of add instructions, summing Ops together and returning the result.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
#define LLVM_DEBUG(...)
Definition: Debug.h:119
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:247
Value * RHS
Value * LHS
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition: APInt.h:78
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:371
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:471
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:200
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255
Represent the analysis usage information of a pass.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:270
Legacy wrapper pass to provide the BasicAAResult object.
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:354
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:213
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:170
static LLVM_ABI BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
BinaryOps getOpcode() const
Definition: InstrTypes.h:374
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:73
static LLVM_ABI Constant * getBinOpAbsorber(unsigned Opcode, Type *Ty, bool AllowLHSConstant=false)
Return the absorbing element for the given binary operation, i.e.
Definition: Constants.cpp:2765
static LLVM_ABI Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2694
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
Definition: Constants.cpp:2635
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:277
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
This is an important base class in LLVM.
Definition: Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:420
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:373
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
A debug info location.
Definition: DebugLoc.h:124
This provides a helper for copying FMF from an instruction or setting specified flags.
Definition: IRBuilder.h:93
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:22
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:314
const BasicBlock & getEntryBlock() const
Definition: Function.h:807
Legacy wrapper pass to provide the GlobalsAAResult object.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:114
Value * CreateFSubFMF(Value *L, Value *R, FMFSource FMFSource, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1637
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:345
Value * CreateFAddFMF(Value *L, Value *R, FMFSource FMFSource, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1618
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1651
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1437
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2780
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI void dropLocation()
Drop the instruction's debug location.
Definition: DebugInfo.cpp:932
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:513
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:171
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:82
const char * getOpcodeName() const
Definition: Instruction.h:314
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:312
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:510
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:86
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1885
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:112
bool areAllPreserved() const
Test whether all analyses are preserved (and none are abandoned).
Definition: Analysis.h:292
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:151
Reassociate commutative expressions.
Definition: Reassociate.h:74
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:99
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:168
value_type pop_back_val()
Definition: SetVector.h:296
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:470
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:182
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:938
void reserve(size_type N)
Definition: SmallVector.h:664
iterator erase(const_iterator CI)
Definition: SmallVector.h:738
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:684
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:806
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:246
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition: InstrTypes.h:147
void setOperand(unsigned i, Value *Val)
Definition: User.h:237
Value * getOperand(unsigned i) const
Definition: User.h:232
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
user_iterator user_begin()
Definition: Value.h:402
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:546
iterator_range< user_iterator > users()
Definition: Value.h:426
void clearSubclassOptionalData()
Clear the optional flags contained in this value.
Definition: Value.h:555
LLVM_ABI void deleteValue()
Delete a pointer to a generic Value.
Definition: Value.cpp:111
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:396
const ParentTy * getParent() const
Definition: ilist_node.h:34
self_iterator getIterator()
Definition: ilist_node.h:134
Utility class representing a non-constant Xor-operand.
Value * getSymbolicPart() const
unsigned getSymbolicRank() const
void setSymbolicRank(unsigned R)
const APInt & getConstPart() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
BinaryOp_match< LHS, RHS, Instruction::FSub > m_FSub(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:165
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:862
BinaryOp_match< LHS, RHS, Instruction::FAdd > m_FAdd(const LHS &L, const RHS &R)
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:299
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
apfloat_match m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
Definition: PatternMatch.h:316
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
constexpr double e
Definition: MathExtras.h:47
LLVM_ABI const_iterator end(StringRef path LLVM_LIFETIME_BOUND)
Get end iterator over path.
Definition: Path.cpp:235
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
LLVM_ABI bool haveNoCommonBitsSet(const WithCache< const Value * > &LHSCache, const WithCache< const Value * > &RHSCache, const SimplifyQuery &SQ)
Return true if LHS and RHS have no common bits set.
void stable_sort(R &&Range)
Definition: STLExtras.h:2077
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1723
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1563
auto unique(Range &&R, Predicate P)
Definition: STLExtras.h:2095
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:402
LLVM_ABI Constant * ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op, const DataLayout &DL)
Attempt to constant fold a unary operation with the specified operand.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
LLVM_ABI void initializeReassociateLegacyPassPass(PassRegistry &)
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
LLVM_ABI FunctionPass * createReassociatePass()
LLVM_ABI bool replaceDbgUsesWithUndef(Instruction *I)
Replace all the uses of an SSA value in @llvm.dbg intrinsics with undef.
Definition: Local.cpp:610
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:2013
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:223
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
LLVM_ABI bool mayHaveNonDefUseDependency(const Instruction &I)
Returns true if the result or effects of the given instructions I depend values not reachable through...
LLVM_ABI Constant * ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1, Constant *V2)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
Utility class representing a base and exponent pair which form one factor of some product.
Definition: Reassociate.h:62