LLVM 22.0.0git
DAGCombiner.cpp
Go to the documentation of this file.
1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/DenseMap.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/SmallSet.h"
29#include "llvm/ADT/Statistic.h"
51#include "llvm/IR/Attributes.h"
52#include "llvm/IR/Constant.h"
53#include "llvm/IR/DataLayout.h"
55#include "llvm/IR/Function.h"
56#include "llvm/IR/Metadata.h"
61#include "llvm/Support/Debug.h"
69#include <algorithm>
70#include <cassert>
71#include <cstdint>
72#include <functional>
73#include <iterator>
74#include <optional>
75#include <string>
76#include <tuple>
77#include <utility>
78#include <variant>
79
80#include "MatchContext.h"
81
82using namespace llvm;
83using namespace llvm::SDPatternMatch;
84
85#define DEBUG_TYPE "dagcombine"
86
87STATISTIC(NodesCombined , "Number of dag nodes combined");
88STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
89STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
90STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
91STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
92STATISTIC(SlicedLoads, "Number of load sliced");
93STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
94
95DEBUG_COUNTER(DAGCombineCounter, "dagcombine",
96 "Controls whether a DAG combine is performed for a node");
97
98static cl::opt<bool>
99CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
100 cl::desc("Enable DAG combiner's use of IR alias analysis"));
101
102static cl::opt<bool>
103UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
104 cl::desc("Enable DAG combiner's use of TBAA"));
105
106#ifndef NDEBUG
108CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
109 cl::desc("Only use DAG-combiner alias analysis in this"
110 " function"));
111#endif
112
113/// Hidden option to stress test load slicing, i.e., when this option
114/// is enabled, load slicing bypasses most of its profitability guards.
115static cl::opt<bool>
116StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
117 cl::desc("Bypass the profitability model of load slicing"),
118 cl::init(false));
119
120static cl::opt<bool>
121 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
122 cl::desc("DAG combiner may split indexing from loads"));
123
124static cl::opt<bool>
125 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
126 cl::desc("DAG combiner enable merging multiple stores "
127 "into a wider store"));
128
130 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
131 cl::desc("Limit the number of operands to inline for Token Factors"));
132
134 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
135 cl::desc("Limit the number of times for the same StoreNode and RootNode "
136 "to bail out in store merging dependence check"));
137
139 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
140 cl::desc("DAG combiner enable reducing the width of load/op/store "
141 "sequence"));
143 "combiner-reduce-load-op-store-width-force-narrowing-profitable",
144 cl::Hidden, cl::init(false),
145 cl::desc("DAG combiner force override the narrowing profitable check when "
146 "reducing the width of load/op/store sequences"));
147
149 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
150 cl::desc("DAG combiner enable load/<replace bytes>/store with "
151 "a narrower store"));
152
153static cl::opt<bool> DisableCombines("combiner-disabled", cl::Hidden,
154 cl::init(false),
155 cl::desc("Disable the DAG combiner"));
156
157namespace {
158
159 class DAGCombiner {
160 SelectionDAG &DAG;
161 const TargetLowering &TLI;
162 const SelectionDAGTargetInfo *STI;
164 CodeGenOptLevel OptLevel;
165 bool LegalDAG = false;
166 bool LegalOperations = false;
167 bool LegalTypes = false;
168 bool ForCodeSize;
169 bool DisableGenericCombines;
170
171 /// Worklist of all of the nodes that need to be simplified.
172 ///
173 /// This must behave as a stack -- new nodes to process are pushed onto the
174 /// back and when processing we pop off of the back.
175 ///
176 /// The worklist will not contain duplicates but may contain null entries
177 /// due to nodes being deleted from the underlying DAG. For fast lookup and
178 /// deduplication, the index of the node in this vector is stored in the
179 /// node in SDNode::CombinerWorklistIndex.
181
182 /// This records all nodes attempted to be added to the worklist since we
183 /// considered a new worklist entry. As we keep do not add duplicate nodes
184 /// in the worklist, this is different from the tail of the worklist.
186
187 /// Map from candidate StoreNode to the pair of RootNode and count.
188 /// The count is used to track how many times we have seen the StoreNode
189 /// with the same RootNode bail out in dependence check. If we have seen
190 /// the bail out for the same pair many times over a limit, we won't
191 /// consider the StoreNode with the same RootNode as store merging
192 /// candidate again.
194
195 // BatchAA - Used for DAG load/store alias analysis.
196 BatchAAResults *BatchAA;
197
198 /// This caches all chains that have already been processed in
199 /// DAGCombiner::getStoreMergeCandidates() and found to have no mergeable
200 /// stores candidates.
201 SmallPtrSet<SDNode *, 4> ChainsWithoutMergeableStores;
202
203 /// When an instruction is simplified, add all users of the instruction to
204 /// the work lists because they might get more simplified now.
205 void AddUsersToWorklist(SDNode *N) {
206 for (SDNode *Node : N->users())
207 AddToWorklist(Node);
208 }
209
210 /// Convenient shorthand to add a node and all of its user to the worklist.
211 void AddToWorklistWithUsers(SDNode *N) {
212 AddUsersToWorklist(N);
213 AddToWorklist(N);
214 }
215
216 // Prune potentially dangling nodes. This is called after
217 // any visit to a node, but should also be called during a visit after any
218 // failed combine which may have created a DAG node.
219 void clearAddedDanglingWorklistEntries() {
220 // Check any nodes added to the worklist to see if they are prunable.
221 while (!PruningList.empty()) {
222 auto *N = PruningList.pop_back_val();
223 if (N->use_empty())
224 recursivelyDeleteUnusedNodes(N);
225 }
226 }
227
228 SDNode *getNextWorklistEntry() {
229 // Before we do any work, remove nodes that are not in use.
230 clearAddedDanglingWorklistEntries();
231 SDNode *N = nullptr;
232 // The Worklist holds the SDNodes in order, but it may contain null
233 // entries.
234 while (!N && !Worklist.empty()) {
235 N = Worklist.pop_back_val();
236 }
237
238 if (N) {
239 assert(N->getCombinerWorklistIndex() >= 0 &&
240 "Found a worklist entry without a corresponding map entry!");
241 // Set to -2 to indicate that we combined the node.
242 N->setCombinerWorklistIndex(-2);
243 }
244 return N;
245 }
246
247 /// Call the node-specific routine that folds each particular type of node.
248 SDValue visit(SDNode *N);
249
250 public:
251 DAGCombiner(SelectionDAG &D, BatchAAResults *BatchAA, CodeGenOptLevel OL)
252 : DAG(D), TLI(D.getTargetLoweringInfo()),
253 STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL),
254 BatchAA(BatchAA) {
255 ForCodeSize = DAG.shouldOptForSize();
256 DisableGenericCombines =
257 DisableCombines || (STI && STI->disableGenericCombines(OptLevel));
258
259 MaximumLegalStoreInBits = 0;
260 // We use the minimum store size here, since that's all we can guarantee
261 // for the scalable vector types.
262 for (MVT VT : MVT::all_valuetypes())
263 if (EVT(VT).isSimple() && VT != MVT::Other &&
264 TLI.isTypeLegal(EVT(VT)) &&
265 VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
266 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
267 }
268
269 void ConsiderForPruning(SDNode *N) {
270 // Mark this for potential pruning.
271 PruningList.insert(N);
272 }
273
274 /// Add to the worklist making sure its instance is at the back (next to be
275 /// processed.)
276 void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true,
277 bool SkipIfCombinedBefore = false) {
278 assert(N->getOpcode() != ISD::DELETED_NODE &&
279 "Deleted Node added to Worklist");
280
281 // Skip handle nodes as they can't usefully be combined and confuse the
282 // zero-use deletion strategy.
283 if (N->getOpcode() == ISD::HANDLENODE)
284 return;
285
286 if (SkipIfCombinedBefore && N->getCombinerWorklistIndex() == -2)
287 return;
288
289 if (IsCandidateForPruning)
290 ConsiderForPruning(N);
291
292 if (N->getCombinerWorklistIndex() < 0) {
293 N->setCombinerWorklistIndex(Worklist.size());
294 Worklist.push_back(N);
295 }
296 }
297
298 /// Remove all instances of N from the worklist.
299 void removeFromWorklist(SDNode *N) {
300 PruningList.remove(N);
301 StoreRootCountMap.erase(N);
302
303 int WorklistIndex = N->getCombinerWorklistIndex();
304 // If not in the worklist, the index might be -1 or -2 (was combined
305 // before). As the node gets deleted anyway, there's no need to update
306 // the index.
307 if (WorklistIndex < 0)
308 return; // Not in the worklist.
309
310 // Null out the entry rather than erasing it to avoid a linear operation.
311 Worklist[WorklistIndex] = nullptr;
312 N->setCombinerWorklistIndex(-1);
313 }
314
315 void deleteAndRecombine(SDNode *N);
316 bool recursivelyDeleteUnusedNodes(SDNode *N);
317
318 /// Replaces all uses of the results of one DAG node with new values.
319 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
320 bool AddTo = true);
321
322 /// Replaces all uses of the results of one DAG node with new values.
323 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
324 return CombineTo(N, &Res, 1, AddTo);
325 }
326
327 /// Replaces all uses of the results of one DAG node with new values.
328 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
329 bool AddTo = true) {
330 SDValue To[] = { Res0, Res1 };
331 return CombineTo(N, To, 2, AddTo);
332 }
333
334 SDValue CombineTo(SDNode *N, SmallVectorImpl<SDValue> *To,
335 bool AddTo = true) {
336 return CombineTo(N, To->data(), To->size(), AddTo);
337 }
338
339 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
340
341 private:
342 unsigned MaximumLegalStoreInBits;
343
344 /// Check the specified integer node value to see if it can be simplified or
345 /// if things it uses can be simplified by bit propagation.
346 /// If so, return true.
347 bool SimplifyDemandedBits(SDValue Op) {
348 unsigned BitWidth = Op.getScalarValueSizeInBits();
349 APInt DemandedBits = APInt::getAllOnes(BitWidth);
350 return SimplifyDemandedBits(Op, DemandedBits);
351 }
352
353 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
354 EVT VT = Op.getValueType();
355 APInt DemandedElts = VT.isFixedLengthVector()
357 : APInt(1, 1);
358 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, false);
359 }
360
361 /// Check the specified vector node value to see if it can be simplified or
362 /// if things it uses can be simplified as it only uses some of the
363 /// elements. If so, return true.
364 bool SimplifyDemandedVectorElts(SDValue Op) {
365 // TODO: For now just pretend it cannot be simplified.
366 if (Op.getValueType().isScalableVector())
367 return false;
368
369 unsigned NumElts = Op.getValueType().getVectorNumElements();
370 APInt DemandedElts = APInt::getAllOnes(NumElts);
371 return SimplifyDemandedVectorElts(Op, DemandedElts);
372 }
373
374 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
375 const APInt &DemandedElts,
376 bool AssumeSingleUse = false);
377 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
378 bool AssumeSingleUse = false);
379
380 bool CombineToPreIndexedLoadStore(SDNode *N);
381 bool CombineToPostIndexedLoadStore(SDNode *N);
382 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
383 bool SliceUpLoad(SDNode *N);
384
385 // Looks up the chain to find a unique (unaliased) store feeding the passed
386 // load. If no such store is found, returns a nullptr.
387 // Note: This will look past a CALLSEQ_START if the load is chained to it so
388 // so that it can find stack stores for byval params.
389 StoreSDNode *getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset);
390 // Scalars have size 0 to distinguish from singleton vectors.
391 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
392 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
393 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
394
395 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
396 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
397 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
398 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
399 SDValue PromoteIntBinOp(SDValue Op);
400 SDValue PromoteIntShiftOp(SDValue Op);
401 SDValue PromoteExtend(SDValue Op);
402 bool PromoteLoad(SDValue Op);
403
404 SDValue foldShiftToAvg(SDNode *N, const SDLoc &DL);
405 // Fold `a bitwiseop (~b +/- c)` -> `a bitwiseop ~(b -/+ c)`
406 SDValue foldBitwiseOpWithNeg(SDNode *N, const SDLoc &DL, EVT VT);
407
408 SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
409 SDValue RHS, SDValue True, SDValue False,
410 ISD::CondCode CC);
411
412 /// Call the node-specific routine that knows how to fold each
413 /// particular type of node. If that doesn't do anything, try the
414 /// target-specific DAG combines.
415 SDValue combine(SDNode *N);
416
417 // Visitation implementation - Implement dag node combining for different
418 // node types. The semantics are as follows:
419 // Return Value:
420 // SDValue.getNode() == 0 - No change was made
421 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
422 // otherwise - N should be replaced by the returned Operand.
423 //
424 SDValue visitTokenFactor(SDNode *N);
425 SDValue visitMERGE_VALUES(SDNode *N);
426 SDValue visitADD(SDNode *N);
427 SDValue visitADDLike(SDNode *N);
428 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1,
429 SDNode *LocReference);
430 SDValue visitPTRADD(SDNode *N);
431 SDValue visitSUB(SDNode *N);
432 SDValue visitADDSAT(SDNode *N);
433 SDValue visitSUBSAT(SDNode *N);
434 SDValue visitADDC(SDNode *N);
435 SDValue visitADDO(SDNode *N);
436 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
437 SDValue visitSUBC(SDNode *N);
438 SDValue visitSUBO(SDNode *N);
439 SDValue visitADDE(SDNode *N);
440 SDValue visitUADDO_CARRY(SDNode *N);
441 SDValue visitSADDO_CARRY(SDNode *N);
442 SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
443 SDNode *N);
444 SDValue visitSADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
445 SDNode *N);
446 SDValue visitSUBE(SDNode *N);
447 SDValue visitUSUBO_CARRY(SDNode *N);
448 SDValue visitSSUBO_CARRY(SDNode *N);
449 template <class MatchContextClass> SDValue visitMUL(SDNode *N);
450 SDValue visitMULFIX(SDNode *N);
451 SDValue useDivRem(SDNode *N);
452 SDValue visitSDIV(SDNode *N);
453 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
454 SDValue visitUDIV(SDNode *N);
455 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
456 SDValue visitREM(SDNode *N);
457 SDValue visitMULHU(SDNode *N);
458 SDValue visitMULHS(SDNode *N);
459 SDValue visitAVG(SDNode *N);
460 SDValue visitABD(SDNode *N);
461 SDValue visitSMUL_LOHI(SDNode *N);
462 SDValue visitUMUL_LOHI(SDNode *N);
463 SDValue visitMULO(SDNode *N);
464 SDValue visitIMINMAX(SDNode *N);
465 SDValue visitAND(SDNode *N);
466 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
467 SDValue visitOR(SDNode *N);
468 SDValue visitORLike(SDValue N0, SDValue N1, const SDLoc &DL);
469 SDValue visitXOR(SDNode *N);
470 SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
471 SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
472 SDValue visitSHL(SDNode *N);
473 SDValue visitSRA(SDNode *N);
474 SDValue visitSRL(SDNode *N);
475 SDValue visitFunnelShift(SDNode *N);
476 SDValue visitSHLSAT(SDNode *N);
477 SDValue visitRotate(SDNode *N);
478 SDValue visitABS(SDNode *N);
479 SDValue visitBSWAP(SDNode *N);
480 SDValue visitBITREVERSE(SDNode *N);
481 SDValue visitCTLZ(SDNode *N);
482 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
483 SDValue visitCTTZ(SDNode *N);
484 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
485 SDValue visitCTPOP(SDNode *N);
486 SDValue visitSELECT(SDNode *N);
487 SDValue visitVSELECT(SDNode *N);
488 SDValue visitVP_SELECT(SDNode *N);
489 SDValue visitSELECT_CC(SDNode *N);
490 SDValue visitSETCC(SDNode *N);
491 SDValue visitSETCCCARRY(SDNode *N);
492 SDValue visitSIGN_EXTEND(SDNode *N);
493 SDValue visitZERO_EXTEND(SDNode *N);
494 SDValue visitANY_EXTEND(SDNode *N);
495 SDValue visitAssertExt(SDNode *N);
496 SDValue visitAssertAlign(SDNode *N);
497 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
498 SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
499 SDValue visitTRUNCATE(SDNode *N);
500 SDValue visitTRUNCATE_USAT_U(SDNode *N);
501 SDValue visitBITCAST(SDNode *N);
502 SDValue visitFREEZE(SDNode *N);
503 SDValue visitBUILD_PAIR(SDNode *N);
504 SDValue visitFADD(SDNode *N);
505 SDValue visitVP_FADD(SDNode *N);
506 SDValue visitVP_FSUB(SDNode *N);
507 SDValue visitSTRICT_FADD(SDNode *N);
508 SDValue visitFSUB(SDNode *N);
509 SDValue visitFMUL(SDNode *N);
510 template <class MatchContextClass> SDValue visitFMA(SDNode *N);
511 SDValue visitFMAD(SDNode *N);
512 SDValue visitFDIV(SDNode *N);
513 SDValue visitFREM(SDNode *N);
514 SDValue visitFSQRT(SDNode *N);
515 SDValue visitFCOPYSIGN(SDNode *N);
516 SDValue visitFPOW(SDNode *N);
517 SDValue visitFCANONICALIZE(SDNode *N);
518 SDValue visitSINT_TO_FP(SDNode *N);
519 SDValue visitUINT_TO_FP(SDNode *N);
520 SDValue visitFP_TO_SINT(SDNode *N);
521 SDValue visitFP_TO_UINT(SDNode *N);
522 SDValue visitXROUND(SDNode *N);
523 SDValue visitFP_ROUND(SDNode *N);
524 SDValue visitFP_EXTEND(SDNode *N);
525 SDValue visitFNEG(SDNode *N);
526 SDValue visitFABS(SDNode *N);
527 SDValue visitFCEIL(SDNode *N);
528 SDValue visitFTRUNC(SDNode *N);
529 SDValue visitFFREXP(SDNode *N);
530 SDValue visitFFLOOR(SDNode *N);
531 SDValue visitFMinMax(SDNode *N);
532 SDValue visitBRCOND(SDNode *N);
533 SDValue visitBR_CC(SDNode *N);
534 SDValue visitLOAD(SDNode *N);
535
536 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
537 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
538 SDValue replaceStoreOfInsertLoad(StoreSDNode *ST);
539
540 bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
541
542 SDValue visitSTORE(SDNode *N);
543 SDValue visitATOMIC_STORE(SDNode *N);
544 SDValue visitLIFETIME_END(SDNode *N);
545 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
546 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
547 SDValue visitBUILD_VECTOR(SDNode *N);
548 SDValue visitCONCAT_VECTORS(SDNode *N);
549 SDValue visitVECTOR_INTERLEAVE(SDNode *N);
550 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
551 SDValue visitVECTOR_SHUFFLE(SDNode *N);
552 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
553 SDValue visitINSERT_SUBVECTOR(SDNode *N);
554 SDValue visitVECTOR_COMPRESS(SDNode *N);
555 SDValue visitMLOAD(SDNode *N);
556 SDValue visitMSTORE(SDNode *N);
557 SDValue visitMGATHER(SDNode *N);
558 SDValue visitMSCATTER(SDNode *N);
559 SDValue visitMHISTOGRAM(SDNode *N);
560 SDValue visitPARTIAL_REDUCE_MLA(SDNode *N);
561 SDValue visitVPGATHER(SDNode *N);
562 SDValue visitVPSCATTER(SDNode *N);
563 SDValue visitVP_STRIDED_LOAD(SDNode *N);
564 SDValue visitVP_STRIDED_STORE(SDNode *N);
565 SDValue visitFP_TO_FP16(SDNode *N);
566 SDValue visitFP16_TO_FP(SDNode *N);
567 SDValue visitFP_TO_BF16(SDNode *N);
568 SDValue visitBF16_TO_FP(SDNode *N);
569 SDValue visitVECREDUCE(SDNode *N);
570 SDValue visitVPOp(SDNode *N);
571 SDValue visitGET_FPENV_MEM(SDNode *N);
572 SDValue visitSET_FPENV_MEM(SDNode *N);
573
574 template <class MatchContextClass>
575 SDValue visitFADDForFMACombine(SDNode *N);
576 template <class MatchContextClass>
577 SDValue visitFSUBForFMACombine(SDNode *N);
578 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
579
580 SDValue XformToShuffleWithZero(SDNode *N);
581 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
582 const SDLoc &DL,
583 SDNode *N,
584 SDValue N0,
585 SDValue N1);
586 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
587 SDValue N1, SDNodeFlags Flags);
588 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
589 SDValue N1, SDNodeFlags Flags);
590 SDValue reassociateReduction(unsigned RedOpc, unsigned Opc, const SDLoc &DL,
591 EVT VT, SDValue N0, SDValue N1,
592 SDNodeFlags Flags = SDNodeFlags());
593
594 SDValue visitShiftByConstant(SDNode *N);
595
596 SDValue foldSelectOfConstants(SDNode *N);
597 SDValue foldVSelectOfConstants(SDNode *N);
598 SDValue foldBinOpIntoSelect(SDNode *BO);
599 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
600 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
601 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
602 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
603 SDValue N2, SDValue N3, ISD::CondCode CC,
604 bool NotExtCompare = false);
605 SDValue convertSelectOfFPConstantsToLoadOffset(
606 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
607 ISD::CondCode CC);
608 SDValue foldSignChangeInBitcast(SDNode *N);
609 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
610 SDValue N2, SDValue N3, ISD::CondCode CC);
611 SDValue foldSelectOfBinops(SDNode *N);
612 SDValue foldSextSetcc(SDNode *N);
613 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
614 const SDLoc &DL);
615 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL);
616 SDValue foldABSToABD(SDNode *N, const SDLoc &DL);
617 SDValue foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
618 SDValue False, ISD::CondCode CC, const SDLoc &DL);
619 SDValue foldSelectToUMin(SDValue LHS, SDValue RHS, SDValue True,
620 SDValue False, ISD::CondCode CC, const SDLoc &DL);
621 SDValue unfoldMaskedMerge(SDNode *N);
622 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
623 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
624 const SDLoc &DL, bool foldBooleans);
625 SDValue rebuildSetCC(SDValue N);
626
627 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
628 SDValue &CC, bool MatchStrict = false) const;
629 bool isOneUseSetCC(SDValue N) const;
630
631 SDValue foldAddToAvg(SDNode *N, const SDLoc &DL);
632 SDValue foldSubToAvg(SDNode *N, const SDLoc &DL);
633
634 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
635 unsigned HiOp);
636 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
637 SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
638 const TargetLowering &TLI);
639 SDValue foldPartialReduceMLAMulOp(SDNode *N);
640 SDValue foldPartialReduceAdd(SDNode *N);
641
642 SDValue CombineExtLoad(SDNode *N);
643 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
644 SDValue combineRepeatedFPDivisors(SDNode *N);
645 SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
646 SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf);
647 SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
648 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
649 SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
650 SDValue BuildSDIV(SDNode *N);
651 SDValue BuildSDIVPow2(SDNode *N);
652 SDValue BuildUDIV(SDNode *N);
653 SDValue BuildSREMPow2(SDNode *N);
654 SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
655 SDValue BuildLogBase2(SDValue V, const SDLoc &DL,
656 bool KnownNeverZero = false,
657 bool InexpensiveOnly = false,
658 std::optional<EVT> OutVT = std::nullopt);
659 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
660 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
661 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
662 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
663 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
664 SDNodeFlags Flags, bool Reciprocal);
665 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
666 SDNodeFlags Flags, bool Reciprocal);
667 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
668 bool DemandHighBits = true);
669 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
670 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
671 SDValue InnerPos, SDValue InnerNeg, bool FromAdd,
672 bool HasPos, unsigned PosOpcode,
673 unsigned NegOpcode, const SDLoc &DL);
674 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
675 SDValue InnerPos, SDValue InnerNeg, bool FromAdd,
676 bool HasPos, unsigned PosOpcode,
677 unsigned NegOpcode, const SDLoc &DL);
678 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL,
679 bool FromAdd);
680 SDValue MatchLoadCombine(SDNode *N);
681 SDValue mergeTruncStores(StoreSDNode *N);
682 SDValue reduceLoadWidth(SDNode *N);
683 SDValue ReduceLoadOpStoreWidth(SDNode *N);
684 SDValue splitMergedValStore(StoreSDNode *ST);
685 SDValue TransformFPLoadStorePair(SDNode *N);
686 SDValue convertBuildVecZextToZext(SDNode *N);
687 SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
688 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
689 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
690 SDValue reduceBuildVecToShuffle(SDNode *N);
691 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
692 ArrayRef<int> VectorMask, SDValue VecIn1,
693 SDValue VecIn2, unsigned LeftIdx,
694 bool DidSplitVec);
695 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
696
697 /// Walk up chain skipping non-aliasing memory nodes,
698 /// looking for aliasing nodes and adding them to the Aliases vector.
699 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
700 SmallVectorImpl<SDValue> &Aliases);
701
702 /// Return true if there is any possibility that the two addresses overlap.
703 bool mayAlias(SDNode *Op0, SDNode *Op1) const;
704
705 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
706 /// chain (aliasing node.)
707 SDValue FindBetterChain(SDNode *N, SDValue Chain);
708
709 /// Try to replace a store and any possibly adjacent stores on
710 /// consecutive chains with better chains. Return true only if St is
711 /// replaced.
712 ///
713 /// Notice that other chains may still be replaced even if the function
714 /// returns false.
715 bool findBetterNeighborChains(StoreSDNode *St);
716
717 // Helper for findBetterNeighborChains. Walk up store chain add additional
718 // chained stores that do not overlap and can be parallelized.
719 bool parallelizeChainedStores(StoreSDNode *St);
720
721 /// Holds a pointer to an LSBaseSDNode as well as information on where it
722 /// is located in a sequence of memory operations connected by a chain.
723 struct MemOpLink {
724 // Ptr to the mem node.
725 LSBaseSDNode *MemNode;
726
727 // Offset from the base ptr.
728 int64_t OffsetFromBase;
729
730 MemOpLink(LSBaseSDNode *N, int64_t Offset)
731 : MemNode(N), OffsetFromBase(Offset) {}
732 };
733
734 // Classify the origin of a stored value.
735 enum class StoreSource { Unknown, Constant, Extract, Load };
736 StoreSource getStoreSource(SDValue StoreVal) {
737 switch (StoreVal.getOpcode()) {
738 case ISD::Constant:
739 case ISD::ConstantFP:
740 return StoreSource::Constant;
744 return StoreSource::Constant;
745 return StoreSource::Unknown;
748 return StoreSource::Extract;
749 case ISD::LOAD:
750 return StoreSource::Load;
751 default:
752 return StoreSource::Unknown;
753 }
754 }
755
756 /// This is a helper function for visitMUL to check the profitability
757 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
758 /// MulNode is the original multiply, AddNode is (add x, c1),
759 /// and ConstNode is c2.
760 bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
761 SDValue ConstNode);
762
763 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
764 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
765 /// the type of the loaded value to be extended.
766 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
767 EVT LoadResultTy, EVT &ExtVT);
768
769 /// Helper function to calculate whether the given Load/Store can have its
770 /// width reduced to ExtVT.
771 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
772 EVT &MemVT, unsigned ShAmt = 0);
773
774 /// Used by BackwardsPropagateMask to find suitable loads.
775 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
776 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
777 ConstantSDNode *Mask, SDNode *&NodeToMask);
778 /// Attempt to propagate a given AND node back to load leaves so that they
779 /// can be combined into narrow loads.
780 bool BackwardsPropagateMask(SDNode *N);
781
782 /// Helper function for mergeConsecutiveStores which merges the component
783 /// store chains.
784 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
785 unsigned NumStores);
786
787 /// Helper function for mergeConsecutiveStores which checks if all the store
788 /// nodes have the same underlying object. We can still reuse the first
789 /// store's pointer info if all the stores are from the same object.
790 bool hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes);
791
792 /// This is a helper function for mergeConsecutiveStores. When the source
793 /// elements of the consecutive stores are all constants or all extracted
794 /// vector elements, try to merge them into one larger store introducing
795 /// bitcasts if necessary. \return True if a merged store was created.
796 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
797 EVT MemVT, unsigned NumStores,
798 bool IsConstantSrc, bool UseVector,
799 bool UseTrunc);
800
801 /// This is a helper function for mergeConsecutiveStores. Stores that
802 /// potentially may be merged with St are placed in StoreNodes. On success,
803 /// returns a chain predecessor to all store candidates.
804 SDNode *getStoreMergeCandidates(StoreSDNode *St,
805 SmallVectorImpl<MemOpLink> &StoreNodes);
806
807 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
808 /// have indirect dependency through their operands. RootNode is the
809 /// predecessor to all stores calculated by getStoreMergeCandidates and is
810 /// used to prune the dependency check. \return True if safe to merge.
811 bool checkMergeStoreCandidatesForDependencies(
812 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
813 SDNode *RootNode);
814
815 /// Helper function for tryStoreMergeOfLoads. Checks if the load/store
816 /// chain has a call in it. \return True if a call is found.
817 bool hasCallInLdStChain(StoreSDNode *St, LoadSDNode *Ld);
818
819 /// This is a helper function for mergeConsecutiveStores. Given a list of
820 /// store candidates, find the first N that are consecutive in memory.
821 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
822 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
823 int64_t ElementSizeBytes) const;
824
825 /// This is a helper function for mergeConsecutiveStores. It is used for
826 /// store chains that are composed entirely of constant values.
827 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
828 unsigned NumConsecutiveStores,
829 EVT MemVT, SDNode *Root, bool AllowVectors);
830
831 /// This is a helper function for mergeConsecutiveStores. It is used for
832 /// store chains that are composed entirely of extracted vector elements.
833 /// When extracting multiple vector elements, try to store them in one
834 /// vector store rather than a sequence of scalar stores.
835 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
836 unsigned NumConsecutiveStores, EVT MemVT,
837 SDNode *Root);
838
839 /// This is a helper function for mergeConsecutiveStores. It is used for
840 /// store chains that are composed entirely of loaded values.
841 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
842 unsigned NumConsecutiveStores, EVT MemVT,
843 SDNode *Root, bool AllowVectors,
844 bool IsNonTemporalStore, bool IsNonTemporalLoad);
845
846 /// Merge consecutive store operations into a wide store.
847 /// This optimization uses wide integers or vectors when possible.
848 /// \return true if stores were merged.
849 bool mergeConsecutiveStores(StoreSDNode *St);
850
851 /// Try to transform a truncation where C is a constant:
852 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
853 ///
854 /// \p N needs to be a truncation and its first operand an AND. Other
855 /// requirements are checked by the function (e.g. that trunc is
856 /// single-use) and if missed an empty SDValue is returned.
857 SDValue distributeTruncateThroughAnd(SDNode *N);
858
859 /// Helper function to determine whether the target supports operation
860 /// given by \p Opcode for type \p VT, that is, whether the operation
861 /// is legal or custom before legalizing operations, and whether is
862 /// legal (but not custom) after legalization.
863 bool hasOperation(unsigned Opcode, EVT VT) {
864 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
865 }
866
867 bool hasUMin(EVT VT) const {
868 auto LK = TLI.getTypeConversion(*DAG.getContext(), VT);
869 return (LK.first == TargetLoweringBase::TypeLegal ||
871 TLI.isOperationLegalOrCustom(ISD::UMIN, LK.second);
872 }
873
874 public:
875 /// Runs the dag combiner on all nodes in the work list
876 void Run(CombineLevel AtLevel);
877
878 SelectionDAG &getDAG() const { return DAG; }
879
880 /// Convenience wrapper around TargetLowering::getShiftAmountTy.
881 EVT getShiftAmountTy(EVT LHSTy) {
882 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout());
883 }
884
885 /// This method returns true if we are running before type legalization or
886 /// if the specified VT is legal.
887 bool isTypeLegal(const EVT &VT) {
888 if (!LegalTypes) return true;
889 return TLI.isTypeLegal(VT);
890 }
891
892 /// Convenience wrapper around TargetLowering::getSetCCResultType
893 EVT getSetCCResultType(EVT VT) const {
894 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
895 }
896
897 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
898 SDValue OrigLoad, SDValue ExtLoad,
899 ISD::NodeType ExtType);
900 };
901
902/// This class is a DAGUpdateListener that removes any deleted
903/// nodes from the worklist.
904class WorklistRemover : public SelectionDAG::DAGUpdateListener {
905 DAGCombiner &DC;
906
907public:
908 explicit WorklistRemover(DAGCombiner &dc)
909 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
910
911 void NodeDeleted(SDNode *N, SDNode *E) override {
912 DC.removeFromWorklist(N);
913 }
914};
915
916class WorklistInserter : public SelectionDAG::DAGUpdateListener {
917 DAGCombiner &DC;
918
919public:
920 explicit WorklistInserter(DAGCombiner &dc)
921 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
922
923 // FIXME: Ideally we could add N to the worklist, but this causes exponential
924 // compile time costs in large DAGs, e.g. Halide.
925 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
926};
927
928} // end anonymous namespace
929
930//===----------------------------------------------------------------------===//
931// TargetLowering::DAGCombinerInfo implementation
932//===----------------------------------------------------------------------===//
933
935 ((DAGCombiner*)DC)->AddToWorklist(N);
936}
937
939CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
940 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
941}
942
944CombineTo(SDNode *N, SDValue Res, bool AddTo) {
945 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
946}
947
949CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
950 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
951}
952
955 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
956}
957
960 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
961}
962
963//===----------------------------------------------------------------------===//
964// Helper Functions
965//===----------------------------------------------------------------------===//
966
967void DAGCombiner::deleteAndRecombine(SDNode *N) {
968 removeFromWorklist(N);
969
970 // If the operands of this node are only used by the node, they will now be
971 // dead. Make sure to re-visit them and recursively delete dead nodes.
972 for (const SDValue &Op : N->ops())
973 // For an operand generating multiple values, one of the values may
974 // become dead allowing further simplification (e.g. split index
975 // arithmetic from an indexed load).
976 if (Op->hasOneUse() || Op->getNumValues() > 1)
977 AddToWorklist(Op.getNode());
978
979 DAG.DeleteNode(N);
980}
981
982// APInts must be the same size for most operations, this helper
983// function zero extends the shorter of the pair so that they match.
984// We provide an Offset so that we can create bitwidths that won't overflow.
985static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
986 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
987 LHS = LHS.zext(Bits);
988 RHS = RHS.zext(Bits);
989}
990
991// Return true if this node is a setcc, or is a select_cc
992// that selects between the target values used for true and false, making it
993// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
994// the appropriate nodes based on the type of node we are checking. This
995// simplifies life a bit for the callers.
996bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
997 SDValue &CC, bool MatchStrict) const {
998 if (N.getOpcode() == ISD::SETCC) {
999 LHS = N.getOperand(0);
1000 RHS = N.getOperand(1);
1001 CC = N.getOperand(2);
1002 return true;
1003 }
1004
1005 if (MatchStrict &&
1006 (N.getOpcode() == ISD::STRICT_FSETCC ||
1007 N.getOpcode() == ISD::STRICT_FSETCCS)) {
1008 LHS = N.getOperand(1);
1009 RHS = N.getOperand(2);
1010 CC = N.getOperand(3);
1011 return true;
1012 }
1013
1014 if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
1015 !TLI.isConstFalseVal(N.getOperand(3)))
1016 return false;
1017
1018 if (TLI.getBooleanContents(N.getValueType()) ==
1020 return false;
1021
1022 LHS = N.getOperand(0);
1023 RHS = N.getOperand(1);
1024 CC = N.getOperand(4);
1025 return true;
1026}
1027
1028/// Return true if this is a SetCC-equivalent operation with only one use.
1029/// If this is true, it allows the users to invert the operation for free when
1030/// it is profitable to do so.
1031bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1032 SDValue N0, N1, N2;
1033 if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
1034 return true;
1035 return false;
1036}
1037
1039 if (!ScalarTy.isSimple())
1040 return false;
1041
1042 uint64_t MaskForTy = 0ULL;
1043 switch (ScalarTy.getSimpleVT().SimpleTy) {
1044 case MVT::i8:
1045 MaskForTy = 0xFFULL;
1046 break;
1047 case MVT::i16:
1048 MaskForTy = 0xFFFFULL;
1049 break;
1050 case MVT::i32:
1051 MaskForTy = 0xFFFFFFFFULL;
1052 break;
1053 default:
1054 return false;
1055 break;
1056 }
1057
1058 APInt Val;
1059 if (ISD::isConstantSplatVector(N, Val))
1060 return Val.getLimitedValue() == MaskForTy;
1061
1062 return false;
1063}
1064
1065// Determines if it is a constant integer or a splat/build vector of constant
1066// integers (and undefs).
1067// Do not permit build vector implicit truncation.
1068static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1070 return !(Const->isOpaque() && NoOpaques);
1071 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
1072 return false;
1073 unsigned BitWidth = N.getScalarValueSizeInBits();
1074 for (const SDValue &Op : N->op_values()) {
1075 if (Op.isUndef())
1076 continue;
1078 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1079 (Const->isOpaque() && NoOpaques))
1080 return false;
1081 }
1082 return true;
1083}
1084
1085// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1086// undef's.
1087static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1088 if (V.getOpcode() != ISD::BUILD_VECTOR)
1089 return false;
1090 return isConstantOrConstantVector(V, NoOpaques) ||
1092}
1093
1094// Determine if this an indexed load with an opaque target constant index.
1095static bool canSplitIdx(LoadSDNode *LD) {
1096 return MaySplitLoadIndex &&
1097 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1098 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1099}
1100
1101bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1102 const SDLoc &DL,
1103 SDNode *N,
1104 SDValue N0,
1105 SDValue N1) {
1106 // Currently this only tries to ensure we don't undo the GEP splits done by
1107 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1108 // we check if the following transformation would be problematic:
1109 // (load/store (add, (add, x, offset1), offset2)) ->
1110 // (load/store (add, x, offset1+offset2)).
1111
1112 // (load/store (add, (add, x, y), offset2)) ->
1113 // (load/store (add, (add, x, offset2), y)).
1114
1115 if (!N0.isAnyAdd())
1116 return false;
1117
1118 // Check for vscale addressing modes.
1119 // (load/store (add/sub (add x, y), vscale))
1120 // (load/store (add/sub (add x, y), (lsl vscale, C)))
1121 // (load/store (add/sub (add x, y), (mul vscale, C)))
1122 if ((N1.getOpcode() == ISD::VSCALE ||
1123 ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::MUL) &&
1124 N1.getOperand(0).getOpcode() == ISD::VSCALE &&
1126 N1.getValueType().getFixedSizeInBits() <= 64) {
1127 int64_t ScalableOffset = N1.getOpcode() == ISD::VSCALE
1128 ? N1.getConstantOperandVal(0)
1129 : (N1.getOperand(0).getConstantOperandVal(0) *
1130 (N1.getOpcode() == ISD::SHL
1131 ? (1LL << N1.getConstantOperandVal(1))
1132 : N1.getConstantOperandVal(1)));
1133 if (Opc == ISD::SUB)
1134 ScalableOffset = -ScalableOffset;
1135 if (all_of(N->users(), [&](SDNode *Node) {
1136 if (auto *LoadStore = dyn_cast<MemSDNode>(Node);
1137 LoadStore && LoadStore->getBasePtr().getNode() == N) {
1138 TargetLoweringBase::AddrMode AM;
1139 AM.HasBaseReg = true;
1140 AM.ScalableOffset = ScalableOffset;
1141 EVT VT = LoadStore->getMemoryVT();
1142 unsigned AS = LoadStore->getAddressSpace();
1143 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1144 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy,
1145 AS);
1146 }
1147 return false;
1148 }))
1149 return true;
1150 }
1151
1152 if (Opc != ISD::ADD && Opc != ISD::PTRADD)
1153 return false;
1154
1155 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1156 if (!C2)
1157 return false;
1158
1159 const APInt &C2APIntVal = C2->getAPIntValue();
1160 if (C2APIntVal.getSignificantBits() > 64)
1161 return false;
1162
1163 if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1164 if (N0.hasOneUse())
1165 return false;
1166
1167 const APInt &C1APIntVal = C1->getAPIntValue();
1168 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1169 if (CombinedValueIntVal.getSignificantBits() > 64)
1170 return false;
1171 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1172
1173 for (SDNode *Node : N->users()) {
1174 if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1175 // Is x[offset2] already not a legal addressing mode? If so then
1176 // reassociating the constants breaks nothing (we test offset2 because
1177 // that's the one we hope to fold into the load or store).
1178 TargetLoweringBase::AddrMode AM;
1179 AM.HasBaseReg = true;
1180 AM.BaseOffs = C2APIntVal.getSExtValue();
1181 EVT VT = LoadStore->getMemoryVT();
1182 unsigned AS = LoadStore->getAddressSpace();
1183 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1184 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1185 continue;
1186
1187 // Would x[offset1+offset2] still be a legal addressing mode?
1188 AM.BaseOffs = CombinedValue;
1189 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1190 return true;
1191 }
1192 }
1193 } else {
1194 if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1195 if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1196 return false;
1197
1198 for (SDNode *Node : N->users()) {
1199 auto *LoadStore = dyn_cast<MemSDNode>(Node);
1200 if (!LoadStore)
1201 return false;
1202
1203 // Is x[offset2] a legal addressing mode? If so then
1204 // reassociating the constants breaks address pattern
1205 TargetLoweringBase::AddrMode AM;
1206 AM.HasBaseReg = true;
1207 AM.BaseOffs = C2APIntVal.getSExtValue();
1208 EVT VT = LoadStore->getMemoryVT();
1209 unsigned AS = LoadStore->getAddressSpace();
1210 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1211 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1212 return false;
1213 }
1214 return true;
1215 }
1216
1217 return false;
1218}
1219
1220/// Helper for DAGCombiner::reassociateOps. Try to reassociate (Opc N0, N1) if
1221/// \p N0 is the same kind of operation as \p Opc.
1222SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1223 SDValue N0, SDValue N1,
1224 SDNodeFlags Flags) {
1225 EVT VT = N0.getValueType();
1226
1227 if (N0.getOpcode() != Opc)
1228 return SDValue();
1229
1230 SDValue N00 = N0.getOperand(0);
1231 SDValue N01 = N0.getOperand(1);
1232
1234 SDNodeFlags NewFlags;
1235 if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
1236 Flags.hasNoUnsignedWrap())
1237 NewFlags |= SDNodeFlags::NoUnsignedWrap;
1238
1240 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1241 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1})) {
1242 NewFlags.setDisjoint(Flags.hasDisjoint() &&
1243 N0->getFlags().hasDisjoint());
1244 return DAG.getNode(Opc, DL, VT, N00, OpNode, NewFlags);
1245 }
1246 return SDValue();
1247 }
1248 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1249 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1250 // iff (op x, c1) has one use
1251 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags);
1252 return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags);
1253 }
1254 }
1255
1256 // Check for repeated operand logic simplifications.
1257 if (Opc == ISD::AND || Opc == ISD::OR) {
1258 // (N00 & N01) & N00 --> N00 & N01
1259 // (N00 & N01) & N01 --> N00 & N01
1260 // (N00 | N01) | N00 --> N00 | N01
1261 // (N00 | N01) | N01 --> N00 | N01
1262 if (N1 == N00 || N1 == N01)
1263 return N0;
1264 }
1265 if (Opc == ISD::XOR) {
1266 // (N00 ^ N01) ^ N00 --> N01
1267 if (N1 == N00)
1268 return N01;
1269 // (N00 ^ N01) ^ N01 --> N00
1270 if (N1 == N01)
1271 return N00;
1272 }
1273
1274 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1275 if (N1 != N01) {
1276 // Reassociate if (op N00, N1) already exist
1277 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1278 // if Op (Op N00, N1), N01 already exist
1279 // we need to stop reassciate to avoid dead loop
1280 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1281 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1282 }
1283 }
1284
1285 if (N1 != N00) {
1286 // Reassociate if (op N01, N1) already exist
1287 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1288 // if Op (Op N01, N1), N00 already exist
1289 // we need to stop reassciate to avoid dead loop
1290 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1291 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1292 }
1293 }
1294
1295 // Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND
1296 // (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same
1297 // predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are
1298 // comparisons with the same predicate. This enables optimizations as the
1299 // following one:
1300 // CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
1301 // CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)
1302 if (Opc == ISD::AND || Opc == ISD::OR) {
1303 if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC &&
1304 N01->getOpcode() == ISD::SETCC) {
1305 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
1306 ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get();
1307 ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get();
1308 if (CC1 == CC00 && CC1 != CC01) {
1309 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, Flags);
1310 return DAG.getNode(Opc, DL, VT, OpNode, N01, Flags);
1311 }
1312 if (CC1 == CC01 && CC1 != CC00) {
1313 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N01, N1, Flags);
1314 return DAG.getNode(Opc, DL, VT, OpNode, N00, Flags);
1315 }
1316 }
1317 }
1318 }
1319
1320 return SDValue();
1321}
1322
1323/// Try to reassociate commutative (Opc N0, N1) if either \p N0 or \p N1 is the
1324/// same kind of operation as \p Opc.
1325SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1326 SDValue N1, SDNodeFlags Flags) {
1327 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1328
1329 // Floating-point reassociation is not allowed without loose FP math.
1330 if (N0.getValueType().isFloatingPoint() ||
1332 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1333 return SDValue();
1334
1335 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1, Flags))
1336 return Combined;
1337 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0, Flags))
1338 return Combined;
1339 return SDValue();
1340}
1341
1342// Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
1343// Note that we only expect Flags to be passed from FP operations. For integer
1344// operations they need to be dropped.
1345SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
1346 const SDLoc &DL, EVT VT, SDValue N0,
1347 SDValue N1, SDNodeFlags Flags) {
1348 if (N0.getOpcode() == RedOpc && N1.getOpcode() == RedOpc &&
1349 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
1350 N0->hasOneUse() && N1->hasOneUse() &&
1352 TLI.shouldReassociateReduction(RedOpc, N0.getOperand(0).getValueType())) {
1353 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
1354 return DAG.getNode(RedOpc, DL, VT,
1355 DAG.getNode(Opc, DL, N0.getOperand(0).getValueType(),
1356 N0.getOperand(0), N1.getOperand(0)));
1357 }
1358
1359 // Reassociate op(op(vecreduce(a), b), op(vecreduce(c), d)) into
1360 // op(vecreduce(op(a, c)), op(b, d)), to combine the reductions into a
1361 // single node.
1362 SDValue A, B, C, D, RedA, RedB;
1363 if (sd_match(N0, m_OneUse(m_c_BinOp(
1364 Opc,
1365 m_AllOf(m_OneUse(m_UnaryOp(RedOpc, m_Value(A))),
1366 m_Value(RedA)),
1367 m_Value(B)))) &&
1369 Opc,
1370 m_AllOf(m_OneUse(m_UnaryOp(RedOpc, m_Value(C))),
1371 m_Value(RedB)),
1372 m_Value(D)))) &&
1373 !sd_match(B, m_UnaryOp(RedOpc, m_Value())) &&
1374 !sd_match(D, m_UnaryOp(RedOpc, m_Value())) &&
1375 A.getValueType() == C.getValueType() &&
1376 hasOperation(Opc, A.getValueType()) &&
1377 TLI.shouldReassociateReduction(RedOpc, VT)) {
1378 if ((Opc == ISD::FADD || Opc == ISD::FMUL) &&
1379 (!N0->getFlags().hasAllowReassociation() ||
1381 !RedA->getFlags().hasAllowReassociation() ||
1382 !RedB->getFlags().hasAllowReassociation()))
1383 return SDValue();
1384 SelectionDAG::FlagInserter FlagsInserter(
1385 DAG, Flags & N0->getFlags() & N1->getFlags() & RedA->getFlags() &
1386 RedB->getFlags());
1387 SDValue Op = DAG.getNode(Opc, DL, A.getValueType(), A, C);
1388 SDValue Red = DAG.getNode(RedOpc, DL, VT, Op);
1389 SDValue Op2 = DAG.getNode(Opc, DL, VT, B, D);
1390 return DAG.getNode(Opc, DL, VT, Red, Op2);
1391 }
1392 return SDValue();
1393}
1394
1395SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1396 bool AddTo) {
1397 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1398 ++NodesCombined;
1399 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1400 To[0].dump(&DAG);
1401 dbgs() << " and " << NumTo - 1 << " other values\n");
1402 for (unsigned i = 0, e = NumTo; i != e; ++i)
1403 assert((!To[i].getNode() ||
1404 N->getValueType(i) == To[i].getValueType()) &&
1405 "Cannot combine value to value of different type!");
1406
1407 WorklistRemover DeadNodes(*this);
1408 DAG.ReplaceAllUsesWith(N, To);
1409 if (AddTo) {
1410 // Push the new nodes and any users onto the worklist
1411 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1412 if (To[i].getNode())
1413 AddToWorklistWithUsers(To[i].getNode());
1414 }
1415 }
1416
1417 // Finally, if the node is now dead, remove it from the graph. The node
1418 // may not be dead if the replacement process recursively simplified to
1419 // something else needing this node.
1420 if (N->use_empty())
1421 deleteAndRecombine(N);
1422 return SDValue(N, 0);
1423}
1424
1425void DAGCombiner::
1426CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1427 // Replace the old value with the new one.
1428 ++NodesCombined;
1429 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1430 dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1431
1432 // Replace all uses.
1433 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1434
1435 // Push the new node and any (possibly new) users onto the worklist.
1436 AddToWorklistWithUsers(TLO.New.getNode());
1437
1438 // Finally, if the node is now dead, remove it from the graph.
1439 recursivelyDeleteUnusedNodes(TLO.Old.getNode());
1440}
1441
1442/// Check the specified integer node value to see if it can be simplified or if
1443/// things it uses can be simplified by bit propagation. If so, return true.
1444bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1445 const APInt &DemandedElts,
1446 bool AssumeSingleUse) {
1447 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1448 KnownBits Known;
1449 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1450 AssumeSingleUse))
1451 return false;
1452
1453 // Revisit the node.
1454 AddToWorklist(Op.getNode());
1455
1456 CommitTargetLoweringOpt(TLO);
1457 return true;
1458}
1459
1460/// Check the specified vector node value to see if it can be simplified or
1461/// if things it uses can be simplified as it only uses some of the elements.
1462/// If so, return true.
1463bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1464 const APInt &DemandedElts,
1465 bool AssumeSingleUse) {
1466 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1467 APInt KnownUndef, KnownZero;
1468 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1469 TLO, 0, AssumeSingleUse))
1470 return false;
1471
1472 // Revisit the node.
1473 AddToWorklist(Op.getNode());
1474
1475 CommitTargetLoweringOpt(TLO);
1476 return true;
1477}
1478
1479void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1480 SDLoc DL(Load);
1481 EVT VT = Load->getValueType(0);
1482 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1483
1484 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1485 Trunc.dump(&DAG); dbgs() << '\n');
1486
1487 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1488 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1489
1490 AddToWorklist(Trunc.getNode());
1491 recursivelyDeleteUnusedNodes(Load);
1492}
1493
1494SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1495 Replace = false;
1496 SDLoc DL(Op);
1497 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1498 LoadSDNode *LD = cast<LoadSDNode>(Op);
1499 EVT MemVT = LD->getMemoryVT();
1501 : LD->getExtensionType();
1502 Replace = true;
1503 return DAG.getExtLoad(ExtType, DL, PVT,
1504 LD->getChain(), LD->getBasePtr(),
1505 MemVT, LD->getMemOperand());
1506 }
1507
1508 unsigned Opc = Op.getOpcode();
1509 switch (Opc) {
1510 default: break;
1511 case ISD::AssertSext:
1512 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1513 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1514 break;
1515 case ISD::AssertZext:
1516 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1517 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1518 break;
1519 case ISD::Constant: {
1520 unsigned ExtOpc =
1521 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1522 return DAG.getNode(ExtOpc, DL, PVT, Op);
1523 }
1524 }
1525
1526 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1527 return SDValue();
1528 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1529}
1530
1531SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1533 return SDValue();
1534 EVT OldVT = Op.getValueType();
1535 SDLoc DL(Op);
1536 bool Replace = false;
1537 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1538 if (!NewOp.getNode())
1539 return SDValue();
1540 AddToWorklist(NewOp.getNode());
1541
1542 if (Replace)
1543 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1544 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1545 DAG.getValueType(OldVT));
1546}
1547
1548SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1549 EVT OldVT = Op.getValueType();
1550 SDLoc DL(Op);
1551 bool Replace = false;
1552 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1553 if (!NewOp.getNode())
1554 return SDValue();
1555 AddToWorklist(NewOp.getNode());
1556
1557 if (Replace)
1558 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1559 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1560}
1561
1562/// Promote the specified integer binary operation if the target indicates it is
1563/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1564/// i32 since i16 instructions are longer.
1565SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1566 if (!LegalOperations)
1567 return SDValue();
1568
1569 EVT VT = Op.getValueType();
1570 if (VT.isVector() || !VT.isInteger())
1571 return SDValue();
1572
1573 // If operation type is 'undesirable', e.g. i16 on x86, consider
1574 // promoting it.
1575 unsigned Opc = Op.getOpcode();
1576 if (TLI.isTypeDesirableForOp(Opc, VT))
1577 return SDValue();
1578
1579 EVT PVT = VT;
1580 // Consult target whether it is a good idea to promote this operation and
1581 // what's the right type to promote it to.
1582 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1583 assert(PVT != VT && "Don't know what type to promote to!");
1584
1585 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1586
1587 bool Replace0 = false;
1588 SDValue N0 = Op.getOperand(0);
1589 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1590
1591 bool Replace1 = false;
1592 SDValue N1 = Op.getOperand(1);
1593 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1594 SDLoc DL(Op);
1595
1596 SDValue RV =
1597 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1598
1599 // We are always replacing N0/N1's use in N and only need additional
1600 // replacements if there are additional uses.
1601 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1602 // (SDValue) here because the node may reference multiple values
1603 // (for example, the chain value of a load node).
1604 Replace0 &= !N0->hasOneUse();
1605 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1606
1607 // Combine Op here so it is preserved past replacements.
1608 CombineTo(Op.getNode(), RV);
1609
1610 // If operands have a use ordering, make sure we deal with
1611 // predecessor first.
1612 if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1613 std::swap(N0, N1);
1614 std::swap(NN0, NN1);
1615 }
1616
1617 if (Replace0) {
1618 AddToWorklist(NN0.getNode());
1619 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1620 }
1621 if (Replace1) {
1622 AddToWorklist(NN1.getNode());
1623 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1624 }
1625 return Op;
1626 }
1627 return SDValue();
1628}
1629
1630/// Promote the specified integer shift operation if the target indicates it is
1631/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1632/// i32 since i16 instructions are longer.
1633SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1634 if (!LegalOperations)
1635 return SDValue();
1636
1637 EVT VT = Op.getValueType();
1638 if (VT.isVector() || !VT.isInteger())
1639 return SDValue();
1640
1641 // If operation type is 'undesirable', e.g. i16 on x86, consider
1642 // promoting it.
1643 unsigned Opc = Op.getOpcode();
1644 if (TLI.isTypeDesirableForOp(Opc, VT))
1645 return SDValue();
1646
1647 EVT PVT = VT;
1648 // Consult target whether it is a good idea to promote this operation and
1649 // what's the right type to promote it to.
1650 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1651 assert(PVT != VT && "Don't know what type to promote to!");
1652
1653 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1654
1655 bool Replace = false;
1656 SDValue N0 = Op.getOperand(0);
1657 if (Opc == ISD::SRA)
1658 N0 = SExtPromoteOperand(N0, PVT);
1659 else if (Opc == ISD::SRL)
1660 N0 = ZExtPromoteOperand(N0, PVT);
1661 else
1662 N0 = PromoteOperand(N0, PVT, Replace);
1663
1664 if (!N0.getNode())
1665 return SDValue();
1666
1667 SDLoc DL(Op);
1668 SDValue N1 = Op.getOperand(1);
1669 SDValue RV =
1670 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1671
1672 if (Replace)
1673 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1674
1675 // Deal with Op being deleted.
1676 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1677 return RV;
1678 }
1679 return SDValue();
1680}
1681
1682SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1683 if (!LegalOperations)
1684 return SDValue();
1685
1686 EVT VT = Op.getValueType();
1687 if (VT.isVector() || !VT.isInteger())
1688 return SDValue();
1689
1690 // If operation type is 'undesirable', e.g. i16 on x86, consider
1691 // promoting it.
1692 unsigned Opc = Op.getOpcode();
1693 if (TLI.isTypeDesirableForOp(Opc, VT))
1694 return SDValue();
1695
1696 EVT PVT = VT;
1697 // Consult target whether it is a good idea to promote this operation and
1698 // what's the right type to promote it to.
1699 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1700 assert(PVT != VT && "Don't know what type to promote to!");
1701 // fold (aext (aext x)) -> (aext x)
1702 // fold (aext (zext x)) -> (zext x)
1703 // fold (aext (sext x)) -> (sext x)
1704 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1705 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1706 }
1707 return SDValue();
1708}
1709
1710bool DAGCombiner::PromoteLoad(SDValue Op) {
1711 if (!LegalOperations)
1712 return false;
1713
1714 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1715 return false;
1716
1717 EVT VT = Op.getValueType();
1718 if (VT.isVector() || !VT.isInteger())
1719 return false;
1720
1721 // If operation type is 'undesirable', e.g. i16 on x86, consider
1722 // promoting it.
1723 unsigned Opc = Op.getOpcode();
1724 if (TLI.isTypeDesirableForOp(Opc, VT))
1725 return false;
1726
1727 EVT PVT = VT;
1728 // Consult target whether it is a good idea to promote this operation and
1729 // what's the right type to promote it to.
1730 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1731 assert(PVT != VT && "Don't know what type to promote to!");
1732
1733 SDLoc DL(Op);
1734 SDNode *N = Op.getNode();
1735 LoadSDNode *LD = cast<LoadSDNode>(N);
1736 EVT MemVT = LD->getMemoryVT();
1738 : LD->getExtensionType();
1739 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1740 LD->getChain(), LD->getBasePtr(),
1741 MemVT, LD->getMemOperand());
1742 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1743
1744 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1745 Result.dump(&DAG); dbgs() << '\n');
1746
1747 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1748 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1749
1750 AddToWorklist(Result.getNode());
1751 recursivelyDeleteUnusedNodes(N);
1752 return true;
1753 }
1754
1755 return false;
1756}
1757
1758/// Recursively delete a node which has no uses and any operands for
1759/// which it is the only use.
1760///
1761/// Note that this both deletes the nodes and removes them from the worklist.
1762/// It also adds any nodes who have had a user deleted to the worklist as they
1763/// may now have only one use and subject to other combines.
1764bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1765 if (!N->use_empty())
1766 return false;
1767
1768 SmallSetVector<SDNode *, 16> Nodes;
1769 Nodes.insert(N);
1770 do {
1771 N = Nodes.pop_back_val();
1772 if (!N)
1773 continue;
1774
1775 if (N->use_empty()) {
1776 for (const SDValue &ChildN : N->op_values())
1777 Nodes.insert(ChildN.getNode());
1778
1779 removeFromWorklist(N);
1780 DAG.DeleteNode(N);
1781 } else {
1782 AddToWorklist(N);
1783 }
1784 } while (!Nodes.empty());
1785 return true;
1786}
1787
1788//===----------------------------------------------------------------------===//
1789// Main DAG Combiner implementation
1790//===----------------------------------------------------------------------===//
1791
1792void DAGCombiner::Run(CombineLevel AtLevel) {
1793 // set the instance variables, so that the various visit routines may use it.
1794 Level = AtLevel;
1795 LegalDAG = Level >= AfterLegalizeDAG;
1796 LegalOperations = Level >= AfterLegalizeVectorOps;
1797 LegalTypes = Level >= AfterLegalizeTypes;
1798
1799 WorklistInserter AddNodes(*this);
1800
1801 // Add all the dag nodes to the worklist.
1802 //
1803 // Note: All nodes are not added to PruningList here, this is because the only
1804 // nodes which can be deleted are those which have no uses and all other nodes
1805 // which would otherwise be added to the worklist by the first call to
1806 // getNextWorklistEntry are already present in it.
1807 for (SDNode &Node : DAG.allnodes())
1808 AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
1809
1810 // Create a dummy node (which is not added to allnodes), that adds a reference
1811 // to the root node, preventing it from being deleted, and tracking any
1812 // changes of the root.
1813 HandleSDNode Dummy(DAG.getRoot());
1814
1815 // While we have a valid worklist entry node, try to combine it.
1816 while (SDNode *N = getNextWorklistEntry()) {
1817 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1818 // N is deleted from the DAG, since they too may now be dead or may have a
1819 // reduced number of uses, allowing other xforms.
1820 if (recursivelyDeleteUnusedNodes(N))
1821 continue;
1822
1823 WorklistRemover DeadNodes(*this);
1824
1825 // If this combine is running after legalizing the DAG, re-legalize any
1826 // nodes pulled off the worklist.
1827 if (LegalDAG) {
1828 SmallSetVector<SDNode *, 16> UpdatedNodes;
1829 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1830
1831 for (SDNode *LN : UpdatedNodes)
1832 AddToWorklistWithUsers(LN);
1833
1834 if (!NIsValid)
1835 continue;
1836 }
1837
1838 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1839
1840 // Add any operands of the new node which have not yet been combined to the
1841 // worklist as well. getNextWorklistEntry flags nodes that have been
1842 // combined before. Because the worklist uniques things already, this won't
1843 // repeatedly process the same operand.
1844 for (const SDValue &ChildN : N->op_values())
1845 AddToWorklist(ChildN.getNode(), /*IsCandidateForPruning=*/true,
1846 /*SkipIfCombinedBefore=*/true);
1847
1848 SDValue RV = combine(N);
1849
1850 if (!RV.getNode())
1851 continue;
1852
1853 ++NodesCombined;
1854
1855 // Invalidate cached info.
1856 ChainsWithoutMergeableStores.clear();
1857
1858 // If we get back the same node we passed in, rather than a new node or
1859 // zero, we know that the node must have defined multiple values and
1860 // CombineTo was used. Since CombineTo takes care of the worklist
1861 // mechanics for us, we have no work to do in this case.
1862 if (RV.getNode() == N)
1863 continue;
1864
1865 assert(N->getOpcode() != ISD::DELETED_NODE &&
1866 RV.getOpcode() != ISD::DELETED_NODE &&
1867 "Node was deleted but visit returned new node!");
1868
1869 LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1870
1871 if (N->getNumValues() == RV->getNumValues())
1872 DAG.ReplaceAllUsesWith(N, RV.getNode());
1873 else {
1874 assert(N->getValueType(0) == RV.getValueType() &&
1875 N->getNumValues() == 1 && "Type mismatch");
1876 DAG.ReplaceAllUsesWith(N, &RV);
1877 }
1878
1879 // Push the new node and any users onto the worklist. Omit this if the
1880 // new node is the EntryToken (e.g. if a store managed to get optimized
1881 // out), because re-visiting the EntryToken and its users will not uncover
1882 // any additional opportunities, but there may be a large number of such
1883 // users, potentially causing compile time explosion.
1884 if (RV.getOpcode() != ISD::EntryToken)
1885 AddToWorklistWithUsers(RV.getNode());
1886
1887 // Finally, if the node is now dead, remove it from the graph. The node
1888 // may not be dead if the replacement process recursively simplified to
1889 // something else needing this node. This will also take care of adding any
1890 // operands which have lost a user to the worklist.
1891 recursivelyDeleteUnusedNodes(N);
1892 }
1893
1894 // If the root changed (e.g. it was a dead load, update the root).
1895 DAG.setRoot(Dummy.getValue());
1896 DAG.RemoveDeadNodes();
1897}
1898
1899SDValue DAGCombiner::visit(SDNode *N) {
1900 // clang-format off
1901 switch (N->getOpcode()) {
1902 default: break;
1903 case ISD::TokenFactor: return visitTokenFactor(N);
1904 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1905 case ISD::ADD: return visitADD(N);
1906 case ISD::PTRADD: return visitPTRADD(N);
1907 case ISD::SUB: return visitSUB(N);
1908 case ISD::SADDSAT:
1909 case ISD::UADDSAT: return visitADDSAT(N);
1910 case ISD::SSUBSAT:
1911 case ISD::USUBSAT: return visitSUBSAT(N);
1912 case ISD::ADDC: return visitADDC(N);
1913 case ISD::SADDO:
1914 case ISD::UADDO: return visitADDO(N);
1915 case ISD::SUBC: return visitSUBC(N);
1916 case ISD::SSUBO:
1917 case ISD::USUBO: return visitSUBO(N);
1918 case ISD::ADDE: return visitADDE(N);
1919 case ISD::UADDO_CARRY: return visitUADDO_CARRY(N);
1920 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1921 case ISD::SUBE: return visitSUBE(N);
1922 case ISD::USUBO_CARRY: return visitUSUBO_CARRY(N);
1923 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1924 case ISD::SMULFIX:
1925 case ISD::SMULFIXSAT:
1926 case ISD::UMULFIX:
1927 case ISD::UMULFIXSAT: return visitMULFIX(N);
1928 case ISD::MUL: return visitMUL<EmptyMatchContext>(N);
1929 case ISD::SDIV: return visitSDIV(N);
1930 case ISD::UDIV: return visitUDIV(N);
1931 case ISD::SREM:
1932 case ISD::UREM: return visitREM(N);
1933 case ISD::MULHU: return visitMULHU(N);
1934 case ISD::MULHS: return visitMULHS(N);
1935 case ISD::AVGFLOORS:
1936 case ISD::AVGFLOORU:
1937 case ISD::AVGCEILS:
1938 case ISD::AVGCEILU: return visitAVG(N);
1939 case ISD::ABDS:
1940 case ISD::ABDU: return visitABD(N);
1941 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1942 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1943 case ISD::SMULO:
1944 case ISD::UMULO: return visitMULO(N);
1945 case ISD::SMIN:
1946 case ISD::SMAX:
1947 case ISD::UMIN:
1948 case ISD::UMAX: return visitIMINMAX(N);
1949 case ISD::AND: return visitAND(N);
1950 case ISD::OR: return visitOR(N);
1951 case ISD::XOR: return visitXOR(N);
1952 case ISD::SHL: return visitSHL(N);
1953 case ISD::SRA: return visitSRA(N);
1954 case ISD::SRL: return visitSRL(N);
1955 case ISD::ROTR:
1956 case ISD::ROTL: return visitRotate(N);
1957 case ISD::FSHL:
1958 case ISD::FSHR: return visitFunnelShift(N);
1959 case ISD::SSHLSAT:
1960 case ISD::USHLSAT: return visitSHLSAT(N);
1961 case ISD::ABS: return visitABS(N);
1962 case ISD::BSWAP: return visitBSWAP(N);
1963 case ISD::BITREVERSE: return visitBITREVERSE(N);
1964 case ISD::CTLZ: return visitCTLZ(N);
1965 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1966 case ISD::CTTZ: return visitCTTZ(N);
1967 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1968 case ISD::CTPOP: return visitCTPOP(N);
1969 case ISD::SELECT: return visitSELECT(N);
1970 case ISD::VSELECT: return visitVSELECT(N);
1971 case ISD::SELECT_CC: return visitSELECT_CC(N);
1972 case ISD::SETCC: return visitSETCC(N);
1973 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1974 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1975 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1976 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1977 case ISD::AssertSext:
1978 case ISD::AssertZext: return visitAssertExt(N);
1979 case ISD::AssertAlign: return visitAssertAlign(N);
1980 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1983 case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1984 case ISD::TRUNCATE: return visitTRUNCATE(N);
1985 case ISD::TRUNCATE_USAT_U: return visitTRUNCATE_USAT_U(N);
1986 case ISD::BITCAST: return visitBITCAST(N);
1987 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1988 case ISD::FADD: return visitFADD(N);
1989 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1990 case ISD::FSUB: return visitFSUB(N);
1991 case ISD::FMUL: return visitFMUL(N);
1992 case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
1993 case ISD::FMAD: return visitFMAD(N);
1994 case ISD::FDIV: return visitFDIV(N);
1995 case ISD::FREM: return visitFREM(N);
1996 case ISD::FSQRT: return visitFSQRT(N);
1997 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1998 case ISD::FPOW: return visitFPOW(N);
1999 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
2000 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
2001 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
2002 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
2003 case ISD::LROUND:
2004 case ISD::LLROUND:
2005 case ISD::LRINT:
2006 case ISD::LLRINT: return visitXROUND(N);
2007 case ISD::FP_ROUND: return visitFP_ROUND(N);
2008 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
2009 case ISD::FNEG: return visitFNEG(N);
2010 case ISD::FABS: return visitFABS(N);
2011 case ISD::FFLOOR: return visitFFLOOR(N);
2012 case ISD::FMINNUM:
2013 case ISD::FMAXNUM:
2014 case ISD::FMINIMUM:
2015 case ISD::FMAXIMUM:
2016 case ISD::FMINIMUMNUM:
2017 case ISD::FMAXIMUMNUM: return visitFMinMax(N);
2018 case ISD::FCEIL: return visitFCEIL(N);
2019 case ISD::FTRUNC: return visitFTRUNC(N);
2020 case ISD::FFREXP: return visitFFREXP(N);
2021 case ISD::BRCOND: return visitBRCOND(N);
2022 case ISD::BR_CC: return visitBR_CC(N);
2023 case ISD::LOAD: return visitLOAD(N);
2024 case ISD::STORE: return visitSTORE(N);
2025 case ISD::ATOMIC_STORE: return visitATOMIC_STORE(N);
2026 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
2027 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
2028 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
2029 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
2030 case ISD::VECTOR_INTERLEAVE: return visitVECTOR_INTERLEAVE(N);
2031 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
2032 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
2033 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
2034 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
2035 case ISD::MGATHER: return visitMGATHER(N);
2036 case ISD::MLOAD: return visitMLOAD(N);
2037 case ISD::MSCATTER: return visitMSCATTER(N);
2038 case ISD::MSTORE: return visitMSTORE(N);
2039 case ISD::EXPERIMENTAL_VECTOR_HISTOGRAM: return visitMHISTOGRAM(N);
2040 case ISD::PARTIAL_REDUCE_SMLA:
2041 case ISD::PARTIAL_REDUCE_UMLA:
2042 case ISD::PARTIAL_REDUCE_SUMLA:
2043 return visitPARTIAL_REDUCE_MLA(N);
2044 case ISD::VECTOR_COMPRESS: return visitVECTOR_COMPRESS(N);
2045 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
2046 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
2047 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
2048 case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
2049 case ISD::BF16_TO_FP: return visitBF16_TO_FP(N);
2050 case ISD::FREEZE: return visitFREEZE(N);
2051 case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
2052 case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
2053 case ISD::FCANONICALIZE: return visitFCANONICALIZE(N);
2054 case ISD::VECREDUCE_FADD:
2055 case ISD::VECREDUCE_FMUL:
2056 case ISD::VECREDUCE_ADD:
2057 case ISD::VECREDUCE_MUL:
2058 case ISD::VECREDUCE_AND:
2059 case ISD::VECREDUCE_OR:
2060 case ISD::VECREDUCE_XOR:
2061 case ISD::VECREDUCE_SMAX:
2062 case ISD::VECREDUCE_SMIN:
2063 case ISD::VECREDUCE_UMAX:
2064 case ISD::VECREDUCE_UMIN:
2065 case ISD::VECREDUCE_FMAX:
2066 case ISD::VECREDUCE_FMIN:
2067 case ISD::VECREDUCE_FMAXIMUM:
2068 case ISD::VECREDUCE_FMINIMUM: return visitVECREDUCE(N);
2069#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
2070#include "llvm/IR/VPIntrinsics.def"
2071 return visitVPOp(N);
2072 }
2073 // clang-format on
2074 return SDValue();
2075}
2076
2077SDValue DAGCombiner::combine(SDNode *N) {
2078 if (!DebugCounter::shouldExecute(DAGCombineCounter))
2079 return SDValue();
2080
2081 SDValue RV;
2082 if (!DisableGenericCombines)
2083 RV = visit(N);
2084
2085 // If nothing happened, try a target-specific DAG combine.
2086 if (!RV.getNode()) {
2087 assert(N->getOpcode() != ISD::DELETED_NODE &&
2088 "Node was deleted but visit returned NULL!");
2089
2090 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
2091 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
2092
2093 // Expose the DAG combiner to the target combiner impls.
2094 TargetLowering::DAGCombinerInfo
2095 DagCombineInfo(DAG, Level, false, this);
2096
2097 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
2098 }
2099 }
2100
2101 // If nothing happened still, try promoting the operation.
2102 if (!RV.getNode()) {
2103 switch (N->getOpcode()) {
2104 default: break;
2105 case ISD::ADD:
2106 case ISD::SUB:
2107 case ISD::MUL:
2108 case ISD::AND:
2109 case ISD::OR:
2110 case ISD::XOR:
2111 RV = PromoteIntBinOp(SDValue(N, 0));
2112 break;
2113 case ISD::SHL:
2114 case ISD::SRA:
2115 case ISD::SRL:
2116 RV = PromoteIntShiftOp(SDValue(N, 0));
2117 break;
2118 case ISD::SIGN_EXTEND:
2119 case ISD::ZERO_EXTEND:
2120 case ISD::ANY_EXTEND:
2121 RV = PromoteExtend(SDValue(N, 0));
2122 break;
2123 case ISD::LOAD:
2124 if (PromoteLoad(SDValue(N, 0)))
2125 RV = SDValue(N, 0);
2126 break;
2127 }
2128 }
2129
2130 // If N is a commutative binary node, try to eliminate it if the commuted
2131 // version is already present in the DAG.
2132 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
2133 SDValue N0 = N->getOperand(0);
2134 SDValue N1 = N->getOperand(1);
2135
2136 // Constant operands are canonicalized to RHS.
2137 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
2138 SDValue Ops[] = {N1, N0};
2139 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
2140 N->getFlags());
2141 if (CSENode)
2142 return SDValue(CSENode, 0);
2143 }
2144 }
2145
2146 return RV;
2147}
2148
2149/// Given a node, return its input chain if it has one, otherwise return a null
2150/// sd operand.
2152 if (unsigned NumOps = N->getNumOperands()) {
2153 if (N->getOperand(0).getValueType() == MVT::Other)
2154 return N->getOperand(0);
2155 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
2156 return N->getOperand(NumOps-1);
2157 for (unsigned i = 1; i < NumOps-1; ++i)
2158 if (N->getOperand(i).getValueType() == MVT::Other)
2159 return N->getOperand(i);
2160 }
2161 return SDValue();
2162}
2163
2164SDValue DAGCombiner::visitFCANONICALIZE(SDNode *N) {
2165 SDValue Operand = N->getOperand(0);
2166 EVT VT = Operand.getValueType();
2167 SDLoc dl(N);
2168
2169 // Canonicalize undef to quiet NaN.
2170 if (Operand.isUndef()) {
2171 APFloat CanonicalQNaN = APFloat::getQNaN(VT.getFltSemantics());
2172 return DAG.getConstantFP(CanonicalQNaN, dl, VT);
2173 }
2174 return SDValue();
2175}
2176
2177SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
2178 // If N has two operands, where one has an input chain equal to the other,
2179 // the 'other' chain is redundant.
2180 if (N->getNumOperands() == 2) {
2181 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
2182 return N->getOperand(0);
2183 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
2184 return N->getOperand(1);
2185 }
2186
2187 // Don't simplify token factors if optnone.
2188 if (OptLevel == CodeGenOptLevel::None)
2189 return SDValue();
2190
2191 // Don't simplify the token factor if the node itself has too many operands.
2192 if (N->getNumOperands() > TokenFactorInlineLimit)
2193 return SDValue();
2194
2195 // If the sole user is a token factor, we should make sure we have a
2196 // chance to merge them together. This prevents TF chains from inhibiting
2197 // optimizations.
2198 if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::TokenFactor)
2199 AddToWorklist(*(N->user_begin()));
2200
2201 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
2202 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
2203 SmallPtrSet<SDNode*, 16> SeenOps;
2204 bool Changed = false; // If we should replace this token factor.
2205
2206 // Start out with this token factor.
2207 TFs.push_back(N);
2208
2209 // Iterate through token factors. The TFs grows when new token factors are
2210 // encountered.
2211 for (unsigned i = 0; i < TFs.size(); ++i) {
2212 // Limit number of nodes to inline, to avoid quadratic compile times.
2213 // We have to add the outstanding Token Factors to Ops, otherwise we might
2214 // drop Ops from the resulting Token Factors.
2215 if (Ops.size() > TokenFactorInlineLimit) {
2216 for (unsigned j = i; j < TFs.size(); j++)
2217 Ops.emplace_back(TFs[j], 0);
2218 // Drop unprocessed Token Factors from TFs, so we do not add them to the
2219 // combiner worklist later.
2220 TFs.resize(i);
2221 break;
2222 }
2223
2224 SDNode *TF = TFs[i];
2225 // Check each of the operands.
2226 for (const SDValue &Op : TF->op_values()) {
2227 switch (Op.getOpcode()) {
2228 case ISD::EntryToken:
2229 // Entry tokens don't need to be added to the list. They are
2230 // redundant.
2231 Changed = true;
2232 break;
2233
2234 case ISD::TokenFactor:
2235 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
2236 // Queue up for processing.
2237 TFs.push_back(Op.getNode());
2238 Changed = true;
2239 break;
2240 }
2241 [[fallthrough]];
2242
2243 default:
2244 // Only add if it isn't already in the list.
2245 if (SeenOps.insert(Op.getNode()).second)
2246 Ops.push_back(Op);
2247 else
2248 Changed = true;
2249 break;
2250 }
2251 }
2252 }
2253
2254 // Re-visit inlined Token Factors, to clean them up in case they have been
2255 // removed. Skip the first Token Factor, as this is the current node.
2256 for (unsigned i = 1, e = TFs.size(); i < e; i++)
2257 AddToWorklist(TFs[i]);
2258
2259 // Remove Nodes that are chained to another node in the list. Do so
2260 // by walking up chains breath-first stopping when we've seen
2261 // another operand. In general we must climb to the EntryNode, but we can exit
2262 // early if we find all remaining work is associated with just one operand as
2263 // no further pruning is possible.
2264
2265 // List of nodes to search through and original Ops from which they originate.
2267 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
2268 SmallPtrSet<SDNode *, 16> SeenChains;
2269 bool DidPruneOps = false;
2270
2271 unsigned NumLeftToConsider = 0;
2272 for (const SDValue &Op : Ops) {
2273 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2274 OpWorkCount.push_back(1);
2275 }
2276
2277 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2278 // If this is an Op, we can remove the op from the list. Remark any
2279 // search associated with it as from the current OpNumber.
2280 if (SeenOps.contains(Op)) {
2281 Changed = true;
2282 DidPruneOps = true;
2283 unsigned OrigOpNumber = 0;
2284 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2285 OrigOpNumber++;
2286 assert((OrigOpNumber != Ops.size()) &&
2287 "expected to find TokenFactor Operand");
2288 // Re-mark worklist from OrigOpNumber to OpNumber
2289 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2290 if (Worklist[i].second == OrigOpNumber) {
2291 Worklist[i].second = OpNumber;
2292 }
2293 }
2294 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2295 OpWorkCount[OrigOpNumber] = 0;
2296 NumLeftToConsider--;
2297 }
2298 // Add if it's a new chain
2299 if (SeenChains.insert(Op).second) {
2300 OpWorkCount[OpNumber]++;
2301 Worklist.push_back(std::make_pair(Op, OpNumber));
2302 }
2303 };
2304
2305 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2306 // We need at least be consider at least 2 Ops to prune.
2307 if (NumLeftToConsider <= 1)
2308 break;
2309 auto CurNode = Worklist[i].first;
2310 auto CurOpNumber = Worklist[i].second;
2311 assert((OpWorkCount[CurOpNumber] > 0) &&
2312 "Node should not appear in worklist");
2313 switch (CurNode->getOpcode()) {
2314 case ISD::EntryToken:
2315 // Hitting EntryToken is the only way for the search to terminate without
2316 // hitting
2317 // another operand's search. Prevent us from marking this operand
2318 // considered.
2319 NumLeftToConsider++;
2320 break;
2321 case ISD::TokenFactor:
2322 for (const SDValue &Op : CurNode->op_values())
2323 AddToWorklist(i, Op.getNode(), CurOpNumber);
2324 break;
2325 case ISD::LIFETIME_START:
2326 case ISD::LIFETIME_END:
2327 case ISD::CopyFromReg:
2328 case ISD::CopyToReg:
2329 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2330 break;
2331 default:
2332 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2333 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2334 break;
2335 }
2336 OpWorkCount[CurOpNumber]--;
2337 if (OpWorkCount[CurOpNumber] == 0)
2338 NumLeftToConsider--;
2339 }
2340
2341 // If we've changed things around then replace token factor.
2342 if (Changed) {
2344 if (Ops.empty()) {
2345 // The entry token is the only possible outcome.
2346 Result = DAG.getEntryNode();
2347 } else {
2348 if (DidPruneOps) {
2349 SmallVector<SDValue, 8> PrunedOps;
2350 //
2351 for (const SDValue &Op : Ops) {
2352 if (SeenChains.count(Op.getNode()) == 0)
2353 PrunedOps.push_back(Op);
2354 }
2355 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2356 } else {
2357 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2358 }
2359 }
2360 return Result;
2361 }
2362 return SDValue();
2363}
2364
2365/// MERGE_VALUES can always be eliminated.
2366SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2367 WorklistRemover DeadNodes(*this);
2368 // Replacing results may cause a different MERGE_VALUES to suddenly
2369 // be CSE'd with N, and carry its uses with it. Iterate until no
2370 // uses remain, to ensure that the node can be safely deleted.
2371 // First add the users of this node to the work list so that they
2372 // can be tried again once they have new operands.
2373 AddUsersToWorklist(N);
2374 do {
2375 // Do as a single replacement to avoid rewalking use lists.
2377 DAG.ReplaceAllUsesWith(N, Ops.data());
2378 } while (!N->use_empty());
2379 deleteAndRecombine(N);
2380 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2381}
2382
2383/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2384/// ConstantSDNode pointer else nullptr.
2387 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2388}
2389
2390// isTruncateOf - If N is a truncate of some other value, return true, record
2391// the value being truncated in Op and which of Op's bits are zero/one in Known.
2392// This function computes KnownBits to avoid a duplicated call to
2393// computeKnownBits in the caller.
2395 KnownBits &Known) {
2396 if (N->getOpcode() == ISD::TRUNCATE) {
2397 Op = N->getOperand(0);
2398 Known = DAG.computeKnownBits(Op);
2399 if (N->getFlags().hasNoUnsignedWrap())
2400 Known.Zero.setBitsFrom(N.getScalarValueSizeInBits());
2401 return true;
2402 }
2403
2404 if (N.getValueType().getScalarType() != MVT::i1 ||
2405 !sd_match(
2407 return false;
2408
2409 Known = DAG.computeKnownBits(Op);
2410 return (Known.Zero | 1).isAllOnes();
2411}
2412
2413/// Return true if 'Use' is a load or a store that uses N as its base pointer
2414/// and that N may be folded in the load / store addressing mode.
2416 const TargetLowering &TLI) {
2417 EVT VT;
2418 unsigned AS;
2419
2420 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2421 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2422 return false;
2423 VT = LD->getMemoryVT();
2424 AS = LD->getAddressSpace();
2425 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2426 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2427 return false;
2428 VT = ST->getMemoryVT();
2429 AS = ST->getAddressSpace();
2431 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2432 return false;
2433 VT = LD->getMemoryVT();
2434 AS = LD->getAddressSpace();
2436 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2437 return false;
2438 VT = ST->getMemoryVT();
2439 AS = ST->getAddressSpace();
2440 } else {
2441 return false;
2442 }
2443
2445 if (N->isAnyAdd()) {
2446 AM.HasBaseReg = true;
2448 if (Offset)
2449 // [reg +/- imm]
2450 AM.BaseOffs = Offset->getSExtValue();
2451 else
2452 // [reg +/- reg]
2453 AM.Scale = 1;
2454 } else if (N->getOpcode() == ISD::SUB) {
2455 AM.HasBaseReg = true;
2457 if (Offset)
2458 // [reg +/- imm]
2459 AM.BaseOffs = -Offset->getSExtValue();
2460 else
2461 // [reg +/- reg]
2462 AM.Scale = 1;
2463 } else {
2464 return false;
2465 }
2466
2467 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2468 VT.getTypeForEVT(*DAG.getContext()), AS);
2469}
2470
2471/// This inverts a canonicalization in IR that replaces a variable select arm
2472/// with an identity constant. Codegen improves if we re-use the variable
2473/// operand rather than load a constant. This can also be converted into a
2474/// masked vector operation if the target supports it.
2476 bool ShouldCommuteOperands) {
2477 // Match a select as operand 1. The identity constant that we are looking for
2478 // is only valid as operand 1 of a non-commutative binop.
2479 SDValue N0 = N->getOperand(0);
2480 SDValue N1 = N->getOperand(1);
2481 if (ShouldCommuteOperands)
2482 std::swap(N0, N1);
2483
2484 unsigned SelOpcode = N1.getOpcode();
2485 if ((SelOpcode != ISD::VSELECT && SelOpcode != ISD::SELECT) ||
2486 !N1.hasOneUse())
2487 return SDValue();
2488
2489 // We can't hoist all instructions because of immediate UB (not speculatable).
2490 // For example div/rem by zero.
2492 return SDValue();
2493
2494 unsigned Opcode = N->getOpcode();
2495 EVT VT = N->getValueType(0);
2496 SDValue Cond = N1.getOperand(0);
2497 SDValue TVal = N1.getOperand(1);
2498 SDValue FVal = N1.getOperand(2);
2499 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2500
2501 // This transform increases uses of N0, so freeze it to be safe.
2502 // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2503 unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
2504 if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo) &&
2505 TLI.shouldFoldSelectWithIdentityConstant(Opcode, VT, SelOpcode, N0,
2506 FVal)) {
2507 SDValue F0 = DAG.getFreeze(N0);
2508 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2509 return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2510 }
2511 // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2512 if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo) &&
2513 TLI.shouldFoldSelectWithIdentityConstant(Opcode, VT, SelOpcode, N0,
2514 TVal)) {
2515 SDValue F0 = DAG.getFreeze(N0);
2516 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2517 return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2518 }
2519
2520 return SDValue();
2521}
2522
2523SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2524 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2525 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2526 "Unexpected binary operator");
2527
2528 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2529 return Sel;
2530
2531 if (TLI.isCommutativeBinOp(BO->getOpcode()))
2532 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2533 return Sel;
2534
2535 // Don't do this unless the old select is going away. We want to eliminate the
2536 // binary operator, not replace a binop with a select.
2537 // TODO: Handle ISD::SELECT_CC.
2538 unsigned SelOpNo = 0;
2539 SDValue Sel = BO->getOperand(0);
2540 auto BinOpcode = BO->getOpcode();
2541 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2542 SelOpNo = 1;
2543 Sel = BO->getOperand(1);
2544
2545 // Peek through trunc to shift amount type.
2546 if ((BinOpcode == ISD::SHL || BinOpcode == ISD::SRA ||
2547 BinOpcode == ISD::SRL) && Sel.hasOneUse()) {
2548 // This is valid when the truncated bits of x are already zero.
2549 SDValue Op;
2550 KnownBits Known;
2551 if (isTruncateOf(DAG, Sel, Op, Known) &&
2553 Sel = Op;
2554 }
2555 }
2556
2557 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2558 return SDValue();
2559
2560 SDValue CT = Sel.getOperand(1);
2561 if (!isConstantOrConstantVector(CT, true) &&
2563 return SDValue();
2564
2565 SDValue CF = Sel.getOperand(2);
2566 if (!isConstantOrConstantVector(CF, true) &&
2568 return SDValue();
2569
2570 // Bail out if any constants are opaque because we can't constant fold those.
2571 // The exception is "and" and "or" with either 0 or -1 in which case we can
2572 // propagate non constant operands into select. I.e.:
2573 // and (select Cond, 0, -1), X --> select Cond, 0, X
2574 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2575 bool CanFoldNonConst =
2576 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2579
2580 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2581 if (!CanFoldNonConst &&
2582 !isConstantOrConstantVector(CBO, true) &&
2584 return SDValue();
2585
2586 SDLoc DL(Sel);
2587 SDValue NewCT, NewCF;
2588 EVT VT = BO->getValueType(0);
2589
2590 if (CanFoldNonConst) {
2591 // If CBO is an opaque constant, we can't rely on getNode to constant fold.
2592 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
2593 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
2594 NewCT = CT;
2595 else
2596 NewCT = CBO;
2597
2598 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
2599 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
2600 NewCF = CF;
2601 else
2602 NewCF = CBO;
2603 } else {
2604 // We have a select-of-constants followed by a binary operator with a
2605 // constant. Eliminate the binop by pulling the constant math into the
2606 // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2607 // CBO, CF + CBO
2608 NewCT = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CT})
2609 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CT, CBO});
2610 if (!NewCT)
2611 return SDValue();
2612
2613 NewCF = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CF})
2614 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CF, CBO});
2615 if (!NewCF)
2616 return SDValue();
2617 }
2618
2619 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF, BO->getFlags());
2620}
2621
2623 SelectionDAG &DAG) {
2624 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2625 "Expecting add or sub");
2626
2627 // Match a constant operand and a zext operand for the math instruction:
2628 // add Z, C
2629 // sub C, Z
2630 bool IsAdd = N->getOpcode() == ISD::ADD;
2631 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2632 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2633 auto *CN = dyn_cast<ConstantSDNode>(C);
2634 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2635 return SDValue();
2636
2637 // Match the zext operand as a setcc of a boolean.
2638 if (Z.getOperand(0).getValueType() != MVT::i1)
2639 return SDValue();
2640
2641 // Match the compare as: setcc (X & 1), 0, eq.
2642 if (!sd_match(Z.getOperand(0), m_SetCC(m_And(m_Value(), m_One()), m_Zero(),
2644 return SDValue();
2645
2646 // We are adding/subtracting a constant and an inverted low bit. Turn that
2647 // into a subtract/add of the low bit with incremented/decremented constant:
2648 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2649 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2650 EVT VT = C.getValueType();
2651 SDValue LowBit = DAG.getZExtOrTrunc(Z.getOperand(0).getOperand(0), DL, VT);
2652 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT)
2653 : DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2654 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2655}
2656
2657// Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
2658SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) {
2659 SDValue N0 = N->getOperand(0);
2660 EVT VT = N0.getValueType();
2661 SDValue A, B;
2662
2663 if ((!LegalOperations || hasOperation(ISD::AVGCEILU, VT)) &&
2665 m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2666 return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
2667 }
2668 if ((!LegalOperations || hasOperation(ISD::AVGCEILS, VT)) &&
2670 m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2671 return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
2672 }
2673 return SDValue();
2674}
2675
2676/// Try to fold a pointer arithmetic node.
2677/// This needs to be done separately from normal addition, because pointer
2678/// addition is not commutative.
2679SDValue DAGCombiner::visitPTRADD(SDNode *N) {
2680 SDValue N0 = N->getOperand(0);
2681 SDValue N1 = N->getOperand(1);
2682 EVT PtrVT = N0.getValueType();
2683 EVT IntVT = N1.getValueType();
2684 SDLoc DL(N);
2685
2686 // This is already ensured by an assert in SelectionDAG::getNode(). Several
2687 // combines here depend on this assumption.
2688 assert(PtrVT == IntVT &&
2689 "PTRADD with different operand types is not supported");
2690
2691 // fold (ptradd x, 0) -> x
2692 if (isNullConstant(N1))
2693 return N0;
2694
2695 // fold (ptradd 0, x) -> x
2696 if (PtrVT == IntVT && isNullConstant(N0))
2697 return N1;
2698
2699 if (N0.getOpcode() != ISD::PTRADD ||
2700 reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1))
2701 return SDValue();
2702
2703 SDValue X = N0.getOperand(0);
2704 SDValue Y = N0.getOperand(1);
2705 SDValue Z = N1;
2706 bool N0OneUse = N0.hasOneUse();
2707 bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2708 bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2709
2710 // (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
2711 // * y is a constant and (ptradd x, y) has one use; or
2712 // * y and z are both constants.
2713 if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
2714 // If both additions in the original were NUW, the new ones are as well.
2715 SDNodeFlags Flags =
2716 (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2717 SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
2718 AddToWorklist(Add.getNode());
2719 return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
2720 }
2721
2722 // TODO: There is another possible fold here that was proven useful.
2723 // It would be this:
2724 //
2725 // (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y) if:
2726 // * (ptradd x, y) has one use; and
2727 // * y is a constant; and
2728 // * z is not a constant.
2729 //
2730 // In some cases, specifically in AArch64's FEAT_CPA, it exposes the
2731 // opportunity to select more complex instructions such as SUBPT and
2732 // MSUBPT. However, a hypothetical corner case has been found that we could
2733 // not avoid. Consider this (pseudo-POSIX C):
2734 //
2735 // char *foo(char *x, int z) {return (x + LARGE_CONSTANT) + z;}
2736 // char *p = mmap(LARGE_CONSTANT);
2737 // char *q = foo(p, -LARGE_CONSTANT);
2738 //
2739 // Then x + LARGE_CONSTANT is one-past-the-end, so valid, and a
2740 // further + z takes it back to the start of the mapping, so valid,
2741 // regardless of the address mmap gave back. However, if mmap gives you an
2742 // address < LARGE_CONSTANT (ignoring high bits), x - LARGE_CONSTANT will
2743 // borrow from the high bits (with the subsequent + z carrying back into
2744 // the high bits to give you a well-defined pointer) and thus trip
2745 // FEAT_CPA's pointer corruption checks.
2746 //
2747 // We leave this fold as an opportunity for future work, addressing the
2748 // corner case for FEAT_CPA, as well as reconciling the solution with the
2749 // more general application of pointer arithmetic in other future targets.
2750 // For now each architecture that wants this fold must implement it in the
2751 // target-specific code (see e.g. SITargetLowering::performPtrAddCombine)
2752
2753 return SDValue();
2754}
2755
2756/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2757/// a shift and add with a different constant.
2759 SelectionDAG &DAG) {
2760 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2761 "Expecting add or sub");
2762
2763 // We need a constant operand for the add/sub, and the other operand is a
2764 // logical shift right: add (srl), C or sub C, (srl).
2765 bool IsAdd = N->getOpcode() == ISD::ADD;
2766 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2767 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2768 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2769 ShiftOp.getOpcode() != ISD::SRL)
2770 return SDValue();
2771
2772 // The shift must be of a 'not' value.
2773 SDValue Not = ShiftOp.getOperand(0);
2774 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2775 return SDValue();
2776
2777 // The shift must be moving the sign bit to the least-significant-bit.
2778 EVT VT = ShiftOp.getValueType();
2779 SDValue ShAmt = ShiftOp.getOperand(1);
2780 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2781 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2782 return SDValue();
2783
2784 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2785 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2786 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2787 if (SDValue NewC = DAG.FoldConstantArithmetic(
2788 IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2789 {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2790 SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2791 Not.getOperand(0), ShAmt);
2792 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2793 }
2794
2795 return SDValue();
2796}
2797
2798static bool
2800 return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
2801 (isBitwiseNot(Op1) && Op1.getOperand(0) == Op0);
2802}
2803
2804/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2805/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2806/// are no common bits set in the operands).
2807SDValue DAGCombiner::visitADDLike(SDNode *N) {
2808 SDValue N0 = N->getOperand(0);
2809 SDValue N1 = N->getOperand(1);
2810 EVT VT = N0.getValueType();
2811 SDLoc DL(N);
2812
2813 // fold (add x, undef) -> undef
2814 if (N0.isUndef())
2815 return N0;
2816 if (N1.isUndef())
2817 return N1;
2818
2819 // fold (add c1, c2) -> c1+c2
2820 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2821 return C;
2822
2823 // canonicalize constant to RHS
2826 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2827
2828 if (areBitwiseNotOfEachother(N0, N1))
2829 return DAG.getConstant(APInt::getAllOnes(VT.getScalarSizeInBits()), DL, VT);
2830
2831 // fold vector ops
2832 if (VT.isVector()) {
2833 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2834 return FoldedVOp;
2835
2836 // fold (add x, 0) -> x, vector edition
2838 return N0;
2839 }
2840
2841 // fold (add x, 0) -> x
2842 if (isNullConstant(N1))
2843 return N0;
2844
2845 if (N0.getOpcode() == ISD::SUB) {
2846 SDValue N00 = N0.getOperand(0);
2847 SDValue N01 = N0.getOperand(1);
2848
2849 // fold ((A-c1)+c2) -> (A+(c2-c1))
2850 if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2851 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2852
2853 // fold ((c1-A)+c2) -> (c1+c2)-A
2854 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2855 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2856 }
2857
2858 // add (sext i1 X), 1 -> zext (not i1 X)
2859 // We don't transform this pattern:
2860 // add (zext i1 X), -1 -> sext (not i1 X)
2861 // because most (?) targets generate better code for the zext form.
2862 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2863 isOneOrOneSplat(N1)) {
2864 SDValue X = N0.getOperand(0);
2865 if ((!LegalOperations ||
2866 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2868 X.getScalarValueSizeInBits() == 1) {
2869 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2870 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2871 }
2872 }
2873
2874 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2875 // iff (or x, c0) is equivalent to (add x, c0).
2876 // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2877 // iff (xor x, c0) is equivalent to (add x, c0).
2878 if (DAG.isADDLike(N0)) {
2879 SDValue N01 = N0.getOperand(1);
2880 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2881 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2882 }
2883
2884 if (SDValue NewSel = foldBinOpIntoSelect(N))
2885 return NewSel;
2886
2887 // reassociate add
2888 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2889 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2890 return RADD;
2891
2892 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2893 // equivalent to (add x, c).
2894 // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2895 // equivalent to (add x, c).
2896 // Do this optimization only when adding c does not introduce instructions
2897 // for adding carries.
2898 auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2899 if (DAG.isADDLike(N0) && N0.hasOneUse() &&
2900 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2901 // If N0's type does not split or is a sign mask, it does not introduce
2902 // add carry.
2903 auto TyActn = TLI.getTypeAction(*DAG.getContext(), N0.getValueType());
2904 bool NoAddCarry = TyActn == TargetLoweringBase::TypeLegal ||
2907 if (NoAddCarry)
2908 return DAG.getNode(
2909 ISD::ADD, DL, VT,
2910 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2911 N0.getOperand(1));
2912 }
2913 return SDValue();
2914 };
2915 if (SDValue Add = ReassociateAddOr(N0, N1))
2916 return Add;
2917 if (SDValue Add = ReassociateAddOr(N1, N0))
2918 return Add;
2919
2920 // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
2921 if (SDValue SD =
2922 reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
2923 return SD;
2924 }
2925
2926 SDValue A, B, C, D;
2927
2928 // fold ((0-A) + B) -> B-A
2929 if (sd_match(N0, m_Neg(m_Value(A))))
2930 return DAG.getNode(ISD::SUB, DL, VT, N1, A);
2931
2932 // fold (A + (0-B)) -> A-B
2933 if (sd_match(N1, m_Neg(m_Value(B))))
2934 return DAG.getNode(ISD::SUB, DL, VT, N0, B);
2935
2936 // fold (A+(B-A)) -> B
2937 if (sd_match(N1, m_Sub(m_Value(B), m_Specific(N0))))
2938 return B;
2939
2940 // fold ((B-A)+A) -> B
2941 if (sd_match(N0, m_Sub(m_Value(B), m_Specific(N1))))
2942 return B;
2943
2944 // fold ((A-B)+(C-A)) -> (C-B)
2945 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2947 return DAG.getNode(ISD::SUB, DL, VT, C, B);
2948
2949 // fold ((A-B)+(B-C)) -> (A-C)
2950 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2952 return DAG.getNode(ISD::SUB, DL, VT, A, C);
2953
2954 // fold (A+(B-(A+C))) to (B-C)
2955 // fold (A+(B-(C+A))) to (B-C)
2956 if (sd_match(N1, m_Sub(m_Value(B), m_Add(m_Specific(N0), m_Value(C)))))
2957 return DAG.getNode(ISD::SUB, DL, VT, B, C);
2958
2959 // fold (A+((B-A)+or-C)) to (B+or-C)
2960 if (sd_match(N1,
2962 m_Sub(m_Sub(m_Value(B), m_Specific(N0)), m_Value(C)))))
2963 return DAG.getNode(N1.getOpcode(), DL, VT, B, C);
2964
2965 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2966 if (sd_match(N0, m_OneUse(m_Sub(m_Value(A), m_Value(B)))) &&
2967 sd_match(N1, m_OneUse(m_Sub(m_Value(C), m_Value(D)))) &&
2969 return DAG.getNode(ISD::SUB, DL, VT,
2970 DAG.getNode(ISD::ADD, SDLoc(N0), VT, A, C),
2971 DAG.getNode(ISD::ADD, SDLoc(N1), VT, B, D));
2972
2973 // fold (add (umax X, C), -C) --> (usubsat X, C)
2974 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2975 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2976 return (!Max && !Op) ||
2977 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2978 };
2979 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2980 /*AllowUndefs*/ true))
2981 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2982 N0.getOperand(1));
2983 }
2984
2986 return SDValue(N, 0);
2987
2988 if (isOneOrOneSplat(N1)) {
2989 // fold (add (xor a, -1), 1) -> (sub 0, a)
2990 if (isBitwiseNot(N0))
2991 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2992 N0.getOperand(0));
2993
2994 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2995 if (N0.getOpcode() == ISD::ADD) {
2996 SDValue A, Xor;
2997
2998 if (isBitwiseNot(N0.getOperand(0))) {
2999 A = N0.getOperand(1);
3000 Xor = N0.getOperand(0);
3001 } else if (isBitwiseNot(N0.getOperand(1))) {
3002 A = N0.getOperand(0);
3003 Xor = N0.getOperand(1);
3004 }
3005
3006 if (Xor)
3007 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
3008 }
3009
3010 // Look for:
3011 // add (add x, y), 1
3012 // And if the target does not like this form then turn into:
3013 // sub y, (xor x, -1)
3014 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3015 N0.hasOneUse() &&
3016 // Limit this to after legalization if the add has wrap flags
3017 (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
3018 !N->getFlags().hasNoSignedWrap()))) {
3019 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3020 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
3021 }
3022 }
3023
3024 // (x - y) + -1 -> add (xor y, -1), x
3025 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
3026 isAllOnesOrAllOnesSplat(N1, /*AllowUndefs=*/true)) {
3027 SDValue Not = DAG.getNOT(DL, N0.getOperand(1), VT);
3028 return DAG.getNode(ISD::ADD, DL, VT, Not, N0.getOperand(0));
3029 }
3030
3031 // Fold add(mul(add(A, CA), CM), CB) -> add(mul(A, CM), CM*CA+CB).
3032 // This can help if the inner add has multiple uses.
3033 APInt CM, CA;
3034 if (ConstantSDNode *CB = dyn_cast<ConstantSDNode>(N1)) {
3035 if (VT.getScalarSizeInBits() <= 64) {
3037 m_ConstInt(CM)))) &&
3039 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
3040 SDNodeFlags Flags;
3041 // If all the inputs are nuw, the outputs can be nuw. If all the input
3042 // are _also_ nsw the outputs can be too.
3043 if (N->getFlags().hasNoUnsignedWrap() &&
3044 N0->getFlags().hasNoUnsignedWrap() &&
3047 if (N->getFlags().hasNoSignedWrap() &&
3048 N0->getFlags().hasNoSignedWrap() &&
3051 }
3052 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
3053 DAG.getConstant(CM, DL, VT), Flags);
3054 return DAG.getNode(
3055 ISD::ADD, DL, VT, Mul,
3056 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
3057 }
3058 // Also look in case there is an intermediate add.
3059 if (sd_match(N0, m_OneUse(m_Add(
3061 m_ConstInt(CM))),
3062 m_Value(B)))) &&
3064 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
3065 SDNodeFlags Flags;
3066 // If all the inputs are nuw, the outputs can be nuw. If all the input
3067 // are _also_ nsw the outputs can be too.
3068 SDValue OMul =
3069 N0.getOperand(0) == B ? N0.getOperand(1) : N0.getOperand(0);
3070 if (N->getFlags().hasNoUnsignedWrap() &&
3071 N0->getFlags().hasNoUnsignedWrap() &&
3072 OMul->getFlags().hasNoUnsignedWrap() &&
3073 OMul.getOperand(0)->getFlags().hasNoUnsignedWrap()) {
3075 if (N->getFlags().hasNoSignedWrap() &&
3076 N0->getFlags().hasNoSignedWrap() &&
3077 OMul->getFlags().hasNoSignedWrap() &&
3078 OMul.getOperand(0)->getFlags().hasNoSignedWrap())
3080 }
3081 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
3082 DAG.getConstant(CM, DL, VT), Flags);
3083 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N1), VT, Mul, B, Flags);
3084 return DAG.getNode(
3085 ISD::ADD, DL, VT, Add,
3086 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
3087 }
3088 }
3089 }
3090
3091 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
3092 return Combined;
3093
3094 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
3095 return Combined;
3096
3097 return SDValue();
3098}
3099
3100// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
3101SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) {
3102 SDValue N0 = N->getOperand(0);
3103 EVT VT = N0.getValueType();
3104 SDValue A, B;
3105
3106 if ((!LegalOperations || hasOperation(ISD::AVGFLOORU, VT)) &&
3108 m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
3109 return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
3110 }
3111 if ((!LegalOperations || hasOperation(ISD::AVGFLOORS, VT)) &&
3113 m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
3114 return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
3115 }
3116
3117 return SDValue();
3118}
3119
3120SDValue DAGCombiner::visitADD(SDNode *N) {
3121 SDValue N0 = N->getOperand(0);
3122 SDValue N1 = N->getOperand(1);
3123 EVT VT = N0.getValueType();
3124 SDLoc DL(N);
3125
3126 if (SDValue Combined = visitADDLike(N))
3127 return Combined;
3128
3129 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
3130 return V;
3131
3132 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
3133 return V;
3134
3135 if (SDValue V = MatchRotate(N0, N1, SDLoc(N), /*FromAdd=*/true))
3136 return V;
3137
3138 // Try to match AVGFLOOR fixedwidth pattern
3139 if (SDValue V = foldAddToAvg(N, DL))
3140 return V;
3141
3142 // fold (a+b) -> (a|b) iff a and b share no bits.
3143 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
3144 DAG.haveNoCommonBitsSet(N0, N1))
3145 return DAG.getNode(ISD::OR, DL, VT, N0, N1, SDNodeFlags::Disjoint);
3146
3147 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
3148 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
3149 const APInt &C0 = N0->getConstantOperandAPInt(0);
3150 const APInt &C1 = N1->getConstantOperandAPInt(0);
3151 return DAG.getVScale(DL, VT, C0 + C1);
3152 }
3153
3154 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
3155 if (N0.getOpcode() == ISD::ADD &&
3156 N0.getOperand(1).getOpcode() == ISD::VSCALE &&
3157 N1.getOpcode() == ISD::VSCALE) {
3158 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3159 const APInt &VS1 = N1->getConstantOperandAPInt(0);
3160 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
3161 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
3162 }
3163
3164 // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
3165 if (N0.getOpcode() == ISD::STEP_VECTOR &&
3166 N1.getOpcode() == ISD::STEP_VECTOR) {
3167 const APInt &C0 = N0->getConstantOperandAPInt(0);
3168 const APInt &C1 = N1->getConstantOperandAPInt(0);
3169 APInt NewStep = C0 + C1;
3170 return DAG.getStepVector(DL, VT, NewStep);
3171 }
3172
3173 // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
3174 if (N0.getOpcode() == ISD::ADD &&
3176 N1.getOpcode() == ISD::STEP_VECTOR) {
3177 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3178 const APInt &SV1 = N1->getConstantOperandAPInt(0);
3179 APInt NewStep = SV0 + SV1;
3180 SDValue SV = DAG.getStepVector(DL, VT, NewStep);
3181 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
3182 }
3183
3184 return SDValue();
3185}
3186
3187SDValue DAGCombiner::visitADDSAT(SDNode *N) {
3188 unsigned Opcode = N->getOpcode();
3189 SDValue N0 = N->getOperand(0);
3190 SDValue N1 = N->getOperand(1);
3191 EVT VT = N0.getValueType();
3192 bool IsSigned = Opcode == ISD::SADDSAT;
3193 SDLoc DL(N);
3194
3195 // fold (add_sat x, undef) -> -1
3196 if (N0.isUndef() || N1.isUndef())
3197 return DAG.getAllOnesConstant(DL, VT);
3198
3199 // fold (add_sat c1, c2) -> c3
3200 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
3201 return C;
3202
3203 // canonicalize constant to RHS
3206 return DAG.getNode(Opcode, DL, VT, N1, N0);
3207
3208 // fold vector ops
3209 if (VT.isVector()) {
3210 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3211 return FoldedVOp;
3212
3213 // fold (add_sat x, 0) -> x, vector edition
3215 return N0;
3216 }
3217
3218 // fold (add_sat x, 0) -> x
3219 if (isNullConstant(N1))
3220 return N0;
3221
3222 // If it cannot overflow, transform into an add.
3223 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3224 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
3225
3226 return SDValue();
3227}
3228
3230 bool ForceCarryReconstruction = false) {
3231 bool Masked = false;
3232
3233 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
3234 while (true) {
3235 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
3236 V = V.getOperand(0);
3237 continue;
3238 }
3239
3240 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
3241 if (ForceCarryReconstruction)
3242 return V;
3243
3244 Masked = true;
3245 V = V.getOperand(0);
3246 continue;
3247 }
3248
3249 if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
3250 return V;
3251
3252 break;
3253 }
3254
3255 // If this is not a carry, return.
3256 if (V.getResNo() != 1)
3257 return SDValue();
3258
3259 if (V.getOpcode() != ISD::UADDO_CARRY && V.getOpcode() != ISD::USUBO_CARRY &&
3260 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
3261 return SDValue();
3262
3263 EVT VT = V->getValueType(0);
3264 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
3265 return SDValue();
3266
3267 // If the result is masked, then no matter what kind of bool it is we can
3268 // return. If it isn't, then we need to make sure the bool type is either 0 or
3269 // 1 and not other values.
3270 if (Masked ||
3271 TLI.getBooleanContents(V.getValueType()) ==
3273 return V;
3274
3275 return SDValue();
3276}
3277
3278/// Given the operands of an add/sub operation, see if the 2nd operand is a
3279/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
3280/// the opcode and bypass the mask operation.
3281static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
3282 SelectionDAG &DAG, const SDLoc &DL) {
3283 if (N1.getOpcode() == ISD::ZERO_EXTEND)
3284 N1 = N1.getOperand(0);
3285
3286 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
3287 return SDValue();
3288
3289 EVT VT = N0.getValueType();
3290 SDValue N10 = N1.getOperand(0);
3291 if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
3292 N10 = N10.getOperand(0);
3293
3294 if (N10.getValueType() != VT)
3295 return SDValue();
3296
3297 if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits())
3298 return SDValue();
3299
3300 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
3301 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
3302 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10);
3303}
3304
3305/// Helper for doing combines based on N0 and N1 being added to each other.
3306SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
3307 SDNode *LocReference) {
3308 EVT VT = N0.getValueType();
3309 SDLoc DL(LocReference);
3310
3311 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
3312 SDValue Y, N;
3313 if (sd_match(N1, m_Shl(m_Neg(m_Value(Y)), m_Value(N))))
3314 return DAG.getNode(ISD::SUB, DL, VT, N0,
3315 DAG.getNode(ISD::SHL, DL, VT, Y, N));
3316
3317 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
3318 return V;
3319
3320 // Look for:
3321 // add (add x, 1), y
3322 // And if the target does not like this form then turn into:
3323 // sub y, (xor x, -1)
3324 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3325 N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1)) &&
3326 // Limit this to after legalization if the add has wrap flags
3327 (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
3328 !N0->getFlags().hasNoSignedWrap()))) {
3329 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3330 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
3331 }
3332
3333 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
3334 // Hoist one-use subtraction by non-opaque constant:
3335 // (x - C) + y -> (x + y) - C
3336 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3337 if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3338 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
3339 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
3340 }
3341 // Hoist one-use subtraction from non-opaque constant:
3342 // (C - x) + y -> (y - x) + C
3343 if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3344 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
3345 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
3346 }
3347 }
3348
3349 // add (mul x, C), x -> mul x, C+1
3350 if (N0.getOpcode() == ISD::MUL && N0.getOperand(0) == N1 &&
3351 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true) &&
3352 N0.hasOneUse()) {
3353 SDValue NewC = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
3354 DAG.getConstant(1, DL, VT));
3355 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), NewC);
3356 }
3357
3358 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
3359 // rather than 'add 0/-1' (the zext should get folded).
3360 // add (sext i1 Y), X --> sub X, (zext i1 Y)
3361 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
3362 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
3364 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
3365 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
3366 }
3367
3368 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
3369 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3370 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3371 if (TN->getVT() == MVT::i1) {
3372 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3373 DAG.getConstant(1, DL, VT));
3374 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
3375 }
3376 }
3377
3378 // (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3379 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1)) &&
3380 N1.getResNo() == 0)
3381 return DAG.getNode(ISD::UADDO_CARRY, DL, N1->getVTList(),
3382 N0, N1.getOperand(0), N1.getOperand(2));
3383
3384 // (add X, Carry) -> (uaddo_carry X, 0, Carry)
3386 if (SDValue Carry = getAsCarry(TLI, N1))
3387 return DAG.getNode(ISD::UADDO_CARRY, DL,
3388 DAG.getVTList(VT, Carry.getValueType()), N0,
3389 DAG.getConstant(0, DL, VT), Carry);
3390
3391 return SDValue();
3392}
3393
3394SDValue DAGCombiner::visitADDC(SDNode *N) {
3395 SDValue N0 = N->getOperand(0);
3396 SDValue N1 = N->getOperand(1);
3397 EVT VT = N0.getValueType();
3398 SDLoc DL(N);
3399
3400 // If the flag result is dead, turn this into an ADD.
3401 if (!N->hasAnyUseOfValue(1))
3402 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3403 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3404
3405 // canonicalize constant to RHS.
3406 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3407 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3408 if (N0C && !N1C)
3409 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
3410
3411 // fold (addc x, 0) -> x + no carry out
3412 if (isNullConstant(N1))
3413 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
3414 DL, MVT::Glue));
3415
3416 // If it cannot overflow, transform into an add.
3418 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3419 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3420
3421 return SDValue();
3422}
3423
3424/**
3425 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
3426 * then the flip also occurs if computing the inverse is the same cost.
3427 * This function returns an empty SDValue in case it cannot flip the boolean
3428 * without increasing the cost of the computation. If you want to flip a boolean
3429 * no matter what, use DAG.getLogicalNOT.
3430 */
3432 const TargetLowering &TLI,
3433 bool Force) {
3434 if (Force && isa<ConstantSDNode>(V))
3435 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3436
3437 if (V.getOpcode() != ISD::XOR)
3438 return SDValue();
3439
3440 if (DAG.isBoolConstant(V.getOperand(1)) == true)
3441 return V.getOperand(0);
3442 if (Force && isConstOrConstSplat(V.getOperand(1), false))
3443 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3444 return SDValue();
3445}
3446
3447SDValue DAGCombiner::visitADDO(SDNode *N) {
3448 SDValue N0 = N->getOperand(0);
3449 SDValue N1 = N->getOperand(1);
3450 EVT VT = N0.getValueType();
3451 bool IsSigned = (ISD::SADDO == N->getOpcode());
3452
3453 EVT CarryVT = N->getValueType(1);
3454 SDLoc DL(N);
3455
3456 // If the flag result is dead, turn this into an ADD.
3457 if (!N->hasAnyUseOfValue(1))
3458 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3459 DAG.getUNDEF(CarryVT));
3460
3461 // canonicalize constant to RHS.
3464 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
3465
3466 // fold (addo x, 0) -> x + no carry out
3467 if (isNullOrNullSplat(N1))
3468 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3469
3470 // If it cannot overflow, transform into an add.
3471 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3472 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3473 DAG.getConstant(0, DL, CarryVT));
3474
3475 if (IsSigned) {
3476 // fold (saddo (xor a, -1), 1) -> (ssub 0, a).
3477 if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
3478 return DAG.getNode(ISD::SSUBO, DL, N->getVTList(),
3479 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3480 } else {
3481 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
3482 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
3483 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
3484 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3485 return CombineTo(
3486 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3487 }
3488
3489 if (SDValue Combined = visitUADDOLike(N0, N1, N))
3490 return Combined;
3491
3492 if (SDValue Combined = visitUADDOLike(N1, N0, N))
3493 return Combined;
3494 }
3495
3496 return SDValue();
3497}
3498
3499SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3500 EVT VT = N0.getValueType();
3501 if (VT.isVector())
3502 return SDValue();
3503
3504 // (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3505 // If Y + 1 cannot overflow.
3506 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1))) {
3507 SDValue Y = N1.getOperand(0);
3508 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3510 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0, Y,
3511 N1.getOperand(2));
3512 }
3513
3514 // (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
3516 if (SDValue Carry = getAsCarry(TLI, N1))
3517 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0,
3518 DAG.getConstant(0, SDLoc(N), VT), Carry);
3519
3520 return SDValue();
3521}
3522
3523SDValue DAGCombiner::visitADDE(SDNode *N) {
3524 SDValue N0 = N->getOperand(0);
3525 SDValue N1 = N->getOperand(1);
3526 SDValue CarryIn = N->getOperand(2);
3527
3528 // canonicalize constant to RHS
3529 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3530 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3531 if (N0C && !N1C)
3532 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3533 N1, N0, CarryIn);
3534
3535 // fold (adde x, y, false) -> (addc x, y)
3536 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3537 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3538
3539 return SDValue();
3540}
3541
3542SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
3543 SDValue N0 = N->getOperand(0);
3544 SDValue N1 = N->getOperand(1);
3545 SDValue CarryIn = N->getOperand(2);
3546 SDLoc DL(N);
3547
3548 // canonicalize constant to RHS
3549 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3550 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3551 if (N0C && !N1C)
3552 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3553
3554 // fold (uaddo_carry x, y, false) -> (uaddo x, y)
3555 if (isNullConstant(CarryIn)) {
3556 if (!LegalOperations ||
3557 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3558 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3559 }
3560
3561 // fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3562 if (isNullConstant(N0) && isNullConstant(N1)) {
3563 EVT VT = N0.getValueType();
3564 EVT CarryVT = CarryIn.getValueType();
3565 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3566 AddToWorklist(CarryExt.getNode());
3567 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3568 DAG.getConstant(1, DL, VT)),
3569 DAG.getConstant(0, DL, CarryVT));
3570 }
3571
3572 if (SDValue Combined = visitUADDO_CARRYLike(N0, N1, CarryIn, N))
3573 return Combined;
3574
3575 if (SDValue Combined = visitUADDO_CARRYLike(N1, N0, CarryIn, N))
3576 return Combined;
3577
3578 // We want to avoid useless duplication.
3579 // TODO: This is done automatically for binary operations. As UADDO_CARRY is
3580 // not a binary operation, this is not really possible to leverage this
3581 // existing mechanism for it. However, if more operations require the same
3582 // deduplication logic, then it may be worth generalize.
3583 SDValue Ops[] = {N1, N0, CarryIn};
3584 SDNode *CSENode =
3585 DAG.getNodeIfExists(ISD::UADDO_CARRY, N->getVTList(), Ops, N->getFlags());
3586 if (CSENode)
3587 return SDValue(CSENode, 0);
3588
3589 return SDValue();
3590}
3591
3592/**
3593 * If we are facing some sort of diamond carry propagation pattern try to
3594 * break it up to generate something like:
3595 * (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
3596 *
3597 * The end result is usually an increase in operation required, but because the
3598 * carry is now linearized, other transforms can kick in and optimize the DAG.
3599 *
3600 * Patterns typically look something like
3601 * (uaddo A, B)
3602 * / \
3603 * Carry Sum
3604 * | \
3605 * | (uaddo_carry *, 0, Z)
3606 * | /
3607 * \ Carry
3608 * | /
3609 * (uaddo_carry X, *, *)
3610 *
3611 * But numerous variation exist. Our goal is to identify A, B, X and Z and
3612 * produce a combine with a single path for carry propagation.
3613 */
3615 SelectionDAG &DAG, SDValue X,
3616 SDValue Carry0, SDValue Carry1,
3617 SDNode *N) {
3618 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3619 return SDValue();
3620 if (Carry1.getOpcode() != ISD::UADDO)
3621 return SDValue();
3622
3623 SDValue Z;
3624
3625 /**
3626 * First look for a suitable Z. It will present itself in the form of
3627 * (uaddo_carry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3628 */
3629 if (Carry0.getOpcode() == ISD::UADDO_CARRY &&
3630 isNullConstant(Carry0.getOperand(1))) {
3631 Z = Carry0.getOperand(2);
3632 } else if (Carry0.getOpcode() == ISD::UADDO &&
3633 isOneConstant(Carry0.getOperand(1))) {
3634 EVT VT = Carry0->getValueType(1);
3635 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3636 } else {
3637 // We couldn't find a suitable Z.
3638 return SDValue();
3639 }
3640
3641
3642 auto cancelDiamond = [&](SDValue A,SDValue B) {
3643 SDLoc DL(N);
3644 SDValue NewY =
3645 DAG.getNode(ISD::UADDO_CARRY, DL, Carry0->getVTList(), A, B, Z);
3646 Combiner.AddToWorklist(NewY.getNode());
3647 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), X,
3648 DAG.getConstant(0, DL, X.getValueType()),
3649 NewY.getValue(1));
3650 };
3651
3652 /**
3653 * (uaddo A, B)
3654 * |
3655 * Sum
3656 * |
3657 * (uaddo_carry *, 0, Z)
3658 */
3659 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3660 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3661 }
3662
3663 /**
3664 * (uaddo_carry A, 0, Z)
3665 * |
3666 * Sum
3667 * |
3668 * (uaddo *, B)
3669 */
3670 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3671 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3672 }
3673
3674 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3675 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3676 }
3677
3678 return SDValue();
3679}
3680
3681// If we are facing some sort of diamond carry/borrow in/out pattern try to
3682// match patterns like:
3683//
3684// (uaddo A, B) CarryIn
3685// | \ |
3686// | \ |
3687// PartialSum PartialCarryOutX /
3688// | | /
3689// | ____|____________/
3690// | / |
3691// (uaddo *, *) \________
3692// | \ \
3693// | \ |
3694// | PartialCarryOutY |
3695// | \ |
3696// | \ /
3697// AddCarrySum | ______/
3698// | /
3699// CarryOut = (or *, *)
3700//
3701// And generate UADDO_CARRY (or USUBO_CARRY) with two result values:
3702//
3703// {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn)
3704//
3705// Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY
3706// with a single path for carry/borrow out propagation.
3708 SDValue N0, SDValue N1, SDNode *N) {
3709 SDValue Carry0 = getAsCarry(TLI, N0);
3710 if (!Carry0)
3711 return SDValue();
3712 SDValue Carry1 = getAsCarry(TLI, N1);
3713 if (!Carry1)
3714 return SDValue();
3715
3716 unsigned Opcode = Carry0.getOpcode();
3717 if (Opcode != Carry1.getOpcode())
3718 return SDValue();
3719 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3720 return SDValue();
3721 // Guarantee identical type of CarryOut
3722 EVT CarryOutType = N->getValueType(0);
3723 if (CarryOutType != Carry0.getValue(1).getValueType() ||
3724 CarryOutType != Carry1.getValue(1).getValueType())
3725 return SDValue();
3726
3727 // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3728 // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3729 if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3730 std::swap(Carry0, Carry1);
3731
3732 // Check if nodes are connected in expected way.
3733 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3734 Carry1.getOperand(1) != Carry0.getValue(0))
3735 return SDValue();
3736
3737 // The carry in value must be on the righthand side for subtraction.
3738 unsigned CarryInOperandNum =
3739 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3740 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3741 return SDValue();
3742 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3743
3744 unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
3745 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3746 return SDValue();
3747
3748 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3749 CarryIn = getAsCarry(TLI, CarryIn, true);
3750 if (!CarryIn)
3751 return SDValue();
3752
3753 SDLoc DL(N);
3754 CarryIn = DAG.getBoolExtOrTrunc(CarryIn, DL, Carry1->getValueType(1),
3755 Carry1->getValueType(0));
3756 SDValue Merged =
3757 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3758 Carry0.getOperand(1), CarryIn);
3759
3760 // Please note that because we have proven that the result of the UADDO/USUBO
3761 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3762 // therefore prove that if the first UADDO/USUBO overflows, the second
3763 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3764 // maximum value.
3765 //
3766 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3767 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3768 //
3769 // This is important because it means that OR and XOR can be used to merge
3770 // carry flags; and that AND can return a constant zero.
3771 //
3772 // TODO: match other operations that can merge flags (ADD, etc)
3773 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3774 if (N->getOpcode() == ISD::AND)
3775 return DAG.getConstant(0, DL, CarryOutType);
3776 return Merged.getValue(1);
3777}
3778
3779SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
3780 SDValue CarryIn, SDNode *N) {
3781 // fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
3782 // carry.
3783 if (isBitwiseNot(N0))
3784 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3785 SDLoc DL(N);
3786 SDValue Sub = DAG.getNode(ISD::USUBO_CARRY, DL, N->getVTList(), N1,
3787 N0.getOperand(0), NotC);
3788 return CombineTo(
3789 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3790 }
3791
3792 // Iff the flag result is dead:
3793 // (uaddo_carry (add|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
3794 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3795 // or the dependency between the instructions.
3796 if ((N0.getOpcode() == ISD::ADD ||
3797 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3798 N0.getValue(1) != CarryIn)) &&
3799 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3800 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(),
3801 N0.getOperand(0), N0.getOperand(1), CarryIn);
3802
3803 /**
3804 * When one of the uaddo_carry argument is itself a carry, we may be facing
3805 * a diamond carry propagation. In which case we try to transform the DAG
3806 * to ensure linear carry propagation if that is possible.
3807 */
3808 if (auto Y = getAsCarry(TLI, N1)) {
3809 // Because both are carries, Y and Z can be swapped.
3810 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3811 return R;
3812 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3813 return R;
3814 }
3815
3816 return SDValue();
3817}
3818
3819SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1,
3820 SDValue CarryIn, SDNode *N) {
3821 // fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c)
3822 if (isBitwiseNot(N0)) {
3823 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true))
3824 return DAG.getNode(ISD::SSUBO_CARRY, SDLoc(N), N->getVTList(), N1,
3825 N0.getOperand(0), NotC);
3826 }
3827
3828 return SDValue();
3829}
3830
3831SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3832 SDValue N0 = N->getOperand(0);
3833 SDValue N1 = N->getOperand(1);
3834 SDValue CarryIn = N->getOperand(2);
3835 SDLoc DL(N);
3836
3837 // canonicalize constant to RHS
3838 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3839 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3840 if (N0C && !N1C)
3841 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3842
3843 // fold (saddo_carry x, y, false) -> (saddo x, y)
3844 if (isNullConstant(CarryIn)) {
3845 if (!LegalOperations ||
3846 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3847 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3848 }
3849
3850 if (SDValue Combined = visitSADDO_CARRYLike(N0, N1, CarryIn, N))
3851 return Combined;
3852
3853 if (SDValue Combined = visitSADDO_CARRYLike(N1, N0, CarryIn, N))
3854 return Combined;
3855
3856 return SDValue();
3857}
3858
3859// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3860// clamp/truncation if necessary.
3862 SDValue RHS, SelectionDAG &DAG,
3863 const SDLoc &DL) {
3864 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3865 "Illegal truncation");
3866
3867 if (DstVT == SrcVT)
3868 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3869
3870 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3871 // clamping RHS.
3873 DstVT.getScalarSizeInBits());
3874 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3875 return SDValue();
3876
3877 SDValue SatLimit =
3879 DstVT.getScalarSizeInBits()),
3880 DL, SrcVT);
3881 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3882 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3883 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3884 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3885}
3886
3887// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3888// usubsat(a,b), optionally as a truncated type.
3889SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL) {
3890 if (N->getOpcode() != ISD::SUB ||
3891 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3892 return SDValue();
3893
3894 EVT SubVT = N->getValueType(0);
3895 SDValue Op0 = N->getOperand(0);
3896 SDValue Op1 = N->getOperand(1);
3897
3898 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3899 // they may be converted to usubsat(a,b).
3900 if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3901 SDValue MaxLHS = Op0.getOperand(0);
3902 SDValue MaxRHS = Op0.getOperand(1);
3903 if (MaxLHS == Op1)
3904 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, DL);
3905 if (MaxRHS == Op1)
3906 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, DL);
3907 }
3908
3909 if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3910 SDValue MinLHS = Op1.getOperand(0);
3911 SDValue MinRHS = Op1.getOperand(1);
3912 if (MinLHS == Op0)
3913 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, DL);
3914 if (MinRHS == Op0)
3915 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, DL);
3916 }
3917
3918 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3919 if (Op1.getOpcode() == ISD::TRUNCATE &&
3920 Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3921 Op1.getOperand(0).hasOneUse()) {
3922 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3923 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3924 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3925 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3926 DAG, DL);
3927 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3928 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3929 DAG, DL);
3930 }
3931
3932 return SDValue();
3933}
3934
3935// Refinement of DAG/Type Legalisation (promotion) when CTLZ is used for
3936// counting leading ones. Broadly, it replaces the substraction with a left
3937// shift.
3938//
3939// * DAG Legalisation Pattern:
3940//
3941// (sub (ctlz (zeroextend (not Src)))
3942// BitWidthDiff)
3943//
3944// if BitWidthDiff == BitWidth(Node) - BitWidth(Src)
3945// -->
3946//
3947// (ctlz_zero_undef (not (shl (anyextend Src)
3948// BitWidthDiff)))
3949//
3950// * Type Legalisation Pattern:
3951//
3952// (sub (ctlz (and (xor Src XorMask)
3953// AndMask))
3954// BitWidthDiff)
3955//
3956// if AndMask has only trailing ones
3957// and MaskBitWidth(AndMask) == BitWidth(Node) - BitWidthDiff
3958// and XorMask has more trailing ones than AndMask
3959// -->
3960//
3961// (ctlz_zero_undef (not (shl Src BitWidthDiff)))
3962template <class MatchContextClass>
3964 const SDLoc DL(N);
3965 SDValue N0 = N->getOperand(0);
3966 EVT VT = N0.getValueType();
3967 unsigned BitWidth = VT.getScalarSizeInBits();
3968
3969 MatchContextClass Matcher(DAG, DAG.getTargetLoweringInfo(), N);
3970
3971 APInt AndMask;
3972 APInt XorMask;
3973 APInt BitWidthDiff;
3974
3975 SDValue CtlzOp;
3976 SDValue Src;
3977
3978 if (!sd_context_match(
3979 N, Matcher, m_Sub(m_Ctlz(m_Value(CtlzOp)), m_ConstInt(BitWidthDiff))))
3980 return SDValue();
3981
3982 if (sd_context_match(CtlzOp, Matcher, m_ZExt(m_Not(m_Value(Src))))) {
3983 // DAG Legalisation Pattern:
3984 // (sub (ctlz (zero_extend (not Op)) BitWidthDiff))
3985 if ((BitWidth - Src.getValueType().getScalarSizeInBits()) != BitWidthDiff)
3986 return SDValue();
3987
3988 Src = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Src);
3989 } else if (sd_context_match(CtlzOp, Matcher,
3990 m_And(m_Xor(m_Value(Src), m_ConstInt(XorMask)),
3991 m_ConstInt(AndMask)))) {
3992 // Type Legalisation Pattern:
3993 // (sub (ctlz (and (xor Op XorMask) AndMask)) BitWidthDiff)
3994 unsigned AndMaskWidth = BitWidth - BitWidthDiff.getZExtValue();
3995 if (!(AndMask.isMask(AndMaskWidth) && XorMask.countr_one() >= AndMaskWidth))
3996 return SDValue();
3997 } else
3998 return SDValue();
3999
4000 SDValue ShiftConst = DAG.getShiftAmountConstant(BitWidthDiff, VT, DL);
4001 SDValue LShift = Matcher.getNode(ISD::SHL, DL, VT, Src, ShiftConst);
4002 SDValue Not =
4003 Matcher.getNode(ISD::XOR, DL, VT, LShift, DAG.getAllOnesConstant(DL, VT));
4004
4005 return Matcher.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, Not);
4006}
4007
4008// Fold sub(x, mul(divrem(x,y)[0], y)) to divrem(x, y)[1]
4010 const SDLoc &DL) {
4011 assert(N->getOpcode() == ISD::SUB && "Node must be a SUB");
4012 SDValue Sub0 = N->getOperand(0);
4013 SDValue Sub1 = N->getOperand(1);
4014
4015 auto CheckAndFoldMulCase = [&](SDValue DivRem, SDValue MaybeY) -> SDValue {
4016 if ((DivRem.getOpcode() == ISD::SDIVREM ||
4017 DivRem.getOpcode() == ISD::UDIVREM) &&
4018 DivRem.getResNo() == 0 && DivRem.getOperand(0) == Sub0 &&
4019 DivRem.getOperand(1) == MaybeY) {
4020 return SDValue(DivRem.getNode(), 1);
4021 }
4022 return SDValue();
4023 };
4024
4025 if (Sub1.getOpcode() == ISD::MUL) {
4026 // (sub x, (mul divrem(x,y)[0], y))
4027 SDValue Mul0 = Sub1.getOperand(0);
4028 SDValue Mul1 = Sub1.getOperand(1);
4029
4030 if (SDValue Res = CheckAndFoldMulCase(Mul0, Mul1))
4031 return Res;
4032
4033 if (SDValue Res = CheckAndFoldMulCase(Mul1, Mul0))
4034 return Res;
4035
4036 } else if (Sub1.getOpcode() == ISD::SHL) {
4037 // Handle (sub x, (shl divrem(x,y)[0], C)) where y = 1 << C
4038 SDValue Shl0 = Sub1.getOperand(0);
4039 SDValue Shl1 = Sub1.getOperand(1);
4040 // Check if Shl0 is divrem(x, Y)[0]
4041 if ((Shl0.getOpcode() == ISD::SDIVREM ||
4042 Shl0.getOpcode() == ISD::UDIVREM) &&
4043 Shl0.getResNo() == 0 && Shl0.getOperand(0) == Sub0) {
4044
4045 SDValue Divisor = Shl0.getOperand(1);
4046
4047 ConstantSDNode *DivC = isConstOrConstSplat(Divisor);
4049 if (!DivC || !ShC)
4050 return SDValue();
4051
4052 if (DivC->getAPIntValue().isPowerOf2() &&
4053 DivC->getAPIntValue().logBase2() == ShC->getAPIntValue())
4054 return SDValue(Shl0.getNode(), 1);
4055 }
4056 }
4057 return SDValue();
4058}
4059
4060// Since it may not be valid to emit a fold to zero for vector initializers
4061// check if we can before folding.
4062static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
4063 SelectionDAG &DAG, bool LegalOperations) {
4064 if (!VT.isVector())
4065 return DAG.getConstant(0, DL, VT);
4066 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
4067 return DAG.getConstant(0, DL, VT);
4068 return SDValue();
4069}
4070
4071SDValue DAGCombiner::visitSUB(SDNode *N) {
4072 SDValue N0 = N->getOperand(0);
4073 SDValue N1 = N->getOperand(1);
4074 EVT VT = N0.getValueType();
4075 unsigned BitWidth = VT.getScalarSizeInBits();
4076 SDLoc DL(N);
4077
4079 return V;
4080
4081 // fold (sub x, x) -> 0
4082 if (N0 == N1)
4083 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4084
4085 // fold (sub c1, c2) -> c3
4086 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
4087 return C;
4088
4089 // fold vector ops
4090 if (VT.isVector()) {
4091 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4092 return FoldedVOp;
4093
4094 // fold (sub x, 0) -> x, vector edition
4096 return N0;
4097 }
4098
4099 // (sub x, ([v]select (ult x, y), 0, y)) -> (umin x, (sub x, y))
4100 // (sub x, ([v]select (uge x, y), y, 0)) -> (umin x, (sub x, y))
4101 if (N1.hasOneUse() && hasUMin(VT)) {
4102 SDValue Y;
4103 auto MS0 = m_Specific(N0);
4104 auto MVY = m_Value(Y);
4105 auto MZ = m_Zero();
4106 auto MCC1 = m_SpecificCondCode(ISD::SETULT);
4107 auto MCC2 = m_SpecificCondCode(ISD::SETUGE);
4108
4109 if (sd_match(N1, m_SelectCCLike(MS0, MVY, MZ, m_Deferred(Y), MCC1)) ||
4110 sd_match(N1, m_SelectCCLike(MS0, MVY, m_Deferred(Y), MZ, MCC2)) ||
4111 sd_match(N1, m_VSelect(m_SetCC(MS0, MVY, MCC1), MZ, m_Deferred(Y))) ||
4112 sd_match(N1, m_VSelect(m_SetCC(MS0, MVY, MCC2), m_Deferred(Y), MZ)))
4113
4114 return DAG.getNode(ISD::UMIN, DL, VT, N0,
4115 DAG.getNode(ISD::SUB, DL, VT, N0, Y));
4116 }
4117
4118 if (SDValue NewSel = foldBinOpIntoSelect(N))
4119 return NewSel;
4120
4121 // fold (sub x, c) -> (add x, -c)
4122 if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1))
4123 return DAG.getNode(ISD::ADD, DL, VT, N0,
4124 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4125
4126 if (isNullOrNullSplat(N0)) {
4127 // Right-shifting everything out but the sign bit followed by negation is
4128 // the same as flipping arithmetic/logical shift type without the negation:
4129 // -(X >>u 31) -> (X >>s 31)
4130 // -(X >>s 31) -> (X >>u 31)
4131 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
4132 ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
4133 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
4134 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
4135 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
4136 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
4137 }
4138 }
4139
4140 // 0 - X --> 0 if the sub is NUW.
4141 if (N->getFlags().hasNoUnsignedWrap())
4142 return N0;
4143
4145 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
4146 // N1 must be 0 because negating the minimum signed value is undefined.
4147 if (N->getFlags().hasNoSignedWrap())
4148 return N0;
4149
4150 // 0 - X --> X if X is 0 or the minimum signed value.
4151 return N1;
4152 }
4153
4154 // Convert 0 - abs(x).
4155 if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
4157 if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
4158 return Result;
4159
4160 // Similar to the previous rule, but this time targeting an expanded abs.
4161 // (sub 0, (max X, (sub 0, X))) --> (min X, (sub 0, X))
4162 // as well as
4163 // (sub 0, (min X, (sub 0, X))) --> (max X, (sub 0, X))
4164 // Note that these two are applicable to both signed and unsigned min/max.
4165 SDValue X;
4166 SDValue S0;
4167 auto NegPat = m_AllOf(m_Neg(m_Deferred(X)), m_Value(S0));
4168 if (sd_match(N1, m_OneUse(m_AnyOf(m_SMax(m_Value(X), NegPat),
4169 m_UMax(m_Value(X), NegPat),
4170 m_SMin(m_Value(X), NegPat),
4171 m_UMin(m_Value(X), NegPat))))) {
4172 unsigned NewOpc = ISD::getInverseMinMaxOpcode(N1->getOpcode());
4173 if (hasOperation(NewOpc, VT))
4174 return DAG.getNode(NewOpc, DL, VT, X, S0);
4175 }
4176
4177 // Fold neg(splat(neg(x)) -> splat(x)
4178 if (VT.isVector()) {
4179 SDValue N1S = DAG.getSplatValue(N1, true);
4180 if (N1S && N1S.getOpcode() == ISD::SUB &&
4181 isNullConstant(N1S.getOperand(0)))
4182 return DAG.getSplat(VT, DL, N1S.getOperand(1));
4183 }
4184
4185 // sub 0, (and x, 1) --> SIGN_EXTEND_INREG x, i1
4186 if (N1.getOpcode() == ISD::AND && N1.hasOneUse() &&
4187 isOneOrOneSplat(N1->getOperand(1))) {
4188 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), 1);
4189 if (VT.isVector())
4190 ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT,
4194 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N1->getOperand(0),
4195 DAG.getValueType(ExtVT));
4196 }
4197 }
4198 }
4199
4200 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
4202 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4203
4204 // fold (A - (0-B)) -> A+B
4205 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
4206 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
4207
4208 // fold A-(A-B) -> B
4209 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
4210 return N1.getOperand(1);
4211
4212 // fold (A+B)-A -> B
4213 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
4214 return N0.getOperand(1);
4215
4216 // fold (A+B)-B -> A
4217 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
4218 return N0.getOperand(0);
4219
4220 // fold (A+C1)-C2 -> A+(C1-C2)
4221 if (N0.getOpcode() == ISD::ADD) {
4222 SDValue N01 = N0.getOperand(1);
4223 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
4224 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
4225 }
4226
4227 // fold C2-(A+C1) -> (C2-C1)-A
4228 if (N1.getOpcode() == ISD::ADD) {
4229 SDValue N11 = N1.getOperand(1);
4230 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
4231 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
4232 }
4233
4234 // fold (A-C1)-C2 -> A-(C1+C2)
4235 if (N0.getOpcode() == ISD::SUB) {
4236 SDValue N01 = N0.getOperand(1);
4237 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
4238 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
4239 }
4240
4241 // fold (c1-A)-c2 -> (c1-c2)-A
4242 if (N0.getOpcode() == ISD::SUB) {
4243 SDValue N00 = N0.getOperand(0);
4244 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
4245 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
4246 }
4247
4248 SDValue A, B, C;
4249
4250 // fold ((A+(B+C))-B) -> A+C
4251 if (sd_match(N0, m_Add(m_Value(A), m_Add(m_Specific(N1), m_Value(C)))))
4252 return DAG.getNode(ISD::ADD, DL, VT, A, C);
4253
4254 // fold ((A+(B-C))-B) -> A-C
4255 if (sd_match(N0, m_Add(m_Value(A), m_Sub(m_Specific(N1), m_Value(C)))))
4256 return DAG.getNode(ISD::SUB, DL, VT, A, C);
4257
4258 // fold ((A-(B-C))-C) -> A-B
4259 if (sd_match(N0, m_Sub(m_Value(A), m_Sub(m_Value(B), m_Specific(N1)))))
4260 return DAG.getNode(ISD::SUB, DL, VT, A, B);
4261
4262 // fold (A-(B-C)) -> A+(C-B)
4263 if (sd_match(N1, m_OneUse(m_Sub(m_Value(B), m_Value(C)))))
4264 return DAG.getNode(ISD::ADD, DL, VT, N0,
4265 DAG.getNode(ISD::SUB, DL, VT, C, B));
4266
4267 // A - (A & B) -> A & (~B)
4268 if (sd_match(N1, m_And(m_Specific(N0), m_Value(B))) &&
4269 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true)))
4270 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getNOT(DL, B, VT));
4271
4272 // fold (A - (-B * C)) -> (A + (B * C))
4273 if (sd_match(N1, m_OneUse(m_Mul(m_Neg(m_Value(B)), m_Value(C)))))
4274 return DAG.getNode(ISD::ADD, DL, VT, N0,
4275 DAG.getNode(ISD::MUL, DL, VT, B, C));
4276
4277 // If either operand of a sub is undef, the result is undef
4278 if (N0.isUndef())
4279 return N0;
4280 if (N1.isUndef())
4281 return N1;
4282
4283 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
4284 return V;
4285
4286 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
4287 return V;
4288
4289 // Try to match AVGCEIL fixedwidth pattern
4290 if (SDValue V = foldSubToAvg(N, DL))
4291 return V;
4292
4293 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, DL))
4294 return V;
4295
4296 if (SDValue V = foldSubToUSubSat(VT, N, DL))
4297 return V;
4298
4299 if (SDValue V = foldRemainderIdiom(N, DAG, DL))
4300 return V;
4301
4302 // (A - B) - 1 -> add (xor B, -1), A
4304 m_One(/*AllowUndefs=*/true))))
4305 return DAG.getNode(ISD::ADD, DL, VT, A, DAG.getNOT(DL, B, VT));
4306
4307 // Look for:
4308 // sub y, (xor x, -1)
4309 // And if the target does not like this form then turn into:
4310 // add (add x, y), 1
4311 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
4312 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
4313 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
4314 }
4315
4316 // Hoist one-use addition by non-opaque constant:
4317 // (x + C) - y -> (x - y) + C
4318 if (!reassociationCanBreakAddressingModePattern(ISD::SUB, DL, N, N0, N1) &&
4319 N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
4320 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4321 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4322 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
4323 }
4324 // y - (x + C) -> (y - x) - C
4325 if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
4326 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
4327 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
4328 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
4329 }
4330 // (x - C) - y -> (x - y) - C
4331 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
4332 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4333 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4334 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4335 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
4336 }
4337 // (C - x) - y -> C - (x + y)
4338 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4339 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
4340 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
4341 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
4342 }
4343
4344 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
4345 // rather than 'sub 0/1' (the sext should get folded).
4346 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
4347 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
4348 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
4349 TLI.getBooleanContents(VT) ==
4351 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
4352 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
4353 }
4354
4355 // fold B = sra (A, size(A)-1); sub (xor (A, B), B) -> (abs A)
4356 if ((!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4358 sd_match(N0, m_Xor(m_Specific(A), m_Specific(N1))))
4359 return DAG.getNode(ISD::ABS, DL, VT, A);
4360
4361 // If the relocation model supports it, consider symbol offsets.
4362 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
4363 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
4364 // fold (sub Sym+c1, Sym+c2) -> c1-c2
4365 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
4366 if (GA->getGlobal() == GB->getGlobal())
4367 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
4368 DL, VT);
4369 }
4370
4371 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
4372 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
4373 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
4374 if (TN->getVT() == MVT::i1) {
4375 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
4376 DAG.getConstant(1, DL, VT));
4377 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
4378 }
4379 }
4380
4381 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
4382 if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
4383 const APInt &IntVal = N1.getConstantOperandAPInt(0);
4384 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
4385 }
4386
4387 // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
4388 if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
4389 APInt NewStep = -N1.getConstantOperandAPInt(0);
4390 return DAG.getNode(ISD::ADD, DL, VT, N0,
4391 DAG.getStepVector(DL, VT, NewStep));
4392 }
4393
4394 // Prefer an add for more folding potential and possibly better codegen:
4395 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
4396 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
4397 SDValue ShAmt = N1.getOperand(1);
4398 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
4399 if (ShAmtC && ShAmtC->getAPIntValue() == (BitWidth - 1)) {
4400 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
4401 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
4402 }
4403 }
4404
4405 // As with the previous fold, prefer add for more folding potential.
4406 // Subtracting SMIN/0 is the same as adding SMIN/0:
4407 // N0 - (X << BW-1) --> N0 + (X << BW-1)
4408 if (N1.getOpcode() == ISD::SHL) {
4409 ConstantSDNode *ShlC = isConstOrConstSplat(N1.getOperand(1));
4410 if (ShlC && ShlC->getAPIntValue() == (BitWidth - 1))
4411 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
4412 }
4413
4414 // (sub (usubo_carry X, 0, Carry), Y) -> (usubo_carry X, Y, Carry)
4415 if (N0.getOpcode() == ISD::USUBO_CARRY && isNullConstant(N0.getOperand(1)) &&
4416 N0.getResNo() == 0 && N0.hasOneUse())
4417 return DAG.getNode(ISD::USUBO_CARRY, DL, N0->getVTList(),
4418 N0.getOperand(0), N1, N0.getOperand(2));
4419
4421 // (sub Carry, X) -> (uaddo_carry (sub 0, X), 0, Carry)
4422 if (SDValue Carry = getAsCarry(TLI, N0)) {
4423 SDValue X = N1;
4424 SDValue Zero = DAG.getConstant(0, DL, VT);
4425 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
4426 return DAG.getNode(ISD::UADDO_CARRY, DL,
4427 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
4428 Carry);
4429 }
4430 }
4431
4432 // If there's no chance of borrowing from adjacent bits, then sub is xor:
4433 // sub C0, X --> xor X, C0
4434 if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
4435 if (!C0->isOpaque()) {
4436 const APInt &C0Val = C0->getAPIntValue();
4437 const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
4438 if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
4439 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4440 }
4441 }
4442
4443 // smax(a,b) - smin(a,b) --> abds(a,b)
4444 if ((!LegalOperations || hasOperation(ISD::ABDS, VT)) &&
4445 sd_match(N0, m_SMaxLike(m_Value(A), m_Value(B))) &&
4447 return DAG.getNode(ISD::ABDS, DL, VT, A, B);
4448
4449 // smin(a,b) - smax(a,b) --> neg(abds(a,b))
4450 if (hasOperation(ISD::ABDS, VT) &&
4451 sd_match(N0, m_SMinLike(m_Value(A), m_Value(B))) &&
4453 return DAG.getNegative(DAG.getNode(ISD::ABDS, DL, VT, A, B), DL, VT);
4454
4455 // umax(a,b) - umin(a,b) --> abdu(a,b)
4456 if ((!LegalOperations || hasOperation(ISD::ABDU, VT)) &&
4457 sd_match(N0, m_UMaxLike(m_Value(A), m_Value(B))) &&
4459 return DAG.getNode(ISD::ABDU, DL, VT, A, B);
4460
4461 // umin(a,b) - umax(a,b) --> neg(abdu(a,b))
4462 if (hasOperation(ISD::ABDU, VT) &&
4463 sd_match(N0, m_UMinLike(m_Value(A), m_Value(B))) &&
4465 return DAG.getNegative(DAG.getNode(ISD::ABDU, DL, VT, A, B), DL, VT);
4466
4467 return SDValue();
4468}
4469
4470SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
4471 unsigned Opcode = N->getOpcode();
4472 SDValue N0 = N->getOperand(0);
4473 SDValue N1 = N->getOperand(1);
4474 EVT VT = N0.getValueType();
4475 bool IsSigned = Opcode == ISD::SSUBSAT;
4476 SDLoc DL(N);
4477
4478 // fold (sub_sat x, undef) -> 0
4479 if (N0.isUndef() || N1.isUndef())
4480 return DAG.getConstant(0, DL, VT);
4481
4482 // fold (sub_sat x, x) -> 0
4483 if (N0 == N1)
4484 return DAG.getConstant(0, DL, VT);
4485
4486 // fold (sub_sat c1, c2) -> c3
4487 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4488 return C;
4489
4490 // fold vector ops
4491 if (VT.isVector()) {
4492 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4493 return FoldedVOp;
4494
4495 // fold (sub_sat x, 0) -> x, vector edition
4497 return N0;
4498 }
4499
4500 // fold (sub_sat x, 0) -> x
4501 if (isNullConstant(N1))
4502 return N0;
4503
4504 // If it cannot overflow, transform into an sub.
4505 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4506 return DAG.getNode(ISD::SUB, DL, VT, N0, N1);
4507
4508 return SDValue();
4509}
4510
4511SDValue DAGCombiner::visitSUBC(SDNode *N) {
4512 SDValue N0 = N->getOperand(0);
4513 SDValue N1 = N->getOperand(1);
4514 EVT VT = N0.getValueType();
4515 SDLoc DL(N);
4516
4517 // If the flag result is dead, turn this into an SUB.
4518 if (!N->hasAnyUseOfValue(1))
4519 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4520 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4521
4522 // fold (subc x, x) -> 0 + no borrow
4523 if (N0 == N1)
4524 return CombineTo(N, DAG.getConstant(0, DL, VT),
4525 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4526
4527 // fold (subc x, 0) -> x + no borrow
4528 if (isNullConstant(N1))
4529 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4530
4531 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4532 if (isAllOnesConstant(N0))
4533 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4534 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4535
4536 return SDValue();
4537}
4538
4539SDValue DAGCombiner::visitSUBO(SDNode *N) {
4540 SDValue N0 = N->getOperand(0);
4541 SDValue N1 = N->getOperand(1);
4542 EVT VT = N0.getValueType();
4543 bool IsSigned = (ISD::SSUBO == N->getOpcode());
4544
4545 EVT CarryVT = N->getValueType(1);
4546 SDLoc DL(N);
4547
4548 // If the flag result is dead, turn this into an SUB.
4549 if (!N->hasAnyUseOfValue(1))
4550 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4551 DAG.getUNDEF(CarryVT));
4552
4553 // fold (subo x, x) -> 0 + no borrow
4554 if (N0 == N1)
4555 return CombineTo(N, DAG.getConstant(0, DL, VT),
4556 DAG.getConstant(0, DL, CarryVT));
4557
4558 // fold (subox, c) -> (addo x, -c)
4559 if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1))
4560 if (IsSigned && !N1C->isMinSignedValue())
4561 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
4562 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4563
4564 // fold (subo x, 0) -> x + no borrow
4565 if (isNullOrNullSplat(N1))
4566 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
4567
4568 // If it cannot overflow, transform into an sub.
4569 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4570 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4571 DAG.getConstant(0, DL, CarryVT));
4572
4573 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4574 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
4575 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4576 DAG.getConstant(0, DL, CarryVT));
4577
4578 return SDValue();
4579}
4580
4581SDValue DAGCombiner::visitSUBE(SDNode *N) {
4582 SDValue N0 = N->getOperand(0);
4583 SDValue N1 = N->getOperand(1);
4584 SDValue CarryIn = N->getOperand(2);
4585
4586 // fold (sube x, y, false) -> (subc x, y)
4587 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
4588 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
4589
4590 return SDValue();
4591}
4592
4593SDValue DAGCombiner::visitUSUBO_CARRY(SDNode *N) {
4594 SDValue N0 = N->getOperand(0);
4595 SDValue N1 = N->getOperand(1);
4596 SDValue CarryIn = N->getOperand(2);
4597
4598 // fold (usubo_carry x, y, false) -> (usubo x, y)
4599 if (isNullConstant(CarryIn)) {
4600 if (!LegalOperations ||
4601 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
4602 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
4603 }
4604
4605 return SDValue();
4606}
4607
4608SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
4609 SDValue N0 = N->getOperand(0);
4610 SDValue N1 = N->getOperand(1);
4611 SDValue CarryIn = N->getOperand(2);
4612
4613 // fold (ssubo_carry x, y, false) -> (ssubo x, y)
4614 if (isNullConstant(CarryIn)) {
4615 if (!LegalOperations ||
4616 TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
4617 return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
4618 }
4619
4620 return SDValue();
4621}
4622
4623// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
4624// UMULFIXSAT here.
4625SDValue DAGCombiner::visitMULFIX(SDNode *N) {
4626 SDValue N0 = N->getOperand(0);
4627 SDValue N1 = N->getOperand(1);
4628 SDValue Scale = N->getOperand(2);
4629 EVT VT = N0.getValueType();
4630
4631 // fold (mulfix x, undef, scale) -> 0
4632 if (N0.isUndef() || N1.isUndef())
4633 return DAG.getConstant(0, SDLoc(N), VT);
4634
4635 // Canonicalize constant to RHS (vector doesn't have to splat)
4638 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
4639
4640 // fold (mulfix x, 0, scale) -> 0
4641 if (isNullConstant(N1))
4642 return DAG.getConstant(0, SDLoc(N), VT);
4643
4644 return SDValue();
4645}
4646
4647template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) {
4648 SDValue N0 = N->getOperand(0);
4649 SDValue N1 = N->getOperand(1);
4650 EVT VT = N0.getValueType();
4651 unsigned BitWidth = VT.getScalarSizeInBits();
4652 SDLoc DL(N);
4653 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
4654 MatchContextClass Matcher(DAG, TLI, N);
4655
4656 // fold (mul x, undef) -> 0
4657 if (N0.isUndef() || N1.isUndef())
4658 return DAG.getConstant(0, DL, VT);
4659
4660 // fold (mul c1, c2) -> c1*c2
4661 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
4662 return C;
4663
4664 // canonicalize constant to RHS (vector doesn't have to splat)
4667 return Matcher.getNode(ISD::MUL, DL, VT, N1, N0);
4668
4669 bool N1IsConst = false;
4670 bool N1IsOpaqueConst = false;
4671 APInt ConstValue1;
4672
4673 // fold vector ops
4674 if (VT.isVector()) {
4675 // TODO: Change this to use SimplifyVBinOp when it supports VP op.
4676 if (!UseVP)
4677 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4678 return FoldedVOp;
4679
4680 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
4681 assert((!N1IsConst || ConstValue1.getBitWidth() == BitWidth) &&
4682 "Splat APInt should be element width");
4683 } else {
4684 N1IsConst = isa<ConstantSDNode>(N1);
4685 if (N1IsConst) {
4686 ConstValue1 = N1->getAsAPIntVal();
4687 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
4688 }
4689 }
4690
4691 // fold (mul x, 0) -> 0
4692 if (N1IsConst && ConstValue1.isZero())
4693 return N1;
4694
4695 // fold (mul x, 1) -> x
4696 if (N1IsConst && ConstValue1.isOne())
4697 return N0;
4698
4699 if (!UseVP)
4700 if (SDValue NewSel = foldBinOpIntoSelect(N))
4701 return NewSel;
4702
4703 // fold (mul x, -1) -> 0-x
4704 if (N1IsConst && ConstValue1.isAllOnes())
4705 return Matcher.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4706
4707 // fold (mul x, (1 << c)) -> x << c
4708 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4709 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
4710 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4711 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4712 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4713 SDNodeFlags Flags;
4714 Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap());
4715 // TODO: Preserve setNoSignedWrap if LogBase2 isn't BitWidth - 1.
4716 return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc, Flags);
4717 }
4718 }
4719
4720 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4721 if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
4722 unsigned Log2Val = (-ConstValue1).logBase2();
4723
4724 // FIXME: If the input is something that is easily negated (e.g. a
4725 // single-use add), we should put the negate there.
4726 return Matcher.getNode(
4727 ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
4728 Matcher.getNode(ISD::SHL, DL, VT, N0,
4729 DAG.getShiftAmountConstant(Log2Val, VT, DL)));
4730 }
4731
4732 // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
4733 // hi result is in use in case we hit this mid-legalization.
4734 if (!UseVP) {
4735 for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
4736 if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
4737 SDVTList LoHiVT = DAG.getVTList(VT, VT);
4738 // TODO: Can we match commutable operands with getNodeIfExists?
4739 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
4740 if (LoHi->hasAnyUseOfValue(1))
4741 return SDValue(LoHi, 0);
4742 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
4743 if (LoHi->hasAnyUseOfValue(1))
4744 return SDValue(LoHi, 0);
4745 }
4746 }
4747 }
4748
4749 // Try to transform:
4750 // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4751 // mul x, (2^N + 1) --> add (shl x, N), x
4752 // mul x, (2^N - 1) --> sub (shl x, N), x
4753 // Examples: x * 33 --> (x << 5) + x
4754 // x * 15 --> (x << 4) - x
4755 // x * -33 --> -((x << 5) + x)
4756 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
4757 // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4758 // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4759 // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4760 // Examples: x * 0x8800 --> (x << 15) + (x << 11)
4761 // x * 0xf800 --> (x << 16) - (x << 11)
4762 // x * -0x8800 --> -((x << 15) + (x << 11))
4763 // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
4764 if (!UseVP && N1IsConst &&
4765 TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
4766 // TODO: We could handle more general decomposition of any constant by
4767 // having the target set a limit on number of ops and making a
4768 // callback to determine that sequence (similar to sqrt expansion).
4769 unsigned MathOp = ISD::DELETED_NODE;
4770 APInt MulC = ConstValue1.abs();
4771 // The constant `2` should be treated as (2^0 + 1).
4772 unsigned TZeros = MulC == 2 ? 0 : MulC.countr_zero();
4773 MulC.lshrInPlace(TZeros);
4774 if ((MulC - 1).isPowerOf2())
4775 MathOp = ISD::ADD;
4776 else if ((MulC + 1).isPowerOf2())
4777 MathOp = ISD::SUB;
4778
4779 if (MathOp != ISD::DELETED_NODE) {
4780 unsigned ShAmt =
4781 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4782 ShAmt += TZeros;
4783 assert(ShAmt < BitWidth &&
4784 "multiply-by-constant generated out of bounds shift");
4785 SDValue Shl =
4786 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
4787 SDValue R =
4788 TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
4789 DAG.getNode(ISD::SHL, DL, VT, N0,
4790 DAG.getConstant(TZeros, DL, VT)))
4791 : DAG.getNode(MathOp, DL, VT, Shl, N0);
4792 if (ConstValue1.isNegative())
4793 R = DAG.getNegative(R, DL, VT);
4794 return R;
4795 }
4796 }
4797
4798 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4799 if (sd_context_match(N0, Matcher, m_Opc(ISD::SHL))) {
4800 SDValue N01 = N0.getOperand(1);
4801 if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
4802 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
4803 }
4804
4805 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4806 // use.
4807 {
4808 SDValue Sh, Y;
4809
4810 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
4811 if (sd_context_match(N0, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
4813 Sh = N0; Y = N1;
4814 } else if (sd_context_match(N1, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
4816 Sh = N1; Y = N0;
4817 }
4818
4819 if (Sh.getNode()) {
4820 SDValue Mul = Matcher.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
4821 return Matcher.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
4822 }
4823 }
4824
4825 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4826 if (sd_context_match(N0, Matcher, m_Opc(ISD::ADD)) &&
4830 return Matcher.getNode(
4831 ISD::ADD, DL, VT,
4832 Matcher.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
4833 Matcher.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
4834
4835 // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4836 ConstantSDNode *NC1 = isConstOrConstSplat(N1);
4837 if (!UseVP && N0.getOpcode() == ISD::VSCALE && NC1) {
4838 const APInt &C0 = N0.getConstantOperandAPInt(0);
4839 const APInt &C1 = NC1->getAPIntValue();
4840 return DAG.getVScale(DL, VT, C0 * C1);
4841 }
4842
4843 // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4844 APInt MulVal;
4845 if (!UseVP && N0.getOpcode() == ISD::STEP_VECTOR &&
4846 ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4847 const APInt &C0 = N0.getConstantOperandAPInt(0);
4848 APInt NewStep = C0 * MulVal;
4849 return DAG.getStepVector(DL, VT, NewStep);
4850 }
4851
4852 // Fold Y = sra (X, size(X)-1); mul (or (Y, 1), X) -> (abs X)
4853 SDValue X;
4854 if (!UseVP && (!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4856 N, Matcher,
4858 m_Deferred(X)))) {
4859 return Matcher.getNode(ISD::ABS, DL, VT, X);
4860 }
4861
4862 // Fold ((mul x, 0/undef) -> 0,
4863 // (mul x, 1) -> x) -> x)
4864 // -> and(x, mask)
4865 // We can replace vectors with '0' and '1' factors with a clearing mask.
4866 if (VT.isFixedLengthVector()) {
4867 unsigned NumElts = VT.getVectorNumElements();
4868 SmallBitVector ClearMask;
4869 ClearMask.reserve(NumElts);
4870 auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4871 if (!V || V->isZero()) {
4872 ClearMask.push_back(true);
4873 return true;
4874 }
4875 ClearMask.push_back(false);
4876 return V->isOne();
4877 };
4878 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4879 ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4880 assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4881 EVT LegalSVT = N1.getOperand(0).getValueType();
4882 SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4883 SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4885 for (unsigned I = 0; I != NumElts; ++I)
4886 if (ClearMask[I])
4887 Mask[I] = Zero;
4888 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4889 }
4890 }
4891
4892 // reassociate mul
4893 // TODO: Change reassociateOps to support vp ops.
4894 if (!UseVP)
4895 if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
4896 return RMUL;
4897
4898 // Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
4899 // TODO: Change reassociateReduction to support vp ops.
4900 if (!UseVP)
4901 if (SDValue SD =
4902 reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
4903 return SD;
4904
4905 // Simplify the operands using demanded-bits information.
4907 return SDValue(N, 0);
4908
4909 return SDValue();
4910}
4911
4912/// Return true if divmod libcall is available.
4914 const TargetLowering &TLI) {
4915 RTLIB::Libcall LC;
4916 EVT NodeType = Node->getValueType(0);
4917 if (!NodeType.isSimple())
4918 return false;
4919 switch (NodeType.getSimpleVT().SimpleTy) {
4920 default: return false; // No libcall for vector types.
4921 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
4922 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4923 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4924 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4925 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4926 }
4927
4928 return TLI.getLibcallName(LC) != nullptr;
4929}
4930
4931/// Issue divrem if both quotient and remainder are needed.
4932SDValue DAGCombiner::useDivRem(SDNode *Node) {
4933 if (Node->use_empty())
4934 return SDValue(); // This is a dead node, leave it alone.
4935
4936 unsigned Opcode = Node->getOpcode();
4937 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4938 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4939
4940 // DivMod lib calls can still work on non-legal types if using lib-calls.
4941 EVT VT = Node->getValueType(0);
4942 if (VT.isVector() || !VT.isInteger())
4943 return SDValue();
4944
4945 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4946 return SDValue();
4947
4948 // If DIVREM is going to get expanded into a libcall,
4949 // but there is no libcall available, then don't combine.
4950 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4952 return SDValue();
4953
4954 // If div is legal, it's better to do the normal expansion
4955 unsigned OtherOpcode = 0;
4956 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4957 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4958 if (TLI.isOperationLegalOrCustom(Opcode, VT))
4959 return SDValue();
4960 } else {
4961 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4962 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4963 return SDValue();
4964 }
4965
4966 SDValue Op0 = Node->getOperand(0);
4967 SDValue Op1 = Node->getOperand(1);
4968 SDValue combined;
4969 for (SDNode *User : Op0->users()) {
4970 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4971 User->use_empty())
4972 continue;
4973 // Convert the other matching node(s), too;
4974 // otherwise, the DIVREM may get target-legalized into something
4975 // target-specific that we won't be able to recognize.
4976 unsigned UserOpc = User->getOpcode();
4977 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4978 User->getOperand(0) == Op0 &&
4979 User->getOperand(1) == Op1) {
4980 if (!combined) {
4981 if (UserOpc == OtherOpcode) {
4982 SDVTList VTs = DAG.getVTList(VT, VT);
4983 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4984 } else if (UserOpc == DivRemOpc) {
4985 combined = SDValue(User, 0);
4986 } else {
4987 assert(UserOpc == Opcode);
4988 continue;
4989 }
4990 }
4991 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4992 CombineTo(User, combined);
4993 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4994 CombineTo(User, combined.getValue(1));
4995 }
4996 }
4997 return combined;
4998}
4999
5001 SDValue N0 = N->getOperand(0);
5002 SDValue N1 = N->getOperand(1);
5003 EVT VT = N->getValueType(0);
5004 SDLoc DL(N);
5005
5006 unsigned Opc = N->getOpcode();
5007 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
5009
5010 // X / undef -> undef
5011 // X % undef -> undef
5012 // X / 0 -> undef
5013 // X % 0 -> undef
5014 // NOTE: This includes vectors where any divisor element is zero/undef.
5015 if (DAG.isUndef(Opc, {N0, N1}))
5016 return DAG.getUNDEF(VT);
5017
5018 // undef / X -> 0
5019 // undef % X -> 0
5020 if (N0.isUndef())
5021 return DAG.getConstant(0, DL, VT);
5022
5023 // 0 / X -> 0
5024 // 0 % X -> 0
5026 if (N0C && N0C->isZero())
5027 return N0;
5028
5029 // X / X -> 1
5030 // X % X -> 0
5031 if (N0 == N1)
5032 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
5033
5034 // X / 1 -> X
5035 // X % 1 -> 0
5036 // If this is a boolean op (single-bit element type), we can't have
5037 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
5038 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
5039 // it's a 1.
5040 if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
5041 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
5042
5043 return SDValue();
5044}
5045
5046SDValue DAGCombiner::visitSDIV(SDNode *N) {
5047 SDValue N0 = N->getOperand(0);
5048 SDValue N1 = N->getOperand(1);
5049 EVT VT = N->getValueType(0);
5050 EVT CCVT = getSetCCResultType(VT);
5051 SDLoc DL(N);
5052
5053 // fold (sdiv c1, c2) -> c1/c2
5054 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
5055 return C;
5056
5057 // fold vector ops
5058 if (VT.isVector())
5059 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5060 return FoldedVOp;
5061
5062 // fold (sdiv X, -1) -> 0-X
5063 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5064 if (N1C && N1C->isAllOnes())
5065 return DAG.getNegative(N0, DL, VT);
5066
5067 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
5068 if (N1C && N1C->isMinSignedValue())
5069 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
5070 DAG.getConstant(1, DL, VT),
5071 DAG.getConstant(0, DL, VT));
5072
5073 if (SDValue V = simplifyDivRem(N, DAG))
5074 return V;
5075
5076 if (SDValue NewSel = foldBinOpIntoSelect(N))
5077 return NewSel;
5078
5079 // If we know the sign bits of both operands are zero, strength reduce to a
5080 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
5081 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
5082 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
5083
5084 if (SDValue V = visitSDIVLike(N0, N1, N)) {
5085 // If the corresponding remainder node exists, update its users with
5086 // (Dividend - (Quotient * Divisor).
5087 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
5088 { N0, N1 })) {
5089 // If the sdiv has the exact flag we shouldn't propagate it to the
5090 // remainder node.
5091 if (!N->getFlags().hasExact()) {
5092 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
5093 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5094 AddToWorklist(Mul.getNode());
5095 AddToWorklist(Sub.getNode());
5096 CombineTo(RemNode, Sub);
5097 }
5098 }
5099 return V;
5100 }
5101
5102 // sdiv, srem -> sdivrem
5103 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
5104 // true. Otherwise, we break the simplification logic in visitREM().
5105 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5106 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
5107 if (SDValue DivRem = useDivRem(N))
5108 return DivRem;
5109
5110 return SDValue();
5111}
5112
5113static bool isDivisorPowerOfTwo(SDValue Divisor) {
5114 // Helper for determining whether a value is a power-2 constant scalar or a
5115 // vector of such elements.
5116 auto IsPowerOfTwo = [](ConstantSDNode *C) {
5117 if (C->isZero() || C->isOpaque())
5118 return false;
5119 if (C->getAPIntValue().isPowerOf2())
5120 return true;
5121 if (C->getAPIntValue().isNegatedPowerOf2())
5122 return true;
5123 return false;
5124 };
5125
5126 return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
5127}
5128
5129SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
5130 SDLoc DL(N);
5131 EVT VT = N->getValueType(0);
5132 EVT CCVT = getSetCCResultType(VT);
5133 unsigned BitWidth = VT.getScalarSizeInBits();
5134
5135 // fold (sdiv X, pow2) -> simple ops after legalize
5136 // FIXME: We check for the exact bit here because the generic lowering gives
5137 // better results in that case. The target-specific lowering should learn how
5138 // to handle exact sdivs efficiently.
5139 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
5140 // Target-specific implementation of sdiv x, pow2.
5141 if (SDValue Res = BuildSDIVPow2(N))
5142 return Res;
5143
5144 // Create constants that are functions of the shift amount value.
5145 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
5146 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
5147 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
5148 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
5149 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
5150 if (!isConstantOrConstantVector(Inexact))
5151 return SDValue();
5152
5153 // Splat the sign bit into the register
5154 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
5155 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
5156 AddToWorklist(Sign.getNode());
5157
5158 // Add (N0 < 0) ? abs2 - 1 : 0;
5159 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
5160 AddToWorklist(Srl.getNode());
5161 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
5162 AddToWorklist(Add.getNode());
5163 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
5164 AddToWorklist(Sra.getNode());
5165
5166 // Special case: (sdiv X, 1) -> X
5167 // Special Case: (sdiv X, -1) -> 0-X
5168 SDValue One = DAG.getConstant(1, DL, VT);
5170 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
5171 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
5172 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
5173 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
5174
5175 // If dividing by a positive value, we're done. Otherwise, the result must
5176 // be negated.
5177 SDValue Zero = DAG.getConstant(0, DL, VT);
5178 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
5179
5180 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
5181 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
5182 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
5183 return Res;
5184 }
5185
5186 // If integer divide is expensive and we satisfy the requirements, emit an
5187 // alternate sequence. Targets may check function attributes for size/speed
5188 // trade-offs.
5189 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5191 !TLI.isIntDivCheap(N->getValueType(0), Attr))
5192 if (SDValue Op = BuildSDIV(N))
5193 return Op;
5194
5195 return SDValue();
5196}
5197
5198SDValue DAGCombiner::visitUDIV(SDNode *N) {
5199 SDValue N0 = N->getOperand(0);
5200 SDValue N1 = N->getOperand(1);
5201 EVT VT = N->getValueType(0);
5202 EVT CCVT = getSetCCResultType(VT);
5203 SDLoc DL(N);
5204
5205 // fold (udiv c1, c2) -> c1/c2
5206 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
5207 return C;
5208
5209 // fold vector ops
5210 if (VT.isVector())
5211 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5212 return FoldedVOp;
5213
5214 // fold (udiv X, -1) -> select(X == -1, 1, 0)
5215 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5216 if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) {
5217 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
5218 DAG.getConstant(1, DL, VT),
5219 DAG.getConstant(0, DL, VT));
5220 }
5221
5222 if (SDValue V = simplifyDivRem(N, DAG))
5223 return V;
5224
5225 if (SDValue NewSel = foldBinOpIntoSelect(N))
5226 return NewSel;
5227
5228 if (SDValue V = visitUDIVLike(N0, N1, N)) {
5229 // If the corresponding remainder node exists, update its users with
5230 // (Dividend - (Quotient * Divisor).
5231 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
5232 { N0, N1 })) {
5233 // If the udiv has the exact flag we shouldn't propagate it to the
5234 // remainder node.
5235 if (!N->getFlags().hasExact()) {
5236 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
5237 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5238 AddToWorklist(Mul.getNode());
5239 AddToWorklist(Sub.getNode());
5240 CombineTo(RemNode, Sub);
5241 }
5242 }
5243 return V;
5244 }
5245
5246 // sdiv, srem -> sdivrem
5247 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
5248 // true. Otherwise, we break the simplification logic in visitREM().
5249 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5250 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
5251 if (SDValue DivRem = useDivRem(N))
5252 return DivRem;
5253
5254 // Simplify the operands using demanded-bits information.
5255 // We don't have demanded bits support for UDIV so this just enables constant
5256 // folding based on known bits.
5258 return SDValue(N, 0);
5259
5260 return SDValue();
5261}
5262
5263SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
5264 SDLoc DL(N);
5265 EVT VT = N->getValueType(0);
5266
5267 // fold (udiv x, (1 << c)) -> x >>u c
5268 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
5269 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5270 AddToWorklist(LogBase2.getNode());
5271
5272 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5273 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
5274 AddToWorklist(Trunc.getNode());
5275 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5276 }
5277 }
5278
5279 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
5280 if (N1.getOpcode() == ISD::SHL) {
5281 SDValue N10 = N1.getOperand(0);
5282 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
5283 if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
5284 AddToWorklist(LogBase2.getNode());
5285
5286 EVT ADDVT = N1.getOperand(1).getValueType();
5287 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
5288 AddToWorklist(Trunc.getNode());
5289 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
5290 AddToWorklist(Add.getNode());
5291 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
5292 }
5293 }
5294 }
5295
5296 // fold (udiv x, c) -> alternate
5297 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5299 !TLI.isIntDivCheap(N->getValueType(0), Attr))
5300 if (SDValue Op = BuildUDIV(N))
5301 return Op;
5302
5303 return SDValue();
5304}
5305
5306SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) {
5307 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
5308 !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
5309 // Target-specific implementation of srem x, pow2.
5310 if (SDValue Res = BuildSREMPow2(N))
5311 return Res;
5312 }
5313 return SDValue();
5314}
5315
5316// handles ISD::SREM and ISD::UREM
5317SDValue DAGCombiner::visitREM(SDNode *N) {
5318 unsigned Opcode = N->getOpcode();
5319 SDValue N0 = N->getOperand(0);
5320 SDValue N1 = N->getOperand(1);
5321 EVT VT = N->getValueType(0);
5322 EVT CCVT = getSetCCResultType(VT);
5323
5324 bool isSigned = (Opcode == ISD::SREM);
5325 SDLoc DL(N);
5326
5327 // fold (rem c1, c2) -> c1%c2
5328 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5329 return C;
5330
5331 // fold (urem X, -1) -> select(FX == -1, 0, FX)
5332 // Freeze the numerator to avoid a miscompile with an undefined value.
5333 if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false) &&
5334 CCVT.isVector() == VT.isVector()) {
5335 SDValue F0 = DAG.getFreeze(N0);
5336 SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
5337 return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
5338 }
5339
5340 if (SDValue V = simplifyDivRem(N, DAG))
5341 return V;
5342
5343 if (SDValue NewSel = foldBinOpIntoSelect(N))
5344 return NewSel;
5345
5346 if (isSigned) {
5347 // If we know the sign bits of both operands are zero, strength reduce to a
5348 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
5349 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
5350 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
5351 } else {
5352 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
5353 // fold (urem x, pow2) -> (and x, pow2-1)
5354 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5355 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5356 AddToWorklist(Add.getNode());
5357 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5358 }
5359 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
5360 // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1))
5361 // TODO: We should sink the following into isKnownToBePowerOfTwo
5362 // using a OrZero parameter analogous to our handling in ValueTracking.
5363 if ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) &&
5365 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5366 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5367 AddToWorklist(Add.getNode());
5368 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5369 }
5370 }
5371
5372 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5373
5374 // If X/C can be simplified by the division-by-constant logic, lower
5375 // X%C to the equivalent of X-X/C*C.
5376 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
5377 // speculative DIV must not cause a DIVREM conversion. We guard against this
5378 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
5379 // combine will not return a DIVREM. Regardless, checking cheapness here
5380 // makes sense since the simplification results in fatter code.
5381 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
5382 if (isSigned) {
5383 // check if we can build faster implementation for srem
5384 if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))
5385 return OptimizedRem;
5386 }
5387
5388 SDValue OptimizedDiv =
5389 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
5390 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
5391 // If the equivalent Div node also exists, update its users.
5392 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
5393 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
5394 { N0, N1 }))
5395 CombineTo(DivNode, OptimizedDiv);
5396 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
5397 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5398 AddToWorklist(OptimizedDiv.getNode());
5399 AddToWorklist(Mul.getNode());
5400 return Sub;
5401 }
5402 }
5403
5404 // sdiv, srem -> sdivrem
5405 if (SDValue DivRem = useDivRem(N))
5406 return DivRem.getValue(1);
5407
5408 return SDValue();
5409}
5410
5411SDValue DAGCombiner::visitMULHS(SDNode *N) {
5412 SDValue N0 = N->getOperand(0);
5413 SDValue N1 = N->getOperand(1);
5414 EVT VT = N->getValueType(0);
5415 SDLoc DL(N);
5416
5417 // fold (mulhs c1, c2)
5418 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
5419 return C;
5420
5421 // canonicalize constant to RHS.
5424 return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
5425
5426 if (VT.isVector()) {
5427 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5428 return FoldedVOp;
5429
5430 // fold (mulhs x, 0) -> 0
5431 // do not return N1, because undef node may exist.
5433 return DAG.getConstant(0, DL, VT);
5434 }
5435
5436 // fold (mulhs x, 0) -> 0
5437 if (isNullConstant(N1))
5438 return N1;
5439
5440 // fold (mulhs x, 1) -> (sra x, size(x)-1)
5441 if (isOneConstant(N1))
5442 return DAG.getNode(
5443 ISD::SRA, DL, VT, N0,
5445
5446 // fold (mulhs x, undef) -> 0
5447 if (N0.isUndef() || N1.isUndef())
5448 return DAG.getConstant(0, DL, VT);
5449
5450 // If the type twice as wide is legal, transform the mulhs to a wider multiply
5451 // plus a shift.
5452 if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
5453 !VT.isVector()) {
5454 MVT Simple = VT.getSimpleVT();
5455 unsigned SimpleSize = Simple.getSizeInBits();
5456 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5457 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5458 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5459 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5460 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5461 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5462 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5463 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5464 }
5465 }
5466
5467 return SDValue();
5468}
5469
5470SDValue DAGCombiner::visitMULHU(SDNode *N) {
5471 SDValue N0 = N->getOperand(0);
5472 SDValue N1 = N->getOperand(1);
5473 EVT VT = N->getValueType(0);
5474 SDLoc DL(N);
5475
5476 // fold (mulhu c1, c2)
5477 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
5478 return C;
5479
5480 // canonicalize constant to RHS.
5483 return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
5484
5485 if (VT.isVector()) {
5486 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5487 return FoldedVOp;
5488
5489 // fold (mulhu x, 0) -> 0
5490 // do not return N1, because undef node may exist.
5492 return DAG.getConstant(0, DL, VT);
5493 }
5494
5495 // fold (mulhu x, 0) -> 0
5496 if (isNullConstant(N1))
5497 return N1;
5498
5499 // fold (mulhu x, 1) -> 0
5500 if (isOneConstant(N1))
5501 return DAG.getConstant(0, DL, VT);
5502
5503 // fold (mulhu x, undef) -> 0
5504 if (N0.isUndef() || N1.isUndef())
5505 return DAG.getConstant(0, DL, VT);
5506
5507 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
5508 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
5509 hasOperation(ISD::SRL, VT)) {
5510 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5511 unsigned NumEltBits = VT.getScalarSizeInBits();
5512 SDValue SRLAmt = DAG.getNode(
5513 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
5514 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5515 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
5516 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5517 }
5518 }
5519
5520 // If the type twice as wide is legal, transform the mulhu to a wider multiply
5521 // plus a shift.
5522 if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
5523 !VT.isVector()) {
5524 MVT Simple = VT.getSimpleVT();
5525 unsigned SimpleSize = Simple.getSizeInBits();
5526 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5527 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5528 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5529 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5530 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5531 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5532 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5533 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5534 }
5535 }
5536
5537 // Simplify the operands using demanded-bits information.
5538 // We don't have demanded bits support for MULHU so this just enables constant
5539 // folding based on known bits.
5541 return SDValue(N, 0);
5542
5543 return SDValue();
5544}
5545
5546SDValue DAGCombiner::visitAVG(SDNode *N) {
5547 unsigned Opcode = N->getOpcode();
5548 SDValue N0 = N->getOperand(0);
5549 SDValue N1 = N->getOperand(1);
5550 EVT VT = N->getValueType(0);
5551 SDLoc DL(N);
5552 bool IsSigned = Opcode == ISD::AVGCEILS || Opcode == ISD::AVGFLOORS;
5553
5554 // fold (avg c1, c2)
5555 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5556 return C;
5557
5558 // canonicalize constant to RHS.
5561 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5562
5563 if (VT.isVector())
5564 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5565 return FoldedVOp;
5566
5567 // fold (avg x, undef) -> x
5568 if (N0.isUndef())
5569 return N1;
5570 if (N1.isUndef())
5571 return N0;
5572
5573 // fold (avg x, x) --> x
5574 if (N0 == N1 && Level >= AfterLegalizeTypes)
5575 return N0;
5576
5577 // fold (avgfloor x, 0) -> x >> 1
5578 SDValue X, Y;
5580 return DAG.getNode(ISD::SRA, DL, VT, X,
5581 DAG.getShiftAmountConstant(1, VT, DL));
5583 return DAG.getNode(ISD::SRL, DL, VT, X,
5584 DAG.getShiftAmountConstant(1, VT, DL));
5585
5586 // fold avgu(zext(x), zext(y)) -> zext(avgu(x, y))
5587 // fold avgs(sext(x), sext(y)) -> sext(avgs(x, y))
5588 if (!IsSigned &&
5589 sd_match(N, m_BinOp(Opcode, m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) &&
5590 X.getValueType() == Y.getValueType() &&
5591 hasOperation(Opcode, X.getValueType())) {
5592 SDValue AvgU = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5593 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, AvgU);
5594 }
5595 if (IsSigned &&
5596 sd_match(N, m_BinOp(Opcode, m_SExt(m_Value(X)), m_SExt(m_Value(Y)))) &&
5597 X.getValueType() == Y.getValueType() &&
5598 hasOperation(Opcode, X.getValueType())) {
5599 SDValue AvgS = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5600 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, AvgS);
5601 }
5602
5603 // Fold avgflooru(x,y) -> avgceilu(x,y-1) iff y != 0
5604 // Fold avgflooru(x,y) -> avgceilu(x-1,y) iff x != 0
5605 // Check if avgflooru isn't legal/custom but avgceilu is.
5606 if (Opcode == ISD::AVGFLOORU && !hasOperation(ISD::AVGFLOORU, VT) &&
5607 (!LegalOperations || hasOperation(ISD::AVGCEILU, VT))) {
5608 if (DAG.isKnownNeverZero(N1))
5609 return DAG.getNode(
5610 ISD::AVGCEILU, DL, VT, N0,
5611 DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getAllOnesConstant(DL, VT)));
5612 if (DAG.isKnownNeverZero(N0))
5613 return DAG.getNode(
5614 ISD::AVGCEILU, DL, VT, N1,
5615 DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getAllOnesConstant(DL, VT)));
5616 }
5617
5618 // Fold avgfloor((add nw x,y), 1) -> avgceil(x,y)
5619 // Fold avgfloor((add nw x,1), y) -> avgceil(x,y)
5620 if ((Opcode == ISD::AVGFLOORU && hasOperation(ISD::AVGCEILU, VT)) ||
5621 (Opcode == ISD::AVGFLOORS && hasOperation(ISD::AVGCEILS, VT))) {
5622 SDValue Add;
5623 if (sd_match(N,
5624 m_c_BinOp(Opcode,
5626 m_One())) ||
5627 sd_match(N, m_c_BinOp(Opcode,
5629 m_Value(Y)))) {
5630
5631 if (IsSigned && Add->getFlags().hasNoSignedWrap())
5632 return DAG.getNode(ISD::AVGCEILS, DL, VT, X, Y);
5633
5634 if (!IsSigned && Add->getFlags().hasNoUnsignedWrap())
5635 return DAG.getNode(ISD::AVGCEILU, DL, VT, X, Y);
5636 }
5637 }
5638
5639 // Fold avgfloors(x,y) -> avgflooru(x,y) if both x and y are non-negative
5640 if (Opcode == ISD::AVGFLOORS && hasOperation(ISD::AVGFLOORU, VT)) {
5641 if (DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5642 return DAG.getNode(ISD::AVGFLOORU, DL, VT, N0, N1);
5643 }
5644
5645 return SDValue();
5646}
5647
5648SDValue DAGCombiner::visitABD(SDNode *N) {
5649 unsigned Opcode = N->getOpcode();
5650 SDValue N0 = N->getOperand(0);
5651 SDValue N1 = N->getOperand(1);
5652 EVT VT = N->getValueType(0);
5653 SDLoc DL(N);
5654
5655 // fold (abd c1, c2)
5656 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5657 return C;
5658
5659 // canonicalize constant to RHS.
5662 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5663
5664 if (VT.isVector())
5665 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5666 return FoldedVOp;
5667
5668 // fold (abd x, undef) -> 0
5669 if (N0.isUndef() || N1.isUndef())
5670 return DAG.getConstant(0, DL, VT);
5671
5672 // fold (abd x, x) -> 0
5673 if (N0 == N1)
5674 return DAG.getConstant(0, DL, VT);
5675
5676 SDValue X;
5677
5678 // fold (abds x, 0) -> abs x
5680 (!LegalOperations || hasOperation(ISD::ABS, VT)))
5681 return DAG.getNode(ISD::ABS, DL, VT, X);
5682
5683 // fold (abdu x, 0) -> x
5685 return X;
5686
5687 // fold (abds x, y) -> (abdu x, y) iff both args are known positive
5688 if (Opcode == ISD::ABDS && hasOperation(ISD::ABDU, VT) &&
5689 DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5690 return DAG.getNode(ISD::ABDU, DL, VT, N1, N0);
5691
5692 return SDValue();
5693}
5694
5695/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
5696/// give the opcodes for the two computations that are being performed. Return
5697/// true if a simplification was made.
5698SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
5699 unsigned HiOp) {
5700 // If the high half is not needed, just compute the low half.
5701 bool HiExists = N->hasAnyUseOfValue(1);
5702 if (!HiExists && (!LegalOperations ||
5703 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
5704 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5705 return CombineTo(N, Res, Res);
5706 }
5707
5708 // If the low half is not needed, just compute the high half.
5709 bool LoExists = N->hasAnyUseOfValue(0);
5710 if (!LoExists && (!LegalOperations ||
5711 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
5712 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5713 return CombineTo(N, Res, Res);
5714 }
5715
5716 // If both halves are used, return as it is.
5717 if (LoExists && HiExists)
5718 return SDValue();
5719
5720 // If the two computed results can be simplified separately, separate them.
5721 if (LoExists) {
5722 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5723 AddToWorklist(Lo.getNode());
5724 SDValue LoOpt = combine(Lo.getNode());
5725 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
5726 (!LegalOperations ||
5727 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
5728 return CombineTo(N, LoOpt, LoOpt);
5729 }
5730
5731 if (HiExists) {
5732 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5733 AddToWorklist(Hi.getNode());
5734 SDValue HiOpt = combine(Hi.getNode());
5735 if (HiOpt.getNode() && HiOpt != Hi &&
5736 (!LegalOperations ||
5737 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
5738 return CombineTo(N, HiOpt, HiOpt);
5739 }
5740
5741 return SDValue();
5742}
5743
5744SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
5745 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
5746 return Res;
5747
5748 SDValue N0 = N->getOperand(0);
5749 SDValue N1 = N->getOperand(1);
5750 EVT VT = N->getValueType(0);
5751 SDLoc DL(N);
5752
5753 // Constant fold.
5755 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1);
5756
5757 // canonicalize constant to RHS (vector doesn't have to splat)
5760 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0);
5761
5762 // If the type is twice as wide is legal, transform the mulhu to a wider
5763 // multiply plus a shift.
5764 if (VT.isSimple() && !VT.isVector()) {
5765 MVT Simple = VT.getSimpleVT();
5766 unsigned SimpleSize = Simple.getSizeInBits();
5767 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5768 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5769 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5770 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5771 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5772 // Compute the high part as N1.
5773 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5774 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5775 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5776 // Compute the low part as N0.
5777 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5778 return CombineTo(N, Lo, Hi);
5779 }
5780 }
5781
5782 return SDValue();
5783}
5784
5785SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
5786 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
5787 return Res;
5788
5789 SDValue N0 = N->getOperand(0);
5790 SDValue N1 = N->getOperand(1);
5791 EVT VT = N->getValueType(0);
5792 SDLoc DL(N);
5793
5794 // Constant fold.
5796 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1);
5797
5798 // canonicalize constant to RHS (vector doesn't have to splat)
5801 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0);
5802
5803 // (umul_lohi N0, 0) -> (0, 0)
5804 if (isNullConstant(N1)) {
5805 SDValue Zero = DAG.getConstant(0, DL, VT);
5806 return CombineTo(N, Zero, Zero);
5807 }
5808
5809 // (umul_lohi N0, 1) -> (N0, 0)
5810 if (isOneConstant(N1)) {
5811 SDValue Zero = DAG.getConstant(0, DL, VT);
5812 return CombineTo(N, N0, Zero);
5813 }
5814
5815 // If the type is twice as wide is legal, transform the mulhu to a wider
5816 // multiply plus a shift.
5817 if (VT.isSimple() && !VT.isVector()) {
5818 MVT Simple = VT.getSimpleVT();
5819 unsigned SimpleSize = Simple.getSizeInBits();
5820 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5821 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5822 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5823 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5824 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5825 // Compute the high part as N1.
5826 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5827 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5828 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5829 // Compute the low part as N0.
5830 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5831 return CombineTo(N, Lo, Hi);
5832 }
5833 }
5834
5835 return SDValue();
5836}
5837
5838SDValue DAGCombiner::visitMULO(SDNode *N) {
5839 SDValue N0 = N->getOperand(0);
5840 SDValue N1 = N->getOperand(1);
5841 EVT VT = N0.getValueType();
5842 bool IsSigned = (ISD::SMULO == N->getOpcode());
5843
5844 EVT CarryVT = N->getValueType(1);
5845 SDLoc DL(N);
5846
5847 ConstantSDNode *N0C = isConstOrConstSplat(N0);
5848 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5849
5850 // fold operation with constant operands.
5851 // TODO: Move this to FoldConstantArithmetic when it supports nodes with
5852 // multiple results.
5853 if (N0C && N1C) {
5854 bool Overflow;
5855 APInt Result =
5856 IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
5857 : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
5858 return CombineTo(N, DAG.getConstant(Result, DL, VT),
5859 DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
5860 }
5861
5862 // canonicalize constant to RHS.
5865 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
5866
5867 // fold (mulo x, 0) -> 0 + no carry out
5868 if (isNullOrNullSplat(N1))
5869 return CombineTo(N, DAG.getConstant(0, DL, VT),
5870 DAG.getConstant(0, DL, CarryVT));
5871
5872 // (mulo x, 2) -> (addo x, x)
5873 // FIXME: This needs a freeze.
5874 if (N1C && N1C->getAPIntValue() == 2 &&
5875 (!IsSigned || VT.getScalarSizeInBits() > 2))
5876 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
5877 N->getVTList(), N0, N0);
5878
5879 // A 1 bit SMULO overflows if both inputs are 1.
5880 if (IsSigned && VT.getScalarSizeInBits() == 1) {
5881 SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
5882 SDValue Cmp = DAG.getSetCC(DL, CarryVT, And,
5883 DAG.getConstant(0, DL, VT), ISD::SETNE);
5884 return CombineTo(N, And, Cmp);
5885 }
5886
5887 // If it cannot overflow, transform into a mul.
5888 if (DAG.willNotOverflowMul(IsSigned, N0, N1))
5889 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
5890 DAG.getConstant(0, DL, CarryVT));
5891 return SDValue();
5892}
5893
5894// Function to calculate whether the Min/Max pair of SDNodes (potentially
5895// swapped around) make a signed saturate pattern, clamping to between a signed
5896// saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
5897// Returns the node being clamped and the bitwidth of the clamp in BW. Should
5898// work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
5899// same as SimplifySelectCC. N0<N1 ? N2 : N3.
5901 SDValue N3, ISD::CondCode CC, unsigned &BW,
5902 bool &Unsigned, SelectionDAG &DAG) {
5903 auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
5904 ISD::CondCode CC) {
5905 // The compare and select operand should be the same or the select operands
5906 // should be truncated versions of the comparison.
5907 if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
5908 return 0;
5909 // The constants need to be the same or a truncated version of each other.
5912 if (!N1C || !N3C)
5913 return 0;
5914 const APInt &C1 = N1C->getAPIntValue().trunc(N1.getScalarValueSizeInBits());
5915 const APInt &C2 = N3C->getAPIntValue().trunc(N3.getScalarValueSizeInBits());
5916 if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth()))
5917 return 0;
5918 return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
5919 };
5920
5921 // Check the initial value is a SMIN/SMAX equivalent.
5922 unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
5923 if (!Opcode0)
5924 return SDValue();
5925
5926 // We could only need one range check, if the fptosi could never produce
5927 // the upper value.
5928 if (N0.getOpcode() == ISD::FP_TO_SINT && Opcode0 == ISD::SMAX) {
5929 if (isNullOrNullSplat(N3)) {
5930 EVT IntVT = N0.getValueType().getScalarType();
5931 EVT FPVT = N0.getOperand(0).getValueType().getScalarType();
5932 if (FPVT.isSimple()) {
5933 Type *InputTy = FPVT.getTypeForEVT(*DAG.getContext());
5934 const fltSemantics &Semantics = InputTy->getFltSemantics();
5935 uint32_t MinBitWidth =
5936 APFloatBase::semanticsIntSizeInBits(Semantics, /*isSigned*/ true);
5937 if (IntVT.getSizeInBits() >= MinBitWidth) {
5938 Unsigned = true;
5939 BW = PowerOf2Ceil(MinBitWidth);
5940 return N0;
5941 }
5942 }
5943 }
5944 }
5945
5946 SDValue N00, N01, N02, N03;
5947 ISD::CondCode N0CC;
5948 switch (N0.getOpcode()) {
5949 case ISD::SMIN:
5950 case ISD::SMAX:
5951 N00 = N02 = N0.getOperand(0);
5952 N01 = N03 = N0.getOperand(1);
5953 N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
5954 break;
5955 case ISD::SELECT_CC:
5956 N00 = N0.getOperand(0);
5957 N01 = N0.getOperand(1);
5958 N02 = N0.getOperand(2);
5959 N03 = N0.getOperand(3);
5960 N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
5961 break;
5962 case ISD::SELECT:
5963 case ISD::VSELECT:
5964 if (N0.getOperand(0).getOpcode() != ISD::SETCC)
5965 return SDValue();
5966 N00 = N0.getOperand(0).getOperand(0);
5967 N01 = N0.getOperand(0).getOperand(1);
5968 N02 = N0.getOperand(1);
5969 N03 = N0.getOperand(2);
5970 N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
5971 break;
5972 default:
5973 return SDValue();
5974 }
5975
5976 unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
5977 if (!Opcode1 || Opcode0 == Opcode1)
5978 return SDValue();
5979
5980 ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
5981 ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
5982 if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
5983 return SDValue();
5984
5985 const APInt &MinC = MinCOp->getAPIntValue();
5986 const APInt &MaxC = MaxCOp->getAPIntValue();
5987 APInt MinCPlus1 = MinC + 1;
5988 if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
5989 BW = MinCPlus1.exactLogBase2() + 1;
5990 Unsigned = false;
5991 return N02;
5992 }
5993
5994 if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
5995 BW = MinCPlus1.exactLogBase2();
5996 Unsigned = true;
5997 return N02;
5998 }
5999
6000 return SDValue();
6001}
6002
6004 SDValue N3, ISD::CondCode CC,
6005 SelectionDAG &DAG) {
6006 unsigned BW;
6007 bool Unsigned;
6008 SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned, DAG);
6009 if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
6010 return SDValue();
6011 EVT FPVT = Fp.getOperand(0).getValueType();
6012 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
6013 if (FPVT.isVector())
6014 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
6015 FPVT.getVectorElementCount());
6016 unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
6017 if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
6018 return SDValue();
6019 SDLoc DL(Fp);
6020 SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
6021 DAG.getValueType(NewVT.getScalarType()));
6022 return DAG.getExtOrTrunc(!Unsigned, Sat, DL, N2->getValueType(0));
6023}
6024
6026 SDValue N3, ISD::CondCode CC,
6027 SelectionDAG &DAG) {
6028 // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
6029 // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
6030 // be truncated versions of the setcc (N0/N1).
6031 if ((N0 != N2 &&
6032 (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
6033 N0.getOpcode() != ISD::FP_TO_UINT || CC != ISD::SETULT)
6034 return SDValue();
6037 if (!N1C || !N3C)
6038 return SDValue();
6039 const APInt &C1 = N1C->getAPIntValue();
6040 const APInt &C3 = N3C->getAPIntValue();
6041 if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
6042 C1 != C3.zext(C1.getBitWidth()))
6043 return SDValue();
6044
6045 unsigned BW = (C1 + 1).exactLogBase2();
6046 EVT FPVT = N0.getOperand(0).getValueType();
6047 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
6048 if (FPVT.isVector())
6049 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
6050 FPVT.getVectorElementCount());
6052 FPVT, NewVT))
6053 return SDValue();
6054
6055 SDValue Sat =
6056 DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
6057 DAG.getValueType(NewVT.getScalarType()));
6058 return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
6059}
6060
6061SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
6062 SDValue N0 = N->getOperand(0);
6063 SDValue N1 = N->getOperand(1);
6064 EVT VT = N0.getValueType();
6065 unsigned Opcode = N->getOpcode();
6066 SDLoc DL(N);
6067
6068 // fold operation with constant operands.
6069 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
6070 return C;
6071
6072 // If the operands are the same, this is a no-op.
6073 if (N0 == N1)
6074 return N0;
6075
6076 // Fold operation with vscale operands.
6077 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
6078 uint64_t C0 = N0->getConstantOperandVal(0);
6079 uint64_t C1 = N1->getConstantOperandVal(0);
6080 if (Opcode == ISD::UMAX)
6081 return C0 > C1 ? N0 : N1;
6082 else if (Opcode == ISD::UMIN)
6083 return C0 > C1 ? N1 : N0;
6084 }
6085
6086 // canonicalize constant to RHS
6089 return DAG.getNode(Opcode, DL, VT, N1, N0);
6090
6091 // fold vector ops
6092 if (VT.isVector())
6093 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
6094 return FoldedVOp;
6095
6096 // reassociate minmax
6097 if (SDValue RMINMAX = reassociateOps(Opcode, DL, N0, N1, N->getFlags()))
6098 return RMINMAX;
6099
6100 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
6101 // Only do this if:
6102 // 1. The current op isn't legal and the flipped is.
6103 // 2. The saturation pattern is broken by canonicalization in InstCombine.
6104 bool IsOpIllegal = !TLI.isOperationLegal(Opcode, VT);
6105 bool IsSatBroken = Opcode == ISD::UMIN && N0.getOpcode() == ISD::SMAX;
6106 if ((IsSatBroken || IsOpIllegal) && (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
6107 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
6108 unsigned AltOpcode;
6109 switch (Opcode) {
6110 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
6111 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
6112 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
6113 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
6114 default: llvm_unreachable("Unknown MINMAX opcode");
6115 }
6116 if ((IsSatBroken && IsOpIllegal) || TLI.isOperationLegal(AltOpcode, VT))
6117 return DAG.getNode(AltOpcode, DL, VT, N0, N1);
6118 }
6119
6120 if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
6122 N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
6123 return S;
6124 if (Opcode == ISD::UMIN)
6125 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
6126 return S;
6127
6128 // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
6129 auto ReductionOpcode = [](unsigned Opcode) {
6130 switch (Opcode) {
6131 case ISD::SMIN:
6132 return ISD::VECREDUCE_SMIN;
6133 case ISD::SMAX:
6134 return ISD::VECREDUCE_SMAX;
6135 case ISD::UMIN:
6136 return ISD::VECREDUCE_UMIN;
6137 case ISD::UMAX:
6138 return ISD::VECREDUCE_UMAX;
6139 default:
6140 llvm_unreachable("Unexpected opcode");
6141 }
6142 };
6143 if (SDValue SD = reassociateReduction(ReductionOpcode(Opcode), Opcode,
6144 SDLoc(N), VT, N0, N1))
6145 return SD;
6146
6147 // Simplify the operands using demanded-bits information.
6149 return SDValue(N, 0);
6150
6151 return SDValue();
6152}
6153
6154/// If this is a bitwise logic instruction and both operands have the same
6155/// opcode, try to sink the other opcode after the logic instruction.
6156SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
6157 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
6158 EVT VT = N0.getValueType();
6159 unsigned LogicOpcode = N->getOpcode();
6160 unsigned HandOpcode = N0.getOpcode();
6161 assert(ISD::isBitwiseLogicOp(LogicOpcode) && "Expected logic opcode");
6162 assert(HandOpcode == N1.getOpcode() && "Bad input!");
6163
6164 // Bail early if none of these transforms apply.
6165 if (N0.getNumOperands() == 0)
6166 return SDValue();
6167
6168 // FIXME: We should check number of uses of the operands to not increase
6169 // the instruction count for all transforms.
6170
6171 // Handle size-changing casts (or sign_extend_inreg).
6172 SDValue X = N0.getOperand(0);
6173 SDValue Y = N1.getOperand(0);
6174 EVT XVT = X.getValueType();
6175 SDLoc DL(N);
6176 if (ISD::isExtOpcode(HandOpcode) || ISD::isExtVecInRegOpcode(HandOpcode) ||
6177 (HandOpcode == ISD::SIGN_EXTEND_INREG &&
6178 N0.getOperand(1) == N1.getOperand(1))) {
6179 // If both operands have other uses, this transform would create extra
6180 // instructions without eliminating anything.
6181 if (!N0.hasOneUse() && !N1.hasOneUse())
6182 return SDValue();
6183 // We need matching integer source types.
6184 if (XVT != Y.getValueType())
6185 return SDValue();
6186 // Don't create an illegal op during or after legalization. Don't ever
6187 // create an unsupported vector op.
6188 if ((VT.isVector() || LegalOperations) &&
6189 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
6190 return SDValue();
6191 // Avoid infinite looping with PromoteIntBinOp.
6192 // TODO: Should we apply desirable/legal constraints to all opcodes?
6193 if ((HandOpcode == ISD::ANY_EXTEND ||
6194 HandOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
6195 LegalTypes && !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
6196 return SDValue();
6197 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
6198 SDNodeFlags LogicFlags;
6199 LogicFlags.setDisjoint(N->getFlags().hasDisjoint() &&
6200 ISD::isExtOpcode(HandOpcode));
6201 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y, LogicFlags);
6202 if (HandOpcode == ISD::SIGN_EXTEND_INREG)
6203 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
6204 return DAG.getNode(HandOpcode, DL, VT, Logic);
6205 }
6206
6207 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
6208 if (HandOpcode == ISD::TRUNCATE) {
6209 // If both operands have other uses, this transform would create extra
6210 // instructions without eliminating anything.
6211 if (!N0.hasOneUse() && !N1.hasOneUse())
6212 return SDValue();
6213 // We need matching source types.
6214 if (XVT != Y.getValueType())
6215 return SDValue();
6216 // Don't create an illegal op during or after legalization.
6217 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
6218 return SDValue();
6219 // Be extra careful sinking truncate. If it's free, there's no benefit in
6220 // widening a binop. Also, don't create a logic op on an illegal type.
6221 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
6222 return SDValue();
6223 if (!TLI.isTypeLegal(XVT))
6224 return SDValue();
6225 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6226 return DAG.getNode(HandOpcode, DL, VT, Logic);
6227 }
6228
6229 // For binops SHL/SRL/SRA/AND:
6230 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
6231 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
6232 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
6233 N0.getOperand(1) == N1.getOperand(1)) {
6234 // If either operand has other uses, this transform is not an improvement.
6235 if (!N0.hasOneUse() || !N1.hasOneUse())
6236 return SDValue();
6237 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6238 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
6239 }
6240
6241 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
6242 if (HandOpcode == ISD::BSWAP) {
6243 // If either operand has other uses, this transform is not an improvement.
6244 if (!N0.hasOneUse() || !N1.hasOneUse())
6245 return SDValue();
6246 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6247 return DAG.getNode(HandOpcode, DL, VT, Logic);
6248 }
6249
6250 // For funnel shifts FSHL/FSHR:
6251 // logic_op (OP x, x1, s), (OP y, y1, s) -->
6252 // --> OP (logic_op x, y), (logic_op, x1, y1), s
6253 if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) &&
6254 N0.getOperand(2) == N1.getOperand(2)) {
6255 if (!N0.hasOneUse() || !N1.hasOneUse())
6256 return SDValue();
6257 SDValue X1 = N0.getOperand(1);
6258 SDValue Y1 = N1.getOperand(1);
6259 SDValue S = N0.getOperand(2);
6260 SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y);
6261 SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1);
6262 return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S);
6263 }
6264
6265 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
6266 // Only perform this optimization up until type legalization, before
6267 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
6268 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
6269 // we don't want to undo this promotion.
6270 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
6271 // on scalars.
6272 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
6273 Level <= AfterLegalizeTypes) {
6274 // Input types must be integer and the same.
6275 if (XVT.isInteger() && XVT == Y.getValueType() &&
6276 !(VT.isVector() && TLI.isTypeLegal(VT) &&
6277 !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
6278 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6279 return DAG.getNode(HandOpcode, DL, VT, Logic);
6280 }
6281 }
6282
6283 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
6284 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
6285 // If both shuffles use the same mask, and both shuffle within a single
6286 // vector, then it is worthwhile to move the swizzle after the operation.
6287 // The type-legalizer generates this pattern when loading illegal
6288 // vector types from memory. In many cases this allows additional shuffle
6289 // optimizations.
6290 // There are other cases where moving the shuffle after the xor/and/or
6291 // is profitable even if shuffles don't perform a swizzle.
6292 // If both shuffles use the same mask, and both shuffles have the same first
6293 // or second operand, then it might still be profitable to move the shuffle
6294 // after the xor/and/or operation.
6295 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
6296 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
6297 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
6298 assert(X.getValueType() == Y.getValueType() &&
6299 "Inputs to shuffles are not the same type");
6300
6301 // Check that both shuffles use the same mask. The masks are known to be of
6302 // the same length because the result vector type is the same.
6303 // Check also that shuffles have only one use to avoid introducing extra
6304 // instructions.
6305 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
6306 !SVN0->getMask().equals(SVN1->getMask()))
6307 return SDValue();
6308
6309 // Don't try to fold this node if it requires introducing a
6310 // build vector of all zeros that might be illegal at this stage.
6311 SDValue ShOp = N0.getOperand(1);
6312 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
6313 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6314
6315 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
6316 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
6317 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
6318 N0.getOperand(0), N1.getOperand(0));
6319 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
6320 }
6321
6322 // Don't try to fold this node if it requires introducing a
6323 // build vector of all zeros that might be illegal at this stage.
6324 ShOp = N0.getOperand(0);
6325 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
6326 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6327
6328 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
6329 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
6330 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
6331 N1.getOperand(1));
6332 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
6333 }
6334 }
6335
6336 return SDValue();
6337}
6338
6339/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
6340SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
6341 const SDLoc &DL) {
6342 SDValue LL, LR, RL, RR, N0CC, N1CC;
6343 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
6344 !isSetCCEquivalent(N1, RL, RR, N1CC))
6345 return SDValue();
6346
6347 assert(N0.getValueType() == N1.getValueType() &&
6348 "Unexpected operand types for bitwise logic op");
6349 assert(LL.getValueType() == LR.getValueType() &&
6350 RL.getValueType() == RR.getValueType() &&
6351 "Unexpected operand types for setcc");
6352
6353 // If we're here post-legalization or the logic op type is not i1, the logic
6354 // op type must match a setcc result type. Also, all folds require new
6355 // operations on the left and right operands, so those types must match.
6356 EVT VT = N0.getValueType();
6357 EVT OpVT = LL.getValueType();
6358 if (LegalOperations || VT.getScalarType() != MVT::i1)
6359 if (VT != getSetCCResultType(OpVT))
6360 return SDValue();
6361 if (OpVT != RL.getValueType())
6362 return SDValue();
6363
6364 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
6365 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
6366 bool IsInteger = OpVT.isInteger();
6367 if (LR == RR && CC0 == CC1 && IsInteger) {
6368 bool IsZero = isNullOrNullSplat(LR);
6369 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
6370
6371 // All bits clear?
6372 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
6373 // All sign bits clear?
6374 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
6375 // Any bits set?
6376 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
6377 // Any sign bits set?
6378 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
6379
6380 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
6381 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
6382 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
6383 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
6384 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
6385 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
6386 AddToWorklist(Or.getNode());
6387 return DAG.getSetCC(DL, VT, Or, LR, CC1);
6388 }
6389
6390 // All bits set?
6391 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
6392 // All sign bits set?
6393 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
6394 // Any bits clear?
6395 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
6396 // Any sign bits clear?
6397 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
6398
6399 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
6400 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
6401 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
6402 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
6403 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
6404 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
6405 AddToWorklist(And.getNode());
6406 return DAG.getSetCC(DL, VT, And, LR, CC1);
6407 }
6408 }
6409
6410 // TODO: What is the 'or' equivalent of this fold?
6411 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
6412 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
6413 IsInteger && CC0 == ISD::SETNE &&
6414 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
6415 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
6416 SDValue One = DAG.getConstant(1, DL, OpVT);
6417 SDValue Two = DAG.getConstant(2, DL, OpVT);
6418 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
6419 AddToWorklist(Add.getNode());
6420 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
6421 }
6422
6423 // Try more general transforms if the predicates match and the only user of
6424 // the compares is the 'and' or 'or'.
6425 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
6426 N0.hasOneUse() && N1.hasOneUse()) {
6427 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
6428 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
6429 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
6430 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
6431 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
6432 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
6433 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6434 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
6435 }
6436
6437 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
6438 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
6439 // Match a shared variable operand and 2 non-opaque constant operands.
6440 auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) {
6441 // The difference of the constants must be a single bit.
6442 const APInt &CMax =
6443 APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
6444 const APInt &CMin =
6445 APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
6446 return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
6447 };
6448 if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) {
6449 // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
6450 // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
6451 SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
6452 SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
6453 SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
6454 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
6455 SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
6456 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
6457 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6458 return DAG.getSetCC(DL, VT, And, Zero, CC0);
6459 }
6460 }
6461 }
6462
6463 // Canonicalize equivalent operands to LL == RL.
6464 if (LL == RR && LR == RL) {
6466 std::swap(RL, RR);
6467 }
6468
6469 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6470 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6471 if (LL == RL && LR == RR) {
6472 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
6473 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
6474 if (NewCC != ISD::SETCC_INVALID &&
6475 (!LegalOperations ||
6476 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
6477 TLI.isOperationLegal(ISD::SETCC, OpVT))))
6478 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
6479 }
6480
6481 return SDValue();
6482}
6483
6484static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2,
6485 SelectionDAG &DAG) {
6486 return DAG.isKnownNeverSNaN(Operand2) && DAG.isKnownNeverSNaN(Operand1);
6487}
6488
6489static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
6490 SelectionDAG &DAG) {
6491 return DAG.isKnownNeverNaN(Operand2) && DAG.isKnownNeverNaN(Operand1);
6492}
6493
6494// FIXME: use FMINIMUMNUM if possible, such as for RISC-V.
6495static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
6496 ISD::CondCode CC, unsigned OrAndOpcode,
6497 SelectionDAG &DAG,
6498 bool isFMAXNUMFMINNUM_IEEE,
6499 bool isFMAXNUMFMINNUM) {
6500 // The optimization cannot be applied for all the predicates because
6501 // of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle
6502 // NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be
6503 // applied at all if one of the operands is a signaling NaN.
6504
6505 // It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
6506 // are non NaN values.
6507 if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
6508 ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND))) {
6509 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6510 isFMAXNUMFMINNUM_IEEE
6511 ? ISD::FMINNUM_IEEE
6513 }
6514
6515 if (((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::OR)) ||
6516 ((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::AND))) {
6517 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6518 isFMAXNUMFMINNUM_IEEE
6519 ? ISD::FMAXNUM_IEEE
6521 }
6522
6523 // Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet
6524 // NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/
6525 // FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove
6526 // that there are not any sNaNs, then the optimization is not valid
6527 // for FMINNUM_IEEE/FMAXNUM_IEEE. In the presence of sNaNs, we apply
6528 // the optimization using FMINNUM/FMAXNUM for the following cases. If
6529 // we can prove that we do not have any sNaNs, then we can do the
6530 // optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following
6531 // cases.
6532 if (((CC == ISD::SETOLT || CC == ISD::SETOLE) && (OrAndOpcode == ISD::OR)) ||
6533 ((CC == ISD::SETUGT || CC == ISD::SETUGE) && (OrAndOpcode == ISD::AND))) {
6534 return isFMAXNUMFMINNUM ? ISD::FMINNUM
6535 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6536 isFMAXNUMFMINNUM_IEEE
6537 ? ISD::FMINNUM_IEEE
6539 }
6540
6541 if (((CC == ISD::SETOGT || CC == ISD::SETOGE) && (OrAndOpcode == ISD::OR)) ||
6542 ((CC == ISD::SETULT || CC == ISD::SETULE) && (OrAndOpcode == ISD::AND))) {
6543 return isFMAXNUMFMINNUM ? ISD::FMAXNUM
6544 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6545 isFMAXNUMFMINNUM_IEEE
6546 ? ISD::FMAXNUM_IEEE
6548 }
6549
6550 return ISD::DELETED_NODE;
6551}
6552
6555 assert(
6556 (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
6557 "Invalid Op to combine SETCC with");
6558
6559 // TODO: Search past casts/truncates.
6560 SDValue LHS = LogicOp->getOperand(0);
6561 SDValue RHS = LogicOp->getOperand(1);
6562 if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC ||
6563 !LHS->hasOneUse() || !RHS->hasOneUse())
6564 return SDValue();
6565
6566 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6568 LogicOp, LHS.getNode(), RHS.getNode());
6569
6570 SDValue LHS0 = LHS->getOperand(0);
6571 SDValue RHS0 = RHS->getOperand(0);
6572 SDValue LHS1 = LHS->getOperand(1);
6573 SDValue RHS1 = RHS->getOperand(1);
6574 // TODO: We don't actually need a splat here, for vectors we just need the
6575 // invariants to hold for each element.
6576 auto *LHS1C = isConstOrConstSplat(LHS1);
6577 auto *RHS1C = isConstOrConstSplat(RHS1);
6578 ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6579 ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
6580 EVT VT = LogicOp->getValueType(0);
6581 EVT OpVT = LHS0.getValueType();
6582 SDLoc DL(LogicOp);
6583
6584 // Check if the operands of an and/or operation are comparisons and if they
6585 // compare against the same value. Replace the and/or-cmp-cmp sequence with
6586 // min/max cmp sequence. If LHS1 is equal to RHS1, then the or-cmp-cmp
6587 // sequence will be replaced with min-cmp sequence:
6588 // (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1
6589 // and and-cmp-cmp will be replaced with max-cmp sequence:
6590 // (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1
6591 // The optimization does not work for `==` or `!=` .
6592 // The two comparisons should have either the same predicate or the
6593 // predicate of one of the comparisons is the opposite of the other one.
6594 bool isFMAXNUMFMINNUM_IEEE = TLI.isOperationLegal(ISD::FMAXNUM_IEEE, OpVT) &&
6595 TLI.isOperationLegal(ISD::FMINNUM_IEEE, OpVT);
6596 bool isFMAXNUMFMINNUM = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, OpVT) &&
6597 TLI.isOperationLegalOrCustom(ISD::FMINNUM, OpVT);
6598 if (((OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
6599 TLI.isOperationLegal(ISD::SMAX, OpVT) &&
6600 TLI.isOperationLegal(ISD::UMIN, OpVT) &&
6601 TLI.isOperationLegal(ISD::SMIN, OpVT)) ||
6602 (OpVT.isFloatingPoint() &&
6603 (isFMAXNUMFMINNUM_IEEE || isFMAXNUMFMINNUM))) &&
6605 CCL != ISD::SETFALSE && CCL != ISD::SETO && CCL != ISD::SETUO &&
6606 CCL != ISD::SETTRUE &&
6607 (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR))) {
6608
6609 SDValue CommonValue, Operand1, Operand2;
6611 if (CCL == CCR) {
6612 if (LHS0 == RHS0) {
6613 CommonValue = LHS0;
6614 Operand1 = LHS1;
6615 Operand2 = RHS1;
6617 } else if (LHS1 == RHS1) {
6618 CommonValue = LHS1;
6619 Operand1 = LHS0;
6620 Operand2 = RHS0;
6621 CC = CCL;
6622 }
6623 } else {
6624 assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC");
6625 if (LHS0 == RHS1) {
6626 CommonValue = LHS0;
6627 Operand1 = LHS1;
6628 Operand2 = RHS0;
6629 CC = CCR;
6630 } else if (RHS0 == LHS1) {
6631 CommonValue = LHS1;
6632 Operand1 = LHS0;
6633 Operand2 = RHS1;
6634 CC = CCL;
6635 }
6636 }
6637
6638 // Don't do this transform for sign bit tests. Let foldLogicOfSetCCs
6639 // handle it using OR/AND.
6640 if (CC == ISD::SETLT && isNullOrNullSplat(CommonValue))
6641 CC = ISD::SETCC_INVALID;
6642 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CommonValue))
6643 CC = ISD::SETCC_INVALID;
6644
6645 if (CC != ISD::SETCC_INVALID) {
6646 unsigned NewOpcode = ISD::DELETED_NODE;
6647 bool IsSigned = isSignedIntSetCC(CC);
6648 if (OpVT.isInteger()) {
6649 bool IsLess = (CC == ISD::SETLE || CC == ISD::SETULE ||
6650 CC == ISD::SETLT || CC == ISD::SETULT);
6651 bool IsOr = (LogicOp->getOpcode() == ISD::OR);
6652 if (IsLess == IsOr)
6653 NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN;
6654 else
6655 NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
6656 } else if (OpVT.isFloatingPoint())
6657 NewOpcode =
6658 getMinMaxOpcodeForFP(Operand1, Operand2, CC, LogicOp->getOpcode(),
6659 DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
6660
6661 if (NewOpcode != ISD::DELETED_NODE) {
6662 SDValue MinMaxValue =
6663 DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
6664 return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
6665 }
6666 }
6667 }
6668
6669 if (LHS0 == LHS1 && RHS0 == RHS1 && CCL == CCR &&
6670 LHS0.getValueType() == RHS0.getValueType() &&
6671 ((LogicOp->getOpcode() == ISD::AND && CCL == ISD::SETO) ||
6672 (LogicOp->getOpcode() == ISD::OR && CCL == ISD::SETUO)))
6673 return DAG.getSetCC(DL, VT, LHS0, RHS0, CCL);
6674
6675 if (TargetPreference == AndOrSETCCFoldKind::None)
6676 return SDValue();
6677
6678 if (CCL == CCR &&
6679 CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
6680 LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger()) {
6681 const APInt &APLhs = LHS1C->getAPIntValue();
6682 const APInt &APRhs = RHS1C->getAPIntValue();
6683
6684 // Preference is to use ISD::ABS or we already have an ISD::ABS (in which
6685 // case this is just a compare).
6686 if (APLhs == (-APRhs) &&
6687 ((TargetPreference & AndOrSETCCFoldKind::ABS) ||
6688 DAG.doesNodeExist(ISD::ABS, DAG.getVTList(OpVT), {LHS0}))) {
6689 const APInt &C = APLhs.isNegative() ? APRhs : APLhs;
6690 // (icmp eq A, C) | (icmp eq A, -C)
6691 // -> (icmp eq Abs(A), C)
6692 // (icmp ne A, C) & (icmp ne A, -C)
6693 // -> (icmp ne Abs(A), C)
6694 SDValue AbsOp = DAG.getNode(ISD::ABS, DL, OpVT, LHS0);
6695 return DAG.getNode(ISD::SETCC, DL, VT, AbsOp,
6696 DAG.getConstant(C, DL, OpVT), LHS.getOperand(2));
6697 } else if (TargetPreference &
6699
6700 // AndOrSETCCFoldKind::AddAnd:
6701 // A == C0 | A == C1
6702 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6703 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) == 0
6704 // A != C0 & A != C1
6705 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6706 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) != 0
6707
6708 // AndOrSETCCFoldKind::NotAnd:
6709 // A == C0 | A == C1
6710 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6711 // -> ~A & smin(C0, C1) == 0
6712 // A != C0 & A != C1
6713 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6714 // -> ~A & smin(C0, C1) != 0
6715
6716 const APInt &MaxC = APIntOps::smax(APRhs, APLhs);
6717 const APInt &MinC = APIntOps::smin(APRhs, APLhs);
6718 APInt Dif = MaxC - MinC;
6719 if (!Dif.isZero() && Dif.isPowerOf2()) {
6720 if (MaxC.isAllOnes() &&
6721 (TargetPreference & AndOrSETCCFoldKind::NotAnd)) {
6722 SDValue NotOp = DAG.getNOT(DL, LHS0, OpVT);
6723 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, NotOp,
6724 DAG.getConstant(MinC, DL, OpVT));
6725 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6726 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6727 } else if (TargetPreference & AndOrSETCCFoldKind::AddAnd) {
6728
6729 SDValue AddOp = DAG.getNode(ISD::ADD, DL, OpVT, LHS0,
6730 DAG.getConstant(-MinC, DL, OpVT));
6731 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
6732 DAG.getConstant(~Dif, DL, OpVT));
6733 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6734 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6735 }
6736 }
6737 }
6738 }
6739
6740 return SDValue();
6741}
6742
6743// Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`.
6744// We canonicalize to the `select` form in the middle end, but the `and` form
6745// gets better codegen and all tested targets (arm, x86, riscv)
6747 const SDLoc &DL, SelectionDAG &DAG) {
6748 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6749 if (!isNullConstant(F))
6750 return SDValue();
6751
6752 EVT CondVT = Cond.getValueType();
6753 if (TLI.getBooleanContents(CondVT) !=
6755 return SDValue();
6756
6757 if (T.getOpcode() != ISD::AND)
6758 return SDValue();
6759
6760 if (!isOneConstant(T.getOperand(1)))
6761 return SDValue();
6762
6763 EVT OpVT = T.getValueType();
6764
6765 SDValue CondMask =
6766 OpVT == CondVT ? Cond : DAG.getBoolExtOrTrunc(Cond, DL, OpVT, CondVT);
6767 return DAG.getNode(ISD::AND, DL, OpVT, CondMask, T.getOperand(0));
6768}
6769
6770/// This contains all DAGCombine rules which reduce two values combined by
6771/// an And operation to a single value. This makes them reusable in the context
6772/// of visitSELECT(). Rules involving constants are not included as
6773/// visitSELECT() already handles those cases.
6774SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
6775 EVT VT = N1.getValueType();
6776 SDLoc DL(N);
6777
6778 // fold (and x, undef) -> 0
6779 if (N0.isUndef() || N1.isUndef())
6780 return DAG.getConstant(0, DL, VT);
6781
6782 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
6783 return V;
6784
6785 // Canonicalize:
6786 // and(x, add) -> and(add, x)
6787 if (N1.getOpcode() == ISD::ADD)
6788 std::swap(N0, N1);
6789
6790 // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
6791 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
6792 VT.isScalarInteger() && VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
6793 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6794 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
6795 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
6796 // immediate for an add, but it is legal if its top c2 bits are set,
6797 // transform the ADD so the immediate doesn't need to be materialized
6798 // in a register.
6799 APInt ADDC = ADDI->getAPIntValue();
6800 APInt SRLC = SRLI->getAPIntValue();
6801 if (ADDC.getSignificantBits() <= 64 && SRLC.ult(VT.getSizeInBits()) &&
6802 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6804 SRLC.getZExtValue());
6805 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
6806 ADDC |= Mask;
6807 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6808 SDLoc DL0(N0);
6809 SDValue NewAdd =
6810 DAG.getNode(ISD::ADD, DL0, VT,
6811 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
6812 CombineTo(N0.getNode(), NewAdd);
6813 // Return N so it doesn't get rechecked!
6814 return SDValue(N, 0);
6815 }
6816 }
6817 }
6818 }
6819 }
6820 }
6821
6822 return SDValue();
6823}
6824
6825bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
6826 EVT LoadResultTy, EVT &ExtVT) {
6827 if (!AndC->getAPIntValue().isMask())
6828 return false;
6829
6830 unsigned ActiveBits = AndC->getAPIntValue().countr_one();
6831
6832 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6833 EVT LoadedVT = LoadN->getMemoryVT();
6834
6835 if (ExtVT == LoadedVT &&
6836 (!LegalOperations ||
6837 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
6838 // ZEXTLOAD will match without needing to change the size of the value being
6839 // loaded.
6840 return true;
6841 }
6842
6843 // Do not change the width of a volatile or atomic loads.
6844 if (!LoadN->isSimple())
6845 return false;
6846
6847 // Do not generate loads of non-round integer types since these can
6848 // be expensive (and would be wrong if the type is not byte sized).
6849 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
6850 return false;
6851
6852 if (LegalOperations &&
6853 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
6854 return false;
6855
6856 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT, /*ByteOffset=*/0))
6857 return false;
6858
6859 return true;
6860}
6861
6862bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
6863 ISD::LoadExtType ExtType, EVT &MemVT,
6864 unsigned ShAmt) {
6865 if (!LDST)
6866 return false;
6867
6868 // Only allow byte offsets.
6869 if (ShAmt % 8)
6870 return false;
6871 const unsigned ByteShAmt = ShAmt / 8;
6872
6873 // Do not generate loads of non-round integer types since these can
6874 // be expensive (and would be wrong if the type is not byte sized).
6875 if (!MemVT.isRound())
6876 return false;
6877
6878 // Don't change the width of a volatile or atomic loads.
6879 if (!LDST->isSimple())
6880 return false;
6881
6882 EVT LdStMemVT = LDST->getMemoryVT();
6883
6884 // Bail out when changing the scalable property, since we can't be sure that
6885 // we're actually narrowing here.
6886 if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
6887 return false;
6888
6889 // Verify that we are actually reducing a load width here.
6890 if (LdStMemVT.bitsLT(MemVT))
6891 return false;
6892
6893 // Ensure that this isn't going to produce an unsupported memory access.
6894 if (ShAmt) {
6895 const Align LDSTAlign = LDST->getAlign();
6896 const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
6897 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
6898 LDST->getAddressSpace(), NarrowAlign,
6899 LDST->getMemOperand()->getFlags()))
6900 return false;
6901 }
6902
6903 // It's not possible to generate a constant of extended or untyped type.
6904 EVT PtrType = LDST->getBasePtr().getValueType();
6905 if (PtrType == MVT::Untyped || PtrType.isExtended())
6906 return false;
6907
6908 if (isa<LoadSDNode>(LDST)) {
6909 LoadSDNode *Load = cast<LoadSDNode>(LDST);
6910 // Don't transform one with multiple uses, this would require adding a new
6911 // load.
6912 if (!SDValue(Load, 0).hasOneUse())
6913 return false;
6914
6915 if (LegalOperations &&
6916 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
6917 return false;
6918
6919 // For the transform to be legal, the load must produce only two values
6920 // (the value loaded and the chain). Don't transform a pre-increment
6921 // load, for example, which produces an extra value. Otherwise the
6922 // transformation is not equivalent, and the downstream logic to replace
6923 // uses gets things wrong.
6924 if (Load->getNumValues() > 2)
6925 return false;
6926
6927 // If the load that we're shrinking is an extload and we're not just
6928 // discarding the extension we can't simply shrink the load. Bail.
6929 // TODO: It would be possible to merge the extensions in some cases.
6930 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
6931 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6932 return false;
6933
6934 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT, ByteShAmt))
6935 return false;
6936 } else {
6937 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
6938 StoreSDNode *Store = cast<StoreSDNode>(LDST);
6939 // Can't write outside the original store
6940 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6941 return false;
6942
6943 if (LegalOperations &&
6944 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
6945 return false;
6946 }
6947 return true;
6948}
6949
6950bool DAGCombiner::SearchForAndLoads(SDNode *N,
6951 SmallVectorImpl<LoadSDNode*> &Loads,
6952 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
6953 ConstantSDNode *Mask,
6954 SDNode *&NodeToMask) {
6955 // Recursively search for the operands, looking for loads which can be
6956 // narrowed.
6957 for (SDValue Op : N->op_values()) {
6958 if (Op.getValueType().isVector())
6959 return false;
6960
6961 // Some constants may need fixing up later if they are too large.
6962 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6963 assert(ISD::isBitwiseLogicOp(N->getOpcode()) &&
6964 "Expected bitwise logic operation");
6965 if (!C->getAPIntValue().isSubsetOf(Mask->getAPIntValue()))
6966 NodesWithConsts.insert(N);
6967 continue;
6968 }
6969
6970 if (!Op.hasOneUse())
6971 return false;
6972
6973 switch(Op.getOpcode()) {
6974 case ISD::LOAD: {
6975 auto *Load = cast<LoadSDNode>(Op);
6976 EVT ExtVT;
6977 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
6978 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
6979
6980 // ZEXTLOAD is already small enough.
6981 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
6982 ExtVT.bitsGE(Load->getMemoryVT()))
6983 continue;
6984
6985 // Use LE to convert equal sized loads to zext.
6986 if (ExtVT.bitsLE(Load->getMemoryVT()))
6987 Loads.push_back(Load);
6988
6989 continue;
6990 }
6991 return false;
6992 }
6993 case ISD::ZERO_EXTEND:
6994 case ISD::AssertZext: {
6995 unsigned ActiveBits = Mask->getAPIntValue().countr_one();
6996 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6997 EVT VT = Op.getOpcode() == ISD::AssertZext ?
6998 cast<VTSDNode>(Op.getOperand(1))->getVT() :
6999 Op.getOperand(0).getValueType();
7000
7001 // We can accept extending nodes if the mask is wider or an equal
7002 // width to the original type.
7003 if (ExtVT.bitsGE(VT))
7004 continue;
7005 break;
7006 }
7007 case ISD::OR:
7008 case ISD::XOR:
7009 case ISD::AND:
7010 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
7011 NodeToMask))
7012 return false;
7013 continue;
7014 }
7015
7016 // Allow one node which will masked along with any loads found.
7017 if (NodeToMask)
7018 return false;
7019
7020 // Also ensure that the node to be masked only produces one data result.
7021 NodeToMask = Op.getNode();
7022 if (NodeToMask->getNumValues() > 1) {
7023 bool HasValue = false;
7024 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
7025 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
7026 if (VT != MVT::Glue && VT != MVT::Other) {
7027 if (HasValue) {
7028 NodeToMask = nullptr;
7029 return false;
7030 }
7031 HasValue = true;
7032 }
7033 }
7034 assert(HasValue && "Node to be masked has no data result?");
7035 }
7036 }
7037 return true;
7038}
7039
7040bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
7041 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
7042 if (!Mask)
7043 return false;
7044
7045 if (!Mask->getAPIntValue().isMask())
7046 return false;
7047
7048 // No need to do anything if the and directly uses a load.
7049 if (isa<LoadSDNode>(N->getOperand(0)))
7050 return false;
7051
7053 SmallPtrSet<SDNode*, 2> NodesWithConsts;
7054 SDNode *FixupNode = nullptr;
7055 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
7056 if (Loads.empty())
7057 return false;
7058
7059 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
7060 SDValue MaskOp = N->getOperand(1);
7061
7062 // If it exists, fixup the single node we allow in the tree that needs
7063 // masking.
7064 if (FixupNode) {
7065 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
7066 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
7067 FixupNode->getValueType(0),
7068 SDValue(FixupNode, 0), MaskOp);
7069 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
7070 if (And.getOpcode() == ISD ::AND)
7071 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
7072 }
7073
7074 // Narrow any constants that need it.
7075 for (auto *LogicN : NodesWithConsts) {
7076 SDValue Op0 = LogicN->getOperand(0);
7077 SDValue Op1 = LogicN->getOperand(1);
7078
7079 // We only need to fix AND if both inputs are constants. And we only need
7080 // to fix one of the constants.
7081 if (LogicN->getOpcode() == ISD::AND &&
7083 continue;
7084
7085 if (isa<ConstantSDNode>(Op0) && LogicN->getOpcode() != ISD::AND)
7086 Op0 =
7087 DAG.getNode(ISD::AND, SDLoc(Op0), Op0.getValueType(), Op0, MaskOp);
7088
7089 if (isa<ConstantSDNode>(Op1))
7090 Op1 =
7091 DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOp);
7092
7093 if (isa<ConstantSDNode>(Op0) && !isa<ConstantSDNode>(Op1))
7094 std::swap(Op0, Op1);
7095
7096 DAG.UpdateNodeOperands(LogicN, Op0, Op1);
7097 }
7098
7099 // Create narrow loads.
7100 for (auto *Load : Loads) {
7101 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
7102 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
7103 SDValue(Load, 0), MaskOp);
7104 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
7105 if (And.getOpcode() == ISD ::AND)
7106 And = SDValue(
7107 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
7108 SDValue NewLoad = reduceLoadWidth(And.getNode());
7109 assert(NewLoad &&
7110 "Shouldn't be masking the load if it can't be narrowed");
7111 CombineTo(Load, NewLoad, NewLoad.getValue(1));
7112 }
7113 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
7114 return true;
7115 }
7116 return false;
7117}
7118
7119// Unfold
7120// x & (-1 'logical shift' y)
7121// To
7122// (x 'opposite logical shift' y) 'logical shift' y
7123// if it is better for performance.
7124SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
7125 assert(N->getOpcode() == ISD::AND);
7126
7127 SDValue N0 = N->getOperand(0);
7128 SDValue N1 = N->getOperand(1);
7129
7130 // Do we actually prefer shifts over mask?
7132 return SDValue();
7133
7134 // Try to match (-1 '[outer] logical shift' y)
7135 unsigned OuterShift;
7136 unsigned InnerShift; // The opposite direction to the OuterShift.
7137 SDValue Y; // Shift amount.
7138 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
7139 if (!M.hasOneUse())
7140 return false;
7141 OuterShift = M->getOpcode();
7142 if (OuterShift == ISD::SHL)
7143 InnerShift = ISD::SRL;
7144 else if (OuterShift == ISD::SRL)
7145 InnerShift = ISD::SHL;
7146 else
7147 return false;
7148 if (!isAllOnesConstant(M->getOperand(0)))
7149 return false;
7150 Y = M->getOperand(1);
7151 return true;
7152 };
7153
7154 SDValue X;
7155 if (matchMask(N1))
7156 X = N0;
7157 else if (matchMask(N0))
7158 X = N1;
7159 else
7160 return SDValue();
7161
7162 SDLoc DL(N);
7163 EVT VT = N->getValueType(0);
7164
7165 // tmp = x 'opposite logical shift' y
7166 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
7167 // ret = tmp 'logical shift' y
7168 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
7169
7170 return T1;
7171}
7172
7173/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
7174/// For a target with a bit test, this is expected to become test + set and save
7175/// at least 1 instruction.
7177 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
7178
7179 // Look through an optional extension.
7180 SDValue And0 = And->getOperand(0), And1 = And->getOperand(1);
7181 if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse())
7182 And0 = And0.getOperand(0);
7183 if (!isOneConstant(And1) || !And0.hasOneUse())
7184 return SDValue();
7185
7186 SDValue Src = And0;
7187
7188 // Attempt to find a 'not' op.
7189 // TODO: Should we favor test+set even without the 'not' op?
7190 bool FoundNot = false;
7191 if (isBitwiseNot(Src)) {
7192 FoundNot = true;
7193 Src = Src.getOperand(0);
7194
7195 // Look though an optional truncation. The source operand may not be the
7196 // same type as the original 'and', but that is ok because we are masking
7197 // off everything but the low bit.
7198 if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse())
7199 Src = Src.getOperand(0);
7200 }
7201
7202 // Match a shift-right by constant.
7203 if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse())
7204 return SDValue();
7205
7206 // This is probably not worthwhile without a supported type.
7207 EVT SrcVT = Src.getValueType();
7208 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7209 if (!TLI.isTypeLegal(SrcVT))
7210 return SDValue();
7211
7212 // We might have looked through casts that make this transform invalid.
7213 unsigned BitWidth = SrcVT.getScalarSizeInBits();
7214 SDValue ShiftAmt = Src.getOperand(1);
7215 auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt);
7216 if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(BitWidth))
7217 return SDValue();
7218
7219 // Set source to shift source.
7220 Src = Src.getOperand(0);
7221
7222 // Try again to find a 'not' op.
7223 // TODO: Should we favor test+set even with two 'not' ops?
7224 if (!FoundNot) {
7225 if (!isBitwiseNot(Src))
7226 return SDValue();
7227 Src = Src.getOperand(0);
7228 }
7229
7230 if (!TLI.hasBitTest(Src, ShiftAmt))
7231 return SDValue();
7232
7233 // Turn this into a bit-test pattern using mask op + setcc:
7234 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
7235 // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
7236 SDLoc DL(And);
7237 SDValue X = DAG.getZExtOrTrunc(Src, DL, SrcVT);
7238 EVT CCVT =
7239 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
7240 SDValue Mask = DAG.getConstant(
7241 APInt::getOneBitSet(BitWidth, ShiftAmtC->getZExtValue()), DL, SrcVT);
7242 SDValue NewAnd = DAG.getNode(ISD::AND, DL, SrcVT, X, Mask);
7243 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
7244 SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
7245 return DAG.getZExtOrTrunc(Setcc, DL, And->getValueType(0));
7246}
7247
7248/// For targets that support usubsat, match a bit-hack form of that operation
7249/// that ends in 'and' and convert it.
7251 EVT VT = N->getValueType(0);
7252 unsigned BitWidth = VT.getScalarSizeInBits();
7253 APInt SignMask = APInt::getSignMask(BitWidth);
7254
7255 // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
7256 // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
7257 // xor/add with SMIN (signmask) are logically equivalent.
7258 SDValue X;
7259 if (!sd_match(N, m_And(m_OneUse(m_Xor(m_Value(X), m_SpecificInt(SignMask))),
7261 m_SpecificInt(BitWidth - 1))))) &&
7264 m_SpecificInt(BitWidth - 1))))))
7265 return SDValue();
7266
7267 return DAG.getNode(ISD::USUBSAT, DL, VT, X,
7268 DAG.getConstant(SignMask, DL, VT));
7269}
7270
7271/// Given a bitwise logic operation N with a matching bitwise logic operand,
7272/// fold a pattern where 2 of the source operands are identically shifted
7273/// values. For example:
7274/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
7276 SelectionDAG &DAG) {
7277 unsigned LogicOpcode = N->getOpcode();
7278 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
7279 "Expected bitwise logic operation");
7280
7281 if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
7282 return SDValue();
7283
7284 // Match another bitwise logic op and a shift.
7285 unsigned ShiftOpcode = ShiftOp.getOpcode();
7286 if (LogicOp.getOpcode() != LogicOpcode ||
7287 !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
7288 ShiftOpcode == ISD::SRA))
7289 return SDValue();
7290
7291 // Match another shift op inside the first logic operand. Handle both commuted
7292 // possibilities.
7293 // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
7294 // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
7295 SDValue X1 = ShiftOp.getOperand(0);
7296 SDValue Y = ShiftOp.getOperand(1);
7297 SDValue X0, Z;
7298 if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
7299 LogicOp.getOperand(0).getOperand(1) == Y) {
7300 X0 = LogicOp.getOperand(0).getOperand(0);
7301 Z = LogicOp.getOperand(1);
7302 } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
7303 LogicOp.getOperand(1).getOperand(1) == Y) {
7304 X0 = LogicOp.getOperand(1).getOperand(0);
7305 Z = LogicOp.getOperand(0);
7306 } else {
7307 return SDValue();
7308 }
7309
7310 EVT VT = N->getValueType(0);
7311 SDLoc DL(N);
7312 SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
7313 SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
7314 return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
7315}
7316
7317/// Given a tree of logic operations with shape like
7318/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y)))
7319/// try to match and fold shift operations with the same shift amount.
7320/// For example:
7321/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
7322/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
7324 SDValue RightHand, SelectionDAG &DAG) {
7325 unsigned LogicOpcode = N->getOpcode();
7326 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
7327 "Expected bitwise logic operation");
7328 if (LeftHand.getOpcode() != LogicOpcode ||
7329 RightHand.getOpcode() != LogicOpcode)
7330 return SDValue();
7331 if (!LeftHand.hasOneUse() || !RightHand.hasOneUse())
7332 return SDValue();
7333
7334 // Try to match one of following patterns:
7335 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W)
7336 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y))
7337 // Note that foldLogicOfShifts will handle commuted versions of the left hand
7338 // itself.
7339 SDValue CombinedShifts, W;
7340 SDValue R0 = RightHand.getOperand(0);
7341 SDValue R1 = RightHand.getOperand(1);
7342 if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG)))
7343 W = R1;
7344 else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG)))
7345 W = R0;
7346 else
7347 return SDValue();
7348
7349 EVT VT = N->getValueType(0);
7350 SDLoc DL(N);
7351 return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
7352}
7353
7354/// Fold "masked merge" expressions like `(m & x) | (~m & y)` and its DeMorgan
7355/// variant `(~m | x) & (m | y)` into the equivalent `((x ^ y) & m) ^ y)`
7356/// pattern. This is typically a better representation for targets without a
7357/// fused "and-not" operation.
7359 const TargetLowering &TLI, const SDLoc &DL) {
7360 // Note that masked-merge variants using XOR or ADD expressions are
7361 // normalized to OR by InstCombine so we only check for OR or AND.
7362 assert((Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::AND) &&
7363 "Must be called with ISD::OR or ISD::AND node");
7364
7365 // If the target supports and-not, don't fold this.
7366 if (TLI.hasAndNot(SDValue(Node, 0)))
7367 return SDValue();
7368
7369 SDValue M, X, Y;
7370
7371 if (sd_match(Node,
7373 m_OneUse(m_And(m_Deferred(M), m_Value(X))))) ||
7374 sd_match(Node,
7376 m_OneUse(m_Or(m_Deferred(M), m_Value(Y)))))) {
7377 EVT VT = M.getValueType();
7378 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, X, Y);
7379 SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor, M);
7380 return DAG.getNode(ISD::XOR, DL, VT, And, Y);
7381 }
7382 return SDValue();
7383}
7384
7385SDValue DAGCombiner::visitAND(SDNode *N) {
7386 SDValue N0 = N->getOperand(0);
7387 SDValue N1 = N->getOperand(1);
7388 EVT VT = N1.getValueType();
7389 SDLoc DL(N);
7390
7391 // x & x --> x
7392 if (N0 == N1)
7393 return N0;
7394
7395 // fold (and c1, c2) -> c1&c2
7396 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N0, N1}))
7397 return C;
7398
7399 // canonicalize constant to RHS
7402 return DAG.getNode(ISD::AND, DL, VT, N1, N0);
7403
7404 if (areBitwiseNotOfEachother(N0, N1))
7405 return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), DL, VT);
7406
7407 // fold vector ops
7408 if (VT.isVector()) {
7409 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
7410 return FoldedVOp;
7411
7412 // fold (and x, 0) -> 0, vector edition
7414 // do not return N1, because undef node may exist in N1
7416 N1.getValueType());
7417
7418 // fold (and x, -1) -> x, vector edition
7420 return N0;
7421
7422 // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
7423 auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
7424 ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
7425 if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat) {
7426 EVT LoadVT = MLoad->getMemoryVT();
7427 EVT ExtVT = VT;
7428 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
7429 // For this AND to be a zero extension of the masked load the elements
7430 // of the BuildVec must mask the bottom bits of the extended element
7431 // type
7432 uint64_t ElementSize =
7434 if (Splat->getAPIntValue().isMask(ElementSize)) {
7435 SDValue NewLoad = DAG.getMaskedLoad(
7436 ExtVT, DL, MLoad->getChain(), MLoad->getBasePtr(),
7437 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
7438 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
7439 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
7440 bool LoadHasOtherUsers = !N0.hasOneUse();
7441 CombineTo(N, NewLoad);
7442 if (LoadHasOtherUsers)
7443 CombineTo(MLoad, NewLoad.getValue(0), NewLoad.getValue(1));
7444 return SDValue(N, 0);
7445 }
7446 }
7447 }
7448 }
7449
7450 // fold (and x, -1) -> x
7451 if (isAllOnesConstant(N1))
7452 return N0;
7453
7454 // if (and x, c) is known to be zero, return 0
7455 unsigned BitWidth = VT.getScalarSizeInBits();
7456 ConstantSDNode *N1C = isConstOrConstSplat(N1);
7458 return DAG.getConstant(0, DL, VT);
7459
7460 if (SDValue R = foldAndOrOfSETCC(N, DAG))
7461 return R;
7462
7463 if (SDValue NewSel = foldBinOpIntoSelect(N))
7464 return NewSel;
7465
7466 // reassociate and
7467 if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))
7468 return RAND;
7469
7470 // Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y))
7471 if (SDValue SD =
7472 reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, DL, VT, N0, N1))
7473 return SD;
7474
7475 // fold (and (or x, C), D) -> D if (C & D) == D
7476 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7477 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
7478 };
7479 if (N0.getOpcode() == ISD::OR &&
7480 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
7481 return N1;
7482
7483 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
7484 SDValue N0Op0 = N0.getOperand(0);
7485 EVT SrcVT = N0Op0.getValueType();
7486 unsigned SrcBitWidth = SrcVT.getScalarSizeInBits();
7487 APInt Mask = ~N1C->getAPIntValue();
7488 Mask = Mask.trunc(SrcBitWidth);
7489
7490 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
7491 if (DAG.MaskedValueIsZero(N0Op0, Mask))
7492 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0Op0);
7493
7494 // fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
7495 if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) &&
7496 TLI.isTruncateFree(VT, SrcVT) && TLI.isZExtFree(SrcVT, VT) &&
7497 TLI.isTypeDesirableForOp(ISD::AND, SrcVT) &&
7498 TLI.isNarrowingProfitable(N, VT, SrcVT))
7499 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
7500 DAG.getNode(ISD::AND, DL, SrcVT, N0Op0,
7501 DAG.getZExtOrTrunc(N1, DL, SrcVT)));
7502 }
7503
7504 // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
7505 if (ISD::isExtOpcode(N0.getOpcode())) {
7506 unsigned ExtOpc = N0.getOpcode();
7507 SDValue N0Op0 = N0.getOperand(0);
7508 if (N0Op0.getOpcode() == ISD::AND &&
7509 (ExtOpc != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0Op0, VT)) &&
7510 N0->hasOneUse() && N0Op0->hasOneUse()) {
7511 if (SDValue NewExt = DAG.FoldConstantArithmetic(ExtOpc, DL, VT,
7512 {N0Op0.getOperand(1)})) {
7513 if (SDValue NewMask =
7514 DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N1, NewExt})) {
7515 return DAG.getNode(ISD::AND, DL, VT,
7516 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(0)),
7517 NewMask);
7518 }
7519 }
7520 }
7521 }
7522
7523 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
7524 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
7525 // already be zero by virtue of the width of the base type of the load.
7526 //
7527 // the 'X' node here can either be nothing or an extract_vector_elt to catch
7528 // more cases.
7529 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7531 N0.getOperand(0).getOpcode() == ISD::LOAD &&
7532 N0.getOperand(0).getResNo() == 0) ||
7533 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
7534 auto *Load =
7535 cast<LoadSDNode>((N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(0));
7536
7537 // Get the constant (if applicable) the zero'th operand is being ANDed with.
7538 // This can be a pure constant or a vector splat, in which case we treat the
7539 // vector as a scalar and use the splat value.
7540 APInt Constant = APInt::getZero(1);
7541 if (const ConstantSDNode *C = isConstOrConstSplat(
7542 N1, /*AllowUndefs=*/false, /*AllowTruncation=*/true)) {
7543 Constant = C->getAPIntValue();
7544 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
7545 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
7546 APInt SplatValue, SplatUndef;
7547 unsigned SplatBitSize;
7548 bool HasAnyUndefs;
7549 // Endianness should not matter here. Code below makes sure that we only
7550 // use the result if the SplatBitSize is a multiple of the vector element
7551 // size. And after that we AND all element sized parts of the splat
7552 // together. So the end result should be the same regardless of in which
7553 // order we do those operations.
7554 const bool IsBigEndian = false;
7555 bool IsSplat =
7556 Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
7557 HasAnyUndefs, EltBitWidth, IsBigEndian);
7558
7559 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
7560 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
7561 if (IsSplat && (SplatBitSize % EltBitWidth) == 0) {
7562 // Undef bits can contribute to a possible optimisation if set, so
7563 // set them.
7564 SplatValue |= SplatUndef;
7565
7566 // The splat value may be something like "0x00FFFFFF", which means 0 for
7567 // the first vector value and FF for the rest, repeating. We need a mask
7568 // that will apply equally to all members of the vector, so AND all the
7569 // lanes of the constant together.
7570 Constant = APInt::getAllOnes(EltBitWidth);
7571 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
7572 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
7573 }
7574 }
7575
7576 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
7577 // actually legal and isn't going to get expanded, else this is a false
7578 // optimisation.
7579 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
7580 Load->getValueType(0),
7581 Load->getMemoryVT());
7582
7583 // Resize the constant to the same size as the original memory access before
7584 // extension. If it is still the AllOnesValue then this AND is completely
7585 // unneeded.
7586 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
7587
7588 bool B;
7589 switch (Load->getExtensionType()) {
7590 default: B = false; break;
7591 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
7592 case ISD::ZEXTLOAD:
7593 case ISD::NON_EXTLOAD: B = true; break;
7594 }
7595
7596 if (B && Constant.isAllOnes()) {
7597 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
7598 // preserve semantics once we get rid of the AND.
7599 SDValue NewLoad(Load, 0);
7600
7601 // Fold the AND away. NewLoad may get replaced immediately.
7602 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
7603
7604 if (Load->getExtensionType() == ISD::EXTLOAD) {
7605 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
7606 Load->getValueType(0), SDLoc(Load),
7607 Load->getChain(), Load->getBasePtr(),
7608 Load->getOffset(), Load->getMemoryVT(),
7609 Load->getMemOperand());
7610 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
7611 if (Load->getNumValues() == 3) {
7612 // PRE/POST_INC loads have 3 values.
7613 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
7614 NewLoad.getValue(2) };
7615 CombineTo(Load, To, 3, true);
7616 } else {
7617 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
7618 }
7619 }
7620
7621 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7622 }
7623 }
7624
7625 // Try to convert a constant mask AND into a shuffle clear mask.
7626 if (VT.isVector())
7627 if (SDValue Shuffle = XformToShuffleWithZero(N))
7628 return Shuffle;
7629
7630 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7631 return Combined;
7632
7633 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C &&
7635 SDValue Ext = N0.getOperand(0);
7636 EVT ExtVT = Ext->getValueType(0);
7637 SDValue Extendee = Ext->getOperand(0);
7638
7639 unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits();
7640 if (N1C->getAPIntValue().isMask(ScalarWidth) &&
7641 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, ExtVT))) {
7642 // (and (extract_subvector (zext|anyext|sext v) _) iN_mask)
7643 // => (extract_subvector (iN_zeroext v))
7644 SDValue ZeroExtExtendee =
7645 DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Extendee);
7646
7647 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ZeroExtExtendee,
7648 N0.getOperand(1));
7649 }
7650 }
7651
7652 // fold (and (masked_gather x)) -> (zext_masked_gather x)
7653 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
7654 EVT MemVT = GN0->getMemoryVT();
7655 EVT ScalarVT = MemVT.getScalarType();
7656
7657 if (SDValue(GN0, 0).hasOneUse() &&
7658 isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
7660 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
7661 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
7662
7663 SDValue ZExtLoad = DAG.getMaskedGather(
7664 DAG.getVTList(VT, MVT::Other), MemVT, DL, Ops, GN0->getMemOperand(),
7665 GN0->getIndexType(), ISD::ZEXTLOAD);
7666
7667 CombineTo(N, ZExtLoad);
7668 AddToWorklist(ZExtLoad.getNode());
7669 // Avoid recheck of N.
7670 return SDValue(N, 0);
7671 }
7672 }
7673
7674 // fold (and (load x), 255) -> (zextload x, i8)
7675 // fold (and (extload x, i16), 255) -> (zextload x, i8)
7676 if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector())
7677 if (SDValue Res = reduceLoadWidth(N))
7678 return Res;
7679
7680 if (LegalTypes) {
7681 // Attempt to propagate the AND back up to the leaves which, if they're
7682 // loads, can be combined to narrow loads and the AND node can be removed.
7683 // Perform after legalization so that extend nodes will already be
7684 // combined into the loads.
7685 if (BackwardsPropagateMask(N))
7686 return SDValue(N, 0);
7687 }
7688
7689 if (SDValue Combined = visitANDLike(N0, N1, N))
7690 return Combined;
7691
7692 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
7693 if (N0.getOpcode() == N1.getOpcode())
7694 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7695 return V;
7696
7697 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7698 return R;
7699 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
7700 return R;
7701
7702 // Fold (and X, (bswap (not Y))) -> (and X, (not (bswap Y)))
7703 // Fold (and X, (bitreverse (not Y))) -> (and X, (not (bitreverse Y)))
7704 SDValue X, Y, Z, NotY;
7705 for (unsigned Opc : {ISD::BSWAP, ISD::BITREVERSE})
7706 if (sd_match(N,
7707 m_And(m_Value(X), m_OneUse(m_UnaryOp(Opc, m_Value(NotY))))) &&
7708 sd_match(NotY, m_Not(m_Value(Y))) &&
7709 (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
7710 return DAG.getNode(ISD::AND, DL, VT, X,
7711 DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y), VT));
7712
7713 // Fold (and X, (rot (not Y), Z)) -> (and X, (not (rot Y, Z)))
7714 for (unsigned Opc : {ISD::ROTL, ISD::ROTR})
7715 if (sd_match(N, m_And(m_Value(X),
7716 m_OneUse(m_BinOp(Opc, m_Value(NotY), m_Value(Z))))) &&
7717 sd_match(NotY, m_Not(m_Value(Y))) &&
7718 (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
7719 return DAG.getNode(ISD::AND, DL, VT, X,
7720 DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y, Z), VT));
7721
7722 // Fold (and X, (add (not Y), Z)) -> (and X, (not (sub Y, Z)))
7723 // Fold (and X, (sub (not Y), Z)) -> (and X, (not (add Y, Z)))
7724 if (TLI.hasAndNot(SDValue(N, 0)))
7725 if (SDValue Folded = foldBitwiseOpWithNeg(N, DL, VT))
7726 return Folded;
7727
7728 // Fold (and (srl X, C), 1) -> (srl X, BW-1) for signbit extraction
7729 // If we are shifting down an extended sign bit, see if we can simplify
7730 // this to shifting the MSB directly to expose further simplifications.
7731 // This pattern often appears after sext_inreg legalization.
7732 APInt Amt;
7733 if (sd_match(N, m_And(m_Srl(m_Value(X), m_ConstInt(Amt)), m_One())) &&
7734 Amt.ult(BitWidth - 1) && Amt.uge(BitWidth - DAG.ComputeNumSignBits(X)))
7735 return DAG.getNode(ISD::SRL, DL, VT, X,
7736 DAG.getShiftAmountConstant(BitWidth - 1, VT, DL));
7737
7738 // Masking the negated extension of a boolean is just the zero-extended
7739 // boolean:
7740 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
7741 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
7742 //
7743 // Note: the SimplifyDemandedBits fold below can make an information-losing
7744 // transform, and then we have no way to find this better fold.
7745 if (sd_match(N, m_And(m_Sub(m_Zero(), m_Value(X)), m_One()))) {
7746 if (X.getOpcode() == ISD::ZERO_EXTEND &&
7747 X.getOperand(0).getScalarValueSizeInBits() == 1)
7748 return X;
7749 if (X.getOpcode() == ISD::SIGN_EXTEND &&
7750 X.getOperand(0).getScalarValueSizeInBits() == 1)
7751 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, X.getOperand(0));
7752 }
7753
7754 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
7755 // fold (and (sra)) -> (and (srl)) when possible.
7757 return SDValue(N, 0);
7758
7759 // fold (zext_inreg (extload x)) -> (zextload x)
7760 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
7761 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
7762 (ISD::isEXTLoad(N0.getNode()) ||
7763 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
7764 auto *LN0 = cast<LoadSDNode>(N0);
7765 EVT MemVT = LN0->getMemoryVT();
7766 // If we zero all the possible extended bits, then we can turn this into
7767 // a zextload if we are running before legalize or the operation is legal.
7768 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
7769 unsigned MemBitSize = MemVT.getScalarSizeInBits();
7770 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
7771 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
7772 ((!LegalOperations && LN0->isSimple()) ||
7773 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
7774 SDValue ExtLoad =
7775 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
7776 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
7777 AddToWorklist(N);
7778 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7779 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7780 }
7781 }
7782
7783 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
7784 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
7785 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
7786 N0.getOperand(1), false))
7787 return BSwap;
7788 }
7789
7790 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
7791 return Shifts;
7792
7793 if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
7794 return V;
7795
7796 // Recognize the following pattern:
7797 //
7798 // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
7799 //
7800 // where bitmask is a mask that clears the upper bits of AndVT. The
7801 // number of bits in bitmask must be a power of two.
7802 auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
7803 if (LHS->getOpcode() != ISD::SIGN_EXTEND)
7804 return false;
7805
7807 if (!C)
7808 return false;
7809
7810 if (!C->getAPIntValue().isMask(
7811 LHS.getOperand(0).getValueType().getFixedSizeInBits()))
7812 return false;
7813
7814 return true;
7815 };
7816
7817 // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
7818 if (IsAndZeroExtMask(N0, N1))
7819 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
7820
7821 if (hasOperation(ISD::USUBSAT, VT))
7822 if (SDValue V = foldAndToUsubsat(N, DAG, DL))
7823 return V;
7824
7825 // Postpone until legalization completed to avoid interference with bswap
7826 // folding
7827 if (LegalOperations || VT.isVector())
7828 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
7829 return R;
7830
7831 if (VT.isScalarInteger() && VT != MVT::i1)
7832 if (SDValue R = foldMaskedMerge(N, DAG, TLI, DL))
7833 return R;
7834
7835 return SDValue();
7836}
7837
7838/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
7839SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
7840 bool DemandHighBits) {
7841 if (!LegalOperations)
7842 return SDValue();
7843
7844 EVT VT = N->getValueType(0);
7845 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
7846 return SDValue();
7848 return SDValue();
7849
7850 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
7851 bool LookPassAnd0 = false;
7852 bool LookPassAnd1 = false;
7853 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
7854 std::swap(N0, N1);
7855 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
7856 std::swap(N0, N1);
7857 if (N0.getOpcode() == ISD::AND) {
7858 if (!N0->hasOneUse())
7859 return SDValue();
7860 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7861 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
7862 // This is needed for X86.
7863 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
7864 N01C->getZExtValue() != 0xFFFF))
7865 return SDValue();
7866 N0 = N0.getOperand(0);
7867 LookPassAnd0 = true;
7868 }
7869
7870 if (N1.getOpcode() == ISD::AND) {
7871 if (!N1->hasOneUse())
7872 return SDValue();
7873 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7874 if (!N11C || N11C->getZExtValue() != 0xFF)
7875 return SDValue();
7876 N1 = N1.getOperand(0);
7877 LookPassAnd1 = true;
7878 }
7879
7880 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
7881 std::swap(N0, N1);
7882 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
7883 return SDValue();
7884 if (!N0->hasOneUse() || !N1->hasOneUse())
7885 return SDValue();
7886
7887 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7888 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7889 if (!N01C || !N11C)
7890 return SDValue();
7891 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
7892 return SDValue();
7893
7894 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
7895 SDValue N00 = N0->getOperand(0);
7896 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
7897 if (!N00->hasOneUse())
7898 return SDValue();
7899 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
7900 if (!N001C || N001C->getZExtValue() != 0xFF)
7901 return SDValue();
7902 N00 = N00.getOperand(0);
7903 LookPassAnd0 = true;
7904 }
7905
7906 SDValue N10 = N1->getOperand(0);
7907 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
7908 if (!N10->hasOneUse())
7909 return SDValue();
7910 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
7911 // Also allow 0xFFFF since the bits will be shifted out. This is needed
7912 // for X86.
7913 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
7914 N101C->getZExtValue() != 0xFFFF))
7915 return SDValue();
7916 N10 = N10.getOperand(0);
7917 LookPassAnd1 = true;
7918 }
7919
7920 if (N00 != N10)
7921 return SDValue();
7922
7923 // Make sure everything beyond the low halfword gets set to zero since the SRL
7924 // 16 will clear the top bits.
7925 unsigned OpSizeInBits = VT.getSizeInBits();
7926 if (OpSizeInBits > 16) {
7927 // If the left-shift isn't masked out then the only way this is a bswap is
7928 // if all bits beyond the low 8 are 0. In that case the entire pattern
7929 // reduces to a left shift anyway: leave it for other parts of the combiner.
7930 if (DemandHighBits && !LookPassAnd0)
7931 return SDValue();
7932
7933 // However, if the right shift isn't masked out then it might be because
7934 // it's not needed. See if we can spot that too. If the high bits aren't
7935 // demanded, we only need bits 23:16 to be zero. Otherwise, we need all
7936 // upper bits to be zero.
7937 if (!LookPassAnd1) {
7938 unsigned HighBit = DemandHighBits ? OpSizeInBits : 24;
7939 if (!DAG.MaskedValueIsZero(N10,
7940 APInt::getBitsSet(OpSizeInBits, 16, HighBit)))
7941 return SDValue();
7942 }
7943 }
7944
7945 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
7946 if (OpSizeInBits > 16) {
7947 SDLoc DL(N);
7948 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
7949 DAG.getShiftAmountConstant(OpSizeInBits - 16, VT, DL));
7950 }
7951 return Res;
7952}
7953
7954/// Return true if the specified node is an element that makes up a 32-bit
7955/// packed halfword byteswap.
7956/// ((x & 0x000000ff) << 8) |
7957/// ((x & 0x0000ff00) >> 8) |
7958/// ((x & 0x00ff0000) << 8) |
7959/// ((x & 0xff000000) >> 8)
7961 if (!N->hasOneUse())
7962 return false;
7963
7964 unsigned Opc = N.getOpcode();
7965 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
7966 return false;
7967
7968 SDValue N0 = N.getOperand(0);
7969 unsigned Opc0 = N0.getOpcode();
7970 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
7971 return false;
7972
7973 ConstantSDNode *N1C = nullptr;
7974 // SHL or SRL: look upstream for AND mask operand
7975 if (Opc == ISD::AND)
7976 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7977 else if (Opc0 == ISD::AND)
7979 if (!N1C)
7980 return false;
7981
7982 unsigned MaskByteOffset;
7983 switch (N1C->getZExtValue()) {
7984 default:
7985 return false;
7986 case 0xFF: MaskByteOffset = 0; break;
7987 case 0xFF00: MaskByteOffset = 1; break;
7988 case 0xFFFF:
7989 // In case demanded bits didn't clear the bits that will be shifted out.
7990 // This is needed for X86.
7991 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
7992 MaskByteOffset = 1;
7993 break;
7994 }
7995 return false;
7996 case 0xFF0000: MaskByteOffset = 2; break;
7997 case 0xFF000000: MaskByteOffset = 3; break;
7998 }
7999
8000 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
8001 if (Opc == ISD::AND) {
8002 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
8003 // (x >> 8) & 0xff
8004 // (x >> 8) & 0xff0000
8005 if (Opc0 != ISD::SRL)
8006 return false;
8008 if (!C || C->getZExtValue() != 8)
8009 return false;
8010 } else {
8011 // (x << 8) & 0xff00
8012 // (x << 8) & 0xff000000
8013 if (Opc0 != ISD::SHL)
8014 return false;
8016 if (!C || C->getZExtValue() != 8)
8017 return false;
8018 }
8019 } else if (Opc == ISD::SHL) {
8020 // (x & 0xff) << 8
8021 // (x & 0xff0000) << 8
8022 if (MaskByteOffset != 0 && MaskByteOffset != 2)
8023 return false;
8024 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
8025 if (!C || C->getZExtValue() != 8)
8026 return false;
8027 } else { // Opc == ISD::SRL
8028 // (x & 0xff00) >> 8
8029 // (x & 0xff000000) >> 8
8030 if (MaskByteOffset != 1 && MaskByteOffset != 3)
8031 return false;
8032 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
8033 if (!C || C->getZExtValue() != 8)
8034 return false;
8035 }
8036
8037 if (Parts[MaskByteOffset])
8038 return false;
8039
8040 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
8041 return true;
8042}
8043
8044// Match 2 elements of a packed halfword bswap.
8046 if (N.getOpcode() == ISD::OR)
8047 return isBSwapHWordElement(N.getOperand(0), Parts) &&
8048 isBSwapHWordElement(N.getOperand(1), Parts);
8049
8050 if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
8051 ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
8052 if (!C || C->getAPIntValue() != 16)
8053 return false;
8054 Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
8055 return true;
8056 }
8057
8058 return false;
8059}
8060
8061// Match this pattern:
8062// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
8063// And rewrite this to:
8064// (rotr (bswap A), 16)
8066 SelectionDAG &DAG, SDNode *N, SDValue N0,
8067 SDValue N1, EVT VT) {
8068 assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
8069 "MatchBSwapHWordOrAndAnd: expecting i32");
8070 if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
8071 return SDValue();
8072 if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
8073 return SDValue();
8074 // TODO: this is too restrictive; lifting this restriction requires more tests
8075 if (!N0->hasOneUse() || !N1->hasOneUse())
8076 return SDValue();
8079 if (!Mask0 || !Mask1)
8080 return SDValue();
8081 if (Mask0->getAPIntValue() != 0xff00ff00 ||
8082 Mask1->getAPIntValue() != 0x00ff00ff)
8083 return SDValue();
8084 SDValue Shift0 = N0.getOperand(0);
8085 SDValue Shift1 = N1.getOperand(0);
8086 if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
8087 return SDValue();
8088 ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
8089 ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
8090 if (!ShiftAmt0 || !ShiftAmt1)
8091 return SDValue();
8092 if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
8093 return SDValue();
8094 if (Shift0.getOperand(0) != Shift1.getOperand(0))
8095 return SDValue();
8096
8097 SDLoc DL(N);
8098 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
8099 SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL);
8100 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
8101}
8102
8103/// Match a 32-bit packed halfword bswap. That is
8104/// ((x & 0x000000ff) << 8) |
8105/// ((x & 0x0000ff00) >> 8) |
8106/// ((x & 0x00ff0000) << 8) |
8107/// ((x & 0xff000000) >> 8)
8108/// => (rotl (bswap x), 16)
8109SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
8110 if (!LegalOperations)
8111 return SDValue();
8112
8113 EVT VT = N->getValueType(0);
8114 if (VT != MVT::i32)
8115 return SDValue();
8117 return SDValue();
8118
8119 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT))
8120 return BSwap;
8121
8122 // Try again with commuted operands.
8123 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT))
8124 return BSwap;
8125
8126
8127 // Look for either
8128 // (or (bswaphpair), (bswaphpair))
8129 // (or (or (bswaphpair), (and)), (and))
8130 // (or (or (and), (bswaphpair)), (and))
8131 SDNode *Parts[4] = {};
8132
8133 if (isBSwapHWordPair(N0, Parts)) {
8134 // (or (or (and), (and)), (or (and), (and)))
8135 if (!isBSwapHWordPair(N1, Parts))
8136 return SDValue();
8137 } else if (N0.getOpcode() == ISD::OR) {
8138 // (or (or (or (and), (and)), (and)), (and))
8139 if (!isBSwapHWordElement(N1, Parts))
8140 return SDValue();
8141 SDValue N00 = N0.getOperand(0);
8142 SDValue N01 = N0.getOperand(1);
8143 if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
8144 !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
8145 return SDValue();
8146 } else {
8147 return SDValue();
8148 }
8149
8150 // Make sure the parts are all coming from the same node.
8151 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
8152 return SDValue();
8153
8154 SDLoc DL(N);
8155 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
8156 SDValue(Parts[0], 0));
8157
8158 // Result of the bswap should be rotated by 16. If it's not legal, then
8159 // do (x << 16) | (x >> 16).
8160 SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL);
8162 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
8164 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
8165 return DAG.getNode(ISD::OR, DL, VT,
8166 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
8167 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
8168}
8169
8170/// This contains all DAGCombine rules which reduce two values combined by
8171/// an Or operation to a single value \see visitANDLike().
8172SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, const SDLoc &DL) {
8173 EVT VT = N1.getValueType();
8174
8175 // fold (or x, undef) -> -1
8176 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
8177 return DAG.getAllOnesConstant(DL, VT);
8178
8179 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
8180 return V;
8181
8182 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
8183 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
8184 // Don't increase # computations.
8185 (N0->hasOneUse() || N1->hasOneUse())) {
8186 // We can only do this xform if we know that bits from X that are set in C2
8187 // but not in C1 are already zero. Likewise for Y.
8188 if (const ConstantSDNode *N0O1C =
8190 if (const ConstantSDNode *N1O1C =
8192 // We can only do this xform if we know that bits from X that are set in
8193 // C2 but not in C1 are already zero. Likewise for Y.
8194 const APInt &LHSMask = N0O1C->getAPIntValue();
8195 const APInt &RHSMask = N1O1C->getAPIntValue();
8196
8197 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
8198 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
8199 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
8200 N0.getOperand(0), N1.getOperand(0));
8201 return DAG.getNode(ISD::AND, DL, VT, X,
8202 DAG.getConstant(LHSMask | RHSMask, DL, VT));
8203 }
8204 }
8205 }
8206 }
8207
8208 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
8209 if (N0.getOpcode() == ISD::AND &&
8210 N1.getOpcode() == ISD::AND &&
8211 N0.getOperand(0) == N1.getOperand(0) &&
8212 // Don't increase # computations.
8213 (N0->hasOneUse() || N1->hasOneUse())) {
8214 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
8215 N0.getOperand(1), N1.getOperand(1));
8216 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
8217 }
8218
8219 return SDValue();
8220}
8221
8222/// OR combines for which the commuted variant will be tried as well.
8224 SDNode *N) {
8225 EVT VT = N0.getValueType();
8226 unsigned BW = VT.getScalarSizeInBits();
8227 SDLoc DL(N);
8228
8229 auto peekThroughResize = [](SDValue V) {
8230 if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE)
8231 return V->getOperand(0);
8232 return V;
8233 };
8234
8235 SDValue N0Resized = peekThroughResize(N0);
8236 if (N0Resized.getOpcode() == ISD::AND) {
8237 SDValue N1Resized = peekThroughResize(N1);
8238 SDValue N00 = N0Resized.getOperand(0);
8239 SDValue N01 = N0Resized.getOperand(1);
8240
8241 // fold or (and x, y), x --> x
8242 if (N00 == N1Resized || N01 == N1Resized)
8243 return N1;
8244
8245 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
8246 // TODO: Set AllowUndefs = true.
8247 if (SDValue NotOperand = getBitwiseNotOperand(N01, N00,
8248 /* AllowUndefs */ false)) {
8249 if (peekThroughResize(NotOperand) == N1Resized)
8250 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N00, DL, VT),
8251 N1);
8252 }
8253
8254 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
8255 if (SDValue NotOperand = getBitwiseNotOperand(N00, N01,
8256 /* AllowUndefs */ false)) {
8257 if (peekThroughResize(NotOperand) == N1Resized)
8258 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N01, DL, VT),
8259 N1);
8260 }
8261 }
8262
8263 SDValue X, Y;
8264
8265 // fold or (xor X, N1), N1 --> or X, N1
8266 if (sd_match(N0, m_Xor(m_Value(X), m_Specific(N1))))
8267 return DAG.getNode(ISD::OR, DL, VT, X, N1);
8268
8269 // fold or (xor x, y), (x and/or y) --> or x, y
8270 if (sd_match(N0, m_Xor(m_Value(X), m_Value(Y))) &&
8271 (sd_match(N1, m_And(m_Specific(X), m_Specific(Y))) ||
8273 return DAG.getNode(ISD::OR, DL, VT, X, Y);
8274
8275 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
8276 return R;
8277
8278 auto peekThroughZext = [](SDValue V) {
8279 if (V->getOpcode() == ISD::ZERO_EXTEND)
8280 return V->getOperand(0);
8281 return V;
8282 };
8283
8284 // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
8285 if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
8286 N0.getOperand(0) == N1.getOperand(0) &&
8287 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
8288 return N0;
8289
8290 // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
8291 if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
8292 N0.getOperand(1) == N1.getOperand(0) &&
8293 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
8294 return N0;
8295
8296 // Attempt to match a legalized build_pair-esque pattern:
8297 // or(shl(aext(Hi),BW/2),zext(Lo))
8298 SDValue Lo, Hi;
8299 if (sd_match(N0,
8301 sd_match(N1, m_ZExt(m_Value(Lo))) &&
8302 Lo.getScalarValueSizeInBits() == (BW / 2) &&
8303 Lo.getValueType() == Hi.getValueType()) {
8304 // Fold build_pair(not(Lo),not(Hi)) -> not(build_pair(Lo,Hi)).
8305 SDValue NotLo, NotHi;
8306 if (sd_match(Lo, m_OneUse(m_Not(m_Value(NotLo)))) &&
8307 sd_match(Hi, m_OneUse(m_Not(m_Value(NotHi))))) {
8308 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotLo);
8309 Hi = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NotHi);
8310 Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
8311 DAG.getShiftAmountConstant(BW / 2, VT, DL));
8312 return DAG.getNOT(DL, DAG.getNode(ISD::OR, DL, VT, Lo, Hi), VT);
8313 }
8314 }
8315
8316 return SDValue();
8317}
8318
8319SDValue DAGCombiner::visitOR(SDNode *N) {
8320 SDValue N0 = N->getOperand(0);
8321 SDValue N1 = N->getOperand(1);
8322 EVT VT = N1.getValueType();
8323 SDLoc DL(N);
8324
8325 // x | x --> x
8326 if (N0 == N1)
8327 return N0;
8328
8329 // fold (or c1, c2) -> c1|c2
8330 if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, DL, VT, {N0, N1}))
8331 return C;
8332
8333 // canonicalize constant to RHS
8336 return DAG.getNode(ISD::OR, DL, VT, N1, N0);
8337
8338 // fold vector ops
8339 if (VT.isVector()) {
8340 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
8341 return FoldedVOp;
8342
8343 // fold (or x, 0) -> x, vector edition
8345 return N0;
8346
8347 // fold (or x, -1) -> -1, vector edition
8349 // do not return N1, because undef node may exist in N1
8350 return DAG.getAllOnesConstant(DL, N1.getValueType());
8351
8352 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
8353 // Do this only if the resulting type / shuffle is legal.
8354 auto *SV0 = dyn_cast<ShuffleVectorSDNode>(N0);
8355 auto *SV1 = dyn_cast<ShuffleVectorSDNode>(N1);
8356 if (SV0 && SV1 && TLI.isTypeLegal(VT)) {
8357 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
8358 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
8359 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
8360 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
8361 // Ensure both shuffles have a zero input.
8362 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
8363 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
8364 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
8365 bool CanFold = true;
8366 int NumElts = VT.getVectorNumElements();
8367 SmallVector<int, 4> Mask(NumElts, -1);
8368
8369 for (int i = 0; i != NumElts; ++i) {
8370 int M0 = SV0->getMaskElt(i);
8371 int M1 = SV1->getMaskElt(i);
8372
8373 // Determine if either index is pointing to a zero vector.
8374 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
8375 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
8376
8377 // If one element is zero and the otherside is undef, keep undef.
8378 // This also handles the case that both are undef.
8379 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0))
8380 continue;
8381
8382 // Make sure only one of the elements is zero.
8383 if (M0Zero == M1Zero) {
8384 CanFold = false;
8385 break;
8386 }
8387
8388 assert((M0 >= 0 || M1 >= 0) && "Undef index!");
8389
8390 // We have a zero and non-zero element. If the non-zero came from
8391 // SV0 make the index a LHS index. If it came from SV1, make it
8392 // a RHS index. We need to mod by NumElts because we don't care
8393 // which operand it came from in the original shuffles.
8394 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
8395 }
8396
8397 if (CanFold) {
8398 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
8399 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
8400 SDValue LegalShuffle =
8401 TLI.buildLegalVectorShuffle(VT, DL, NewLHS, NewRHS, Mask, DAG);
8402 if (LegalShuffle)
8403 return LegalShuffle;
8404 }
8405 }
8406 }
8407 }
8408
8409 // fold (or x, 0) -> x
8410 if (isNullConstant(N1))
8411 return N0;
8412
8413 // fold (or x, -1) -> -1
8414 if (isAllOnesConstant(N1))
8415 return N1;
8416
8417 if (SDValue NewSel = foldBinOpIntoSelect(N))
8418 return NewSel;
8419
8420 // fold (or x, c) -> c iff (x & ~c) == 0
8421 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
8422 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
8423 return N1;
8424
8425 if (SDValue R = foldAndOrOfSETCC(N, DAG))
8426 return R;
8427
8428 if (SDValue Combined = visitORLike(N0, N1, DL))
8429 return Combined;
8430
8431 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
8432 return Combined;
8433
8434 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
8435 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
8436 return BSwap;
8437 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
8438 return BSwap;
8439
8440 // reassociate or
8441 if (SDValue ROR = reassociateOps(ISD::OR, DL, N0, N1, N->getFlags()))
8442 return ROR;
8443
8444 // Fold or(vecreduce(x), vecreduce(y)) -> vecreduce(or(x, y))
8445 if (SDValue SD =
8446 reassociateReduction(ISD::VECREDUCE_OR, ISD::OR, DL, VT, N0, N1))
8447 return SD;
8448
8449 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
8450 // iff (c1 & c2) != 0 or c1/c2 are undef.
8451 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
8452 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
8453 };
8454 if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
8455 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
8456 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
8457 {N1, N0.getOperand(1)})) {
8458 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
8459 AddToWorklist(IOR.getNode());
8460 return DAG.getNode(ISD::AND, DL, VT, COR, IOR);
8461 }
8462 }
8463
8464 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
8465 return Combined;
8466 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
8467 return Combined;
8468
8469 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
8470 if (N0.getOpcode() == N1.getOpcode())
8471 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
8472 return V;
8473
8474 // See if this is some rotate idiom.
8475 if (SDValue Rot = MatchRotate(N0, N1, DL, /*FromAdd=*/false))
8476 return Rot;
8477
8478 if (SDValue Load = MatchLoadCombine(N))
8479 return Load;
8480
8481 // Simplify the operands using demanded-bits information.
8483 return SDValue(N, 0);
8484
8485 // If OR can be rewritten into ADD, try combines based on ADD.
8486 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
8487 DAG.isADDLike(SDValue(N, 0)))
8488 if (SDValue Combined = visitADDLike(N))
8489 return Combined;
8490
8491 // Postpone until legalization completed to avoid interference with bswap
8492 // folding
8493 if (LegalOperations || VT.isVector())
8494 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
8495 return R;
8496
8497 if (VT.isScalarInteger() && VT != MVT::i1)
8498 if (SDValue R = foldMaskedMerge(N, DAG, TLI, DL))
8499 return R;
8500
8501 return SDValue();
8502}
8503
8505 SDValue &Mask) {
8506 if (Op.getOpcode() == ISD::AND &&
8507 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
8508 Mask = Op.getOperand(1);
8509 return Op.getOperand(0);
8510 }
8511 return Op;
8512}
8513
8514/// Match "(X shl/srl V1) & V2" where V2 may not be present.
8515static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift,
8516 SDValue &Mask) {
8517 Op = stripConstantMask(DAG, Op, Mask);
8518 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
8519 Shift = Op;
8520 return true;
8521 }
8522 return false;
8523}
8524
8525/// Helper function for visitOR to extract the needed side of a rotate idiom
8526/// from a shl/srl/mul/udiv. This is meant to handle cases where
8527/// InstCombine merged some outside op with one of the shifts from
8528/// the rotate pattern.
8529/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
8530/// Otherwise, returns an expansion of \p ExtractFrom based on the following
8531/// patterns:
8532///
8533/// (or (add v v) (shrl v bitwidth-1)):
8534/// expands (add v v) -> (shl v 1)
8535///
8536/// (or (mul v c0) (shrl (mul v c1) c2)):
8537/// expands (mul v c0) -> (shl (mul v c1) c3)
8538///
8539/// (or (udiv v c0) (shl (udiv v c1) c2)):
8540/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
8541///
8542/// (or (shl v c0) (shrl (shl v c1) c2)):
8543/// expands (shl v c0) -> (shl (shl v c1) c3)
8544///
8545/// (or (shrl v c0) (shl (shrl v c1) c2)):
8546/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
8547///
8548/// Such that in all cases, c3+c2==bitwidth(op v c1).
8550 SDValue ExtractFrom, SDValue &Mask,
8551 const SDLoc &DL) {
8552 assert(OppShift && ExtractFrom && "Empty SDValue");
8553 if (OppShift.getOpcode() != ISD::SHL && OppShift.getOpcode() != ISD::SRL)
8554 return SDValue();
8555
8556 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
8557
8558 // Value and Type of the shift.
8559 SDValue OppShiftLHS = OppShift.getOperand(0);
8560 EVT ShiftedVT = OppShiftLHS.getValueType();
8561
8562 // Amount of the existing shift.
8563 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
8564
8565 // (add v v) -> (shl v 1)
8566 // TODO: Should this be a general DAG canonicalization?
8567 if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
8568 ExtractFrom.getOpcode() == ISD::ADD &&
8569 ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
8570 ExtractFrom.getOperand(0) == OppShiftLHS &&
8571 OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
8572 return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
8573 DAG.getShiftAmountConstant(1, ShiftedVT, DL));
8574
8575 // Preconditions:
8576 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
8577 //
8578 // Find opcode of the needed shift to be extracted from (op0 v c0).
8579 unsigned Opcode = ISD::DELETED_NODE;
8580 bool IsMulOrDiv = false;
8581 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
8582 // opcode or its arithmetic (mul or udiv) variant.
8583 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
8584 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
8585 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
8586 return false;
8587 Opcode = NeededShift;
8588 return true;
8589 };
8590 // op0 must be either the needed shift opcode or the mul/udiv equivalent
8591 // that the needed shift can be extracted from.
8592 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
8593 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
8594 return SDValue();
8595
8596 // op0 must be the same opcode on both sides, have the same LHS argument,
8597 // and produce the same value type.
8598 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
8599 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
8600 ShiftedVT != ExtractFrom.getValueType())
8601 return SDValue();
8602
8603 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
8604 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
8605 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
8606 ConstantSDNode *ExtractFromCst =
8607 isConstOrConstSplat(ExtractFrom.getOperand(1));
8608 // TODO: We should be able to handle non-uniform constant vectors for these values
8609 // Check that we have constant values.
8610 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
8611 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
8612 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
8613 return SDValue();
8614
8615 // Compute the shift amount we need to extract to complete the rotate.
8616 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
8617 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
8618 return SDValue();
8619 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
8620 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
8621 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
8622 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
8623 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
8624
8625 // Now try extract the needed shift from the ExtractFrom op and see if the
8626 // result matches up with the existing shift's LHS op.
8627 if (IsMulOrDiv) {
8628 // Op to extract from is a mul or udiv by a constant.
8629 // Check:
8630 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
8631 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
8632 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
8633 NeededShiftAmt.getZExtValue());
8634 APInt ResultAmt;
8635 APInt Rem;
8636 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
8637 if (Rem != 0 || ResultAmt != OppLHSAmt)
8638 return SDValue();
8639 } else {
8640 // Op to extract from is a shift by a constant.
8641 // Check:
8642 // c2 - (bitwidth(op0 v c0) - c1) == c0
8643 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
8644 ExtractFromAmt.getBitWidth()))
8645 return SDValue();
8646 }
8647
8648 // Return the expanded shift op that should allow a rotate to be formed.
8649 EVT ShiftVT = OppShift.getOperand(1).getValueType();
8650 EVT ResVT = ExtractFrom.getValueType();
8651 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
8652 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
8653}
8654
8655// Return true if we can prove that, whenever Neg and Pos are both in the
8656// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
8657// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
8658//
8659// (or (shift1 X, Neg), (shift2 X, Pos))
8660//
8661// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
8662// in direction shift1 by Neg. The range [0, EltSize) means that we only need
8663// to consider shift amounts with defined behavior.
8664//
8665// The IsRotate flag should be set when the LHS of both shifts is the same.
8666// Otherwise if matching a general funnel shift, it should be clear.
8667static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
8668 SelectionDAG &DAG, bool IsRotate, bool FromAdd) {
8669 const auto &TLI = DAG.getTargetLoweringInfo();
8670 // If EltSize is a power of 2 then:
8671 //
8672 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
8673 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
8674 //
8675 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
8676 // for the stronger condition:
8677 //
8678 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
8679 //
8680 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
8681 // we can just replace Neg with Neg' for the rest of the function.
8682 //
8683 // In other cases we check for the even stronger condition:
8684 //
8685 // Neg == EltSize - Pos [B]
8686 //
8687 // for all Neg and Pos. Note that the (or ...) then invokes undefined
8688 // behavior if Pos == 0 (and consequently Neg == EltSize).
8689 //
8690 // We could actually use [A] whenever EltSize is a power of 2, but the
8691 // only extra cases that it would match are those uninteresting ones
8692 // where Neg and Pos are never in range at the same time. E.g. for
8693 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
8694 // as well as (sub 32, Pos), but:
8695 //
8696 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
8697 //
8698 // always invokes undefined behavior for 32-bit X.
8699 //
8700 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
8701 // This allows us to peek through any operations that only affect Mask's
8702 // un-demanded bits.
8703 //
8704 // NOTE: We can only do this when matching operations which won't modify the
8705 // least Log2(EltSize) significant bits and not a general funnel shift.
8706 unsigned MaskLoBits = 0;
8707 if (IsRotate && !FromAdd && isPowerOf2_64(EltSize)) {
8708 unsigned Bits = Log2_64(EltSize);
8709 unsigned NegBits = Neg.getScalarValueSizeInBits();
8710 if (NegBits >= Bits) {
8711 APInt DemandedBits = APInt::getLowBitsSet(NegBits, Bits);
8712 if (SDValue Inner =
8714 Neg = Inner;
8715 MaskLoBits = Bits;
8716 }
8717 }
8718 }
8719
8720 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
8721 if (Neg.getOpcode() != ISD::SUB)
8722 return false;
8724 if (!NegC)
8725 return false;
8726 SDValue NegOp1 = Neg.getOperand(1);
8727
8728 // On the RHS of [A], if Pos is the result of operation on Pos' that won't
8729 // affect Mask's demanded bits, just replace Pos with Pos'. These operations
8730 // are redundant for the purpose of the equality.
8731 if (MaskLoBits) {
8732 unsigned PosBits = Pos.getScalarValueSizeInBits();
8733 if (PosBits >= MaskLoBits) {
8734 APInt DemandedBits = APInt::getLowBitsSet(PosBits, MaskLoBits);
8735 if (SDValue Inner =
8737 Pos = Inner;
8738 }
8739 }
8740 }
8741
8742 // The condition we need is now:
8743 //
8744 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
8745 //
8746 // If NegOp1 == Pos then we need:
8747 //
8748 // EltSize & Mask == NegC & Mask
8749 //
8750 // (because "x & Mask" is a truncation and distributes through subtraction).
8751 //
8752 // We also need to account for a potential truncation of NegOp1 if the amount
8753 // has already been legalized to a shift amount type.
8754 APInt Width;
8755 if ((Pos == NegOp1) ||
8756 (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
8757 Width = NegC->getAPIntValue();
8758
8759 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
8760 // Then the condition we want to prove becomes:
8761 //
8762 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
8763 //
8764 // which, again because "x & Mask" is a truncation, becomes:
8765 //
8766 // NegC & Mask == (EltSize - PosC) & Mask
8767 // EltSize & Mask == (NegC + PosC) & Mask
8768 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
8769 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
8770 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
8771 else
8772 return false;
8773 } else
8774 return false;
8775
8776 // Now we just need to check that EltSize & Mask == Width & Mask.
8777 if (MaskLoBits)
8778 // EltSize & Mask is 0 since Mask is EltSize - 1.
8779 return Width.getLoBits(MaskLoBits) == 0;
8780 return Width == EltSize;
8781}
8782
8783// A subroutine of MatchRotate used once we have found an OR of two opposite
8784// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
8785// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
8786// former being preferred if supported. InnerPos and InnerNeg are Pos and
8787// Neg with outer conversions stripped away.
8788SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
8789 SDValue Neg, SDValue InnerPos,
8790 SDValue InnerNeg, bool FromAdd,
8791 bool HasPos, unsigned PosOpcode,
8792 unsigned NegOpcode, const SDLoc &DL) {
8793 // fold (or/add (shl x, (*ext y)),
8794 // (srl x, (*ext (sub 32, y)))) ->
8795 // (rotl x, y) or (rotr x, (sub 32, y))
8796 //
8797 // fold (or/add (shl x, (*ext (sub 32, y))),
8798 // (srl x, (*ext y))) ->
8799 // (rotr x, y) or (rotl x, (sub 32, y))
8800 EVT VT = Shifted.getValueType();
8801 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
8802 /*IsRotate*/ true, FromAdd))
8803 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
8804 HasPos ? Pos : Neg);
8805
8806 return SDValue();
8807}
8808
8809// A subroutine of MatchRotate used once we have found an OR of two opposite
8810// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
8811// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
8812// former being preferred if supported. InnerPos and InnerNeg are Pos and
8813// Neg with outer conversions stripped away.
8814// TODO: Merge with MatchRotatePosNeg.
8815SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
8816 SDValue Neg, SDValue InnerPos,
8817 SDValue InnerNeg, bool FromAdd,
8818 bool HasPos, unsigned PosOpcode,
8819 unsigned NegOpcode, const SDLoc &DL) {
8820 EVT VT = N0.getValueType();
8821 unsigned EltBits = VT.getScalarSizeInBits();
8822
8823 // fold (or/add (shl x0, (*ext y)),
8824 // (srl x1, (*ext (sub 32, y)))) ->
8825 // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
8826 //
8827 // fold (or/add (shl x0, (*ext (sub 32, y))),
8828 // (srl x1, (*ext y))) ->
8829 // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
8830 if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1,
8831 FromAdd))
8832 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
8833 HasPos ? Pos : Neg);
8834
8835 // Matching the shift+xor cases, we can't easily use the xor'd shift amount
8836 // so for now just use the PosOpcode case if its legal.
8837 // TODO: When can we use the NegOpcode case?
8838 if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
8839 SDValue X;
8840 // fold (or/add (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
8841 // -> (fshl x0, x1, y)
8842 if (sd_match(N1, m_Srl(m_Value(X), m_One())) &&
8843 sd_match(InnerNeg,
8844 m_Xor(m_Specific(InnerPos), m_SpecificInt(EltBits - 1))) &&
8846 return DAG.getNode(ISD::FSHL, DL, VT, N0, X, Pos);
8847 }
8848
8849 // fold (or/add (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
8850 // -> (fshr x0, x1, y)
8851 if (sd_match(N0, m_Shl(m_Value(X), m_One())) &&
8852 sd_match(InnerPos,
8853 m_Xor(m_Specific(InnerNeg), m_SpecificInt(EltBits - 1))) &&
8855 return DAG.getNode(ISD::FSHR, DL, VT, X, N1, Neg);
8856 }
8857
8858 // fold (or/add (shl (add x0, x0), (xor y, 31)), (srl x1, y))
8859 // -> (fshr x0, x1, y)
8860 // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
8861 if (sd_match(N0, m_Add(m_Value(X), m_Deferred(X))) &&
8862 sd_match(InnerPos,
8863 m_Xor(m_Specific(InnerNeg), m_SpecificInt(EltBits - 1))) &&
8865 return DAG.getNode(ISD::FSHR, DL, VT, X, N1, Neg);
8866 }
8867 }
8868
8869 return SDValue();
8870}
8871
8872// MatchRotate - Handle an 'or' or 'add' of two operands. If this is one of the
8873// many idioms for rotate, and if the target supports rotation instructions,
8874// generate a rot[lr]. This also matches funnel shift patterns, similar to
8875// rotation but with different shifted sources.
8876SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL,
8877 bool FromAdd) {
8878 EVT VT = LHS.getValueType();
8879
8880 // The target must have at least one rotate/funnel flavor.
8881 // We still try to match rotate by constant pre-legalization.
8882 // TODO: Support pre-legalization funnel-shift by constant.
8883 bool HasROTL = hasOperation(ISD::ROTL, VT);
8884 bool HasROTR = hasOperation(ISD::ROTR, VT);
8885 bool HasFSHL = hasOperation(ISD::FSHL, VT);
8886 bool HasFSHR = hasOperation(ISD::FSHR, VT);
8887
8888 // If the type is going to be promoted and the target has enabled custom
8889 // lowering for rotate, allow matching rotate by non-constants. Only allow
8890 // this for scalar types.
8891 if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) ==
8895 }
8896
8897 if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8898 return SDValue();
8899
8900 // Check for truncated rotate.
8901 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
8902 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
8903 assert(LHS.getValueType() == RHS.getValueType());
8904 if (SDValue Rot =
8905 MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL, FromAdd))
8906 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
8907 }
8908
8909 // Match "(X shl/srl V1) & V2" where V2 may not be present.
8910 SDValue LHSShift; // The shift.
8911 SDValue LHSMask; // AND value if any.
8912 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
8913
8914 SDValue RHSShift; // The shift.
8915 SDValue RHSMask; // AND value if any.
8916 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
8917
8918 // If neither side matched a rotate half, bail
8919 if (!LHSShift && !RHSShift)
8920 return SDValue();
8921
8922 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
8923 // side of the rotate, so try to handle that here. In all cases we need to
8924 // pass the matched shift from the opposite side to compute the opcode and
8925 // needed shift amount to extract. We still want to do this if both sides
8926 // matched a rotate half because one half may be a potential overshift that
8927 // can be broken down (ie if InstCombine merged two shl or srl ops into a
8928 // single one).
8929
8930 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
8931 if (LHSShift)
8932 if (SDValue NewRHSShift =
8933 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
8934 RHSShift = NewRHSShift;
8935 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
8936 if (RHSShift)
8937 if (SDValue NewLHSShift =
8938 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
8939 LHSShift = NewLHSShift;
8940
8941 // If a side is still missing, nothing else we can do.
8942 if (!RHSShift || !LHSShift)
8943 return SDValue();
8944
8945 // At this point we've matched or extracted a shift op on each side.
8946
8947 if (LHSShift.getOpcode() == RHSShift.getOpcode())
8948 return SDValue(); // Shifts must disagree.
8949
8950 // Canonicalize shl to left side in a shl/srl pair.
8951 if (RHSShift.getOpcode() == ISD::SHL) {
8952 std::swap(LHS, RHS);
8953 std::swap(LHSShift, RHSShift);
8954 std::swap(LHSMask, RHSMask);
8955 }
8956
8957 // Something has gone wrong - we've lost the shl/srl pair - bail.
8958 if (LHSShift.getOpcode() != ISD::SHL || RHSShift.getOpcode() != ISD::SRL)
8959 return SDValue();
8960
8961 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8962 SDValue LHSShiftArg = LHSShift.getOperand(0);
8963 SDValue LHSShiftAmt = LHSShift.getOperand(1);
8964 SDValue RHSShiftArg = RHSShift.getOperand(0);
8965 SDValue RHSShiftAmt = RHSShift.getOperand(1);
8966
8967 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
8968 ConstantSDNode *RHS) {
8969 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
8970 };
8971
8972 auto ApplyMasks = [&](SDValue Res) {
8973 // If there is an AND of either shifted operand, apply it to the result.
8974 if (LHSMask.getNode() || RHSMask.getNode()) {
8977
8978 if (LHSMask.getNode()) {
8979 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
8980 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8981 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
8982 }
8983 if (RHSMask.getNode()) {
8984 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
8985 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8986 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
8987 }
8988
8989 Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
8990 }
8991
8992 return Res;
8993 };
8994
8995 // TODO: Support pre-legalization funnel-shift by constant.
8996 bool IsRotate = LHSShiftArg == RHSShiftArg;
8997 if (!IsRotate && !(HasFSHL || HasFSHR)) {
8998 if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
8999 ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
9000 // Look for a disguised rotate by constant.
9001 // The common shifted operand X may be hidden inside another 'or'.
9002 SDValue X, Y;
9003 auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
9004 if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
9005 return false;
9006 if (CommonOp == Or.getOperand(0)) {
9007 X = CommonOp;
9008 Y = Or.getOperand(1);
9009 return true;
9010 }
9011 if (CommonOp == Or.getOperand(1)) {
9012 X = CommonOp;
9013 Y = Or.getOperand(0);
9014 return true;
9015 }
9016 return false;
9017 };
9018
9019 SDValue Res;
9020 if (matchOr(LHSShiftArg, RHSShiftArg)) {
9021 // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
9022 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
9023 SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
9024 Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
9025 } else if (matchOr(RHSShiftArg, LHSShiftArg)) {
9026 // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
9027 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
9028 SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
9029 Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
9030 } else {
9031 return SDValue();
9032 }
9033
9034 return ApplyMasks(Res);
9035 }
9036
9037 return SDValue(); // Requires funnel shift support.
9038 }
9039
9040 // fold (or/add (shl x, C1), (srl x, C2)) -> (rotl x, C1)
9041 // fold (or/add (shl x, C1), (srl x, C2)) -> (rotr x, C2)
9042 // fold (or/add (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
9043 // fold (or/add (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
9044 // iff C1+C2 == EltSizeInBits
9045 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
9046 SDValue Res;
9047 if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
9048 bool UseROTL = !LegalOperations || HasROTL;
9049 Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
9050 UseROTL ? LHSShiftAmt : RHSShiftAmt);
9051 } else {
9052 bool UseFSHL = !LegalOperations || HasFSHL;
9053 Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
9054 RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
9055 }
9056
9057 return ApplyMasks(Res);
9058 }
9059
9060 // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
9061 // shift.
9062 if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
9063 return SDValue();
9064
9065 // If there is a mask here, and we have a variable shift, we can't be sure
9066 // that we're masking out the right stuff.
9067 if (LHSMask.getNode() || RHSMask.getNode())
9068 return SDValue();
9069
9070 // If the shift amount is sign/zext/any-extended just peel it off.
9071 SDValue LExtOp0 = LHSShiftAmt;
9072 SDValue RExtOp0 = RHSShiftAmt;
9073 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
9074 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
9075 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
9076 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
9077 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
9078 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
9079 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
9080 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
9081 LExtOp0 = LHSShiftAmt.getOperand(0);
9082 RExtOp0 = RHSShiftAmt.getOperand(0);
9083 }
9084
9085 if (IsRotate && (HasROTL || HasROTR)) {
9086 if (SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
9087 LExtOp0, RExtOp0, FromAdd, HasROTL,
9089 return TryL;
9090
9091 if (SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
9092 RExtOp0, LExtOp0, FromAdd, HasROTR,
9094 return TryR;
9095 }
9096
9097 if (SDValue TryL = MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt,
9098 RHSShiftAmt, LExtOp0, RExtOp0, FromAdd,
9099 HasFSHL, ISD::FSHL, ISD::FSHR, DL))
9100 return TryL;
9101
9102 if (SDValue TryR = MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt,
9103 LHSShiftAmt, RExtOp0, LExtOp0, FromAdd,
9104 HasFSHR, ISD::FSHR, ISD::FSHL, DL))
9105 return TryR;
9106
9107 return SDValue();
9108}
9109
9110/// Recursively traverses the expression calculating the origin of the requested
9111/// byte of the given value. Returns std::nullopt if the provider can't be
9112/// calculated.
9113///
9114/// For all the values except the root of the expression, we verify that the
9115/// value has exactly one use and if not then return std::nullopt. This way if
9116/// the origin of the byte is returned it's guaranteed that the values which
9117/// contribute to the byte are not used outside of this expression.
9118
9119/// However, there is a special case when dealing with vector loads -- we allow
9120/// more than one use if the load is a vector type. Since the values that
9121/// contribute to the byte ultimately come from the ExtractVectorElements of the
9122/// Load, we don't care if the Load has uses other than ExtractVectorElements,
9123/// because those operations are independent from the pattern to be combined.
9124/// For vector loads, we simply care that the ByteProviders are adjacent
9125/// positions of the same vector, and their index matches the byte that is being
9126/// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex
9127/// is the index used in an ExtractVectorElement, and \p StartingIndex is the
9128/// byte position we are trying to provide for the LoadCombine. If these do
9129/// not match, then we can not combine the vector loads. \p Index uses the
9130/// byte position we are trying to provide for and is matched against the
9131/// shl and load size. The \p Index algorithm ensures the requested byte is
9132/// provided for by the pattern, and the pattern does not over provide bytes.
9133///
9134///
9135/// The supported LoadCombine pattern for vector loads is as follows
9136/// or
9137/// / \
9138/// or shl
9139/// / \ |
9140/// or shl zext
9141/// / \ | |
9142/// shl zext zext EVE*
9143/// | | | |
9144/// zext EVE* EVE* LOAD
9145/// | | |
9146/// EVE* LOAD LOAD
9147/// |
9148/// LOAD
9149///
9150/// *ExtractVectorElement
9152
9153static std::optional<SDByteProvider>
9154calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
9155 std::optional<uint64_t> VectorIndex,
9156 unsigned StartingIndex = 0) {
9157
9158 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
9159 if (Depth == 10)
9160 return std::nullopt;
9161
9162 // Only allow multiple uses if the instruction is a vector load (in which
9163 // case we will use the load for every ExtractVectorElement)
9164 if (Depth && !Op.hasOneUse() &&
9165 (Op.getOpcode() != ISD::LOAD || !Op.getValueType().isVector()))
9166 return std::nullopt;
9167
9168 // Fail to combine if we have encountered anything but a LOAD after handling
9169 // an ExtractVectorElement.
9170 if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value())
9171 return std::nullopt;
9172
9173 unsigned BitWidth = Op.getScalarValueSizeInBits();
9174 if (BitWidth % 8 != 0)
9175 return std::nullopt;
9176 unsigned ByteWidth = BitWidth / 8;
9177 assert(Index < ByteWidth && "invalid index requested");
9178 (void) ByteWidth;
9179
9180 switch (Op.getOpcode()) {
9181 case ISD::OR: {
9182 auto LHS =
9183 calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex);
9184 if (!LHS)
9185 return std::nullopt;
9186 auto RHS =
9187 calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex);
9188 if (!RHS)
9189 return std::nullopt;
9190
9191 if (LHS->isConstantZero())
9192 return RHS;
9193 if (RHS->isConstantZero())
9194 return LHS;
9195 return std::nullopt;
9196 }
9197 case ISD::SHL: {
9198 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
9199 if (!ShiftOp)
9200 return std::nullopt;
9201
9202 uint64_t BitShift = ShiftOp->getZExtValue();
9203
9204 if (BitShift % 8 != 0)
9205 return std::nullopt;
9206 uint64_t ByteShift = BitShift / 8;
9207
9208 // If we are shifting by an amount greater than the index we are trying to
9209 // provide, then do not provide anything. Otherwise, subtract the index by
9210 // the amount we shifted by.
9211 return Index < ByteShift
9213 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
9214 Depth + 1, VectorIndex, Index);
9215 }
9216 case ISD::ANY_EXTEND:
9217 case ISD::SIGN_EXTEND:
9218 case ISD::ZERO_EXTEND: {
9219 SDValue NarrowOp = Op->getOperand(0);
9220 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
9221 if (NarrowBitWidth % 8 != 0)
9222 return std::nullopt;
9223 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
9224
9225 if (Index >= NarrowByteWidth)
9226 return Op.getOpcode() == ISD::ZERO_EXTEND
9227 ? std::optional<SDByteProvider>(
9229 : std::nullopt;
9230 return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex,
9231 StartingIndex);
9232 }
9233 case ISD::BSWAP:
9234 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
9235 Depth + 1, VectorIndex, StartingIndex);
9237 auto OffsetOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
9238 if (!OffsetOp)
9239 return std::nullopt;
9240
9241 VectorIndex = OffsetOp->getZExtValue();
9242
9243 SDValue NarrowOp = Op->getOperand(0);
9244 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
9245 if (NarrowBitWidth % 8 != 0)
9246 return std::nullopt;
9247 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
9248 // EXTRACT_VECTOR_ELT can extend the element type to the width of the return
9249 // type, leaving the high bits undefined.
9250 if (Index >= NarrowByteWidth)
9251 return std::nullopt;
9252
9253 // Check to see if the position of the element in the vector corresponds
9254 // with the byte we are trying to provide for. In the case of a vector of
9255 // i8, this simply means the VectorIndex == StartingIndex. For non i8 cases,
9256 // the element will provide a range of bytes. For example, if we have a
9257 // vector of i16s, each element provides two bytes (V[1] provides byte 2 and
9258 // 3).
9259 if (*VectorIndex * NarrowByteWidth > StartingIndex)
9260 return std::nullopt;
9261 if ((*VectorIndex + 1) * NarrowByteWidth <= StartingIndex)
9262 return std::nullopt;
9263
9264 return calculateByteProvider(Op->getOperand(0), Index, Depth + 1,
9265 VectorIndex, StartingIndex);
9266 }
9267 case ISD::LOAD: {
9268 auto L = cast<LoadSDNode>(Op.getNode());
9269 if (!L->isSimple() || L->isIndexed())
9270 return std::nullopt;
9271
9272 unsigned NarrowBitWidth = L->getMemoryVT().getScalarSizeInBits();
9273 if (NarrowBitWidth % 8 != 0)
9274 return std::nullopt;
9275 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
9276
9277 // If the width of the load does not reach byte we are trying to provide for
9278 // and it is not a ZEXTLOAD, then the load does not provide for the byte in
9279 // question
9280 if (Index >= NarrowByteWidth)
9281 return L->getExtensionType() == ISD::ZEXTLOAD
9282 ? std::optional<SDByteProvider>(
9284 : std::nullopt;
9285
9286 unsigned BPVectorIndex = VectorIndex.value_or(0U);
9287 return SDByteProvider::getSrc(L, Index, BPVectorIndex);
9288 }
9289 }
9290
9291 return std::nullopt;
9292}
9293
9294static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
9295 return i;
9296}
9297
9298static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
9299 return BW - i - 1;
9300}
9301
9302// Check if the bytes offsets we are looking at match with either big or
9303// little endian value loaded. Return true for big endian, false for little
9304// endian, and std::nullopt if match failed.
9305static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
9306 int64_t FirstOffset) {
9307 // The endian can be decided only when it is 2 bytes at least.
9308 unsigned Width = ByteOffsets.size();
9309 if (Width < 2)
9310 return std::nullopt;
9311
9312 bool BigEndian = true, LittleEndian = true;
9313 for (unsigned i = 0; i < Width; i++) {
9314 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
9315 LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
9316 BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
9317 if (!BigEndian && !LittleEndian)
9318 return std::nullopt;
9319 }
9320
9321 assert((BigEndian != LittleEndian) && "It should be either big endian or"
9322 "little endian");
9323 return BigEndian;
9324}
9325
9326// Look through one layer of truncate or extend.
9328 switch (Value.getOpcode()) {
9329 case ISD::TRUNCATE:
9330 case ISD::ZERO_EXTEND:
9331 case ISD::SIGN_EXTEND:
9332 case ISD::ANY_EXTEND:
9333 return Value.getOperand(0);
9334 }
9335 return SDValue();
9336}
9337
9338/// Match a pattern where a wide type scalar value is stored by several narrow
9339/// stores. Fold it into a single store or a BSWAP and a store if the targets
9340/// supports it.
9341///
9342/// Assuming little endian target:
9343/// i8 *p = ...
9344/// i32 val = ...
9345/// p[0] = (val >> 0) & 0xFF;
9346/// p[1] = (val >> 8) & 0xFF;
9347/// p[2] = (val >> 16) & 0xFF;
9348/// p[3] = (val >> 24) & 0xFF;
9349/// =>
9350/// *((i32)p) = val;
9351///
9352/// i8 *p = ...
9353/// i32 val = ...
9354/// p[0] = (val >> 24) & 0xFF;
9355/// p[1] = (val >> 16) & 0xFF;
9356/// p[2] = (val >> 8) & 0xFF;
9357/// p[3] = (val >> 0) & 0xFF;
9358/// =>
9359/// *((i32)p) = BSWAP(val);
9360SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
9361 // The matching looks for "store (trunc x)" patterns that appear early but are
9362 // likely to be replaced by truncating store nodes during combining.
9363 // TODO: If there is evidence that running this later would help, this
9364 // limitation could be removed. Legality checks may need to be added
9365 // for the created store and optional bswap/rotate.
9366 if (LegalOperations || OptLevel == CodeGenOptLevel::None)
9367 return SDValue();
9368
9369 // We only handle merging simple stores of 1-4 bytes.
9370 // TODO: Allow unordered atomics when wider type is legal (see D66309)
9371 EVT MemVT = N->getMemoryVT();
9372 if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
9373 !N->isSimple() || N->isIndexed())
9374 return SDValue();
9375
9376 // Collect all of the stores in the chain, upto the maximum store width (i64).
9377 SDValue Chain = N->getChain();
9379 unsigned NarrowNumBits = MemVT.getScalarSizeInBits();
9380 unsigned MaxWideNumBits = 64;
9381 unsigned MaxStores = MaxWideNumBits / NarrowNumBits;
9382 while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
9383 // All stores must be the same size to ensure that we are writing all of the
9384 // bytes in the wide value.
9385 // This store should have exactly one use as a chain operand for another
9386 // store in the merging set. If there are other chain uses, then the
9387 // transform may not be safe because order of loads/stores outside of this
9388 // set may not be preserved.
9389 // TODO: We could allow multiple sizes by tracking each stored byte.
9390 if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
9391 Store->isIndexed() || !Store->hasOneUse())
9392 return SDValue();
9393 Stores.push_back(Store);
9394 Chain = Store->getChain();
9395 if (MaxStores < Stores.size())
9396 return SDValue();
9397 }
9398 // There is no reason to continue if we do not have at least a pair of stores.
9399 if (Stores.size() < 2)
9400 return SDValue();
9401
9402 // Handle simple types only.
9403 LLVMContext &Context = *DAG.getContext();
9404 unsigned NumStores = Stores.size();
9405 unsigned WideNumBits = NumStores * NarrowNumBits;
9406 if (WideNumBits != 16 && WideNumBits != 32 && WideNumBits != 64)
9407 return SDValue();
9408
9409 // Check if all bytes of the source value that we are looking at are stored
9410 // to the same base address. Collect offsets from Base address into OffsetMap.
9411 SDValue SourceValue;
9412 SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
9413 int64_t FirstOffset = INT64_MAX;
9414 StoreSDNode *FirstStore = nullptr;
9415 std::optional<BaseIndexOffset> Base;
9416 for (auto *Store : Stores) {
9417 // All the stores store different parts of the CombinedValue. A truncate is
9418 // required to get the partial value.
9419 SDValue Trunc = Store->getValue();
9420 if (Trunc.getOpcode() != ISD::TRUNCATE)
9421 return SDValue();
9422 // Other than the first/last part, a shift operation is required to get the
9423 // offset.
9424 int64_t Offset = 0;
9425 SDValue WideVal = Trunc.getOperand(0);
9426 if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
9427 isa<ConstantSDNode>(WideVal.getOperand(1))) {
9428 // The shift amount must be a constant multiple of the narrow type.
9429 // It is translated to the offset address in the wide source value "y".
9430 //
9431 // x = srl y, ShiftAmtC
9432 // i8 z = trunc x
9433 // store z, ...
9434 uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
9435 if (ShiftAmtC % NarrowNumBits != 0)
9436 return SDValue();
9437
9438 // Make sure we aren't reading bits that are shifted in.
9439 if (ShiftAmtC > WideVal.getScalarValueSizeInBits() - NarrowNumBits)
9440 return SDValue();
9441
9442 Offset = ShiftAmtC / NarrowNumBits;
9443 WideVal = WideVal.getOperand(0);
9444 }
9445
9446 // Stores must share the same source value with different offsets.
9447 if (!SourceValue)
9448 SourceValue = WideVal;
9449 else if (SourceValue != WideVal) {
9450 // Truncate and extends can be stripped to see if the values are related.
9451 if (stripTruncAndExt(SourceValue) != WideVal &&
9452 stripTruncAndExt(WideVal) != SourceValue)
9453 return SDValue();
9454
9455 if (WideVal.getScalarValueSizeInBits() >
9456 SourceValue.getScalarValueSizeInBits())
9457 SourceValue = WideVal;
9458
9459 // Give up if the source value type is smaller than the store size.
9460 if (SourceValue.getScalarValueSizeInBits() < WideNumBits)
9461 return SDValue();
9462 }
9463
9464 // Stores must share the same base address.
9465 BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
9466 int64_t ByteOffsetFromBase = 0;
9467 if (!Base)
9468 Base = Ptr;
9469 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9470 return SDValue();
9471
9472 // Remember the first store.
9473 if (ByteOffsetFromBase < FirstOffset) {
9474 FirstStore = Store;
9475 FirstOffset = ByteOffsetFromBase;
9476 }
9477 // Map the offset in the store and the offset in the combined value, and
9478 // early return if it has been set before.
9479 if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
9480 return SDValue();
9481 OffsetMap[Offset] = ByteOffsetFromBase;
9482 }
9483
9484 EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
9485
9486 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9487 assert(FirstStore && "First store must be set");
9488
9489 // Check that a store of the wide type is both allowed and fast on the target
9490 const DataLayout &Layout = DAG.getDataLayout();
9491 unsigned Fast = 0;
9492 bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
9493 *FirstStore->getMemOperand(), &Fast);
9494 if (!Allowed || !Fast)
9495 return SDValue();
9496
9497 // Check if the pieces of the value are going to the expected places in memory
9498 // to merge the stores.
9499 auto checkOffsets = [&](bool MatchLittleEndian) {
9500 if (MatchLittleEndian) {
9501 for (unsigned i = 0; i != NumStores; ++i)
9502 if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
9503 return false;
9504 } else { // MatchBigEndian by reversing loop counter.
9505 for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
9506 if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
9507 return false;
9508 }
9509 return true;
9510 };
9511
9512 // Check if the offsets line up for the native data layout of this target.
9513 bool NeedBswap = false;
9514 bool NeedRotate = false;
9515 if (!checkOffsets(Layout.isLittleEndian())) {
9516 // Special-case: check if byte offsets line up for the opposite endian.
9517 if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
9518 NeedBswap = true;
9519 else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
9520 NeedRotate = true;
9521 else
9522 return SDValue();
9523 }
9524
9525 SDLoc DL(N);
9526 if (WideVT != SourceValue.getValueType()) {
9527 assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
9528 "Unexpected store value to merge");
9529 SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
9530 }
9531
9532 // Before legalize we can introduce illegal bswaps/rotates which will be later
9533 // converted to an explicit bswap sequence. This way we end up with a single
9534 // store and byte shuffling instead of several stores and byte shuffling.
9535 if (NeedBswap) {
9536 SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
9537 } else if (NeedRotate) {
9538 assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
9539 SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
9540 SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
9541 }
9542
9543 SDValue NewStore =
9544 DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
9545 FirstStore->getPointerInfo(), FirstStore->getAlign());
9546
9547 // Rely on other DAG combine rules to remove the other individual stores.
9548 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
9549 return NewStore;
9550}
9551
9552/// Match a pattern where a wide type scalar value is loaded by several narrow
9553/// loads and combined by shifts and ors. Fold it into a single load or a load
9554/// and a BSWAP if the targets supports it.
9555///
9556/// Assuming little endian target:
9557/// i8 *a = ...
9558/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
9559/// =>
9560/// i32 val = *((i32)a)
9561///
9562/// i8 *a = ...
9563/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
9564/// =>
9565/// i32 val = BSWAP(*((i32)a))
9566///
9567/// TODO: This rule matches complex patterns with OR node roots and doesn't
9568/// interact well with the worklist mechanism. When a part of the pattern is
9569/// updated (e.g. one of the loads) its direct users are put into the worklist,
9570/// but the root node of the pattern which triggers the load combine is not
9571/// necessarily a direct user of the changed node. For example, once the address
9572/// of t28 load is reassociated load combine won't be triggered:
9573/// t25: i32 = add t4, Constant:i32<2>
9574/// t26: i64 = sign_extend t25
9575/// t27: i64 = add t2, t26
9576/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
9577/// t29: i32 = zero_extend t28
9578/// t32: i32 = shl t29, Constant:i8<8>
9579/// t33: i32 = or t23, t32
9580/// As a possible fix visitLoad can check if the load can be a part of a load
9581/// combine pattern and add corresponding OR roots to the worklist.
9582SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
9583 assert(N->getOpcode() == ISD::OR &&
9584 "Can only match load combining against OR nodes");
9585
9586 // Handles simple types only
9587 EVT VT = N->getValueType(0);
9588 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
9589 return SDValue();
9590 unsigned ByteWidth = VT.getSizeInBits() / 8;
9591
9592 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
9593 auto MemoryByteOffset = [&](SDByteProvider P) {
9594 assert(P.hasSrc() && "Must be a memory byte provider");
9595 auto *Load = cast<LoadSDNode>(P.Src.value());
9596
9597 unsigned LoadBitWidth = Load->getMemoryVT().getScalarSizeInBits();
9598
9599 assert(LoadBitWidth % 8 == 0 &&
9600 "can only analyze providers for individual bytes not bit");
9601 unsigned LoadByteWidth = LoadBitWidth / 8;
9602 return IsBigEndianTarget ? bigEndianByteAt(LoadByteWidth, P.DestOffset)
9603 : littleEndianByteAt(LoadByteWidth, P.DestOffset);
9604 };
9605
9606 std::optional<BaseIndexOffset> Base;
9607 SDValue Chain;
9608
9609 SmallPtrSet<LoadSDNode *, 8> Loads;
9610 std::optional<SDByteProvider> FirstByteProvider;
9611 int64_t FirstOffset = INT64_MAX;
9612
9613 // Check if all the bytes of the OR we are looking at are loaded from the same
9614 // base address. Collect bytes offsets from Base address in ByteOffsets.
9615 SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
9616 unsigned ZeroExtendedBytes = 0;
9617 for (int i = ByteWidth - 1; i >= 0; --i) {
9618 auto P =
9619 calculateByteProvider(SDValue(N, 0), i, 0, /*VectorIndex*/ std::nullopt,
9620 /*StartingIndex*/ i);
9621 if (!P)
9622 return SDValue();
9623
9624 if (P->isConstantZero()) {
9625 // It's OK for the N most significant bytes to be 0, we can just
9626 // zero-extend the load.
9627 if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
9628 return SDValue();
9629 continue;
9630 }
9631 assert(P->hasSrc() && "provenance should either be memory or zero");
9632 auto *L = cast<LoadSDNode>(P->Src.value());
9633
9634 // All loads must share the same chain
9635 SDValue LChain = L->getChain();
9636 if (!Chain)
9637 Chain = LChain;
9638 else if (Chain != LChain)
9639 return SDValue();
9640
9641 // Loads must share the same base address
9642 BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
9643 int64_t ByteOffsetFromBase = 0;
9644
9645 // For vector loads, the expected load combine pattern will have an
9646 // ExtractElement for each index in the vector. While each of these
9647 // ExtractElements will be accessing the same base address as determined
9648 // by the load instruction, the actual bytes they interact with will differ
9649 // due to different ExtractElement indices. To accurately determine the
9650 // byte position of an ExtractElement, we offset the base load ptr with
9651 // the index multiplied by the byte size of each element in the vector.
9652 if (L->getMemoryVT().isVector()) {
9653 unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
9654 if (LoadWidthInBit % 8 != 0)
9655 return SDValue();
9656 unsigned ByteOffsetFromVector = P->SrcOffset * LoadWidthInBit / 8;
9657 Ptr.addToOffset(ByteOffsetFromVector);
9658 }
9659
9660 if (!Base)
9661 Base = Ptr;
9662
9663 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9664 return SDValue();
9665
9666 // Calculate the offset of the current byte from the base address
9667 ByteOffsetFromBase += MemoryByteOffset(*P);
9668 ByteOffsets[i] = ByteOffsetFromBase;
9669
9670 // Remember the first byte load
9671 if (ByteOffsetFromBase < FirstOffset) {
9672 FirstByteProvider = P;
9673 FirstOffset = ByteOffsetFromBase;
9674 }
9675
9676 Loads.insert(L);
9677 }
9678
9679 assert(!Loads.empty() && "All the bytes of the value must be loaded from "
9680 "memory, so there must be at least one load which produces the value");
9681 assert(Base && "Base address of the accessed memory location must be set");
9682 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9683
9684 bool NeedsZext = ZeroExtendedBytes > 0;
9685
9686 EVT MemVT =
9687 EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
9688
9689 if (!MemVT.isSimple())
9690 return SDValue();
9691
9692 // Before legalize we can introduce too wide illegal loads which will be later
9693 // split into legal sized loads. This enables us to combine i64 load by i8
9694 // patterns to a couple of i32 loads on 32 bit targets.
9695 if (LegalOperations &&
9696 !TLI.isLoadExtLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, VT,
9697 MemVT))
9698 return SDValue();
9699
9700 // Check if the bytes of the OR we are looking at match with either big or
9701 // little endian value load
9702 std::optional<bool> IsBigEndian = isBigEndian(
9703 ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
9704 if (!IsBigEndian)
9705 return SDValue();
9706
9707 assert(FirstByteProvider && "must be set");
9708
9709 // Ensure that the first byte is loaded from zero offset of the first load.
9710 // So the combined value can be loaded from the first load address.
9711 if (MemoryByteOffset(*FirstByteProvider) != 0)
9712 return SDValue();
9713 auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
9714
9715 // The node we are looking at matches with the pattern, check if we can
9716 // replace it with a single (possibly zero-extended) load and bswap + shift if
9717 // needed.
9718
9719 // If the load needs byte swap check if the target supports it
9720 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
9721
9722 // Before legalize we can introduce illegal bswaps which will be later
9723 // converted to an explicit bswap sequence. This way we end up with a single
9724 // load and byte shuffling instead of several loads and byte shuffling.
9725 // We do not introduce illegal bswaps when zero-extending as this tends to
9726 // introduce too many arithmetic instructions.
9727 if (NeedsBswap && (LegalOperations || NeedsZext) &&
9728 !TLI.isOperationLegal(ISD::BSWAP, VT))
9729 return SDValue();
9730
9731 // If we need to bswap and zero extend, we have to insert a shift. Check that
9732 // it is legal.
9733 if (NeedsBswap && NeedsZext && LegalOperations &&
9734 !TLI.isOperationLegal(ISD::SHL, VT))
9735 return SDValue();
9736
9737 // Check that a load of the wide type is both allowed and fast on the target
9738 unsigned Fast = 0;
9739 bool Allowed =
9740 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
9741 *FirstLoad->getMemOperand(), &Fast);
9742 if (!Allowed || !Fast)
9743 return SDValue();
9744
9745 SDValue NewLoad =
9746 DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
9747 Chain, FirstLoad->getBasePtr(),
9748 FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
9749
9750 // Transfer chain users from old loads to the new load.
9751 for (LoadSDNode *L : Loads)
9752 DAG.makeEquivalentMemoryOrdering(L, NewLoad);
9753
9754 if (!NeedsBswap)
9755 return NewLoad;
9756
9757 SDValue ShiftedLoad =
9758 NeedsZext ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
9759 DAG.getShiftAmountConstant(ZeroExtendedBytes * 8,
9760 VT, SDLoc(N)))
9761 : NewLoad;
9762 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
9763}
9764
9765// If the target has andn, bsl, or a similar bit-select instruction,
9766// we want to unfold masked merge, with canonical pattern of:
9767// | A | |B|
9768// ((x ^ y) & m) ^ y
9769// | D |
9770// Into:
9771// (x & m) | (y & ~m)
9772// If y is a constant, m is not a 'not', and the 'andn' does not work with
9773// immediates, we unfold into a different pattern:
9774// ~(~x & m) & (m | y)
9775// If x is a constant, m is a 'not', and the 'andn' does not work with
9776// immediates, we unfold into a different pattern:
9777// (x | ~m) & ~(~m & ~y)
9778// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
9779// the very least that breaks andnpd / andnps patterns, and because those
9780// patterns are simplified in IR and shouldn't be created in the DAG
9781SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
9782 assert(N->getOpcode() == ISD::XOR);
9783
9784 // Don't touch 'not' (i.e. where y = -1).
9785 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
9786 return SDValue();
9787
9788 EVT VT = N->getValueType(0);
9789
9790 // There are 3 commutable operators in the pattern,
9791 // so we have to deal with 8 possible variants of the basic pattern.
9792 SDValue X, Y, M;
9793 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
9794 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
9795 return false;
9796 SDValue Xor = And.getOperand(XorIdx);
9797 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
9798 return false;
9799 SDValue Xor0 = Xor.getOperand(0);
9800 SDValue Xor1 = Xor.getOperand(1);
9801 // Don't touch 'not' (i.e. where y = -1).
9802 if (isAllOnesOrAllOnesSplat(Xor1))
9803 return false;
9804 if (Other == Xor0)
9805 std::swap(Xor0, Xor1);
9806 if (Other != Xor1)
9807 return false;
9808 X = Xor0;
9809 Y = Xor1;
9810 M = And.getOperand(XorIdx ? 0 : 1);
9811 return true;
9812 };
9813
9814 SDValue N0 = N->getOperand(0);
9815 SDValue N1 = N->getOperand(1);
9816 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
9817 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
9818 return SDValue();
9819
9820 // Don't do anything if the mask is constant. This should not be reachable.
9821 // InstCombine should have already unfolded this pattern, and DAGCombiner
9822 // probably shouldn't produce it, too.
9823 if (isa<ConstantSDNode>(M.getNode()))
9824 return SDValue();
9825
9826 // We can transform if the target has AndNot
9827 if (!TLI.hasAndNot(M))
9828 return SDValue();
9829
9830 SDLoc DL(N);
9831
9832 // If Y is a constant, check that 'andn' works with immediates. Unless M is
9833 // a bitwise not that would already allow ANDN to be used.
9834 if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
9835 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
9836 // If not, we need to do a bit more work to make sure andn is still used.
9837 SDValue NotX = DAG.getNOT(DL, X, VT);
9838 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
9839 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
9840 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
9841 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
9842 }
9843
9844 // If X is a constant and M is a bitwise not, check that 'andn' works with
9845 // immediates.
9846 if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
9847 assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
9848 // If not, we need to do a bit more work to make sure andn is still used.
9849 SDValue NotM = M.getOperand(0);
9850 SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
9851 SDValue NotY = DAG.getNOT(DL, Y, VT);
9852 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
9853 SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
9854 return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
9855 }
9856
9857 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
9858 SDValue NotM = DAG.getNOT(DL, M, VT);
9859 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
9860
9861 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
9862}
9863
9864SDValue DAGCombiner::visitXOR(SDNode *N) {
9865 SDValue N0 = N->getOperand(0);
9866 SDValue N1 = N->getOperand(1);
9867 EVT VT = N0.getValueType();
9868 SDLoc DL(N);
9869
9870 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
9871 if (N0.isUndef() && N1.isUndef())
9872 return DAG.getConstant(0, DL, VT);
9873
9874 // fold (xor x, undef) -> undef
9875 if (N0.isUndef())
9876 return N0;
9877 if (N1.isUndef())
9878 return N1;
9879
9880 // fold (xor c1, c2) -> c1^c2
9881 if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
9882 return C;
9883
9884 // canonicalize constant to RHS
9887 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
9888
9889 // fold vector ops
9890 if (VT.isVector()) {
9891 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
9892 return FoldedVOp;
9893
9894 // fold (xor x, 0) -> x, vector edition
9896 return N0;
9897 }
9898
9899 // fold (xor x, 0) -> x
9900 if (isNullConstant(N1))
9901 return N0;
9902
9903 if (SDValue NewSel = foldBinOpIntoSelect(N))
9904 return NewSel;
9905
9906 // reassociate xor
9907 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
9908 return RXOR;
9909
9910 // Fold xor(vecreduce(x), vecreduce(y)) -> vecreduce(xor(x, y))
9911 if (SDValue SD =
9912 reassociateReduction(ISD::VECREDUCE_XOR, ISD::XOR, DL, VT, N0, N1))
9913 return SD;
9914
9915 // fold (a^b) -> (a|b) iff a and b share no bits.
9916 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
9917 DAG.haveNoCommonBitsSet(N0, N1))
9918 return DAG.getNode(ISD::OR, DL, VT, N0, N1, SDNodeFlags::Disjoint);
9919
9920 // look for 'add-like' folds:
9921 // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
9922 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
9924 if (SDValue Combined = visitADDLike(N))
9925 return Combined;
9926
9927 // fold not (setcc x, y, cc) -> setcc x y !cc
9928 // Avoid breaking: and (not(setcc x, y, cc), z) -> andn for vec
9929 unsigned N0Opcode = N0.getOpcode();
9930 SDValue LHS, RHS, CC;
9931 if (TLI.isConstTrueVal(N1) &&
9932 isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true) &&
9933 !(VT.isVector() && TLI.hasAndNot(SDValue(N, 0)) && N->hasOneUse() &&
9934 N->use_begin()->getUser()->getOpcode() == ISD::AND)) {
9936 LHS.getValueType());
9937 if (!LegalOperations ||
9938 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
9939 switch (N0Opcode) {
9940 default:
9941 llvm_unreachable("Unhandled SetCC Equivalent!");
9942 case ISD::SETCC:
9943 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
9944 case ISD::SELECT_CC:
9945 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
9946 N0.getOperand(3), NotCC);
9947 case ISD::STRICT_FSETCC:
9948 case ISD::STRICT_FSETCCS: {
9949 if (N0.hasOneUse()) {
9950 // FIXME Can we handle multiple uses? Could we token factor the chain
9951 // results from the new/old setcc?
9952 SDValue SetCC =
9953 DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
9954 N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
9955 CombineTo(N, SetCC);
9956 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
9957 recursivelyDeleteUnusedNodes(N0.getNode());
9958 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9959 }
9960 break;
9961 }
9962 }
9963 }
9964 }
9965
9966 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
9967 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
9968 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
9969 SDValue V = N0.getOperand(0);
9970 SDLoc DL0(N0);
9971 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
9972 DAG.getConstant(1, DL0, V.getValueType()));
9973 AddToWorklist(V.getNode());
9974 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
9975 }
9976
9977 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
9978 // fold (not (and x, y)) -> (or (not x), (not y)) iff x or y are setcc
9979 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
9980 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9981 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9982 if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
9983 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9984 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9985 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9986 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9987 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9988 }
9989 }
9990 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
9991 // fold (not (and x, y)) -> (or (not x), (not y)) iff x or y are constants
9992 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
9993 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9994 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9995 if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
9996 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9997 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9998 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9999 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
10000 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
10001 }
10002 }
10003
10004 // fold (not (sub Y, X)) -> (add X, ~Y) if Y is a constant
10005 if (N0.getOpcode() == ISD::SUB && isAllOnesConstant(N1)) {
10006 SDValue Y = N0.getOperand(0);
10007 SDValue X = N0.getOperand(1);
10008
10009 if (auto *YConst = dyn_cast<ConstantSDNode>(Y)) {
10010 APInt NotYValue = ~YConst->getAPIntValue();
10011 SDValue NotY = DAG.getConstant(NotYValue, DL, VT);
10012 return DAG.getNode(ISD::ADD, DL, VT, X, NotY, N->getFlags());
10013 }
10014 }
10015
10016 // fold (not (add X, -1)) -> (neg X)
10017 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && isAllOnesConstant(N1) &&
10019 return DAG.getNegative(N0.getOperand(0), DL, VT);
10020 }
10021
10022 // fold (xor (and x, y), y) -> (and (not x), y)
10023 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
10024 SDValue X = N0.getOperand(0);
10025 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
10026 AddToWorklist(NotX.getNode());
10027 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
10028 }
10029
10030 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
10031 if (!LegalOperations || hasOperation(ISD::ABS, VT)) {
10032 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
10033 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
10034 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
10035 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
10036 SDValue S0 = S.getOperand(0);
10037 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
10038 if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
10039 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
10040 return DAG.getNode(ISD::ABS, DL, VT, S0);
10041 }
10042 }
10043
10044 // fold (xor x, x) -> 0
10045 if (N0 == N1)
10046 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
10047
10048 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
10049 // Here is a concrete example of this equivalence:
10050 // i16 x == 14
10051 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
10052 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
10053 //
10054 // =>
10055 //
10056 // i16 ~1 == 0b1111111111111110
10057 // i16 rol(~1, 14) == 0b1011111111111111
10058 //
10059 // Some additional tips to help conceptualize this transform:
10060 // - Try to see the operation as placing a single zero in a value of all ones.
10061 // - There exists no value for x which would allow the result to contain zero.
10062 // - Values of x larger than the bitwidth are undefined and do not require a
10063 // consistent result.
10064 // - Pushing the zero left requires shifting one bits in from the right.
10065 // A rotate left of ~1 is a nice way of achieving the desired result.
10066 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
10068 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getSignedConstant(~1, DL, VT),
10069 N0.getOperand(1));
10070 }
10071
10072 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
10073 if (N0Opcode == N1.getOpcode())
10074 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
10075 return V;
10076
10077 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
10078 return R;
10079 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
10080 return R;
10081 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
10082 return R;
10083
10084 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
10085 if (SDValue MM = unfoldMaskedMerge(N))
10086 return MM;
10087
10088 // Simplify the expression using non-local knowledge.
10090 return SDValue(N, 0);
10091
10092 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
10093 return Combined;
10094
10095 // fold (xor (smin(x, C), C)) -> select (x < C), xor(x, C), 0
10096 // fold (xor (smax(x, C), C)) -> select (x > C), xor(x, C), 0
10097 // fold (xor (umin(x, C), C)) -> select (x < C), xor(x, C), 0
10098 // fold (xor (umax(x, C), C)) -> select (x > C), xor(x, C), 0
10099 SDValue Op0;
10100 if (sd_match(N0, m_OneUse(m_AnyOf(m_SMin(m_Value(Op0), m_Specific(N1)),
10101 m_SMax(m_Value(Op0), m_Specific(N1)),
10102 m_UMin(m_Value(Op0), m_Specific(N1)),
10103 m_UMax(m_Value(Op0), m_Specific(N1)))))) {
10104
10105 if (isa<ConstantSDNode>(N1) ||
10107 // For vectors, only optimize when the constant is zero or all-ones to
10108 // avoid generating more instructions
10109 if (VT.isVector()) {
10110 ConstantSDNode *N1C = isConstOrConstSplat(N1);
10111 if (!N1C || (!N1C->isZero() && !N1C->isAllOnes()))
10112 return SDValue();
10113 }
10114
10115 // Avoid the fold if the minmax operation is legal and select is expensive
10116 if (TLI.isOperationLegal(N0.getOpcode(), VT) &&
10118 return SDValue();
10119
10120 EVT CCVT = getSetCCResultType(VT);
10121 ISD::CondCode CC;
10122 switch (N0.getOpcode()) {
10123 case ISD::SMIN:
10124 CC = ISD::SETLT;
10125 break;
10126 case ISD::SMAX:
10127 CC = ISD::SETGT;
10128 break;
10129 case ISD::UMIN:
10130 CC = ISD::SETULT;
10131 break;
10132 case ISD::UMAX:
10133 CC = ISD::SETUGT;
10134 break;
10135 }
10136 SDValue FN1 = DAG.getFreeze(N1);
10137 SDValue Cmp = DAG.getSetCC(DL, CCVT, Op0, FN1, CC);
10138 SDValue XorXC = DAG.getNode(ISD::XOR, DL, VT, Op0, FN1);
10139 SDValue Zero = DAG.getConstant(0, DL, VT);
10140 return DAG.getSelect(DL, VT, Cmp, XorXC, Zero);
10141 }
10142 }
10143
10144 return SDValue();
10145}
10146
10147/// If we have a shift-by-constant of a bitwise logic op that itself has a
10148/// shift-by-constant operand with identical opcode, we may be able to convert
10149/// that into 2 independent shifts followed by the logic op. This is a
10150/// throughput improvement.
10152 // Match a one-use bitwise logic op.
10153 SDValue LogicOp = Shift->getOperand(0);
10154 if (!LogicOp.hasOneUse())
10155 return SDValue();
10156
10157 unsigned LogicOpcode = LogicOp.getOpcode();
10158 if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
10159 LogicOpcode != ISD::XOR)
10160 return SDValue();
10161
10162 // Find a matching one-use shift by constant.
10163 unsigned ShiftOpcode = Shift->getOpcode();
10164 SDValue C1 = Shift->getOperand(1);
10165 ConstantSDNode *C1Node = isConstOrConstSplat(C1);
10166 assert(C1Node && "Expected a shift with constant operand");
10167 const APInt &C1Val = C1Node->getAPIntValue();
10168 auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
10169 const APInt *&ShiftAmtVal) {
10170 if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
10171 return false;
10172
10173 ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
10174 if (!ShiftCNode)
10175 return false;
10176
10177 // Capture the shifted operand and shift amount value.
10178 ShiftOp = V.getOperand(0);
10179 ShiftAmtVal = &ShiftCNode->getAPIntValue();
10180
10181 // Shift amount types do not have to match their operand type, so check that
10182 // the constants are the same width.
10183 if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
10184 return false;
10185
10186 // The fold is not valid if the sum of the shift values doesn't fit in the
10187 // given shift amount type.
10188 bool Overflow = false;
10189 APInt NewShiftAmt = C1Val.uadd_ov(*ShiftAmtVal, Overflow);
10190 if (Overflow)
10191 return false;
10192
10193 // The fold is not valid if the sum of the shift values exceeds bitwidth.
10194 if (NewShiftAmt.uge(V.getScalarValueSizeInBits()))
10195 return false;
10196
10197 return true;
10198 };
10199
10200 // Logic ops are commutative, so check each operand for a match.
10201 SDValue X, Y;
10202 const APInt *C0Val;
10203 if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
10204 Y = LogicOp.getOperand(1);
10205 else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
10206 Y = LogicOp.getOperand(0);
10207 else
10208 return SDValue();
10209
10210 // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
10211 SDLoc DL(Shift);
10212 EVT VT = Shift->getValueType(0);
10213 EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
10214 SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
10215 SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
10216 SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
10217 return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2,
10218 LogicOp->getFlags());
10219}
10220
10221/// Handle transforms common to the three shifts, when the shift amount is a
10222/// constant.
10223/// We are looking for: (shift being one of shl/sra/srl)
10224/// shift (binop X, C0), C1
10225/// And want to transform into:
10226/// binop (shift X, C1), (shift C0, C1)
10227SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
10228 assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
10229
10230 // Do not turn a 'not' into a regular xor.
10231 if (isBitwiseNot(N->getOperand(0)))
10232 return SDValue();
10233
10234 // The inner binop must be one-use, since we want to replace it.
10235 SDValue LHS = N->getOperand(0);
10236 if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
10237 return SDValue();
10238
10239 // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)).
10240 if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
10241 return R;
10242
10243 // We want to pull some binops through shifts, so that we have (and (shift))
10244 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
10245 // thing happens with address calculations, so it's important to canonicalize
10246 // it.
10247 switch (LHS.getOpcode()) {
10248 default:
10249 return SDValue();
10250 case ISD::OR:
10251 case ISD::XOR:
10252 case ISD::AND:
10253 break;
10254 case ISD::ADD:
10255 if (N->getOpcode() != ISD::SHL)
10256 return SDValue(); // only shl(add) not sr[al](add).
10257 break;
10258 }
10259
10260 // FIXME: disable this unless the input to the binop is a shift by a constant
10261 // or is copy/select. Enable this in other cases when figure out it's exactly
10262 // profitable.
10263 SDValue BinOpLHSVal = LHS.getOperand(0);
10264 bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
10265 BinOpLHSVal.getOpcode() == ISD::SRA ||
10266 BinOpLHSVal.getOpcode() == ISD::SRL) &&
10267 isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
10268 bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
10269 BinOpLHSVal.getOpcode() == ISD::SELECT;
10270
10271 if (!IsShiftByConstant && !IsCopyOrSelect)
10272 return SDValue();
10273
10274 if (IsCopyOrSelect && N->hasOneUse())
10275 return SDValue();
10276
10277 // Attempt to fold the constants, shifting the binop RHS by the shift amount.
10278 SDLoc DL(N);
10279 EVT VT = N->getValueType(0);
10280 if (SDValue NewRHS = DAG.FoldConstantArithmetic(
10281 N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) {
10282 SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
10283 N->getOperand(1));
10284 return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
10285 }
10286
10287 return SDValue();
10288}
10289
10290SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
10291 assert(N->getOpcode() == ISD::TRUNCATE);
10292 assert(N->getOperand(0).getOpcode() == ISD::AND);
10293
10294 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
10295 EVT TruncVT = N->getValueType(0);
10296 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
10297 TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
10298 SDValue N01 = N->getOperand(0).getOperand(1);
10299 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
10300 SDLoc DL(N);
10301 SDValue N00 = N->getOperand(0).getOperand(0);
10302 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
10303 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
10304 AddToWorklist(Trunc00.getNode());
10305 AddToWorklist(Trunc01.getNode());
10306 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
10307 }
10308 }
10309
10310 return SDValue();
10311}
10312
10313SDValue DAGCombiner::visitRotate(SDNode *N) {
10314 SDLoc dl(N);
10315 SDValue N0 = N->getOperand(0);
10316 SDValue N1 = N->getOperand(1);
10317 EVT VT = N->getValueType(0);
10318 unsigned Bitsize = VT.getScalarSizeInBits();
10319
10320 // fold (rot x, 0) -> x
10321 if (isNullOrNullSplat(N1))
10322 return N0;
10323
10324 // fold (rot x, c) -> x iff (c % BitSize) == 0
10325 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
10326 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
10327 if (DAG.MaskedValueIsZero(N1, ModuloMask))
10328 return N0;
10329 }
10330
10331 // fold (rot x, c) -> (rot x, c % BitSize)
10332 bool OutOfRange = false;
10333 auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
10334 OutOfRange |= C->getAPIntValue().uge(Bitsize);
10335 return true;
10336 };
10337 if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
10338 EVT AmtVT = N1.getValueType();
10339 SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
10340 if (SDValue Amt =
10341 DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
10342 return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
10343 }
10344
10345 // rot i16 X, 8 --> bswap X
10346 auto *RotAmtC = isConstOrConstSplat(N1);
10347 if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
10348 VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
10349 return DAG.getNode(ISD::BSWAP, dl, VT, N0);
10350
10351 // Simplify the operands using demanded-bits information.
10353 return SDValue(N, 0);
10354
10355 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
10356 if (N1.getOpcode() == ISD::TRUNCATE &&
10357 N1.getOperand(0).getOpcode() == ISD::AND) {
10358 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10359 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
10360 }
10361
10362 unsigned NextOp = N0.getOpcode();
10363
10364 // fold (rot* (rot* x, c2), c1)
10365 // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize)
10366 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
10367 bool C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
10369 if (C1 && C2 && N1.getValueType() == N0.getOperand(1).getValueType()) {
10370 EVT ShiftVT = N1.getValueType();
10371 bool SameSide = (N->getOpcode() == NextOp);
10372 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
10373 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
10374 SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
10375 {N1, BitsizeC});
10376 SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
10377 {N0.getOperand(1), BitsizeC});
10378 if (Norm1 && Norm2)
10379 if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
10380 CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
10381 CombinedShift = DAG.FoldConstantArithmetic(ISD::ADD, dl, ShiftVT,
10382 {CombinedShift, BitsizeC});
10383 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
10384 ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
10385 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
10386 CombinedShiftNorm);
10387 }
10388 }
10389 }
10390 return SDValue();
10391}
10392
10393SDValue DAGCombiner::visitSHL(SDNode *N) {
10394 SDValue N0 = N->getOperand(0);
10395 SDValue N1 = N->getOperand(1);
10396 if (SDValue V = DAG.simplifyShift(N0, N1))
10397 return V;
10398
10399 SDLoc DL(N);
10400 EVT VT = N0.getValueType();
10401 EVT ShiftVT = N1.getValueType();
10402 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10403
10404 // fold (shl c1, c2) -> c1<<c2
10405 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N0, N1}))
10406 return C;
10407
10408 // fold vector ops
10409 if (VT.isVector()) {
10410 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10411 return FoldedVOp;
10412
10413 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
10414 // If setcc produces all-one true value then:
10415 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
10416 if (N1CV && N1CV->isConstant()) {
10417 if (N0.getOpcode() == ISD::AND) {
10418 SDValue N00 = N0->getOperand(0);
10419 SDValue N01 = N0->getOperand(1);
10420 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
10421
10422 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
10425 if (SDValue C =
10426 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N01, N1}))
10427 return DAG.getNode(ISD::AND, DL, VT, N00, C);
10428 }
10429 }
10430 }
10431 }
10432
10433 if (SDValue NewSel = foldBinOpIntoSelect(N))
10434 return NewSel;
10435
10436 // if (shl x, c) is known to be zero, return 0
10437 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
10438 return DAG.getConstant(0, DL, VT);
10439
10440 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
10441 if (N1.getOpcode() == ISD::TRUNCATE &&
10442 N1.getOperand(0).getOpcode() == ISD::AND) {
10443 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10444 return DAG.getNode(ISD::SHL, DL, VT, N0, NewOp1);
10445 }
10446
10447 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
10448 if (N0.getOpcode() == ISD::SHL) {
10449 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
10450 ConstantSDNode *RHS) {
10451 APInt c1 = LHS->getAPIntValue();
10452 APInt c2 = RHS->getAPIntValue();
10453 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10454 return (c1 + c2).uge(OpSizeInBits);
10455 };
10456 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
10457 return DAG.getConstant(0, DL, VT);
10458
10459 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
10460 ConstantSDNode *RHS) {
10461 APInt c1 = LHS->getAPIntValue();
10462 APInt c2 = RHS->getAPIntValue();
10463 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10464 return (c1 + c2).ult(OpSizeInBits);
10465 };
10466 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
10467 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
10468 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
10469 }
10470 }
10471
10472 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
10473 // For this to be valid, the second form must not preserve any of the bits
10474 // that are shifted out by the inner shift in the first form. This means
10475 // the outer shift size must be >= the number of bits added by the ext.
10476 // As a corollary, we don't care what kind of ext it is.
10477 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
10478 N0.getOpcode() == ISD::ANY_EXTEND ||
10479 N0.getOpcode() == ISD::SIGN_EXTEND) &&
10480 N0.getOperand(0).getOpcode() == ISD::SHL) {
10481 SDValue N0Op0 = N0.getOperand(0);
10482 SDValue InnerShiftAmt = N0Op0.getOperand(1);
10483 EVT InnerVT = N0Op0.getValueType();
10484 uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
10485
10486 auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
10487 ConstantSDNode *RHS) {
10488 APInt c1 = LHS->getAPIntValue();
10489 APInt c2 = RHS->getAPIntValue();
10490 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10491 return c2.uge(OpSizeInBits - InnerBitwidth) &&
10492 (c1 + c2).uge(OpSizeInBits);
10493 };
10494 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
10495 /*AllowUndefs*/ false,
10496 /*AllowTypeMismatch*/ true))
10497 return DAG.getConstant(0, DL, VT);
10498
10499 auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
10500 ConstantSDNode *RHS) {
10501 APInt c1 = LHS->getAPIntValue();
10502 APInt c2 = RHS->getAPIntValue();
10503 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10504 return c2.uge(OpSizeInBits - InnerBitwidth) &&
10505 (c1 + c2).ult(OpSizeInBits);
10506 };
10507 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
10508 /*AllowUndefs*/ false,
10509 /*AllowTypeMismatch*/ true)) {
10510 SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
10511 SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
10512 Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
10513 return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
10514 }
10515 }
10516
10517 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
10518 // Only fold this if the inner zext has no other uses to avoid increasing
10519 // the total number of instructions.
10520 if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
10521 N0.getOperand(0).getOpcode() == ISD::SRL) {
10522 SDValue N0Op0 = N0.getOperand(0);
10523 SDValue InnerShiftAmt = N0Op0.getOperand(1);
10524
10525 auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10526 APInt c1 = LHS->getAPIntValue();
10527 APInt c2 = RHS->getAPIntValue();
10528 zeroExtendToMatch(c1, c2);
10529 return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
10530 };
10531 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
10532 /*AllowUndefs*/ false,
10533 /*AllowTypeMismatch*/ true)) {
10534 EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
10535 SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
10536 NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
10537 AddToWorklist(NewSHL.getNode());
10538 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
10539 }
10540 }
10541
10542 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) {
10543 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
10544 ConstantSDNode *RHS) {
10545 const APInt &LHSC = LHS->getAPIntValue();
10546 const APInt &RHSC = RHS->getAPIntValue();
10547 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
10548 LHSC.getZExtValue() <= RHSC.getZExtValue();
10549 };
10550
10551 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
10552 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
10553 if (N0->getFlags().hasExact()) {
10554 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10555 /*AllowUndefs*/ false,
10556 /*AllowTypeMismatch*/ true)) {
10557 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10558 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10559 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10560 }
10561 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10562 /*AllowUndefs*/ false,
10563 /*AllowTypeMismatch*/ true)) {
10564 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10565 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10566 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
10567 }
10568 }
10569
10570 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
10571 // (and (srl x, (sub c1, c2), MASK)
10572 // Only fold this if the inner shift has no other uses -- if it does,
10573 // folding this will increase the total number of instructions.
10574 if (N0.getOpcode() == ISD::SRL &&
10575 (N0.getOperand(1) == N1 || N0.hasOneUse()) &&
10577 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10578 /*AllowUndefs*/ false,
10579 /*AllowTypeMismatch*/ true)) {
10580 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10581 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10582 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10583 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
10584 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
10585 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10586 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10587 }
10588 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10589 /*AllowUndefs*/ false,
10590 /*AllowTypeMismatch*/ true)) {
10591 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10592 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10593 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10594 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
10595 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10596 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10597 }
10598 }
10599 }
10600
10601 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
10602 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
10603 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
10604 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
10605 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
10606 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
10607 }
10608
10609 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
10610 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
10611 // Variant of version done on multiply, except mul by a power of 2 is turned
10612 // into a shift.
10613 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
10614 TLI.isDesirableToCommuteWithShift(N, Level)) {
10615 SDValue N01 = N0.getOperand(1);
10616 if (SDValue Shl1 =
10617 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
10618 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
10619 AddToWorklist(Shl0.getNode());
10620 SDNodeFlags Flags;
10621 // Preserve the disjoint flag for Or.
10622 if (N0.getOpcode() == ISD::OR && N0->getFlags().hasDisjoint())
10624 return DAG.getNode(N0.getOpcode(), DL, VT, Shl0, Shl1, Flags);
10625 }
10626 }
10627
10628 // fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)
10629 // TODO: Add zext/add_nuw variant with suitable test coverage
10630 // TODO: Should we limit this with isLegalAddImmediate?
10631 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
10632 N0.getOperand(0).getOpcode() == ISD::ADD &&
10633 N0.getOperand(0)->getFlags().hasNoSignedWrap() &&
10634 TLI.isDesirableToCommuteWithShift(N, Level)) {
10635 SDValue Add = N0.getOperand(0);
10636 SDLoc DL(N0);
10637 if (SDValue ExtC = DAG.FoldConstantArithmetic(N0.getOpcode(), DL, VT,
10638 {Add.getOperand(1)})) {
10639 if (SDValue ShlC =
10640 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {ExtC, N1})) {
10641 SDValue ExtX = DAG.getNode(N0.getOpcode(), DL, VT, Add.getOperand(0));
10642 SDValue ShlX = DAG.getNode(ISD::SHL, DL, VT, ExtX, N1);
10643 return DAG.getNode(ISD::ADD, DL, VT, ShlX, ShlC);
10644 }
10645 }
10646 }
10647
10648 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
10649 if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
10650 SDValue N01 = N0.getOperand(1);
10651 if (SDValue Shl =
10652 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1}))
10653 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), Shl);
10654 }
10655
10656 ConstantSDNode *N1C = isConstOrConstSplat(N1);
10657 if (N1C && !N1C->isOpaque())
10658 if (SDValue NewSHL = visitShiftByConstant(N))
10659 return NewSHL;
10660
10661 // fold (shl X, cttz(Y)) -> (mul (Y & -Y), X) if cttz is unsupported on the
10662 // target.
10663 if (((N1.getOpcode() == ISD::CTTZ &&
10664 VT.getScalarSizeInBits() <= ShiftVT.getScalarSizeInBits()) ||
10666 N1.hasOneUse() && !TLI.isOperationLegalOrCustom(ISD::CTTZ, ShiftVT) &&
10668 SDValue Y = N1.getOperand(0);
10669 SDLoc DL(N);
10670 SDValue NegY = DAG.getNegative(Y, DL, ShiftVT);
10671 SDValue And =
10672 DAG.getZExtOrTrunc(DAG.getNode(ISD::AND, DL, ShiftVT, Y, NegY), DL, VT);
10673 return DAG.getNode(ISD::MUL, DL, VT, And, N0);
10674 }
10675
10677 return SDValue(N, 0);
10678
10679 // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
10680 if (N0.getOpcode() == ISD::VSCALE && N1C) {
10681 const APInt &C0 = N0.getConstantOperandAPInt(0);
10682 const APInt &C1 = N1C->getAPIntValue();
10683 return DAG.getVScale(DL, VT, C0 << C1);
10684 }
10685
10686 SDValue X;
10687 APInt VS0;
10688
10689 // fold (shl (X * vscale(VS0)), C1) -> (X * vscale(VS0 << C1))
10690 if (N1C && sd_match(N0, m_Mul(m_Value(X), m_VScale(m_ConstInt(VS0))))) {
10691 SDNodeFlags Flags;
10692 Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap() &&
10693 N0->getFlags().hasNoUnsignedWrap());
10694
10695 SDValue VScale = DAG.getVScale(DL, VT, VS0 << N1C->getAPIntValue());
10696 return DAG.getNode(ISD::MUL, DL, VT, X, VScale, Flags);
10697 }
10698
10699 // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
10700 APInt ShlVal;
10701 if (N0.getOpcode() == ISD::STEP_VECTOR &&
10702 ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
10703 const APInt &C0 = N0.getConstantOperandAPInt(0);
10704 if (ShlVal.ult(C0.getBitWidth())) {
10705 APInt NewStep = C0 << ShlVal;
10706 return DAG.getStepVector(DL, VT, NewStep);
10707 }
10708 }
10709
10710 return SDValue();
10711}
10712
10713// Transform a right shift of a multiply into a multiply-high.
10714// Examples:
10715// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
10716// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
10718 const TargetLowering &TLI) {
10719 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
10720 "SRL or SRA node is required here!");
10721
10722 // Check the shift amount. Proceed with the transformation if the shift
10723 // amount is constant.
10724 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
10725 if (!ShiftAmtSrc)
10726 return SDValue();
10727
10728 // The operation feeding into the shift must be a multiply.
10729 SDValue ShiftOperand = N->getOperand(0);
10730 if (ShiftOperand.getOpcode() != ISD::MUL)
10731 return SDValue();
10732
10733 // Both operands must be equivalent extend nodes.
10734 SDValue LeftOp = ShiftOperand.getOperand(0);
10735 SDValue RightOp = ShiftOperand.getOperand(1);
10736
10737 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
10738 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
10739
10740 if (!IsSignExt && !IsZeroExt)
10741 return SDValue();
10742
10743 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
10744 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
10745
10746 // return true if U may use the lower bits of its operands
10747 auto UserOfLowerBits = [NarrowVTSize](SDNode *U) {
10748 if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) {
10749 return true;
10750 }
10751 ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(U->getOperand(1));
10752 if (!UShiftAmtSrc) {
10753 return true;
10754 }
10755 unsigned UShiftAmt = UShiftAmtSrc->getZExtValue();
10756 return UShiftAmt < NarrowVTSize;
10757 };
10758
10759 // If the lower part of the MUL is also used and MUL_LOHI is supported
10760 // do not introduce the MULH in favor of MUL_LOHI
10761 unsigned MulLoHiOp = IsSignExt ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10762 if (!ShiftOperand.hasOneUse() &&
10763 TLI.isOperationLegalOrCustom(MulLoHiOp, NarrowVT) &&
10764 llvm::any_of(ShiftOperand->users(), UserOfLowerBits)) {
10765 return SDValue();
10766 }
10767
10768 SDValue MulhRightOp;
10770 unsigned ActiveBits = IsSignExt
10771 ? Constant->getAPIntValue().getSignificantBits()
10772 : Constant->getAPIntValue().getActiveBits();
10773 if (ActiveBits > NarrowVTSize)
10774 return SDValue();
10775 MulhRightOp = DAG.getConstant(
10776 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
10777 NarrowVT);
10778 } else {
10779 if (LeftOp.getOpcode() != RightOp.getOpcode())
10780 return SDValue();
10781 // Check that the two extend nodes are the same type.
10782 if (NarrowVT != RightOp.getOperand(0).getValueType())
10783 return SDValue();
10784 MulhRightOp = RightOp.getOperand(0);
10785 }
10786
10787 EVT WideVT = LeftOp.getValueType();
10788 // Proceed with the transformation if the wide types match.
10789 assert((WideVT == RightOp.getValueType()) &&
10790 "Cannot have a multiply node with two different operand types.");
10791
10792 // Proceed with the transformation if the wide type is twice as large
10793 // as the narrow type.
10794 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
10795 return SDValue();
10796
10797 // Check the shift amount with the narrow type size.
10798 // Proceed with the transformation if the shift amount is the width
10799 // of the narrow type.
10800 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
10801 if (ShiftAmt != NarrowVTSize)
10802 return SDValue();
10803
10804 // If the operation feeding into the MUL is a sign extend (sext),
10805 // we use mulhs. Othewise, zero extends (zext) use mulhu.
10806 unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
10807
10808 // Combine to mulh if mulh is legal/custom for the narrow type on the target
10809 // or if it is a vector type then we could transform to an acceptable type and
10810 // rely on legalization to split/combine the result.
10811 if (NarrowVT.isVector()) {
10812 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), NarrowVT);
10813 if (TransformVT.getVectorElementType() != NarrowVT.getVectorElementType() ||
10814 !TLI.isOperationLegalOrCustom(MulhOpcode, TransformVT))
10815 return SDValue();
10816 } else {
10817 if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
10818 return SDValue();
10819 }
10820
10821 SDValue Result =
10822 DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
10823 bool IsSigned = N->getOpcode() == ISD::SRA;
10824 return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
10825}
10826
10827// fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y))
10828// This helper function accept SDNode with opcode ISD::BSWAP and ISD::BITREVERSE
10830 unsigned Opcode = N->getOpcode();
10831 if (Opcode != ISD::BSWAP && Opcode != ISD::BITREVERSE)
10832 return SDValue();
10833
10834 SDValue N0 = N->getOperand(0);
10835 EVT VT = N->getValueType(0);
10836 SDLoc DL(N);
10837 SDValue X, Y;
10838
10839 // If both operands are bswap/bitreverse, ignore the multiuse
10841 m_UnaryOp(Opcode, m_Value(Y))))))
10842 return DAG.getNode(N0.getOpcode(), DL, VT, X, Y);
10843
10844 // Otherwise need to ensure logic_op and bswap/bitreverse(x) have one use.
10846 m_OneUse(m_UnaryOp(Opcode, m_Value(X))), m_Value(Y))))) {
10847 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, Y);
10848 return DAG.getNode(N0.getOpcode(), DL, VT, X, NewBitReorder);
10849 }
10850
10851 return SDValue();
10852}
10853
10854SDValue DAGCombiner::visitSRA(SDNode *N) {
10855 SDValue N0 = N->getOperand(0);
10856 SDValue N1 = N->getOperand(1);
10857 if (SDValue V = DAG.simplifyShift(N0, N1))
10858 return V;
10859
10860 SDLoc DL(N);
10861 EVT VT = N0.getValueType();
10862 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10863
10864 // fold (sra c1, c2) -> (sra c1, c2)
10865 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, DL, VT, {N0, N1}))
10866 return C;
10867
10868 // Arithmetic shifting an all-sign-bit value is a no-op.
10869 // fold (sra 0, x) -> 0
10870 // fold (sra -1, x) -> -1
10871 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
10872 return N0;
10873
10874 // fold vector ops
10875 if (VT.isVector())
10876 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10877 return FoldedVOp;
10878
10879 if (SDValue NewSel = foldBinOpIntoSelect(N))
10880 return NewSel;
10881
10882 ConstantSDNode *N1C = isConstOrConstSplat(N1);
10883
10884 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
10885 // clamp (add c1, c2) to max shift.
10886 if (N0.getOpcode() == ISD::SRA) {
10887 EVT ShiftVT = N1.getValueType();
10888 EVT ShiftSVT = ShiftVT.getScalarType();
10889 SmallVector<SDValue, 16> ShiftValues;
10890
10891 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10892 APInt c1 = LHS->getAPIntValue();
10893 APInt c2 = RHS->getAPIntValue();
10894 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10895 APInt Sum = c1 + c2;
10896 unsigned ShiftSum =
10897 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
10898 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
10899 return true;
10900 };
10901 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
10902 SDValue ShiftValue;
10903 if (N1.getOpcode() == ISD::BUILD_VECTOR)
10904 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
10905 else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
10906 assert(ShiftValues.size() == 1 &&
10907 "Expected matchBinaryPredicate to return one element for "
10908 "SPLAT_VECTORs");
10909 ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
10910 } else
10911 ShiftValue = ShiftValues[0];
10912 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
10913 }
10914 }
10915
10916 // fold (sra (shl X, m), (sub result_size, n))
10917 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
10918 // result_size - n != m.
10919 // If truncate is free for the target sext(shl) is likely to result in better
10920 // code.
10921 if (N0.getOpcode() == ISD::SHL && N1C) {
10922 // Get the two constants of the shifts, CN0 = m, CN = n.
10923 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
10924 if (N01C) {
10925 LLVMContext &Ctx = *DAG.getContext();
10926 // Determine what the truncate's result bitsize and type would be.
10927 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
10928
10929 if (VT.isVector())
10930 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10931
10932 // Determine the residual right-shift amount.
10933 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
10934
10935 // If the shift is not a no-op (in which case this should be just a sign
10936 // extend already), the truncated to type is legal, sign_extend is legal
10937 // on that type, and the truncate to that type is both legal and free,
10938 // perform the transform.
10939 if ((ShiftAmt > 0) &&
10942 TLI.isTruncateFree(VT, TruncVT)) {
10943 SDValue Amt = DAG.getShiftAmountConstant(ShiftAmt, VT, DL);
10944 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
10945 N0.getOperand(0), Amt);
10946 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
10947 Shift);
10948 return DAG.getNode(ISD::SIGN_EXTEND, DL,
10949 N->getValueType(0), Trunc);
10950 }
10951 }
10952 }
10953
10954 // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
10955 // sra (add (shl X, N1C), AddC), N1C -->
10956 // sext (add (trunc X to (width - N1C)), AddC')
10957 // sra (sub AddC, (shl X, N1C)), N1C -->
10958 // sext (sub AddC1',(trunc X to (width - N1C)))
10959 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C &&
10960 N0.hasOneUse()) {
10961 bool IsAdd = N0.getOpcode() == ISD::ADD;
10962 SDValue Shl = N0.getOperand(IsAdd ? 0 : 1);
10963 if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 &&
10964 Shl.hasOneUse()) {
10965 // TODO: AddC does not need to be a splat.
10966 if (ConstantSDNode *AddC =
10967 isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) {
10968 // Determine what the truncate's type would be and ask the target if
10969 // that is a free operation.
10970 LLVMContext &Ctx = *DAG.getContext();
10971 unsigned ShiftAmt = N1C->getZExtValue();
10972 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
10973 if (VT.isVector())
10974 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10975
10976 // TODO: The simple type check probably belongs in the default hook
10977 // implementation and/or target-specific overrides (because
10978 // non-simple types likely require masking when legalized), but
10979 // that restriction may conflict with other transforms.
10980 if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
10981 TLI.isTruncateFree(VT, TruncVT)) {
10982 SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
10983 SDValue ShiftC =
10984 DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
10985 TruncVT.getScalarSizeInBits()),
10986 DL, TruncVT);
10987 SDValue Add;
10988 if (IsAdd)
10989 Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
10990 else
10991 Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc);
10992 return DAG.getSExtOrTrunc(Add, DL, VT);
10993 }
10994 }
10995 }
10996 }
10997
10998 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
10999 if (N1.getOpcode() == ISD::TRUNCATE &&
11000 N1.getOperand(0).getOpcode() == ISD::AND) {
11001 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
11002 return DAG.getNode(ISD::SRA, DL, VT, N0, NewOp1);
11003 }
11004
11005 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
11006 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
11007 // if c1 is equal to the number of bits the trunc removes
11008 // TODO - support non-uniform vector shift amounts.
11009 if (N0.getOpcode() == ISD::TRUNCATE &&
11010 (N0.getOperand(0).getOpcode() == ISD::SRL ||
11011 N0.getOperand(0).getOpcode() == ISD::SRA) &&
11012 N0.getOperand(0).hasOneUse() &&
11013 N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
11014 SDValue N0Op0 = N0.getOperand(0);
11015 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
11016 EVT LargeVT = N0Op0.getValueType();
11017 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
11018 if (LargeShift->getAPIntValue() == TruncBits) {
11019 EVT LargeShiftVT = getShiftAmountTy(LargeVT);
11020 SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT);
11021 Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt,
11022 DAG.getConstant(TruncBits, DL, LargeShiftVT));
11023 SDValue SRA =
11024 DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
11025 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
11026 }
11027 }
11028 }
11029
11030 // Simplify, based on bits shifted out of the LHS.
11032 return SDValue(N, 0);
11033
11034 // If the sign bit is known to be zero, switch this to a SRL.
11035 if (DAG.SignBitIsZero(N0))
11036 return DAG.getNode(ISD::SRL, DL, VT, N0, N1);
11037
11038 if (N1C && !N1C->isOpaque())
11039 if (SDValue NewSRA = visitShiftByConstant(N))
11040 return NewSRA;
11041
11042 // Try to transform this shift into a multiply-high if
11043 // it matches the appropriate pattern detected in combineShiftToMULH.
11044 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
11045 return MULH;
11046
11047 // Attempt to convert a sra of a load into a narrower sign-extending load.
11048 if (SDValue NarrowLoad = reduceLoadWidth(N))
11049 return NarrowLoad;
11050
11051 if (SDValue AVG = foldShiftToAvg(N, DL))
11052 return AVG;
11053
11054 return SDValue();
11055}
11056
11057SDValue DAGCombiner::visitSRL(SDNode *N) {
11058 SDValue N0 = N->getOperand(0);
11059 SDValue N1 = N->getOperand(1);
11060 if (SDValue V = DAG.simplifyShift(N0, N1))
11061 return V;
11062
11063 SDLoc DL(N);
11064 EVT VT = N0.getValueType();
11065 EVT ShiftVT = N1.getValueType();
11066 unsigned OpSizeInBits = VT.getScalarSizeInBits();
11067
11068 // fold (srl c1, c2) -> c1 >>u c2
11069 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, DL, VT, {N0, N1}))
11070 return C;
11071
11072 // fold vector ops
11073 if (VT.isVector())
11074 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
11075 return FoldedVOp;
11076
11077 if (SDValue NewSel = foldBinOpIntoSelect(N))
11078 return NewSel;
11079
11080 // if (srl x, c) is known to be zero, return 0
11081 ConstantSDNode *N1C = isConstOrConstSplat(N1);
11082 if (N1C &&
11083 DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
11084 return DAG.getConstant(0, DL, VT);
11085
11086 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
11087 if (N0.getOpcode() == ISD::SRL) {
11088 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
11089 ConstantSDNode *RHS) {
11090 APInt c1 = LHS->getAPIntValue();
11091 APInt c2 = RHS->getAPIntValue();
11092 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
11093 return (c1 + c2).uge(OpSizeInBits);
11094 };
11095 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
11096 return DAG.getConstant(0, DL, VT);
11097
11098 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
11099 ConstantSDNode *RHS) {
11100 APInt c1 = LHS->getAPIntValue();
11101 APInt c2 = RHS->getAPIntValue();
11102 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
11103 return (c1 + c2).ult(OpSizeInBits);
11104 };
11105 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
11106 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
11107 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
11108 }
11109 }
11110
11111 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
11112 N0.getOperand(0).getOpcode() == ISD::SRL) {
11113 SDValue InnerShift = N0.getOperand(0);
11114 // TODO - support non-uniform vector shift amounts.
11115 if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
11116 uint64_t c1 = N001C->getZExtValue();
11117 uint64_t c2 = N1C->getZExtValue();
11118 EVT InnerShiftVT = InnerShift.getValueType();
11119 EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
11120 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
11121 // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
11122 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
11123 if (c1 + OpSizeInBits == InnerShiftSize) {
11124 if (c1 + c2 >= InnerShiftSize)
11125 return DAG.getConstant(0, DL, VT);
11126 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
11127 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
11128 InnerShift.getOperand(0), NewShiftAmt);
11129 return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
11130 }
11131 // In the more general case, we can clear the high bits after the shift:
11132 // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
11133 if (N0.hasOneUse() && InnerShift.hasOneUse() &&
11134 c1 + c2 < InnerShiftSize) {
11135 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
11136 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
11137 InnerShift.getOperand(0), NewShiftAmt);
11138 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
11139 OpSizeInBits - c2),
11140 DL, InnerShiftVT);
11141 SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
11142 return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
11143 }
11144 }
11145 }
11146
11147 if (N0.getOpcode() == ISD::SHL) {
11148 // fold (srl (shl nuw x, c), c) -> x
11149 if (N0.getOperand(1) == N1 && N0->getFlags().hasNoUnsignedWrap())
11150 return N0.getOperand(0);
11151
11152 // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
11153 // (and (srl x, (sub c2, c1), MASK)
11154 if ((N0.getOperand(1) == N1 || N0->hasOneUse()) &&
11156 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
11157 ConstantSDNode *RHS) {
11158 const APInt &LHSC = LHS->getAPIntValue();
11159 const APInt &RHSC = RHS->getAPIntValue();
11160 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
11161 LHSC.getZExtValue() <= RHSC.getZExtValue();
11162 };
11163 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
11164 /*AllowUndefs*/ false,
11165 /*AllowTypeMismatch*/ true)) {
11166 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
11167 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
11168 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
11169 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
11170 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
11171 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
11172 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
11173 }
11174 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
11175 /*AllowUndefs*/ false,
11176 /*AllowTypeMismatch*/ true)) {
11177 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
11178 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
11179 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
11180 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
11181 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
11182 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
11183 }
11184 }
11185 }
11186
11187 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
11188 // TODO - support non-uniform vector shift amounts.
11189 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
11190 // Shifting in all undef bits?
11191 EVT SmallVT = N0.getOperand(0).getValueType();
11192 unsigned BitSize = SmallVT.getScalarSizeInBits();
11193 if (N1C->getAPIntValue().uge(BitSize))
11194 return DAG.getUNDEF(VT);
11195
11196 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
11197 uint64_t ShiftAmt = N1C->getZExtValue();
11198 SDLoc DL0(N0);
11199 SDValue SmallShift =
11200 DAG.getNode(ISD::SRL, DL0, SmallVT, N0.getOperand(0),
11201 DAG.getShiftAmountConstant(ShiftAmt, SmallVT, DL0));
11202 AddToWorklist(SmallShift.getNode());
11203 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
11204 return DAG.getNode(ISD::AND, DL, VT,
11205 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
11206 DAG.getConstant(Mask, DL, VT));
11207 }
11208 }
11209
11210 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
11211 // bit, which is unmodified by sra.
11212 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
11213 if (N0.getOpcode() == ISD::SRA)
11214 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1);
11215 }
11216
11217 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit), and x has a power
11218 // of two bitwidth. The "5" represents (log2 (bitwidth x)).
11219 if (N1C && N0.getOpcode() == ISD::CTLZ &&
11220 isPowerOf2_32(OpSizeInBits) &&
11221 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
11222 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
11223
11224 // If any of the input bits are KnownOne, then the input couldn't be all
11225 // zeros, thus the result of the srl will always be zero.
11226 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
11227
11228 // If all of the bits input the to ctlz node are known to be zero, then
11229 // the result of the ctlz is "32" and the result of the shift is one.
11230 APInt UnknownBits = ~Known.Zero;
11231 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
11232
11233 // Otherwise, check to see if there is exactly one bit input to the ctlz.
11234 if (UnknownBits.isPowerOf2()) {
11235 // Okay, we know that only that the single bit specified by UnknownBits
11236 // could be set on input to the CTLZ node. If this bit is set, the SRL
11237 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
11238 // to an SRL/XOR pair, which is likely to simplify more.
11239 unsigned ShAmt = UnknownBits.countr_zero();
11240 SDValue Op = N0.getOperand(0);
11241
11242 if (ShAmt) {
11243 SDLoc DL(N0);
11244 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
11245 DAG.getShiftAmountConstant(ShAmt, VT, DL));
11246 AddToWorklist(Op.getNode());
11247 }
11248 return DAG.getNode(ISD::XOR, DL, VT, Op, DAG.getConstant(1, DL, VT));
11249 }
11250 }
11251
11252 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
11253 if (N1.getOpcode() == ISD::TRUNCATE &&
11254 N1.getOperand(0).getOpcode() == ISD::AND) {
11255 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
11256 return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1);
11257 }
11258
11259 // fold (srl (logic_op x, (shl (zext y), c1)), c1)
11260 // -> (logic_op (srl x, c1), (zext y))
11261 // c1 <= leadingzeros(zext(y))
11262 SDValue X, ZExtY;
11263 if (N1C && sd_match(N0, m_OneUse(m_BitwiseLogic(
11264 m_Value(X),
11267 m_Specific(N1))))))) {
11268 unsigned NumLeadingZeros = ZExtY.getScalarValueSizeInBits() -
11270 if (N1C->getZExtValue() <= NumLeadingZeros)
11271 return DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
11272 DAG.getNode(ISD::SRL, SDLoc(N0), VT, X, N1), ZExtY);
11273 }
11274
11275 // fold operands of srl based on knowledge that the low bits are not
11276 // demanded.
11278 return SDValue(N, 0);
11279
11280 if (N1C && !N1C->isOpaque())
11281 if (SDValue NewSRL = visitShiftByConstant(N))
11282 return NewSRL;
11283
11284 // Attempt to convert a srl of a load into a narrower zero-extending load.
11285 if (SDValue NarrowLoad = reduceLoadWidth(N))
11286 return NarrowLoad;
11287
11288 // Here is a common situation. We want to optimize:
11289 //
11290 // %a = ...
11291 // %b = and i32 %a, 2
11292 // %c = srl i32 %b, 1
11293 // brcond i32 %c ...
11294 //
11295 // into
11296 //
11297 // %a = ...
11298 // %b = and %a, 2
11299 // %c = setcc eq %b, 0
11300 // brcond %c ...
11301 //
11302 // However when after the source operand of SRL is optimized into AND, the SRL
11303 // itself may not be optimized further. Look for it and add the BRCOND into
11304 // the worklist.
11305 //
11306 // The also tends to happen for binary operations when SimplifyDemandedBits
11307 // is involved.
11308 //
11309 // FIXME: This is unecessary if we process the DAG in topological order,
11310 // which we plan to do. This workaround can be removed once the DAG is
11311 // processed in topological order.
11312 if (N->hasOneUse()) {
11313 SDNode *User = *N->user_begin();
11314
11315 // Look pass the truncate.
11316 if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse())
11317 User = *User->user_begin();
11318
11319 if (User->getOpcode() == ISD::BRCOND || User->getOpcode() == ISD::AND ||
11320 User->getOpcode() == ISD::OR || User->getOpcode() == ISD::XOR)
11321 AddToWorklist(User);
11322 }
11323
11324 // Try to transform this shift into a multiply-high if
11325 // it matches the appropriate pattern detected in combineShiftToMULH.
11326 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
11327 return MULH;
11328
11329 if (SDValue AVG = foldShiftToAvg(N, DL))
11330 return AVG;
11331
11332 return SDValue();
11333}
11334
11335SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
11336 EVT VT = N->getValueType(0);
11337 SDValue N0 = N->getOperand(0);
11338 SDValue N1 = N->getOperand(1);
11339 SDValue N2 = N->getOperand(2);
11340 bool IsFSHL = N->getOpcode() == ISD::FSHL;
11341 unsigned BitWidth = VT.getScalarSizeInBits();
11342 SDLoc DL(N);
11343
11344 // fold (fshl/fshr C0, C1, C2) -> C3
11345 if (SDValue C =
11346 DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1, N2}))
11347 return C;
11348
11349 // fold (fshl N0, N1, 0) -> N0
11350 // fold (fshr N0, N1, 0) -> N1
11352 if (DAG.MaskedValueIsZero(
11353 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
11354 return IsFSHL ? N0 : N1;
11355
11356 auto IsUndefOrZero = [](SDValue V) {
11357 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
11358 };
11359
11360 // TODO - support non-uniform vector shift amounts.
11361 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
11362 EVT ShAmtTy = N2.getValueType();
11363
11364 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
11365 if (Cst->getAPIntValue().uge(BitWidth)) {
11366 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
11367 return DAG.getNode(N->getOpcode(), DL, VT, N0, N1,
11368 DAG.getConstant(RotAmt, DL, ShAmtTy));
11369 }
11370
11371 unsigned ShAmt = Cst->getZExtValue();
11372 if (ShAmt == 0)
11373 return IsFSHL ? N0 : N1;
11374
11375 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
11376 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
11377 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
11378 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
11379 if (IsUndefOrZero(N0))
11380 return DAG.getNode(
11381 ISD::SRL, DL, VT, N1,
11382 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt, DL, ShAmtTy));
11383 if (IsUndefOrZero(N1))
11384 return DAG.getNode(
11385 ISD::SHL, DL, VT, N0,
11386 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt, DL, ShAmtTy));
11387
11388 // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
11389 // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
11390 // TODO - bigendian support once we have test coverage.
11391 // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
11392 // TODO - permit LHS EXTLOAD if extensions are shifted out.
11393 if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
11394 !DAG.getDataLayout().isBigEndian()) {
11395 auto *LHS = dyn_cast<LoadSDNode>(N0);
11396 auto *RHS = dyn_cast<LoadSDNode>(N1);
11397 if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
11398 LHS->getAddressSpace() == RHS->getAddressSpace() &&
11399 (LHS->hasNUsesOfValue(1, 0) || RHS->hasNUsesOfValue(1, 0)) &&
11401 if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
11402 SDLoc DL(RHS);
11403 uint64_t PtrOff =
11404 IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
11405 Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
11406 unsigned Fast = 0;
11407 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
11408 RHS->getAddressSpace(), NewAlign,
11409 RHS->getMemOperand()->getFlags(), &Fast) &&
11410 Fast) {
11411 SDValue NewPtr = DAG.getMemBasePlusOffset(
11412 RHS->getBasePtr(), TypeSize::getFixed(PtrOff), DL);
11413 AddToWorklist(NewPtr.getNode());
11414 SDValue Load = DAG.getLoad(
11415 VT, DL, RHS->getChain(), NewPtr,
11416 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
11417 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
11418 DAG.makeEquivalentMemoryOrdering(LHS, Load.getValue(1));
11419 DAG.makeEquivalentMemoryOrdering(RHS, Load.getValue(1));
11420 return Load;
11421 }
11422 }
11423 }
11424 }
11425 }
11426
11427 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
11428 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
11429 // iff We know the shift amount is in range.
11430 // TODO: when is it worth doing SUB(BW, N2) as well?
11431 if (isPowerOf2_32(BitWidth)) {
11432 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
11433 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
11434 return DAG.getNode(ISD::SRL, DL, VT, N1, N2);
11435 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
11436 return DAG.getNode(ISD::SHL, DL, VT, N0, N2);
11437 }
11438
11439 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
11440 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
11441 // TODO: Investigate flipping this rotate if only one is legal.
11442 // If funnel shift is legal as well we might be better off avoiding
11443 // non-constant (BW - N2).
11444 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
11445 if (N0 == N1 && hasOperation(RotOpc, VT))
11446 return DAG.getNode(RotOpc, DL, VT, N0, N2);
11447
11448 // Simplify, based on bits shifted out of N0/N1.
11450 return SDValue(N, 0);
11451
11452 return SDValue();
11453}
11454
11455SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
11456 SDValue N0 = N->getOperand(0);
11457 SDValue N1 = N->getOperand(1);
11458 if (SDValue V = DAG.simplifyShift(N0, N1))
11459 return V;
11460
11461 SDLoc DL(N);
11462 EVT VT = N0.getValueType();
11463
11464 // fold (*shlsat c1, c2) -> c1<<c2
11465 if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
11466 return C;
11467
11468 ConstantSDNode *N1C = isConstOrConstSplat(N1);
11469
11470 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
11471 // fold (sshlsat x, c) -> (shl x, c)
11472 if (N->getOpcode() == ISD::SSHLSAT && N1C &&
11473 N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
11474 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
11475
11476 // fold (ushlsat x, c) -> (shl x, c)
11477 if (N->getOpcode() == ISD::USHLSAT && N1C &&
11478 N1C->getAPIntValue().ule(
11480 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
11481 }
11482
11483 return SDValue();
11484}
11485
11486// Given a ABS node, detect the following patterns:
11487// (ABS (SUB (EXTEND a), (EXTEND b))).
11488// (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))).
11489// Generates UABD/SABD instruction.
11490SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
11491 EVT SrcVT = N->getValueType(0);
11492
11493 if (N->getOpcode() == ISD::TRUNCATE)
11494 N = N->getOperand(0).getNode();
11495
11496 EVT VT = N->getValueType(0);
11497 SDValue Op0, Op1;
11498
11499 if (!sd_match(N, m_Abs(m_Sub(m_Value(Op0), m_Value(Op1)))))
11500 return SDValue();
11501
11502 SDValue AbsOp0 = N->getOperand(0);
11503 unsigned Opc0 = Op0.getOpcode();
11504
11505 // Check if the operands of the sub are (zero|sign)-extended, otherwise
11506 // fallback to ValueTracking.
11507 if (Opc0 != Op1.getOpcode() ||
11508 (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
11509 Opc0 != ISD::SIGN_EXTEND_INREG)) {
11510 // fold (abs (sub nsw x, y)) -> abds(x, y)
11511 // Don't fold this for unsupported types as we lose the NSW handling.
11512 if (hasOperation(ISD::ABDS, VT) && TLI.preferABDSToABSWithNSW(VT) &&
11513 (AbsOp0->getFlags().hasNoSignedWrap() ||
11514 DAG.willNotOverflowSub(/*IsSigned=*/true, Op0, Op1))) {
11515 SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
11516 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11517 }
11518 // fold (abs (sub x, y)) -> abdu(x, y)
11519 if (hasOperation(ISD::ABDU, VT) && DAG.SignBitIsZero(Op0) &&
11520 DAG.SignBitIsZero(Op1)) {
11521 SDValue ABD = DAG.getNode(ISD::ABDU, DL, VT, Op0, Op1);
11522 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11523 }
11524 return SDValue();
11525 }
11526
11527 EVT VT0, VT1;
11528 if (Opc0 == ISD::SIGN_EXTEND_INREG) {
11529 VT0 = cast<VTSDNode>(Op0.getOperand(1))->getVT();
11530 VT1 = cast<VTSDNode>(Op1.getOperand(1))->getVT();
11531 } else {
11532 VT0 = Op0.getOperand(0).getValueType();
11533 VT1 = Op1.getOperand(0).getValueType();
11534 }
11535 unsigned ABDOpcode = (Opc0 == ISD::ZERO_EXTEND) ? ISD::ABDU : ISD::ABDS;
11536
11537 // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
11538 // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
11539 EVT MaxVT = VT0.bitsGT(VT1) ? VT0 : VT1;
11540 if ((VT0 == MaxVT || Op0->hasOneUse()) &&
11541 (VT1 == MaxVT || Op1->hasOneUse()) &&
11542 (!LegalTypes || hasOperation(ABDOpcode, MaxVT))) {
11543 SDValue ABD = DAG.getNode(ABDOpcode, DL, MaxVT,
11544 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op0),
11545 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op1));
11546 ABD = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ABD);
11547 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11548 }
11549
11550 // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
11551 // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
11552 if (!LegalOperations || hasOperation(ABDOpcode, VT)) {
11553 SDValue ABD = DAG.getNode(ABDOpcode, DL, VT, Op0, Op1);
11554 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11555 }
11556
11557 return SDValue();
11558}
11559
11560SDValue DAGCombiner::visitABS(SDNode *N) {
11561 SDValue N0 = N->getOperand(0);
11562 EVT VT = N->getValueType(0);
11563 SDLoc DL(N);
11564
11565 // fold (abs c1) -> c2
11566 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, DL, VT, {N0}))
11567 return C;
11568 // fold (abs (abs x)) -> (abs x)
11569 if (N0.getOpcode() == ISD::ABS)
11570 return N0;
11571 // fold (abs x) -> x iff not-negative
11572 if (DAG.SignBitIsZero(N0))
11573 return N0;
11574
11575 if (SDValue ABD = foldABSToABD(N, DL))
11576 return ABD;
11577
11578 // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
11579 // iff zero_extend/truncate are free.
11580 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
11581 EVT ExtVT = cast<VTSDNode>(N0.getOperand(1))->getVT();
11582 if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) &&
11583 TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) &&
11584 hasOperation(ISD::ABS, ExtVT)) {
11585 return DAG.getNode(
11586 ISD::ZERO_EXTEND, DL, VT,
11587 DAG.getNode(ISD::ABS, DL, ExtVT,
11588 DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N0.getOperand(0))));
11589 }
11590 }
11591
11592 return SDValue();
11593}
11594
11595SDValue DAGCombiner::visitBSWAP(SDNode *N) {
11596 SDValue N0 = N->getOperand(0);
11597 EVT VT = N->getValueType(0);
11598 SDLoc DL(N);
11599
11600 // fold (bswap c1) -> c2
11601 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BSWAP, DL, VT, {N0}))
11602 return C;
11603 // fold (bswap (bswap x)) -> x
11604 if (N0.getOpcode() == ISD::BSWAP)
11605 return N0.getOperand(0);
11606
11607 // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
11608 // isn't supported, it will be expanded to bswap followed by a manual reversal
11609 // of bits in each byte. By placing bswaps before bitreverse, we can remove
11610 // the two bswaps if the bitreverse gets expanded.
11611 if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
11612 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11613 return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
11614 }
11615
11616 // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
11617 // iff x >= bw/2 (i.e. lower half is known zero)
11618 unsigned BW = VT.getScalarSizeInBits();
11619 if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
11620 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11621 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
11622 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11623 ShAmt->getZExtValue() >= (BW / 2) &&
11624 (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
11625 TLI.isTruncateFree(VT, HalfVT) &&
11626 (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
11627 SDValue Res = N0.getOperand(0);
11628 if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
11629 Res = DAG.getNode(ISD::SHL, DL, VT, Res,
11630 DAG.getShiftAmountConstant(NewShAmt, VT, DL));
11631 Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
11632 Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
11633 return DAG.getZExtOrTrunc(Res, DL, VT);
11634 }
11635 }
11636
11637 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
11638 // inverse-shift-of-bswap:
11639 // bswap (X u<< C) --> (bswap X) u>> C
11640 // bswap (X u>> C) --> (bswap X) u<< C
11641 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
11642 N0.hasOneUse()) {
11643 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11644 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11645 ShAmt->getZExtValue() % 8 == 0) {
11646 SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11647 unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
11648 return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
11649 }
11650 }
11651
11652 if (SDValue V = foldBitOrderCrossLogicOp(N, DAG))
11653 return V;
11654
11655 return SDValue();
11656}
11657
11658SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
11659 SDValue N0 = N->getOperand(0);
11660 EVT VT = N->getValueType(0);
11661 SDLoc DL(N);
11662
11663 // fold (bitreverse c1) -> c2
11664 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0}))
11665 return C;
11666
11667 // fold (bitreverse (bitreverse x)) -> x
11668 if (N0.getOpcode() == ISD::BITREVERSE)
11669 return N0.getOperand(0);
11670
11671 SDValue X, Y;
11672
11673 // fold (bitreverse (lshr (bitreverse x), y)) -> (shl x, y)
11674 if ((!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
11676 return DAG.getNode(ISD::SHL, DL, VT, X, Y);
11677
11678 // fold (bitreverse (shl (bitreverse x), y)) -> (lshr x, y)
11679 if ((!LegalOperations || TLI.isOperationLegal(ISD::SRL, VT)) &&
11681 return DAG.getNode(ISD::SRL, DL, VT, X, Y);
11682
11683 return SDValue();
11684}
11685
11686SDValue DAGCombiner::visitCTLZ(SDNode *N) {
11687 SDValue N0 = N->getOperand(0);
11688 EVT VT = N->getValueType(0);
11689 SDLoc DL(N);
11690
11691 // fold (ctlz c1) -> c2
11692 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTLZ, DL, VT, {N0}))
11693 return C;
11694
11695 // If the value is known never to be zero, switch to the undef version.
11696 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT))
11697 if (DAG.isKnownNeverZero(N0))
11698 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, N0);
11699
11700 return SDValue();
11701}
11702
11703SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
11704 SDValue N0 = N->getOperand(0);
11705 EVT VT = N->getValueType(0);
11706 SDLoc DL(N);
11707
11708 // fold (ctlz_zero_undef c1) -> c2
11709 if (SDValue C =
11711 return C;
11712 return SDValue();
11713}
11714
11715SDValue DAGCombiner::visitCTTZ(SDNode *N) {
11716 SDValue N0 = N->getOperand(0);
11717 EVT VT = N->getValueType(0);
11718 SDLoc DL(N);
11719
11720 // fold (cttz c1) -> c2
11721 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTTZ, DL, VT, {N0}))
11722 return C;
11723
11724 // If the value is known never to be zero, switch to the undef version.
11725 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT))
11726 if (DAG.isKnownNeverZero(N0))
11727 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, DL, VT, N0);
11728
11729 return SDValue();
11730}
11731
11732SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
11733 SDValue N0 = N->getOperand(0);
11734 EVT VT = N->getValueType(0);
11735 SDLoc DL(N);
11736
11737 // fold (cttz_zero_undef c1) -> c2
11738 if (SDValue C =
11740 return C;
11741 return SDValue();
11742}
11743
11744SDValue DAGCombiner::visitCTPOP(SDNode *N) {
11745 SDValue N0 = N->getOperand(0);
11746 EVT VT = N->getValueType(0);
11747 unsigned NumBits = VT.getScalarSizeInBits();
11748 SDLoc DL(N);
11749
11750 // fold (ctpop c1) -> c2
11751 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTPOP, DL, VT, {N0}))
11752 return C;
11753
11754 // If the source is being shifted, but doesn't affect any active bits,
11755 // then we can call CTPOP on the shift source directly.
11756 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SHL) {
11757 if (ConstantSDNode *AmtC = isConstOrConstSplat(N0.getOperand(1))) {
11758 const APInt &Amt = AmtC->getAPIntValue();
11759 if (Amt.ult(NumBits)) {
11760 KnownBits KnownSrc = DAG.computeKnownBits(N0.getOperand(0));
11761 if ((N0.getOpcode() == ISD::SRL &&
11762 Amt.ule(KnownSrc.countMinTrailingZeros())) ||
11763 (N0.getOpcode() == ISD::SHL &&
11764 Amt.ule(KnownSrc.countMinLeadingZeros()))) {
11765 return DAG.getNode(ISD::CTPOP, DL, VT, N0.getOperand(0));
11766 }
11767 }
11768 }
11769 }
11770
11771 // If the upper bits are known to be zero, then see if its profitable to
11772 // only count the lower bits.
11773 if (VT.isScalarInteger() && NumBits > 8 && (NumBits & 1) == 0) {
11774 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), NumBits / 2);
11775 if (hasOperation(ISD::CTPOP, HalfVT) &&
11776 TLI.isTypeDesirableForOp(ISD::CTPOP, HalfVT) &&
11777 TLI.isTruncateFree(N0, HalfVT) && TLI.isZExtFree(HalfVT, VT)) {
11778 APInt UpperBits = APInt::getHighBitsSet(NumBits, NumBits / 2);
11779 if (DAG.MaskedValueIsZero(N0, UpperBits)) {
11780 SDValue PopCnt = DAG.getNode(ISD::CTPOP, DL, HalfVT,
11781 DAG.getZExtOrTrunc(N0, DL, HalfVT));
11782 return DAG.getZExtOrTrunc(PopCnt, DL, VT);
11783 }
11784 }
11785 }
11786
11787 return SDValue();
11788}
11789
11791 SDValue RHS, const SDNodeFlags Flags,
11792 const TargetLowering &TLI) {
11793 EVT VT = LHS.getValueType();
11794 if (!VT.isFloatingPoint())
11795 return false;
11796
11797 const TargetOptions &Options = DAG.getTarget().Options;
11798
11799 return (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) &&
11801 (Flags.hasNoNaNs() ||
11802 (DAG.isKnownNeverNaN(RHS) && DAG.isKnownNeverNaN(LHS)));
11803}
11804
11806 SDValue RHS, SDValue True, SDValue False,
11807 ISD::CondCode CC,
11808 const TargetLowering &TLI,
11809 SelectionDAG &DAG) {
11810 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
11811 switch (CC) {
11812 case ISD::SETOLT:
11813 case ISD::SETOLE:
11814 case ISD::SETLT:
11815 case ISD::SETLE:
11816 case ISD::SETULT:
11817 case ISD::SETULE: {
11818 // Since it's known never nan to get here already, either fminnum or
11819 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
11820 // expanded in terms of it.
11821 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
11822 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11823 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11824
11825 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
11826 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11827 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11828 return SDValue();
11829 }
11830 case ISD::SETOGT:
11831 case ISD::SETOGE:
11832 case ISD::SETGT:
11833 case ISD::SETGE:
11834 case ISD::SETUGT:
11835 case ISD::SETUGE: {
11836 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
11837 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11838 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11839
11840 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
11841 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11842 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11843 return SDValue();
11844 }
11845 default:
11846 return SDValue();
11847 }
11848}
11849
11850// Convert (sr[al] (add n[su]w x, y)) -> (avgfloor[su] x, y)
11851SDValue DAGCombiner::foldShiftToAvg(SDNode *N, const SDLoc &DL) {
11852 const unsigned Opcode = N->getOpcode();
11853 if (Opcode != ISD::SRA && Opcode != ISD::SRL)
11854 return SDValue();
11855
11856 EVT VT = N->getValueType(0);
11857 bool IsUnsigned = Opcode == ISD::SRL;
11858
11859 // Captured values.
11860 SDValue A, B, Add;
11861
11862 // Match floor average as it is common to both floor/ceil avgs.
11863 if (sd_match(N, m_BinOp(Opcode,
11865 m_One()))) {
11866 // Decide whether signed or unsigned.
11867 unsigned FloorISD = IsUnsigned ? ISD::AVGFLOORU : ISD::AVGFLOORS;
11868 if (!hasOperation(FloorISD, VT))
11869 return SDValue();
11870
11871 // Can't optimize adds that may wrap.
11872 if ((IsUnsigned && !Add->getFlags().hasNoUnsignedWrap()) ||
11873 (!IsUnsigned && !Add->getFlags().hasNoSignedWrap()))
11874 return SDValue();
11875
11876 return DAG.getNode(FloorISD, DL, N->getValueType(0), {A, B});
11877 }
11878
11879 return SDValue();
11880}
11881
11882SDValue DAGCombiner::foldBitwiseOpWithNeg(SDNode *N, const SDLoc &DL, EVT VT) {
11883 unsigned Opc = N->getOpcode();
11884 SDValue X, Y, Z;
11885 if (sd_match(
11887 return DAG.getNode(Opc, DL, VT, X,
11888 DAG.getNOT(DL, DAG.getNode(ISD::SUB, DL, VT, Y, Z), VT));
11889
11891 m_Value(Z)))))
11892 return DAG.getNode(Opc, DL, VT, X,
11893 DAG.getNOT(DL, DAG.getNode(ISD::ADD, DL, VT, Y, Z), VT));
11894
11895 return SDValue();
11896}
11897
11898/// Generate Min/Max node
11899SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
11900 SDValue RHS, SDValue True,
11901 SDValue False, ISD::CondCode CC) {
11902 if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
11903 return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG);
11904
11905 // If we can't directly match this, try to see if we can pull an fneg out of
11906 // the select.
11908 True, DAG, LegalOperations, ForCodeSize);
11909 if (!NegTrue)
11910 return SDValue();
11911
11912 HandleSDNode NegTrueHandle(NegTrue);
11913
11914 // Try to unfold an fneg from the select if we are comparing the negated
11915 // constant.
11916 //
11917 // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
11918 //
11919 // TODO: Handle fabs
11920 if (LHS == NegTrue) {
11921 // If we can't directly match this, try to see if we can pull an fneg out of
11922 // the select.
11924 RHS, DAG, LegalOperations, ForCodeSize);
11925 if (NegRHS) {
11926 HandleSDNode NegRHSHandle(NegRHS);
11927 if (NegRHS == False) {
11928 SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
11929 False, CC, TLI, DAG);
11930 if (Combined)
11931 return DAG.getNode(ISD::FNEG, DL, VT, Combined);
11932 }
11933 }
11934 }
11935
11936 return SDValue();
11937}
11938
11939/// If a (v)select has a condition value that is a sign-bit test, try to smear
11940/// the condition operand sign-bit across the value width and use it as a mask.
11942 SelectionDAG &DAG) {
11943 SDValue Cond = N->getOperand(0);
11944 SDValue C1 = N->getOperand(1);
11945 SDValue C2 = N->getOperand(2);
11947 return SDValue();
11948
11949 EVT VT = N->getValueType(0);
11950 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
11951 VT != Cond.getOperand(0).getValueType())
11952 return SDValue();
11953
11954 // The inverted-condition + commuted-select variants of these patterns are
11955 // canonicalized to these forms in IR.
11956 SDValue X = Cond.getOperand(0);
11957 SDValue CondC = Cond.getOperand(1);
11958 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11959 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
11961 // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
11962 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11963 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11964 return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
11965 }
11966 if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
11967 // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
11968 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11969 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11970 return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
11971 }
11972 return SDValue();
11973}
11974
11976 const TargetLowering &TLI) {
11977 if (!TLI.convertSelectOfConstantsToMath(VT))
11978 return false;
11979
11980 if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse())
11981 return true;
11983 return true;
11984
11985 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11986 if (CC == ISD::SETLT && isNullOrNullSplat(Cond.getOperand(1)))
11987 return true;
11988 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond.getOperand(1)))
11989 return true;
11990
11991 return false;
11992}
11993
11994SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
11995 SDValue Cond = N->getOperand(0);
11996 SDValue N1 = N->getOperand(1);
11997 SDValue N2 = N->getOperand(2);
11998 EVT VT = N->getValueType(0);
11999 EVT CondVT = Cond.getValueType();
12000 SDLoc DL(N);
12001
12002 if (!VT.isInteger())
12003 return SDValue();
12004
12005 auto *C1 = dyn_cast<ConstantSDNode>(N1);
12006 auto *C2 = dyn_cast<ConstantSDNode>(N2);
12007 if (!C1 || !C2)
12008 return SDValue();
12009
12010 if (CondVT != MVT::i1 || LegalOperations) {
12011 // fold (select Cond, 0, 1) -> (xor Cond, 1)
12012 // We can't do this reliably if integer based booleans have different contents
12013 // to floating point based booleans. This is because we can't tell whether we
12014 // have an integer-based boolean or a floating-point-based boolean unless we
12015 // can find the SETCC that produced it and inspect its operands. This is
12016 // fairly easy if C is the SETCC node, but it can potentially be
12017 // undiscoverable (or not reasonably discoverable). For example, it could be
12018 // in another basic block or it could require searching a complicated
12019 // expression.
12020 if (CondVT.isInteger() &&
12021 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
12023 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
12025 C1->isZero() && C2->isOne()) {
12026 SDValue NotCond =
12027 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
12028 if (VT.bitsEq(CondVT))
12029 return NotCond;
12030 return DAG.getZExtOrTrunc(NotCond, DL, VT);
12031 }
12032
12033 return SDValue();
12034 }
12035
12036 // Only do this before legalization to avoid conflicting with target-specific
12037 // transforms in the other direction (create a select from a zext/sext). There
12038 // is also a target-independent combine here in DAGCombiner in the other
12039 // direction for (select Cond, -1, 0) when the condition is not i1.
12040 assert(CondVT == MVT::i1 && !LegalOperations);
12041
12042 // select Cond, 1, 0 --> zext (Cond)
12043 if (C1->isOne() && C2->isZero())
12044 return DAG.getZExtOrTrunc(Cond, DL, VT);
12045
12046 // select Cond, -1, 0 --> sext (Cond)
12047 if (C1->isAllOnes() && C2->isZero())
12048 return DAG.getSExtOrTrunc(Cond, DL, VT);
12049
12050 // select Cond, 0, 1 --> zext (!Cond)
12051 if (C1->isZero() && C2->isOne()) {
12052 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
12053 NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT);
12054 return NotCond;
12055 }
12056
12057 // select Cond, 0, -1 --> sext (!Cond)
12058 if (C1->isZero() && C2->isAllOnes()) {
12059 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
12060 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
12061 return NotCond;
12062 }
12063
12064 // Use a target hook because some targets may prefer to transform in the
12065 // other direction.
12067 return SDValue();
12068
12069 // For any constants that differ by 1, we can transform the select into
12070 // an extend and add.
12071 const APInt &C1Val = C1->getAPIntValue();
12072 const APInt &C2Val = C2->getAPIntValue();
12073
12074 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
12075 if (C1Val - 1 == C2Val) {
12076 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
12077 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
12078 }
12079
12080 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
12081 if (C1Val + 1 == C2Val) {
12082 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
12083 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
12084 }
12085
12086 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
12087 if (C1Val.isPowerOf2() && C2Val.isZero()) {
12088 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
12089 SDValue ShAmtC =
12090 DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
12091 return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
12092 }
12093
12094 // select Cond, -1, C --> or (sext Cond), C
12095 if (C1->isAllOnes()) {
12096 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
12097 return DAG.getNode(ISD::OR, DL, VT, Cond, N2);
12098 }
12099
12100 // select Cond, C, -1 --> or (sext (not Cond)), C
12101 if (C2->isAllOnes()) {
12102 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
12103 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
12104 return DAG.getNode(ISD::OR, DL, VT, NotCond, N1);
12105 }
12106
12108 return V;
12109
12110 return SDValue();
12111}
12112
12113template <class MatchContextClass>
12115 SelectionDAG &DAG) {
12116 assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT ||
12117 N->getOpcode() == ISD::VP_SELECT) &&
12118 "Expected a (v)(vp.)select");
12119 SDValue Cond = N->getOperand(0);
12120 SDValue T = N->getOperand(1), F = N->getOperand(2);
12121 EVT VT = N->getValueType(0);
12122 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12123 MatchContextClass matcher(DAG, TLI, N);
12124
12125 if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
12126 return SDValue();
12127
12128 // select Cond, Cond, F --> or Cond, freeze(F)
12129 // select Cond, 1, F --> or Cond, freeze(F)
12130 if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
12131 return matcher.getNode(ISD::OR, DL, VT, Cond, DAG.getFreeze(F));
12132
12133 // select Cond, T, Cond --> and Cond, freeze(T)
12134 // select Cond, T, 0 --> and Cond, freeze(T)
12135 if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
12136 return matcher.getNode(ISD::AND, DL, VT, Cond, DAG.getFreeze(T));
12137
12138 // select Cond, T, 1 --> or (not Cond), freeze(T)
12139 if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
12140 SDValue NotCond =
12141 matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
12142 return matcher.getNode(ISD::OR, DL, VT, NotCond, DAG.getFreeze(T));
12143 }
12144
12145 // select Cond, 0, F --> and (not Cond), freeze(F)
12146 if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
12147 SDValue NotCond =
12148 matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
12149 return matcher.getNode(ISD::AND, DL, VT, NotCond, DAG.getFreeze(F));
12150 }
12151
12152 return SDValue();
12153}
12154
12156 SDValue N0 = N->getOperand(0);
12157 SDValue N1 = N->getOperand(1);
12158 SDValue N2 = N->getOperand(2);
12159 EVT VT = N->getValueType(0);
12160 unsigned EltSizeInBits = VT.getScalarSizeInBits();
12161
12162 SDValue Cond0, Cond1;
12163 ISD::CondCode CC;
12164 if (!sd_match(N0, m_OneUse(m_SetCC(m_Value(Cond0), m_Value(Cond1),
12165 m_CondCode(CC)))) ||
12166 VT != Cond0.getValueType())
12167 return SDValue();
12168
12169 // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
12170 // compare is inverted from that pattern ("Cond0 s> -1").
12171 if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
12172 ; // This is the pattern we are looking for.
12173 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
12174 std::swap(N1, N2);
12175 else
12176 return SDValue();
12177
12178 // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & freeze(N1)
12179 if (isNullOrNullSplat(N2)) {
12180 SDLoc DL(N);
12181 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
12182 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
12183 return DAG.getNode(ISD::AND, DL, VT, Sra, DAG.getFreeze(N1));
12184 }
12185
12186 // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | freeze(N2)
12187 if (isAllOnesOrAllOnesSplat(N1)) {
12188 SDLoc DL(N);
12189 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
12190 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
12191 return DAG.getNode(ISD::OR, DL, VT, Sra, DAG.getFreeze(N2));
12192 }
12193
12194 // If we have to invert the sign bit mask, only do that transform if the
12195 // target has a bitwise 'and not' instruction (the invert is free).
12196 // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & freeze(N2)
12197 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12198 if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
12199 SDLoc DL(N);
12200 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
12201 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
12202 SDValue Not = DAG.getNOT(DL, Sra, VT);
12203 return DAG.getNode(ISD::AND, DL, VT, Not, DAG.getFreeze(N2));
12204 }
12205
12206 // TODO: There's another pattern in this family, but it may require
12207 // implementing hasOrNot() to check for profitability:
12208 // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | freeze(N2)
12209
12210 return SDValue();
12211}
12212
12213// Match SELECTs with absolute difference patterns.
12214// (select (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
12215// (select (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
12216// (select (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
12217// (select (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
12218SDValue DAGCombiner::foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
12219 SDValue False, ISD::CondCode CC,
12220 const SDLoc &DL) {
12221 bool IsSigned = isSignedIntSetCC(CC);
12222 unsigned ABDOpc = IsSigned ? ISD::ABDS : ISD::ABDU;
12223 EVT VT = LHS.getValueType();
12224
12225 if (LegalOperations && !hasOperation(ABDOpc, VT))
12226 return SDValue();
12227
12228 switch (CC) {
12229 case ISD::SETGT:
12230 case ISD::SETGE:
12231 case ISD::SETUGT:
12232 case ISD::SETUGE:
12233 if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
12235 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12236 if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
12237 sd_match(False, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
12238 hasOperation(ABDOpc, VT))
12239 return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
12240 break;
12241 case ISD::SETLT:
12242 case ISD::SETLE:
12243 case ISD::SETULT:
12244 case ISD::SETULE:
12245 if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
12247 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12248 if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
12249 sd_match(False, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
12250 hasOperation(ABDOpc, VT))
12251 return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
12252 break;
12253 default:
12254 break;
12255 }
12256
12257 return SDValue();
12258}
12259
12260// ([v]select (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
12261// ([v]select (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
12262SDValue DAGCombiner::foldSelectToUMin(SDValue LHS, SDValue RHS, SDValue True,
12263 SDValue False, ISD::CondCode CC,
12264 const SDLoc &DL) {
12265 APInt C;
12266 EVT VT = True.getValueType();
12267 if (sd_match(RHS, m_ConstInt(C)) && hasUMin(VT)) {
12268 if (CC == ISD::SETUGT && LHS == False &&
12269 sd_match(True, m_Add(m_Specific(False), m_SpecificInt(~C)))) {
12270 SDValue AddC = DAG.getConstant(~C, DL, VT);
12271 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, False, AddC);
12272 return DAG.getNode(ISD::UMIN, DL, VT, Add, False);
12273 }
12274 if (CC == ISD::SETULT && LHS == True &&
12275 sd_match(False, m_Add(m_Specific(True), m_SpecificInt(-C)))) {
12276 SDValue AddC = DAG.getConstant(-C, DL, VT);
12277 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, True, AddC);
12278 return DAG.getNode(ISD::UMIN, DL, VT, True, Add);
12279 }
12280 }
12281 return SDValue();
12282}
12283
12284SDValue DAGCombiner::visitSELECT(SDNode *N) {
12285 SDValue N0 = N->getOperand(0);
12286 SDValue N1 = N->getOperand(1);
12287 SDValue N2 = N->getOperand(2);
12288 EVT VT = N->getValueType(0);
12289 EVT VT0 = N0.getValueType();
12290 SDLoc DL(N);
12291 SDNodeFlags Flags = N->getFlags();
12292
12293 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12294 return V;
12295
12297 return V;
12298
12299 // select (not Cond), N1, N2 -> select Cond, N2, N1
12300 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
12301 return DAG.getSelect(DL, VT, F, N2, N1, Flags);
12302
12303 if (SDValue V = foldSelectOfConstants(N))
12304 return V;
12305
12306 // If we can fold this based on the true/false value, do so.
12307 if (SimplifySelectOps(N, N1, N2))
12308 return SDValue(N, 0); // Don't revisit N.
12309
12310 if (VT0 == MVT::i1) {
12311 // The code in this block deals with the following 2 equivalences:
12312 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
12313 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
12314 // The target can specify its preferred form with the
12315 // shouldNormalizeToSelectSequence() callback. However we always transform
12316 // to the right anyway if we find the inner select exists in the DAG anyway
12317 // and we always transform to the left side if we know that we can further
12318 // optimize the combination of the conditions.
12319 bool normalizeToSequence =
12321 // select (and Cond0, Cond1), X, Y
12322 // -> select Cond0, (select Cond1, X, Y), Y
12323 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
12324 SDValue Cond0 = N0->getOperand(0);
12325 SDValue Cond1 = N0->getOperand(1);
12326 SDValue InnerSelect =
12327 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
12328 if (normalizeToSequence || !InnerSelect.use_empty())
12329 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
12330 InnerSelect, N2, Flags);
12331 // Cleanup on failure.
12332 if (InnerSelect.use_empty())
12333 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
12334 }
12335 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
12336 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
12337 SDValue Cond0 = N0->getOperand(0);
12338 SDValue Cond1 = N0->getOperand(1);
12339 SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
12340 Cond1, N1, N2, Flags);
12341 if (normalizeToSequence || !InnerSelect.use_empty())
12342 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
12343 InnerSelect, Flags);
12344 // Cleanup on failure.
12345 if (InnerSelect.use_empty())
12346 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
12347 }
12348
12349 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
12350 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
12351 SDValue N1_0 = N1->getOperand(0);
12352 SDValue N1_1 = N1->getOperand(1);
12353 SDValue N1_2 = N1->getOperand(2);
12354 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
12355 // Create the actual and node if we can generate good code for it.
12356 if (!normalizeToSequence) {
12357 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
12358 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
12359 N2, Flags);
12360 }
12361 // Otherwise see if we can optimize the "and" to a better pattern.
12362 if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
12363 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
12364 N2, Flags);
12365 }
12366 }
12367 }
12368 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
12369 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
12370 SDValue N2_0 = N2->getOperand(0);
12371 SDValue N2_1 = N2->getOperand(1);
12372 SDValue N2_2 = N2->getOperand(2);
12373 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
12374 // Create the actual or node if we can generate good code for it.
12375 if (!normalizeToSequence) {
12376 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
12377 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
12378 N2_2, Flags);
12379 }
12380 // Otherwise see if we can optimize to a better pattern.
12381 if (SDValue Combined = visitORLike(N0, N2_0, DL))
12382 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
12383 N2_2, Flags);
12384 }
12385 }
12386
12387 // select usubo(x, y).overflow, (sub y, x), (usubo x, y) -> abdu(x, y)
12388 if (N0.getOpcode() == ISD::USUBO && N0.getResNo() == 1 &&
12389 N2.getNode() == N0.getNode() && N2.getResNo() == 0 &&
12390 N1.getOpcode() == ISD::SUB && N2.getOperand(0) == N1.getOperand(1) &&
12391 N2.getOperand(1) == N1.getOperand(0) &&
12392 (!LegalOperations || TLI.isOperationLegal(ISD::ABDU, VT)))
12393 return DAG.getNode(ISD::ABDU, DL, VT, N0.getOperand(0), N0.getOperand(1));
12394
12395 // select usubo(x, y).overflow, (usubo x, y), (sub y, x) -> neg (abdu x, y)
12396 if (N0.getOpcode() == ISD::USUBO && N0.getResNo() == 1 &&
12397 N1.getNode() == N0.getNode() && N1.getResNo() == 0 &&
12398 N2.getOpcode() == ISD::SUB && N2.getOperand(0) == N1.getOperand(1) &&
12399 N2.getOperand(1) == N1.getOperand(0) &&
12400 (!LegalOperations || TLI.isOperationLegal(ISD::ABDU, VT)))
12401 return DAG.getNegative(
12402 DAG.getNode(ISD::ABDU, DL, VT, N0.getOperand(0), N0.getOperand(1)),
12403 DL, VT);
12404 }
12405
12406 // Fold selects based on a setcc into other things, such as min/max/abs.
12407 if (N0.getOpcode() == ISD::SETCC) {
12408 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
12410
12411 // select (fcmp lt x, y), x, y -> fminnum x, y
12412 // select (fcmp gt x, y), x, y -> fmaxnum x, y
12413 //
12414 // This is OK if we don't care what happens if either operand is a NaN.
12415 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, Flags, TLI))
12416 if (SDValue FMinMax =
12417 combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC))
12418 return FMinMax;
12419
12420 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
12421 // This is conservatively limited to pre-legal-operations to give targets
12422 // a chance to reverse the transform if they want to do that. Also, it is
12423 // unlikely that the pattern would be formed late, so it's probably not
12424 // worth going through the other checks.
12425 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
12426 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
12427 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
12428 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
12429 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
12430 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
12431 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
12432 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
12433 //
12434 // The IR equivalent of this transform would have this form:
12435 // %a = add %x, C
12436 // %c = icmp ugt %x, ~C
12437 // %r = select %c, -1, %a
12438 // =>
12439 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
12440 // %u0 = extractvalue %u, 0
12441 // %u1 = extractvalue %u, 1
12442 // %r = select %u1, -1, %u0
12443 SDVTList VTs = DAG.getVTList(VT, VT0);
12444 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
12445 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
12446 }
12447 }
12448
12449 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
12450 (!LegalOperations &&
12452 // Any flags available in a select/setcc fold will be on the setcc as they
12453 // migrated from fcmp
12454 return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2,
12455 N0.getOperand(2), N0->getFlags());
12456 }
12457
12458 if (SDValue ABD = foldSelectToABD(Cond0, Cond1, N1, N2, CC, DL))
12459 return ABD;
12460
12461 if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
12462 return NewSel;
12463
12464 // (select (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
12465 // (select (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
12466 if (SDValue UMin = foldSelectToUMin(Cond0, Cond1, N1, N2, CC, DL))
12467 return UMin;
12468 }
12469
12470 if (!VT.isVector())
12471 if (SDValue BinOp = foldSelectOfBinops(N))
12472 return BinOp;
12473
12474 if (SDValue R = combineSelectAsExtAnd(N0, N1, N2, DL, DAG))
12475 return R;
12476
12477 return SDValue();
12478}
12479
12480// This function assumes all the vselect's arguments are CONCAT_VECTOR
12481// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
12483 SDLoc DL(N);
12484 SDValue Cond = N->getOperand(0);
12485 SDValue LHS = N->getOperand(1);
12486 SDValue RHS = N->getOperand(2);
12487 EVT VT = N->getValueType(0);
12488 int NumElems = VT.getVectorNumElements();
12489 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
12490 RHS.getOpcode() == ISD::CONCAT_VECTORS &&
12491 Cond.getOpcode() == ISD::BUILD_VECTOR);
12492
12493 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
12494 // binary ones here.
12495 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
12496 return SDValue();
12497
12498 // We're sure we have an even number of elements due to the
12499 // concat_vectors we have as arguments to vselect.
12500 // Skip BV elements until we find one that's not an UNDEF
12501 // After we find an UNDEF element, keep looping until we get to half the
12502 // length of the BV and see if all the non-undef nodes are the same.
12503 ConstantSDNode *BottomHalf = nullptr;
12504 for (int i = 0; i < NumElems / 2; ++i) {
12505 if (Cond->getOperand(i)->isUndef())
12506 continue;
12507
12508 if (BottomHalf == nullptr)
12509 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
12510 else if (Cond->getOperand(i).getNode() != BottomHalf)
12511 return SDValue();
12512 }
12513
12514 // Do the same for the second half of the BuildVector
12515 ConstantSDNode *TopHalf = nullptr;
12516 for (int i = NumElems / 2; i < NumElems; ++i) {
12517 if (Cond->getOperand(i)->isUndef())
12518 continue;
12519
12520 if (TopHalf == nullptr)
12521 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
12522 else if (Cond->getOperand(i).getNode() != TopHalf)
12523 return SDValue();
12524 }
12525
12526 assert(TopHalf && BottomHalf &&
12527 "One half of the selector was all UNDEFs and the other was all the "
12528 "same value. This should have been addressed before this function.");
12529 return DAG.getNode(
12531 BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
12532 TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
12533}
12534
12535bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
12536 SelectionDAG &DAG, const SDLoc &DL) {
12537
12538 // Only perform the transformation when existing operands can be reused.
12539 if (IndexIsScaled)
12540 return false;
12541
12542 if (!isNullConstant(BasePtr) && !Index.hasOneUse())
12543 return false;
12544
12545 EVT VT = BasePtr.getValueType();
12546
12547 if (SDValue SplatVal = DAG.getSplatValue(Index);
12548 SplatVal && !isNullConstant(SplatVal) &&
12549 SplatVal.getValueType() == VT) {
12550 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12551 Index = DAG.getSplat(Index.getValueType(), DL, DAG.getConstant(0, DL, VT));
12552 return true;
12553 }
12554
12555 if (Index.getOpcode() != ISD::ADD)
12556 return false;
12557
12558 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0));
12559 SplatVal && SplatVal.getValueType() == VT) {
12560 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12561 Index = Index.getOperand(1);
12562 return true;
12563 }
12564 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1));
12565 SplatVal && SplatVal.getValueType() == VT) {
12566 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12567 Index = Index.getOperand(0);
12568 return true;
12569 }
12570 return false;
12571}
12572
12573// Fold sext/zext of index into index type.
12574bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT,
12575 SelectionDAG &DAG) {
12576 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12577
12578 // It's always safe to look through zero extends.
12579 if (Index.getOpcode() == ISD::ZERO_EXTEND) {
12580 if (TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
12581 IndexType = ISD::UNSIGNED_SCALED;
12582 Index = Index.getOperand(0);
12583 return true;
12584 }
12585 if (ISD::isIndexTypeSigned(IndexType)) {
12586 IndexType = ISD::UNSIGNED_SCALED;
12587 return true;
12588 }
12589 }
12590
12591 // It's only safe to look through sign extends when Index is signed.
12592 if (Index.getOpcode() == ISD::SIGN_EXTEND &&
12593 ISD::isIndexTypeSigned(IndexType) &&
12594 TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
12595 Index = Index.getOperand(0);
12596 return true;
12597 }
12598
12599 return false;
12600}
12601
12602SDValue DAGCombiner::visitVPSCATTER(SDNode *N) {
12603 VPScatterSDNode *MSC = cast<VPScatterSDNode>(N);
12604 SDValue Mask = MSC->getMask();
12605 SDValue Chain = MSC->getChain();
12606 SDValue Index = MSC->getIndex();
12607 SDValue Scale = MSC->getScale();
12608 SDValue StoreVal = MSC->getValue();
12609 SDValue BasePtr = MSC->getBasePtr();
12610 SDValue VL = MSC->getVectorLength();
12611 ISD::MemIndexType IndexType = MSC->getIndexType();
12612 SDLoc DL(N);
12613
12614 // Zap scatters with a zero mask.
12616 return Chain;
12617
12618 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
12619 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
12620 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12621 DL, Ops, MSC->getMemOperand(), IndexType);
12622 }
12623
12624 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
12625 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
12626 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12627 DL, Ops, MSC->getMemOperand(), IndexType);
12628 }
12629
12630 return SDValue();
12631}
12632
12633SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
12634 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
12635 SDValue Mask = MSC->getMask();
12636 SDValue Chain = MSC->getChain();
12637 SDValue Index = MSC->getIndex();
12638 SDValue Scale = MSC->getScale();
12639 SDValue StoreVal = MSC->getValue();
12640 SDValue BasePtr = MSC->getBasePtr();
12641 ISD::MemIndexType IndexType = MSC->getIndexType();
12642 SDLoc DL(N);
12643
12644 // Zap scatters with a zero mask.
12646 return Chain;
12647
12648 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
12649 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
12650 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12651 DL, Ops, MSC->getMemOperand(), IndexType,
12652 MSC->isTruncatingStore());
12653 }
12654
12655 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
12656 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
12657 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12658 DL, Ops, MSC->getMemOperand(), IndexType,
12659 MSC->isTruncatingStore());
12660 }
12661
12662 return SDValue();
12663}
12664
12665SDValue DAGCombiner::visitMSTORE(SDNode *N) {
12666 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
12667 SDValue Mask = MST->getMask();
12668 SDValue Chain = MST->getChain();
12669 SDValue Value = MST->getValue();
12670 SDValue Ptr = MST->getBasePtr();
12671
12672 // Zap masked stores with a zero mask.
12674 return Chain;
12675
12676 // Remove a masked store if base pointers and masks are equal.
12677 if (MaskedStoreSDNode *MST1 = dyn_cast<MaskedStoreSDNode>(Chain)) {
12678 if (MST->isUnindexed() && MST->isSimple() && MST1->isUnindexed() &&
12679 MST1->isSimple() && MST1->getBasePtr() == Ptr &&
12680 !MST->getBasePtr().isUndef() &&
12681 ((Mask == MST1->getMask() && MST->getMemoryVT().getStoreSize() ==
12682 MST1->getMemoryVT().getStoreSize()) ||
12684 TypeSize::isKnownLE(MST1->getMemoryVT().getStoreSize(),
12685 MST->getMemoryVT().getStoreSize())) {
12686 CombineTo(MST1, MST1->getChain());
12687 if (N->getOpcode() != ISD::DELETED_NODE)
12688 AddToWorklist(N);
12689 return SDValue(N, 0);
12690 }
12691 }
12692
12693 // If this is a masked load with an all ones mask, we can use a unmasked load.
12694 // FIXME: Can we do this for indexed, compressing, or truncating stores?
12695 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
12696 !MST->isCompressingStore() && !MST->isTruncatingStore())
12697 return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
12698 MST->getBasePtr(), MST->getPointerInfo(),
12699 MST->getBaseAlign(), MST->getMemOperand()->getFlags(),
12700 MST->getAAInfo());
12701
12702 // Try transforming N to an indexed store.
12703 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12704 return SDValue(N, 0);
12705
12706 if (MST->isTruncatingStore() && MST->isUnindexed() &&
12707 Value.getValueType().isInteger() &&
12709 !cast<ConstantSDNode>(Value)->isOpaque())) {
12710 APInt TruncDemandedBits =
12711 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
12713
12714 // See if we can simplify the operation with
12715 // SimplifyDemandedBits, which only works if the value has a single use.
12716 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
12717 // Re-visit the store if anything changed and the store hasn't been merged
12718 // with another node (N is deleted) SimplifyDemandedBits will add Value's
12719 // node back to the worklist if necessary, but we also need to re-visit
12720 // the Store node itself.
12721 if (N->getOpcode() != ISD::DELETED_NODE)
12722 AddToWorklist(N);
12723 return SDValue(N, 0);
12724 }
12725 }
12726
12727 // If this is a TRUNC followed by a masked store, fold this into a masked
12728 // truncating store. We can do this even if this is already a masked
12729 // truncstore.
12730 // TODO: Try combine to masked compress store if possiable.
12731 if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
12732 MST->isUnindexed() && !MST->isCompressingStore() &&
12733 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
12734 MST->getMemoryVT(), LegalOperations)) {
12735 auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
12736 Value.getOperand(0).getValueType());
12737 return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
12738 MST->getOffset(), Mask, MST->getMemoryVT(),
12739 MST->getMemOperand(), MST->getAddressingMode(),
12740 /*IsTruncating=*/true);
12741 }
12742
12743 return SDValue();
12744}
12745
12746SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) {
12747 auto *SST = cast<VPStridedStoreSDNode>(N);
12748 EVT EltVT = SST->getValue().getValueType().getVectorElementType();
12749 // Combine strided stores with unit-stride to a regular VP store.
12750 if (auto *CStride = dyn_cast<ConstantSDNode>(SST->getStride());
12751 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
12752 return DAG.getStoreVP(SST->getChain(), SDLoc(N), SST->getValue(),
12753 SST->getBasePtr(), SST->getOffset(), SST->getMask(),
12754 SST->getVectorLength(), SST->getMemoryVT(),
12755 SST->getMemOperand(), SST->getAddressingMode(),
12756 SST->isTruncatingStore(), SST->isCompressingStore());
12757 }
12758 return SDValue();
12759}
12760
12761SDValue DAGCombiner::visitVECTOR_COMPRESS(SDNode *N) {
12762 SDLoc DL(N);
12763 SDValue Vec = N->getOperand(0);
12764 SDValue Mask = N->getOperand(1);
12765 SDValue Passthru = N->getOperand(2);
12766 EVT VecVT = Vec.getValueType();
12767
12768 bool HasPassthru = !Passthru.isUndef();
12769
12770 APInt SplatVal;
12771 if (ISD::isConstantSplatVector(Mask.getNode(), SplatVal))
12772 return TLI.isConstTrueVal(Mask) ? Vec : Passthru;
12773
12774 if (Vec.isUndef() || Mask.isUndef())
12775 return Passthru;
12776
12777 // No need for potentially expensive compress if the mask is constant.
12780 EVT ScalarVT = VecVT.getVectorElementType();
12781 unsigned NumSelected = 0;
12782 unsigned NumElmts = VecVT.getVectorNumElements();
12783 for (unsigned I = 0; I < NumElmts; ++I) {
12784 SDValue MaskI = Mask.getOperand(I);
12785 // We treat undef mask entries as "false".
12786 if (MaskI.isUndef())
12787 continue;
12788
12789 if (TLI.isConstTrueVal(MaskI)) {
12790 SDValue VecI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Vec,
12791 DAG.getVectorIdxConstant(I, DL));
12792 Ops.push_back(VecI);
12793 NumSelected++;
12794 }
12795 }
12796 for (unsigned Rest = NumSelected; Rest < NumElmts; ++Rest) {
12797 SDValue Val =
12798 HasPassthru
12799 ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Passthru,
12800 DAG.getVectorIdxConstant(Rest, DL))
12801 : DAG.getUNDEF(ScalarVT);
12802 Ops.push_back(Val);
12803 }
12804 return DAG.getBuildVector(VecVT, DL, Ops);
12805 }
12806
12807 return SDValue();
12808}
12809
12810SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
12811 VPGatherSDNode *MGT = cast<VPGatherSDNode>(N);
12812 SDValue Mask = MGT->getMask();
12813 SDValue Chain = MGT->getChain();
12814 SDValue Index = MGT->getIndex();
12815 SDValue Scale = MGT->getScale();
12816 SDValue BasePtr = MGT->getBasePtr();
12817 SDValue VL = MGT->getVectorLength();
12818 ISD::MemIndexType IndexType = MGT->getIndexType();
12819 SDLoc DL(N);
12820
12821 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12822 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
12823 return DAG.getGatherVP(
12824 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12825 Ops, MGT->getMemOperand(), IndexType);
12826 }
12827
12828 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12829 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
12830 return DAG.getGatherVP(
12831 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12832 Ops, MGT->getMemOperand(), IndexType);
12833 }
12834
12835 return SDValue();
12836}
12837
12838SDValue DAGCombiner::visitMGATHER(SDNode *N) {
12839 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
12840 SDValue Mask = MGT->getMask();
12841 SDValue Chain = MGT->getChain();
12842 SDValue Index = MGT->getIndex();
12843 SDValue Scale = MGT->getScale();
12844 SDValue PassThru = MGT->getPassThru();
12845 SDValue BasePtr = MGT->getBasePtr();
12846 ISD::MemIndexType IndexType = MGT->getIndexType();
12847 SDLoc DL(N);
12848
12849 // Zap gathers with a zero mask.
12851 return CombineTo(N, PassThru, MGT->getChain());
12852
12853 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12854 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12855 return DAG.getMaskedGather(
12856 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12857 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12858 }
12859
12860 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12861 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12862 return DAG.getMaskedGather(
12863 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12864 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12865 }
12866
12867 return SDValue();
12868}
12869
12870SDValue DAGCombiner::visitMLOAD(SDNode *N) {
12871 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
12872 SDValue Mask = MLD->getMask();
12873
12874 // Zap masked loads with a zero mask.
12876 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
12877
12878 // If this is a masked load with an all ones mask, we can use a unmasked load.
12879 // FIXME: Can we do this for indexed, expanding, or extending loads?
12880 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
12881 !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
12882 SDValue NewLd = DAG.getLoad(
12883 N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
12884 MLD->getPointerInfo(), MLD->getBaseAlign(),
12885 MLD->getMemOperand()->getFlags(), MLD->getAAInfo(), MLD->getRanges());
12886 return CombineTo(N, NewLd, NewLd.getValue(1));
12887 }
12888
12889 // Try transforming N to an indexed load.
12890 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12891 return SDValue(N, 0);
12892
12893 return SDValue();
12894}
12895
12896SDValue DAGCombiner::visitMHISTOGRAM(SDNode *N) {
12897 MaskedHistogramSDNode *HG = cast<MaskedHistogramSDNode>(N);
12898 SDValue Chain = HG->getChain();
12899 SDValue Inc = HG->getInc();
12900 SDValue Mask = HG->getMask();
12901 SDValue BasePtr = HG->getBasePtr();
12902 SDValue Index = HG->getIndex();
12903 SDLoc DL(HG);
12904
12905 EVT MemVT = HG->getMemoryVT();
12906 EVT DataVT = Index.getValueType();
12907 MachineMemOperand *MMO = HG->getMemOperand();
12908 ISD::MemIndexType IndexType = HG->getIndexType();
12909
12911 return Chain;
12912
12913 if (refineUniformBase(BasePtr, Index, HG->isIndexScaled(), DAG, DL) ||
12914 refineIndexType(Index, IndexType, DataVT, DAG)) {
12915 SDValue Ops[] = {Chain, Inc, Mask, BasePtr, Index,
12916 HG->getScale(), HG->getIntID()};
12917 return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, Ops,
12918 MMO, IndexType);
12919 }
12920
12921 return SDValue();
12922}
12923
12924SDValue DAGCombiner::visitPARTIAL_REDUCE_MLA(SDNode *N) {
12925 if (SDValue Res = foldPartialReduceMLAMulOp(N))
12926 return Res;
12927 if (SDValue Res = foldPartialReduceAdd(N))
12928 return Res;
12929 return SDValue();
12930}
12931
12932// partial_reduce_*mla(acc, mul(ext(a), ext(b)), splat(1))
12933// -> partial_reduce_*mla(acc, a, b)
12934//
12935// partial_reduce_*mla(acc, mul(ext(x), splat(C)), splat(1))
12936// -> partial_reduce_*mla(acc, x, C)
12937SDValue DAGCombiner::foldPartialReduceMLAMulOp(SDNode *N) {
12938 SDLoc DL(N);
12939 auto *Context = DAG.getContext();
12940 SDValue Acc = N->getOperand(0);
12941 SDValue Op1 = N->getOperand(1);
12942 SDValue Op2 = N->getOperand(2);
12943
12944 APInt C;
12945 if (Op1->getOpcode() != ISD::MUL ||
12946 !ISD::isConstantSplatVector(Op2.getNode(), C) || !C.isOne())
12947 return SDValue();
12948
12949 SDValue LHS = Op1->getOperand(0);
12950 SDValue RHS = Op1->getOperand(1);
12951 unsigned LHSOpcode = LHS->getOpcode();
12952 if (!ISD::isExtOpcode(LHSOpcode))
12953 return SDValue();
12954
12955 SDValue LHSExtOp = LHS->getOperand(0);
12956 EVT LHSExtOpVT = LHSExtOp.getValueType();
12957
12958 // partial_reduce_*mla(acc, mul(ext(x), splat(C)), splat(1))
12959 // -> partial_reduce_*mla(acc, x, C)
12960 if (ISD::isConstantSplatVector(RHS.getNode(), C)) {
12961 // TODO: Make use of partial_reduce_sumla here
12962 APInt CTrunc = C.trunc(LHSExtOpVT.getScalarSizeInBits());
12963 unsigned LHSBits = LHS.getValueType().getScalarSizeInBits();
12964 if ((LHSOpcode != ISD::ZERO_EXTEND || CTrunc.zext(LHSBits) != C) &&
12965 (LHSOpcode != ISD::SIGN_EXTEND || CTrunc.sext(LHSBits) != C))
12966 return SDValue();
12967
12968 unsigned NewOpcode = LHSOpcode == ISD::SIGN_EXTEND
12969 ? ISD::PARTIAL_REDUCE_SMLA
12970 : ISD::PARTIAL_REDUCE_UMLA;
12971
12972 // Only perform these combines if the target supports folding
12973 // the extends into the operation.
12975 NewOpcode, TLI.getTypeToTransformTo(*Context, N->getValueType(0)),
12976 TLI.getTypeToTransformTo(*Context, LHSExtOpVT)))
12977 return SDValue();
12978
12979 return DAG.getNode(NewOpcode, DL, N->getValueType(0), Acc, LHSExtOp,
12980 DAG.getConstant(CTrunc, DL, LHSExtOpVT));
12981 }
12982
12983 unsigned RHSOpcode = RHS->getOpcode();
12984 if (!ISD::isExtOpcode(RHSOpcode))
12985 return SDValue();
12986
12987 SDValue RHSExtOp = RHS->getOperand(0);
12988 if (LHSExtOpVT != RHSExtOp.getValueType())
12989 return SDValue();
12990
12991 unsigned NewOpc;
12992 if (LHSOpcode == ISD::SIGN_EXTEND && RHSOpcode == ISD::SIGN_EXTEND)
12993 NewOpc = ISD::PARTIAL_REDUCE_SMLA;
12994 else if (LHSOpcode == ISD::ZERO_EXTEND && RHSOpcode == ISD::ZERO_EXTEND)
12995 NewOpc = ISD::PARTIAL_REDUCE_UMLA;
12996 else if (LHSOpcode == ISD::SIGN_EXTEND && RHSOpcode == ISD::ZERO_EXTEND)
12997 NewOpc = ISD::PARTIAL_REDUCE_SUMLA;
12998 else if (LHSOpcode == ISD::ZERO_EXTEND && RHSOpcode == ISD::SIGN_EXTEND) {
12999 NewOpc = ISD::PARTIAL_REDUCE_SUMLA;
13000 std::swap(LHSExtOp, RHSExtOp);
13001 } else
13002 return SDValue();
13003 // For a 2-stage extend the signedness of both of the extends must match
13004 // If the mul has the same type, there is no outer extend, and thus we
13005 // can simply use the inner extends to pick the result node.
13006 // TODO: extend to handle nonneg zext as sext
13007 EVT AccElemVT = Acc.getValueType().getVectorElementType();
13008 if (Op1.getValueType().getVectorElementType() != AccElemVT &&
13009 NewOpc != N->getOpcode())
13010 return SDValue();
13011
13012 // Only perform these combines if the target supports folding
13013 // the extends into the operation.
13015 NewOpc, TLI.getTypeToTransformTo(*Context, N->getValueType(0)),
13016 TLI.getTypeToTransformTo(*Context, LHSExtOpVT)))
13017 return SDValue();
13018
13019 return DAG.getNode(NewOpc, DL, N->getValueType(0), Acc, LHSExtOp, RHSExtOp);
13020}
13021
13022// partial.reduce.umla(acc, zext(op), splat(1))
13023// -> partial.reduce.umla(acc, op, splat(trunc(1)))
13024// partial.reduce.smla(acc, sext(op), splat(1))
13025// -> partial.reduce.smla(acc, op, splat(trunc(1)))
13026// partial.reduce.sumla(acc, sext(op), splat(1))
13027// -> partial.reduce.smla(acc, op, splat(trunc(1)))
13028SDValue DAGCombiner::foldPartialReduceAdd(SDNode *N) {
13029 SDLoc DL(N);
13030 SDValue Acc = N->getOperand(0);
13031 SDValue Op1 = N->getOperand(1);
13032 SDValue Op2 = N->getOperand(2);
13033
13034 APInt ConstantOne;
13035 if (!ISD::isConstantSplatVector(Op2.getNode(), ConstantOne) ||
13036 !ConstantOne.isOne())
13037 return SDValue();
13038
13039 unsigned Op1Opcode = Op1.getOpcode();
13040 if (!ISD::isExtOpcode(Op1Opcode))
13041 return SDValue();
13042
13043 bool Op1IsSigned = Op1Opcode == ISD::SIGN_EXTEND;
13044 bool NodeIsSigned = N->getOpcode() != ISD::PARTIAL_REDUCE_UMLA;
13045 EVT AccElemVT = Acc.getValueType().getVectorElementType();
13046 if (Op1IsSigned != NodeIsSigned &&
13047 Op1.getValueType().getVectorElementType() != AccElemVT)
13048 return SDValue();
13049
13050 unsigned NewOpcode =
13051 Op1IsSigned ? ISD::PARTIAL_REDUCE_SMLA : ISD::PARTIAL_REDUCE_UMLA;
13052
13053 SDValue UnextOp1 = Op1.getOperand(0);
13054 EVT UnextOp1VT = UnextOp1.getValueType();
13055 auto *Context = DAG.getContext();
13057 NewOpcode, TLI.getTypeToTransformTo(*Context, N->getValueType(0)),
13058 TLI.getTypeToTransformTo(*Context, UnextOp1VT)))
13059 return SDValue();
13060
13061 return DAG.getNode(NewOpcode, DL, N->getValueType(0), Acc, UnextOp1,
13062 DAG.getConstant(1, DL, UnextOp1VT));
13063}
13064
13065SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) {
13066 auto *SLD = cast<VPStridedLoadSDNode>(N);
13067 EVT EltVT = SLD->getValueType(0).getVectorElementType();
13068 // Combine strided loads with unit-stride to a regular VP load.
13069 if (auto *CStride = dyn_cast<ConstantSDNode>(SLD->getStride());
13070 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
13071 SDValue NewLd = DAG.getLoadVP(
13072 SLD->getAddressingMode(), SLD->getExtensionType(), SLD->getValueType(0),
13073 SDLoc(N), SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(),
13074 SLD->getMask(), SLD->getVectorLength(), SLD->getMemoryVT(),
13075 SLD->getMemOperand(), SLD->isExpandingLoad());
13076 return CombineTo(N, NewLd, NewLd.getValue(1));
13077 }
13078 return SDValue();
13079}
13080
13081/// A vector select of 2 constant vectors can be simplified to math/logic to
13082/// avoid a variable select instruction and possibly avoid constant loads.
13083SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
13084 SDValue Cond = N->getOperand(0);
13085 SDValue N1 = N->getOperand(1);
13086 SDValue N2 = N->getOperand(2);
13087 EVT VT = N->getValueType(0);
13088 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
13092 return SDValue();
13093
13094 // Check if we can use the condition value to increment/decrement a single
13095 // constant value. This simplifies a select to an add and removes a constant
13096 // load/materialization from the general case.
13097 bool AllAddOne = true;
13098 bool AllSubOne = true;
13099 unsigned Elts = VT.getVectorNumElements();
13100 for (unsigned i = 0; i != Elts; ++i) {
13101 SDValue N1Elt = N1.getOperand(i);
13102 SDValue N2Elt = N2.getOperand(i);
13103 if (N1Elt.isUndef())
13104 continue;
13105 // N2 should not contain undef values since it will be reused in the fold.
13106 if (N2Elt.isUndef() || N1Elt.getValueType() != N2Elt.getValueType()) {
13107 AllAddOne = false;
13108 AllSubOne = false;
13109 break;
13110 }
13111
13112 const APInt &C1 = N1Elt->getAsAPIntVal();
13113 const APInt &C2 = N2Elt->getAsAPIntVal();
13114 if (C1 != C2 + 1)
13115 AllAddOne = false;
13116 if (C1 != C2 - 1)
13117 AllSubOne = false;
13118 }
13119
13120 // Further simplifications for the extra-special cases where the constants are
13121 // all 0 or all -1 should be implemented as folds of these patterns.
13122 SDLoc DL(N);
13123 if (AllAddOne || AllSubOne) {
13124 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
13125 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
13126 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
13127 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
13128 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
13129 }
13130
13131 // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
13132 APInt Pow2C;
13133 if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
13134 isNullOrNullSplat(N2)) {
13135 SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
13136 SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
13137 return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
13138 }
13139
13141 return V;
13142
13143 // The general case for select-of-constants:
13144 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
13145 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
13146 // leave that to a machine-specific pass.
13147 return SDValue();
13148}
13149
13150SDValue DAGCombiner::visitVP_SELECT(SDNode *N) {
13151 SDValue N0 = N->getOperand(0);
13152 SDValue N1 = N->getOperand(1);
13153 SDValue N2 = N->getOperand(2);
13154 SDLoc DL(N);
13155
13156 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
13157 return V;
13158
13160 return V;
13161
13162 return SDValue();
13163}
13164
13166 SDValue FVal,
13167 const TargetLowering &TLI,
13168 SelectionDAG &DAG,
13169 const SDLoc &DL) {
13170 EVT VT = TVal.getValueType();
13171 if (!TLI.isTypeLegal(VT))
13172 return SDValue();
13173
13174 EVT CondVT = Cond.getValueType();
13175 assert(CondVT.isVector() && "Vector select expects a vector selector!");
13176
13177 bool IsTAllZero = ISD::isConstantSplatVectorAllZeros(TVal.getNode());
13178 bool IsTAllOne = ISD::isConstantSplatVectorAllOnes(TVal.getNode());
13179 bool IsFAllZero = ISD::isConstantSplatVectorAllZeros(FVal.getNode());
13180 bool IsFAllOne = ISD::isConstantSplatVectorAllOnes(FVal.getNode());
13181
13182 // no vselect(cond, 0/-1, X) or vselect(cond, X, 0/-1), return
13183 if (!IsTAllZero && !IsTAllOne && !IsFAllZero && !IsFAllOne)
13184 return SDValue();
13185
13186 // select Cond, 0, 0 → 0
13187 if (IsTAllZero && IsFAllZero) {
13188 return VT.isFloatingPoint() ? DAG.getConstantFP(0.0, DL, VT)
13189 : DAG.getConstant(0, DL, VT);
13190 }
13191
13192 // check select(setgt lhs, -1), 1, -1 --> or (sra lhs, bitwidth - 1), 1
13193 APInt TValAPInt;
13194 if (Cond.getOpcode() == ISD::SETCC &&
13195 Cond.getOperand(2) == DAG.getCondCode(ISD::SETGT) &&
13196 Cond.getOperand(0).getValueType() == VT && VT.isSimple() &&
13197 ISD::isConstantSplatVector(TVal.getNode(), TValAPInt) &&
13198 TValAPInt.isOne() &&
13199 ISD::isConstantSplatVectorAllOnes(Cond.getOperand(1).getNode()) &&
13201 return SDValue();
13202 }
13203
13204 // To use the condition operand as a bitwise mask, it must have elements that
13205 // are the same size as the select elements. i.e, the condition operand must
13206 // have already been promoted from the IR select condition type <N x i1>.
13207 // Don't check if the types themselves are equal because that excludes
13208 // vector floating-point selects.
13209 if (CondVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
13210 return SDValue();
13211
13212 // Cond value must be 'sign splat' to be converted to a logical op.
13213 if (DAG.ComputeNumSignBits(Cond) != CondVT.getScalarSizeInBits())
13214 return SDValue();
13215
13216 // Try inverting Cond and swapping T/F if it gives all-ones/all-zeros form
13217 if (!IsTAllOne && !IsFAllZero && Cond.hasOneUse() &&
13218 Cond.getOpcode() == ISD::SETCC &&
13219 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT) ==
13220 CondVT) {
13221 if (IsTAllZero || IsFAllOne) {
13222 SDValue CC = Cond.getOperand(2);
13224 cast<CondCodeSDNode>(CC)->get(), Cond.getOperand(0).getValueType());
13225 Cond = DAG.getSetCC(DL, CondVT, Cond.getOperand(0), Cond.getOperand(1),
13226 InverseCC);
13227 std::swap(TVal, FVal);
13228 std::swap(IsTAllOne, IsFAllOne);
13229 std::swap(IsTAllZero, IsFAllZero);
13230 }
13231 }
13232
13234 "Select condition no longer all-sign bits");
13235
13236 // select Cond, -1, 0 → bitcast Cond
13237 if (IsTAllOne && IsFAllZero)
13238 return DAG.getBitcast(VT, Cond);
13239
13240 // select Cond, -1, x → or Cond, x
13241 if (IsTAllOne) {
13242 SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(FVal));
13243 SDValue Or = DAG.getNode(ISD::OR, DL, CondVT, Cond, X);
13244 return DAG.getBitcast(VT, Or);
13245 }
13246
13247 // select Cond, x, 0 → and Cond, x
13248 if (IsFAllZero) {
13249 SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(TVal));
13250 SDValue And = DAG.getNode(ISD::AND, DL, CondVT, Cond, X);
13251 return DAG.getBitcast(VT, And);
13252 }
13253
13254 // select Cond, 0, x -> and not(Cond), x
13255 if (IsTAllZero &&
13257 SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(FVal));
13258 SDValue And =
13259 DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT), X);
13260 return DAG.getBitcast(VT, And);
13261 }
13262
13263 return SDValue();
13264}
13265
13266SDValue DAGCombiner::visitVSELECT(SDNode *N) {
13267 SDValue N0 = N->getOperand(0);
13268 SDValue N1 = N->getOperand(1);
13269 SDValue N2 = N->getOperand(2);
13270 EVT VT = N->getValueType(0);
13271 SDLoc DL(N);
13272
13273 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
13274 return V;
13275
13277 return V;
13278
13279 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
13280 if (!TLI.isTargetCanonicalSelect(N))
13281 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
13282 return DAG.getSelect(DL, VT, F, N2, N1);
13283
13284 // select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
13285 if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N2 && N1->hasOneUse() &&
13288 TLI.getBooleanContents(N0.getValueType()) ==
13290 return DAG.getNode(
13291 ISD::ADD, DL, N1.getValueType(), N2,
13292 DAG.getNode(ISD::AND, DL, N0.getValueType(), N1.getOperand(1), N0));
13293 }
13294
13295 // Canonicalize integer abs.
13296 // vselect (setg[te] X, 0), X, -X ->
13297 // vselect (setgt X, -1), X, -X ->
13298 // vselect (setl[te] X, 0), -X, X ->
13299 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
13300 if (N0.getOpcode() == ISD::SETCC) {
13301 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
13303 bool isAbs = false;
13304 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
13305
13306 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
13307 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
13308 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
13310 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
13311 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
13313
13314 if (isAbs) {
13316 return DAG.getNode(ISD::ABS, DL, VT, LHS);
13317
13318 SDValue Shift = DAG.getNode(
13319 ISD::SRA, DL, VT, LHS,
13320 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, DL));
13321 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
13322 AddToWorklist(Shift.getNode());
13323 AddToWorklist(Add.getNode());
13324 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
13325 }
13326
13327 // vselect x, y (fcmp lt x, y) -> fminnum x, y
13328 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
13329 //
13330 // This is OK if we don't care about what happens if either operand is a
13331 // NaN.
13332 //
13333 if (N0.hasOneUse() &&
13334 isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, N->getFlags(), TLI)) {
13335 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC))
13336 return FMinMax;
13337 }
13338
13339 if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
13340 return S;
13341 if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
13342 return S;
13343
13344 // If this select has a condition (setcc) with narrower operands than the
13345 // select, try to widen the compare to match the select width.
13346 // TODO: This should be extended to handle any constant.
13347 // TODO: This could be extended to handle non-loading patterns, but that
13348 // requires thorough testing to avoid regressions.
13349 if (isNullOrNullSplat(RHS)) {
13350 EVT NarrowVT = LHS.getValueType();
13352 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
13353 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
13354 unsigned WideWidth = WideVT.getScalarSizeInBits();
13355 bool IsSigned = isSignedIntSetCC(CC);
13356 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13357 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
13358 SetCCWidth != 1 && SetCCWidth < WideWidth &&
13359 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
13360 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
13361 // Both compare operands can be widened for free. The LHS can use an
13362 // extended load, and the RHS is a constant:
13363 // vselect (ext (setcc load(X), C)), N1, N2 -->
13364 // vselect (setcc extload(X), C'), N1, N2
13365 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13366 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
13367 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
13368 EVT WideSetCCVT = getSetCCResultType(WideVT);
13369 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
13370 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
13371 }
13372 }
13373
13374 if (SDValue ABD = foldSelectToABD(LHS, RHS, N1, N2, CC, DL))
13375 return ABD;
13376
13377 // Match VSELECTs into add with unsigned saturation.
13378 if (hasOperation(ISD::UADDSAT, VT)) {
13379 // Check if one of the arms of the VSELECT is vector with all bits set.
13380 // If it's on the left side invert the predicate to simplify logic below.
13381 SDValue Other;
13382 ISD::CondCode SatCC = CC;
13384 Other = N2;
13385 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
13386 } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
13387 Other = N1;
13388 }
13389
13390 if (Other && Other.getOpcode() == ISD::ADD) {
13391 SDValue CondLHS = LHS, CondRHS = RHS;
13392 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
13393
13394 // Canonicalize condition operands.
13395 if (SatCC == ISD::SETUGE) {
13396 std::swap(CondLHS, CondRHS);
13397 SatCC = ISD::SETULE;
13398 }
13399
13400 // We can test against either of the addition operands.
13401 // x <= x+y ? x+y : ~0 --> uaddsat x, y
13402 // x+y >= x ? x+y : ~0 --> uaddsat x, y
13403 if (SatCC == ISD::SETULE && Other == CondRHS &&
13404 (OpLHS == CondLHS || OpRHS == CondLHS))
13405 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
13406
13407 if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
13408 (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
13409 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
13410 CondLHS == OpLHS) {
13411 // If the RHS is a constant we have to reverse the const
13412 // canonicalization.
13413 // x >= ~C ? x+C : ~0 --> uaddsat x, C
13414 auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
13415 return Cond->getAPIntValue() == ~Op->getAPIntValue();
13416 };
13417 if (SatCC == ISD::SETULE &&
13418 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
13419 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
13420 }
13421 }
13422 }
13423
13424 // Match VSELECTs into sub with unsigned saturation.
13425 if (hasOperation(ISD::USUBSAT, VT)) {
13426 // Check if one of the arms of the VSELECT is a zero vector. If it's on
13427 // the left side invert the predicate to simplify logic below.
13428 SDValue Other;
13429 ISD::CondCode SatCC = CC;
13431 Other = N2;
13432 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
13434 Other = N1;
13435 }
13436
13437 // zext(x) >= y ? trunc(zext(x) - y) : 0
13438 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
13439 // zext(x) > y ? trunc(zext(x) - y) : 0
13440 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
13441 if (Other && Other.getOpcode() == ISD::TRUNCATE &&
13442 Other.getOperand(0).getOpcode() == ISD::SUB &&
13443 (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) {
13444 SDValue OpLHS = Other.getOperand(0).getOperand(0);
13445 SDValue OpRHS = Other.getOperand(0).getOperand(1);
13446 if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND)
13447 if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS,
13448 DAG, DL))
13449 return R;
13450 }
13451
13452 if (Other && Other.getNumOperands() == 2) {
13453 SDValue CondRHS = RHS;
13454 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
13455
13456 if (OpLHS == LHS) {
13457 // Look for a general sub with unsigned saturation first.
13458 // x >= y ? x-y : 0 --> usubsat x, y
13459 // x > y ? x-y : 0 --> usubsat x, y
13460 if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
13461 Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
13462 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
13463
13464 if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
13465 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
13466 if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
13467 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
13468 // If the RHS is a constant we have to reverse the const
13469 // canonicalization.
13470 // x > C-1 ? x+-C : 0 --> usubsat x, C
13471 auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
13472 return (!Op && !Cond) ||
13473 (Op && Cond &&
13474 Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
13475 };
13476 if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
13477 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
13478 /*AllowUndefs*/ true)) {
13479 OpRHS = DAG.getNegative(OpRHS, DL, VT);
13480 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
13481 }
13482
13483 // Another special case: If C was a sign bit, the sub has been
13484 // canonicalized into a xor.
13485 // FIXME: Would it be better to use computeKnownBits to
13486 // determine whether it's safe to decanonicalize the xor?
13487 // x s< 0 ? x^C : 0 --> usubsat x, C
13488 APInt SplatValue;
13489 if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
13490 ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
13492 SplatValue.isSignMask()) {
13493 // Note that we have to rebuild the RHS constant here to
13494 // ensure we don't rely on particular values of undef lanes.
13495 OpRHS = DAG.getConstant(SplatValue, DL, VT);
13496 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
13497 }
13498 }
13499 }
13500 }
13501 }
13502 }
13503
13504 // (vselect (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
13505 // (vselect (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
13506 if (SDValue UMin = foldSelectToUMin(LHS, RHS, N1, N2, CC, DL))
13507 return UMin;
13508 }
13509
13510 if (SimplifySelectOps(N, N1, N2))
13511 return SDValue(N, 0); // Don't revisit N.
13512
13513 // Fold (vselect all_ones, N1, N2) -> N1
13515 return N1;
13516 // Fold (vselect all_zeros, N1, N2) -> N2
13518 return N2;
13519
13520 // The ConvertSelectToConcatVector function is assuming both the above
13521 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
13522 // and addressed.
13523 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
13526 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
13527 return CV;
13528 }
13529
13530 if (SDValue V = foldVSelectOfConstants(N))
13531 return V;
13532
13533 if (hasOperation(ISD::SRA, VT))
13535 return V;
13536
13538 return SDValue(N, 0);
13539
13540 if (SDValue V = combineVSelectWithAllOnesOrZeros(N0, N1, N2, TLI, DAG, DL))
13541 return V;
13542
13543 return SDValue();
13544}
13545
13546SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
13547 SDValue N0 = N->getOperand(0);
13548 SDValue N1 = N->getOperand(1);
13549 SDValue N2 = N->getOperand(2);
13550 SDValue N3 = N->getOperand(3);
13551 SDValue N4 = N->getOperand(4);
13552 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
13553 SDLoc DL(N);
13554
13555 // fold select_cc lhs, rhs, x, x, cc -> x
13556 if (N2 == N3)
13557 return N2;
13558
13559 // select_cc bool, 0, x, y, seteq -> select bool, y, x
13560 if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 &&
13561 isNullConstant(N1))
13562 return DAG.getSelect(DL, N2.getValueType(), N0, N3, N2);
13563
13564 // Determine if the condition we're dealing with is constant
13565 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
13566 CC, DL, false)) {
13567 AddToWorklist(SCC.getNode());
13568
13569 // cond always true -> true val
13570 // cond always false -> false val
13571 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode()))
13572 return SCCC->isZero() ? N3 : N2;
13573
13574 // When the condition is UNDEF, just return the first operand. This is
13575 // coherent the DAG creation, no setcc node is created in this case
13576 if (SCC->isUndef())
13577 return N2;
13578
13579 // Fold to a simpler select_cc
13580 if (SCC.getOpcode() == ISD::SETCC) {
13581 return DAG.getNode(ISD::SELECT_CC, DL, N2.getValueType(),
13582 SCC.getOperand(0), SCC.getOperand(1), N2, N3,
13583 SCC.getOperand(2), SCC->getFlags());
13584 }
13585 }
13586
13587 // If we can fold this based on the true/false value, do so.
13588 if (SimplifySelectOps(N, N2, N3))
13589 return SDValue(N, 0); // Don't revisit N.
13590
13591 // fold select_cc into other things, such as min/max/abs
13592 return SimplifySelectCC(DL, N0, N1, N2, N3, CC);
13593}
13594
13595SDValue DAGCombiner::visitSETCC(SDNode *N) {
13596 // setcc is very commonly used as an argument to brcond. This pattern
13597 // also lend itself to numerous combines and, as a result, it is desired
13598 // we keep the argument to a brcond as a setcc as much as possible.
13599 bool PreferSetCC =
13600 N->hasOneUse() && N->user_begin()->getOpcode() == ISD::BRCOND;
13601
13602 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
13603 EVT VT = N->getValueType(0);
13604 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
13605 SDLoc DL(N);
13606
13607 if (SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, DL, !PreferSetCC)) {
13608 // If we prefer to have a setcc, and we don't, we'll try our best to
13609 // recreate one using rebuildSetCC.
13610 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
13611 SDValue NewSetCC = rebuildSetCC(Combined);
13612
13613 // We don't have anything interesting to combine to.
13614 if (NewSetCC.getNode() == N)
13615 return SDValue();
13616
13617 if (NewSetCC)
13618 return NewSetCC;
13619 }
13620 return Combined;
13621 }
13622
13623 // Optimize
13624 // 1) (icmp eq/ne (and X, C0), (shift X, C1))
13625 // or
13626 // 2) (icmp eq/ne X, (rotate X, C1))
13627 // If C0 is a mask or shifted mask and the shift amt (C1) isolates the
13628 // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
13629 // Then:
13630 // If C1 is a power of 2, then the rotate and shift+and versions are
13631 // equivilent, so we can interchange them depending on target preference.
13632 // Otherwise, if we have the shift+and version we can interchange srl/shl
13633 // which inturn affects the constant C0. We can use this to get better
13634 // constants again determined by target preference.
13635 if (Cond == ISD::SETNE || Cond == ISD::SETEQ) {
13636 auto IsAndWithShift = [](SDValue A, SDValue B) {
13637 return A.getOpcode() == ISD::AND &&
13638 (B.getOpcode() == ISD::SRL || B.getOpcode() == ISD::SHL) &&
13639 A.getOperand(0) == B.getOperand(0);
13640 };
13641 auto IsRotateWithOp = [](SDValue A, SDValue B) {
13642 return (B.getOpcode() == ISD::ROTL || B.getOpcode() == ISD::ROTR) &&
13643 B.getOperand(0) == A;
13644 };
13645 SDValue AndOrOp = SDValue(), ShiftOrRotate = SDValue();
13646 bool IsRotate = false;
13647
13648 // Find either shift+and or rotate pattern.
13649 if (IsAndWithShift(N0, N1)) {
13650 AndOrOp = N0;
13651 ShiftOrRotate = N1;
13652 } else if (IsAndWithShift(N1, N0)) {
13653 AndOrOp = N1;
13654 ShiftOrRotate = N0;
13655 } else if (IsRotateWithOp(N0, N1)) {
13656 IsRotate = true;
13657 AndOrOp = N0;
13658 ShiftOrRotate = N1;
13659 } else if (IsRotateWithOp(N1, N0)) {
13660 IsRotate = true;
13661 AndOrOp = N1;
13662 ShiftOrRotate = N0;
13663 }
13664
13665 if (AndOrOp && ShiftOrRotate && ShiftOrRotate.hasOneUse() &&
13666 (IsRotate || AndOrOp.hasOneUse())) {
13667 EVT OpVT = N0.getValueType();
13668 // Get constant shift/rotate amount and possibly mask (if its shift+and
13669 // variant).
13670 auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> {
13671 ConstantSDNode *CNode = isConstOrConstSplat(Op, /*AllowUndefs*/ false,
13672 /*AllowTrunc*/ false);
13673 if (CNode == nullptr)
13674 return std::nullopt;
13675 return CNode->getAPIntValue();
13676 };
13677 std::optional<APInt> AndCMask =
13678 IsRotate ? std::nullopt : GetAPIntValue(AndOrOp.getOperand(1));
13679 std::optional<APInt> ShiftCAmt =
13680 GetAPIntValue(ShiftOrRotate.getOperand(1));
13681 unsigned NumBits = OpVT.getScalarSizeInBits();
13682
13683 // We found constants.
13684 if (ShiftCAmt && (IsRotate || AndCMask) && ShiftCAmt->ult(NumBits)) {
13685 unsigned ShiftOpc = ShiftOrRotate.getOpcode();
13686 // Check that the constants meet the constraints.
13687 bool CanTransform = IsRotate;
13688 if (!CanTransform) {
13689 // Check that mask and shift compliment eachother
13690 CanTransform = *ShiftCAmt == (~*AndCMask).popcount();
13691 // Check that we are comparing all bits
13692 CanTransform &= (*ShiftCAmt + AndCMask->popcount()) == NumBits;
13693 // Check that the and mask is correct for the shift
13694 CanTransform &=
13695 ShiftOpc == ISD::SHL ? (~*AndCMask).isMask() : AndCMask->isMask();
13696 }
13697
13698 // See if target prefers another shift/rotate opcode.
13699 unsigned NewShiftOpc = TLI.preferedOpcodeForCmpEqPiecesOfOperand(
13700 OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
13701 // Transform is valid and we have a new preference.
13702 if (CanTransform && NewShiftOpc != ShiftOpc) {
13703 SDValue NewShiftOrRotate =
13704 DAG.getNode(NewShiftOpc, DL, OpVT, ShiftOrRotate.getOperand(0),
13705 ShiftOrRotate.getOperand(1));
13706 SDValue NewAndOrOp = SDValue();
13707
13708 if (NewShiftOpc == ISD::SHL || NewShiftOpc == ISD::SRL) {
13709 APInt NewMask =
13710 NewShiftOpc == ISD::SHL
13711 ? APInt::getHighBitsSet(NumBits,
13712 NumBits - ShiftCAmt->getZExtValue())
13713 : APInt::getLowBitsSet(NumBits,
13714 NumBits - ShiftCAmt->getZExtValue());
13715 NewAndOrOp =
13716 DAG.getNode(ISD::AND, DL, OpVT, ShiftOrRotate.getOperand(0),
13717 DAG.getConstant(NewMask, DL, OpVT));
13718 } else {
13719 NewAndOrOp = ShiftOrRotate.getOperand(0);
13720 }
13721
13722 return DAG.getSetCC(DL, VT, NewAndOrOp, NewShiftOrRotate, Cond);
13723 }
13724 }
13725 }
13726 }
13727 return SDValue();
13728}
13729
13730SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
13731 SDValue LHS = N->getOperand(0);
13732 SDValue RHS = N->getOperand(1);
13733 SDValue Carry = N->getOperand(2);
13734 SDValue Cond = N->getOperand(3);
13735
13736 // If Carry is false, fold to a regular SETCC.
13737 if (isNullConstant(Carry))
13738 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
13739
13740 return SDValue();
13741}
13742
13743/// Check if N satisfies:
13744/// N is used once.
13745/// N is a Load.
13746/// The load is compatible with ExtOpcode. It means
13747/// If load has explicit zero/sign extension, ExpOpcode must have the same
13748/// extension.
13749/// Otherwise returns true.
13750static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
13751 if (!N.hasOneUse())
13752 return false;
13753
13754 if (!isa<LoadSDNode>(N))
13755 return false;
13756
13757 LoadSDNode *Load = cast<LoadSDNode>(N);
13758 ISD::LoadExtType LoadExt = Load->getExtensionType();
13759 if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
13760 return true;
13761
13762 // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
13763 // extension.
13764 if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
13765 (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
13766 return false;
13767
13768 return true;
13769}
13770
13771/// Fold
13772/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
13773/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
13774/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
13775/// This function is called by the DAGCombiner when visiting sext/zext/aext
13776/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
13778 SelectionDAG &DAG, const SDLoc &DL,
13779 CombineLevel Level) {
13780 unsigned Opcode = N->getOpcode();
13781 SDValue N0 = N->getOperand(0);
13782 EVT VT = N->getValueType(0);
13783 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
13784 Opcode == ISD::ANY_EXTEND) &&
13785 "Expected EXTEND dag node in input!");
13786
13787 if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
13788 !N0.hasOneUse())
13789 return SDValue();
13790
13791 SDValue Op1 = N0->getOperand(1);
13792 SDValue Op2 = N0->getOperand(2);
13793 if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
13794 return SDValue();
13795
13796 auto ExtLoadOpcode = ISD::EXTLOAD;
13797 if (Opcode == ISD::SIGN_EXTEND)
13798 ExtLoadOpcode = ISD::SEXTLOAD;
13799 else if (Opcode == ISD::ZERO_EXTEND)
13800 ExtLoadOpcode = ISD::ZEXTLOAD;
13801
13802 // Illegal VSELECT may ISel fail if happen after legalization (DAG
13803 // Combine2), so we should conservatively check the OperationAction.
13804 LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
13805 LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
13806 if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
13807 !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) ||
13808 (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes &&
13810 return SDValue();
13811
13812 SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
13813 SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
13814 return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
13815}
13816
13817/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
13818/// a build_vector of constants.
13819/// This function is called by the DAGCombiner when visiting sext/zext/aext
13820/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
13821/// Vector extends are not folded if operations are legal; this is to
13822/// avoid introducing illegal build_vector dag nodes.
13824 const TargetLowering &TLI,
13825 SelectionDAG &DAG, bool LegalTypes) {
13826 unsigned Opcode = N->getOpcode();
13827 SDValue N0 = N->getOperand(0);
13828 EVT VT = N->getValueType(0);
13829
13830 assert((ISD::isExtOpcode(Opcode) || ISD::isExtVecInRegOpcode(Opcode)) &&
13831 "Expected EXTEND dag node in input!");
13832
13833 // fold (sext c1) -> c1
13834 // fold (zext c1) -> c1
13835 // fold (aext c1) -> c1
13836 if (isa<ConstantSDNode>(N0))
13837 return DAG.getNode(Opcode, DL, VT, N0);
13838
13839 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
13840 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
13841 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
13842 if (N0->getOpcode() == ISD::SELECT) {
13843 SDValue Op1 = N0->getOperand(1);
13844 SDValue Op2 = N0->getOperand(2);
13845 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
13846 (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
13847 // For any_extend, choose sign extension of the constants to allow a
13848 // possible further transform to sign_extend_inreg.i.e.
13849 //
13850 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
13851 // t2: i64 = any_extend t1
13852 // -->
13853 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
13854 // -->
13855 // t4: i64 = sign_extend_inreg t3
13856 unsigned FoldOpc = Opcode;
13857 if (FoldOpc == ISD::ANY_EXTEND)
13858 FoldOpc = ISD::SIGN_EXTEND;
13859 return DAG.getSelect(DL, VT, N0->getOperand(0),
13860 DAG.getNode(FoldOpc, DL, VT, Op1),
13861 DAG.getNode(FoldOpc, DL, VT, Op2));
13862 }
13863 }
13864
13865 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
13866 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
13867 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
13868 EVT SVT = VT.getScalarType();
13869 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
13871 return SDValue();
13872
13873 // We can fold this node into a build_vector.
13874 unsigned VTBits = SVT.getSizeInBits();
13875 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
13877 unsigned NumElts = VT.getVectorNumElements();
13878
13879 for (unsigned i = 0; i != NumElts; ++i) {
13880 SDValue Op = N0.getOperand(i);
13881 if (Op.isUndef()) {
13882 if (Opcode == ISD::ANY_EXTEND || Opcode == ISD::ANY_EXTEND_VECTOR_INREG)
13883 Elts.push_back(DAG.getUNDEF(SVT));
13884 else
13885 Elts.push_back(DAG.getConstant(0, DL, SVT));
13886 continue;
13887 }
13888
13889 SDLoc DL(Op);
13890 // Get the constant value and if needed trunc it to the size of the type.
13891 // Nodes like build_vector might have constants wider than the scalar type.
13892 APInt C = Op->getAsAPIntVal().zextOrTrunc(EVTBits);
13893 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
13894 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
13895 else
13896 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
13897 }
13898
13899 return DAG.getBuildVector(VT, DL, Elts);
13900}
13901
13902// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
13903// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
13904// transformation. Returns true if extension are possible and the above
13905// mentioned transformation is profitable.
13907 unsigned ExtOpc,
13908 SmallVectorImpl<SDNode *> &ExtendNodes,
13909 const TargetLowering &TLI) {
13910 bool HasCopyToRegUses = false;
13911 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
13912 for (SDUse &Use : N0->uses()) {
13913 SDNode *User = Use.getUser();
13914 if (User == N)
13915 continue;
13916 if (Use.getResNo() != N0.getResNo())
13917 continue;
13918 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
13919 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
13921 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
13922 // Sign bits will be lost after a zext.
13923 return false;
13924 bool Add = false;
13925 for (unsigned i = 0; i != 2; ++i) {
13926 SDValue UseOp = User->getOperand(i);
13927 if (UseOp == N0)
13928 continue;
13929 if (!isa<ConstantSDNode>(UseOp))
13930 return false;
13931 Add = true;
13932 }
13933 if (Add)
13934 ExtendNodes.push_back(User);
13935 continue;
13936 }
13937 // If truncates aren't free and there are users we can't
13938 // extend, it isn't worthwhile.
13939 if (!isTruncFree)
13940 return false;
13941 // Remember if this value is live-out.
13942 if (User->getOpcode() == ISD::CopyToReg)
13943 HasCopyToRegUses = true;
13944 }
13945
13946 if (HasCopyToRegUses) {
13947 bool BothLiveOut = false;
13948 for (SDUse &Use : N->uses()) {
13949 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
13950 BothLiveOut = true;
13951 break;
13952 }
13953 }
13954 if (BothLiveOut)
13955 // Both unextended and extended values are live out. There had better be
13956 // a good reason for the transformation.
13957 return !ExtendNodes.empty();
13958 }
13959 return true;
13960}
13961
13962void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
13963 SDValue OrigLoad, SDValue ExtLoad,
13964 ISD::NodeType ExtType) {
13965 // Extend SetCC uses if necessary.
13966 SDLoc DL(ExtLoad);
13967 for (SDNode *SetCC : SetCCs) {
13969
13970 for (unsigned j = 0; j != 2; ++j) {
13971 SDValue SOp = SetCC->getOperand(j);
13972 if (SOp == OrigLoad)
13973 Ops.push_back(ExtLoad);
13974 else
13975 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
13976 }
13977
13978 Ops.push_back(SetCC->getOperand(2));
13979 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
13980 }
13981}
13982
13983// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
13984SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
13985 SDValue N0 = N->getOperand(0);
13986 EVT DstVT = N->getValueType(0);
13987 EVT SrcVT = N0.getValueType();
13988
13989 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
13990 N->getOpcode() == ISD::ZERO_EXTEND) &&
13991 "Unexpected node type (not an extend)!");
13992
13993 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
13994 // For example, on a target with legal v4i32, but illegal v8i32, turn:
13995 // (v8i32 (sext (v8i16 (load x))))
13996 // into:
13997 // (v8i32 (concat_vectors (v4i32 (sextload x)),
13998 // (v4i32 (sextload (x + 16)))))
13999 // Where uses of the original load, i.e.:
14000 // (v8i16 (load x))
14001 // are replaced with:
14002 // (v8i16 (truncate
14003 // (v8i32 (concat_vectors (v4i32 (sextload x)),
14004 // (v4i32 (sextload (x + 16)))))))
14005 //
14006 // This combine is only applicable to illegal, but splittable, vectors.
14007 // All legal types, and illegal non-vector types, are handled elsewhere.
14008 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
14009 //
14010 if (N0->getOpcode() != ISD::LOAD)
14011 return SDValue();
14012
14013 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14014
14015 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
14016 !N0.hasOneUse() || !LN0->isSimple() ||
14017 !DstVT.isVector() || !DstVT.isPow2VectorType() ||
14019 return SDValue();
14020
14022 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
14023 return SDValue();
14024
14025 ISD::LoadExtType ExtType =
14026 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
14027
14028 // Try to split the vector types to get down to legal types.
14029 EVT SplitSrcVT = SrcVT;
14030 EVT SplitDstVT = DstVT;
14031 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
14032 SplitSrcVT.getVectorNumElements() > 1) {
14033 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
14034 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
14035 }
14036
14037 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
14038 return SDValue();
14039
14040 assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
14041
14042 SDLoc DL(N);
14043 const unsigned NumSplits =
14044 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
14045 const unsigned Stride = SplitSrcVT.getStoreSize();
14048
14049 SDValue BasePtr = LN0->getBasePtr();
14050 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
14051 const unsigned Offset = Idx * Stride;
14052
14054 DAG.getExtLoad(ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(),
14055 BasePtr, LN0->getPointerInfo().getWithOffset(Offset),
14056 SplitSrcVT, LN0->getBaseAlign(),
14057 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14058
14059 BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::getFixed(Stride), DL);
14060
14061 Loads.push_back(SplitLoad.getValue(0));
14062 Chains.push_back(SplitLoad.getValue(1));
14063 }
14064
14065 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
14066 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
14067
14068 // Simplify TF.
14069 AddToWorklist(NewChain.getNode());
14070
14071 CombineTo(N, NewValue);
14072
14073 // Replace uses of the original load (before extension)
14074 // with a truncate of the concatenated sextloaded vectors.
14075 SDValue Trunc =
14076 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
14077 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
14078 CombineTo(N0.getNode(), Trunc, NewChain);
14079 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14080}
14081
14082// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
14083// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
14084SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
14085 assert(N->getOpcode() == ISD::ZERO_EXTEND);
14086 EVT VT = N->getValueType(0);
14087 EVT OrigVT = N->getOperand(0).getValueType();
14088 if (TLI.isZExtFree(OrigVT, VT))
14089 return SDValue();
14090
14091 // and/or/xor
14092 SDValue N0 = N->getOperand(0);
14093 if (!ISD::isBitwiseLogicOp(N0.getOpcode()) ||
14094 N0.getOperand(1).getOpcode() != ISD::Constant ||
14095 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
14096 return SDValue();
14097
14098 // shl/shr
14099 SDValue N1 = N0->getOperand(0);
14100 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
14101 N1.getOperand(1).getOpcode() != ISD::Constant ||
14102 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
14103 return SDValue();
14104
14105 // load
14106 if (!isa<LoadSDNode>(N1.getOperand(0)))
14107 return SDValue();
14108 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
14109 EVT MemVT = Load->getMemoryVT();
14110 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
14111 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
14112 return SDValue();
14113
14114
14115 // If the shift op is SHL, the logic op must be AND, otherwise the result
14116 // will be wrong.
14117 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
14118 return SDValue();
14119
14120 if (!N0.hasOneUse() || !N1.hasOneUse())
14121 return SDValue();
14122
14124 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
14125 ISD::ZERO_EXTEND, SetCCs, TLI))
14126 return SDValue();
14127
14128 // Actually do the transformation.
14129 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
14130 Load->getChain(), Load->getBasePtr(),
14131 Load->getMemoryVT(), Load->getMemOperand());
14132
14133 SDLoc DL1(N1);
14134 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
14135 N1.getOperand(1));
14136
14137 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
14138 SDLoc DL0(N0);
14139 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
14140 DAG.getConstant(Mask, DL0, VT));
14141
14142 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
14143 CombineTo(N, And);
14144 if (SDValue(Load, 0).hasOneUse()) {
14145 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
14146 } else {
14147 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
14148 Load->getValueType(0), ExtLoad);
14149 CombineTo(Load, Trunc, ExtLoad.getValue(1));
14150 }
14151
14152 // N0 is dead at this point.
14153 recursivelyDeleteUnusedNodes(N0.getNode());
14154
14155 return SDValue(N,0); // Return N so it doesn't get rechecked!
14156}
14157
14158/// If we're narrowing or widening the result of a vector select and the final
14159/// size is the same size as a setcc (compare) feeding the select, then try to
14160/// apply the cast operation to the select's operands because matching vector
14161/// sizes for a select condition and other operands should be more efficient.
14162SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
14163 unsigned CastOpcode = Cast->getOpcode();
14164 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
14165 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
14166 CastOpcode == ISD::FP_ROUND) &&
14167 "Unexpected opcode for vector select narrowing/widening");
14168
14169 // We only do this transform before legal ops because the pattern may be
14170 // obfuscated by target-specific operations after legalization. Do not create
14171 // an illegal select op, however, because that may be difficult to lower.
14172 EVT VT = Cast->getValueType(0);
14173 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
14174 return SDValue();
14175
14176 SDValue VSel = Cast->getOperand(0);
14177 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
14178 VSel.getOperand(0).getOpcode() != ISD::SETCC)
14179 return SDValue();
14180
14181 // Does the setcc have the same vector size as the casted select?
14182 SDValue SetCC = VSel.getOperand(0);
14183 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
14184 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
14185 return SDValue();
14186
14187 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
14188 SDValue A = VSel.getOperand(1);
14189 SDValue B = VSel.getOperand(2);
14190 SDValue CastA, CastB;
14191 SDLoc DL(Cast);
14192 if (CastOpcode == ISD::FP_ROUND) {
14193 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
14194 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
14195 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
14196 } else {
14197 CastA = DAG.getNode(CastOpcode, DL, VT, A);
14198 CastB = DAG.getNode(CastOpcode, DL, VT, B);
14199 }
14200 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
14201}
14202
14203// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
14204// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
14206 const TargetLowering &TLI, EVT VT,
14207 bool LegalOperations, SDNode *N,
14208 SDValue N0, ISD::LoadExtType ExtLoadType) {
14209 SDNode *N0Node = N0.getNode();
14210 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
14211 : ISD::isZEXTLoad(N0Node);
14212 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
14213 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
14214 return SDValue();
14215
14216 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14217 EVT MemVT = LN0->getMemoryVT();
14218 if ((LegalOperations || !LN0->isSimple() ||
14219 VT.isVector()) &&
14220 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
14221 return SDValue();
14222
14223 SDValue ExtLoad =
14224 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
14225 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
14226 Combiner.CombineTo(N, ExtLoad);
14227 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14228 if (LN0->use_empty())
14229 Combiner.recursivelyDeleteUnusedNodes(LN0);
14230 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14231}
14232
14233// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
14234// Only generate vector extloads when 1) they're legal, and 2) they are
14235// deemed desirable by the target. NonNegZExt can be set to true if a zero
14236// extend has the nonneg flag to allow use of sextload if profitable.
14238 const TargetLowering &TLI, EVT VT,
14239 bool LegalOperations, SDNode *N, SDValue N0,
14240 ISD::LoadExtType ExtLoadType,
14241 ISD::NodeType ExtOpc,
14242 bool NonNegZExt = false) {
14244 return {};
14245
14246 // If this is zext nneg, see if it would make sense to treat it as a sext.
14247 if (NonNegZExt) {
14248 assert(ExtLoadType == ISD::ZEXTLOAD && ExtOpc == ISD::ZERO_EXTEND &&
14249 "Unexpected load type or opcode");
14250 for (SDNode *User : N0->users()) {
14251 if (User->getOpcode() == ISD::SETCC) {
14253 if (ISD::isSignedIntSetCC(CC)) {
14254 ExtLoadType = ISD::SEXTLOAD;
14255 ExtOpc = ISD::SIGN_EXTEND;
14256 break;
14257 }
14258 }
14259 }
14260 }
14261
14262 // TODO: isFixedLengthVector() should be removed and any negative effects on
14263 // code generation being the result of that target's implementation of
14264 // isVectorLoadExtDesirable().
14265 if ((LegalOperations || VT.isFixedLengthVector() ||
14266 !cast<LoadSDNode>(N0)->isSimple()) &&
14267 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))
14268 return {};
14269
14270 bool DoXform = true;
14272 if (!N0.hasOneUse())
14273 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
14274 if (VT.isVector())
14275 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
14276 if (!DoXform)
14277 return {};
14278
14279 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14280 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
14281 LN0->getBasePtr(), N0.getValueType(),
14282 LN0->getMemOperand());
14283 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
14284 // If the load value is used only by N, replace it via CombineTo N.
14285 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
14286 Combiner.CombineTo(N, ExtLoad);
14287 if (NoReplaceTrunc) {
14288 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14289 Combiner.recursivelyDeleteUnusedNodes(LN0);
14290 } else {
14291 SDValue Trunc =
14292 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
14293 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
14294 }
14295 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14296}
14297
14298static SDValue
14300 bool LegalOperations, SDNode *N, SDValue N0,
14301 ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) {
14302 if (!N0.hasOneUse())
14303 return SDValue();
14304
14306 if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
14307 return SDValue();
14308
14309 if ((LegalOperations || !cast<MaskedLoadSDNode>(N0)->isSimple()) &&
14310 !TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
14311 return SDValue();
14312
14313 if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
14314 return SDValue();
14315
14316 SDLoc dl(Ld);
14317 SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
14318 SDValue NewLoad = DAG.getMaskedLoad(
14319 VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
14320 PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
14321 ExtLoadType, Ld->isExpandingLoad());
14322 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
14323 return NewLoad;
14324}
14325
14326// fold ([s|z]ext (atomic_load)) -> ([s|z]ext (truncate ([s|z]ext atomic_load)))
14328 const TargetLowering &TLI, EVT VT,
14329 SDValue N0,
14330 ISD::LoadExtType ExtLoadType) {
14331 auto *ALoad = dyn_cast<AtomicSDNode>(N0);
14332 if (!ALoad || ALoad->getOpcode() != ISD::ATOMIC_LOAD)
14333 return {};
14334 EVT MemoryVT = ALoad->getMemoryVT();
14335 if (!TLI.isAtomicLoadExtLegal(ExtLoadType, VT, MemoryVT))
14336 return {};
14337 // Can't fold into ALoad if it is already extending differently.
14338 ISD::LoadExtType ALoadExtTy = ALoad->getExtensionType();
14339 if ((ALoadExtTy == ISD::ZEXTLOAD && ExtLoadType == ISD::SEXTLOAD) ||
14340 (ALoadExtTy == ISD::SEXTLOAD && ExtLoadType == ISD::ZEXTLOAD))
14341 return {};
14342
14343 EVT OrigVT = ALoad->getValueType(0);
14344 assert(OrigVT.getSizeInBits() < VT.getSizeInBits() && "VT should be wider.");
14345 auto *NewALoad = cast<AtomicSDNode>(DAG.getAtomicLoad(
14346 ExtLoadType, SDLoc(ALoad), MemoryVT, VT, ALoad->getChain(),
14347 ALoad->getBasePtr(), ALoad->getMemOperand()));
14349 SDValue(ALoad, 0),
14350 DAG.getNode(ISD::TRUNCATE, SDLoc(ALoad), OrigVT, SDValue(NewALoad, 0)));
14351 // Update the chain uses.
14352 DAG.ReplaceAllUsesOfValueWith(SDValue(ALoad, 1), SDValue(NewALoad, 1));
14353 return SDValue(NewALoad, 0);
14354}
14355
14357 bool LegalOperations) {
14358 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
14359 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
14360
14361 SDValue SetCC = N->getOperand(0);
14362 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
14363 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
14364 return SDValue();
14365
14366 SDValue X = SetCC.getOperand(0);
14367 SDValue Ones = SetCC.getOperand(1);
14368 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
14369 EVT VT = N->getValueType(0);
14370 EVT XVT = X.getValueType();
14371 // setge X, C is canonicalized to setgt, so we do not need to match that
14372 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
14373 // not require the 'not' op.
14374 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
14375 // Invert and smear/shift the sign bit:
14376 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
14377 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
14378 SDLoc DL(N);
14379 unsigned ShCt = VT.getSizeInBits() - 1;
14380 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14381 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
14382 SDValue NotX = DAG.getNOT(DL, X, VT);
14383 SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
14384 auto ShiftOpcode =
14385 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
14386 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
14387 }
14388 }
14389 return SDValue();
14390}
14391
14392SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
14393 SDValue N0 = N->getOperand(0);
14394 if (N0.getOpcode() != ISD::SETCC)
14395 return SDValue();
14396
14397 SDValue N00 = N0.getOperand(0);
14398 SDValue N01 = N0.getOperand(1);
14400 EVT VT = N->getValueType(0);
14401 EVT N00VT = N00.getValueType();
14402 SDLoc DL(N);
14403
14404 // Propagate fast-math-flags.
14405 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14406
14407 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
14408 // the same size as the compared operands. Try to optimize sext(setcc())
14409 // if this is the case.
14410 if (VT.isVector() && !LegalOperations &&
14411 TLI.getBooleanContents(N00VT) ==
14413 EVT SVT = getSetCCResultType(N00VT);
14414
14415 // If we already have the desired type, don't change it.
14416 if (SVT != N0.getValueType()) {
14417 // We know that the # elements of the results is the same as the
14418 // # elements of the compare (and the # elements of the compare result
14419 // for that matter). Check to see that they are the same size. If so,
14420 // we know that the element size of the sext'd result matches the
14421 // element size of the compare operands.
14422 if (VT.getSizeInBits() == SVT.getSizeInBits())
14423 return DAG.getSetCC(DL, VT, N00, N01, CC);
14424
14425 // If the desired elements are smaller or larger than the source
14426 // elements, we can use a matching integer vector type and then
14427 // truncate/sign extend.
14428 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
14429 if (SVT == MatchingVecType) {
14430 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
14431 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
14432 }
14433 }
14434
14435 // Try to eliminate the sext of a setcc by zexting the compare operands.
14436 if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
14438 bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
14439 unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
14440 unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
14441
14442 // We have an unsupported narrow vector compare op that would be legal
14443 // if extended to the destination type. See if the compare operands
14444 // can be freely extended to the destination type.
14445 auto IsFreeToExtend = [&](SDValue V) {
14446 if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
14447 return true;
14448 // Match a simple, non-extended load that can be converted to a
14449 // legal {z/s}ext-load.
14450 // TODO: Allow widening of an existing {z/s}ext-load?
14451 if (!(ISD::isNON_EXTLoad(V.getNode()) &&
14452 ISD::isUNINDEXEDLoad(V.getNode()) &&
14453 cast<LoadSDNode>(V)->isSimple() &&
14454 TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
14455 return false;
14456
14457 // Non-chain users of this value must either be the setcc in this
14458 // sequence or extends that can be folded into the new {z/s}ext-load.
14459 for (SDUse &Use : V->uses()) {
14460 // Skip uses of the chain and the setcc.
14461 SDNode *User = Use.getUser();
14462 if (Use.getResNo() != 0 || User == N0.getNode())
14463 continue;
14464 // Extra users must have exactly the same cast we are about to create.
14465 // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
14466 // is enhanced similarly.
14467 if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
14468 return false;
14469 }
14470 return true;
14471 };
14472
14473 if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
14474 SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
14475 SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
14476 return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
14477 }
14478 }
14479 }
14480
14481 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
14482 // Here, T can be 1 or -1, depending on the type of the setcc and
14483 // getBooleanContents().
14484 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
14485
14486 // To determine the "true" side of the select, we need to know the high bit
14487 // of the value returned by the setcc if it evaluates to true.
14488 // If the type of the setcc is i1, then the true case of the select is just
14489 // sext(i1 1), that is, -1.
14490 // If the type of the setcc is larger (say, i8) then the value of the high
14491 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
14492 // of the appropriate width.
14493 SDValue ExtTrueVal = (SetCCWidth == 1)
14494 ? DAG.getAllOnesConstant(DL, VT)
14495 : DAG.getBoolConstant(true, DL, VT, N00VT);
14496 SDValue Zero = DAG.getConstant(0, DL, VT);
14497 if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
14498 return SCC;
14499
14500 if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(N0, VT, TLI)) {
14501 EVT SetCCVT = getSetCCResultType(N00VT);
14502 // Don't do this transform for i1 because there's a select transform
14503 // that would reverse it.
14504 // TODO: We should not do this transform at all without a target hook
14505 // because a sext is likely cheaper than a select?
14506 if (SetCCVT.getScalarSizeInBits() != 1 &&
14507 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
14508 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
14509 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
14510 }
14511 }
14512
14513 return SDValue();
14514}
14515
14516SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
14517 SDValue N0 = N->getOperand(0);
14518 EVT VT = N->getValueType(0);
14519 SDLoc DL(N);
14520
14521 if (VT.isVector())
14522 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
14523 return FoldedVOp;
14524
14525 // sext(undef) = 0 because the top bit will all be the same.
14526 if (N0.isUndef())
14527 return DAG.getConstant(0, DL, VT);
14528
14529 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14530 return Res;
14531
14532 // fold (sext (sext x)) -> (sext x)
14533 // fold (sext (aext x)) -> (sext x)
14534 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
14535 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
14536
14537 // fold (sext (aext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
14538 // fold (sext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
14541 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
14542 N0.getOperand(0));
14543
14544 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
14545 SDValue N00 = N0.getOperand(0);
14546 EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
14547 if (N00.getOpcode() == ISD::TRUNCATE || TLI.isTruncateFree(N00, ExtVT)) {
14548 // fold (sext (sext_inreg x)) -> (sext (trunc x))
14549 if ((!LegalTypes || TLI.isTypeLegal(ExtVT))) {
14550 SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00);
14551 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
14552 }
14553
14554 // If the trunc wasn't legal, try to fold to (sext_inreg (anyext x))
14555 if (!LegalTypes || TLI.isTypeLegal(VT)) {
14556 SDValue ExtSrc = DAG.getAnyExtOrTrunc(N00, DL, VT);
14557 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, ExtSrc,
14558 N0->getOperand(1));
14559 }
14560 }
14561 }
14562
14563 if (N0.getOpcode() == ISD::TRUNCATE) {
14564 // fold (sext (truncate (load x))) -> (sext (smaller load x))
14565 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
14566 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
14567 SDNode *oye = N0.getOperand(0).getNode();
14568 if (NarrowLoad.getNode() != N0.getNode()) {
14569 CombineTo(N0.getNode(), NarrowLoad);
14570 // CombineTo deleted the truncate, if needed, but not what's under it.
14571 AddToWorklist(oye);
14572 }
14573 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14574 }
14575
14576 // See if the value being truncated is already sign extended. If so, just
14577 // eliminate the trunc/sext pair.
14578 SDValue Op = N0.getOperand(0);
14579 unsigned OpBits = Op.getScalarValueSizeInBits();
14580 unsigned MidBits = N0.getScalarValueSizeInBits();
14581 unsigned DestBits = VT.getScalarSizeInBits();
14582
14583 if (N0->getFlags().hasNoSignedWrap() ||
14584 DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
14585 if (OpBits == DestBits) {
14586 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
14587 // bits, it is already ready.
14588 return Op;
14589 }
14590
14591 if (OpBits < DestBits) {
14592 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
14593 // bits, just sext from i32.
14594 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
14595 }
14596
14597 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
14598 // bits, just truncate to i32.
14599 SDNodeFlags Flags;
14600 Flags.setNoSignedWrap(true);
14601 Flags.setNoUnsignedWrap(N0->getFlags().hasNoUnsignedWrap());
14602 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op, Flags);
14603 }
14604
14605 // fold (sext (truncate x)) -> (sextinreg x).
14606 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
14607 N0.getValueType())) {
14608 if (OpBits < DestBits)
14609 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
14610 else if (OpBits > DestBits)
14611 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
14612 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
14613 DAG.getValueType(N0.getValueType()));
14614 }
14615 }
14616
14617 // Try to simplify (sext (load x)).
14618 if (SDValue foldedExt =
14619 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
14621 return foldedExt;
14622
14623 if (SDValue foldedExt =
14624 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
14626 return foldedExt;
14627
14628 // fold (sext (load x)) to multiple smaller sextloads.
14629 // Only on illegal but splittable vectors.
14630 if (SDValue ExtLoad = CombineExtLoad(N))
14631 return ExtLoad;
14632
14633 // Try to simplify (sext (sextload x)).
14634 if (SDValue foldedExt = tryToFoldExtOfExtload(
14635 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
14636 return foldedExt;
14637
14638 // Try to simplify (sext (atomic_load x)).
14639 if (SDValue foldedExt =
14640 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::SEXTLOAD))
14641 return foldedExt;
14642
14643 // fold (sext (and/or/xor (load x), cst)) ->
14644 // (and/or/xor (sextload x), (sext cst))
14645 if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
14646 isa<LoadSDNode>(N0.getOperand(0)) &&
14647 N0.getOperand(1).getOpcode() == ISD::Constant &&
14648 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
14649 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
14650 EVT MemVT = LN00->getMemoryVT();
14651 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
14652 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
14654 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
14655 ISD::SIGN_EXTEND, SetCCs, TLI);
14656 if (DoXform) {
14657 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
14658 LN00->getChain(), LN00->getBasePtr(),
14659 LN00->getMemoryVT(),
14660 LN00->getMemOperand());
14661 APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
14662 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
14663 ExtLoad, DAG.getConstant(Mask, DL, VT));
14664 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
14665 bool NoReplaceTruncAnd = !N0.hasOneUse();
14666 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
14667 CombineTo(N, And);
14668 // If N0 has multiple uses, change other uses as well.
14669 if (NoReplaceTruncAnd) {
14670 SDValue TruncAnd =
14672 CombineTo(N0.getNode(), TruncAnd);
14673 }
14674 if (NoReplaceTrunc) {
14675 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
14676 } else {
14677 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
14678 LN00->getValueType(0), ExtLoad);
14679 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
14680 }
14681 return SDValue(N,0); // Return N so it doesn't get rechecked!
14682 }
14683 }
14684 }
14685
14686 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
14687 return V;
14688
14689 if (SDValue V = foldSextSetcc(N))
14690 return V;
14691
14692 // fold (sext x) -> (zext x) if the sign bit is known zero.
14693 if (!TLI.isSExtCheaperThanZExt(N0.getValueType(), VT) &&
14694 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
14695 DAG.SignBitIsZero(N0))
14696 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0, SDNodeFlags::NonNeg);
14697
14698 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14699 return NewVSel;
14700
14701 // Eliminate this sign extend by doing a negation in the destination type:
14702 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
14703 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
14707 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
14708 return DAG.getNegative(Zext, DL, VT);
14709 }
14710 // Eliminate this sign extend by doing a decrement in the destination type:
14711 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
14712 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
14716 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
14717 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
14718 }
14719
14720 // fold sext (not i1 X) -> add (zext i1 X), -1
14721 // TODO: This could be extended to handle bool vectors.
14722 if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
14723 (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
14724 TLI.isOperationLegal(ISD::ADD, VT)))) {
14725 // If we can eliminate the 'not', the sext form should be better
14726 if (SDValue NewXor = visitXOR(N0.getNode())) {
14727 // Returning N0 is a form of in-visit replacement that may have
14728 // invalidated N0.
14729 if (NewXor.getNode() == N0.getNode()) {
14730 // Return SDValue here as the xor should have already been replaced in
14731 // this sext.
14732 return SDValue();
14733 }
14734
14735 // Return a new sext with the new xor.
14736 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
14737 }
14738
14739 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
14740 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
14741 }
14742
14743 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
14744 return Res;
14745
14746 return SDValue();
14747}
14748
14749/// Given an extending node with a pop-count operand, if the target does not
14750/// support a pop-count in the narrow source type but does support it in the
14751/// destination type, widen the pop-count to the destination type.
14752static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG, const SDLoc &DL) {
14753 assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
14754 Extend->getOpcode() == ISD::ANY_EXTEND) &&
14755 "Expected extend op");
14756
14757 SDValue CtPop = Extend->getOperand(0);
14758 if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
14759 return SDValue();
14760
14761 EVT VT = Extend->getValueType(0);
14762 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14765 return SDValue();
14766
14767 // zext (ctpop X) --> ctpop (zext X)
14768 SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
14769 return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
14770}
14771
14772// If we have (zext (abs X)) where X is a type that will be promoted by type
14773// legalization, convert to (abs (sext X)). But don't extend past a legal type.
14774static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) {
14775 assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend.");
14776
14777 EVT VT = Extend->getValueType(0);
14778 if (VT.isVector())
14779 return SDValue();
14780
14781 SDValue Abs = Extend->getOperand(0);
14782 if (Abs.getOpcode() != ISD::ABS || !Abs.hasOneUse())
14783 return SDValue();
14784
14785 EVT AbsVT = Abs.getValueType();
14786 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14787 if (TLI.getTypeAction(*DAG.getContext(), AbsVT) !=
14789 return SDValue();
14790
14791 EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), AbsVT);
14792
14793 SDValue SExt =
14794 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Abs), LegalVT, Abs.getOperand(0));
14795 SDValue NewAbs = DAG.getNode(ISD::ABS, SDLoc(Abs), LegalVT, SExt);
14796 return DAG.getZExtOrTrunc(NewAbs, SDLoc(Extend), VT);
14797}
14798
14799SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
14800 SDValue N0 = N->getOperand(0);
14801 EVT VT = N->getValueType(0);
14802 SDLoc DL(N);
14803
14804 if (VT.isVector())
14805 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
14806 return FoldedVOp;
14807
14808 // zext(undef) = 0
14809 if (N0.isUndef())
14810 return DAG.getConstant(0, DL, VT);
14811
14812 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14813 return Res;
14814
14815 // fold (zext (zext x)) -> (zext x)
14816 // fold (zext (aext x)) -> (zext x)
14817 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
14818 SDNodeFlags Flags;
14819 if (N0.getOpcode() == ISD::ZERO_EXTEND)
14820 Flags.setNonNeg(N0->getFlags().hasNonNeg());
14821 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0), Flags);
14822 }
14823
14824 // fold (zext (aext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14825 // fold (zext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14828 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, N0.getOperand(0));
14829
14830 // fold (zext (truncate x)) -> (zext x) or
14831 // (zext (truncate x)) -> (truncate x)
14832 // This is valid when the truncated bits of x are already zero.
14833 SDValue Op;
14834 KnownBits Known;
14835 if (isTruncateOf(DAG, N0, Op, Known)) {
14836 APInt TruncatedBits =
14837 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
14838 APInt(Op.getScalarValueSizeInBits(), 0) :
14839 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
14840 N0.getScalarValueSizeInBits(),
14841 std::min(Op.getScalarValueSizeInBits(),
14842 VT.getScalarSizeInBits()));
14843 if (TruncatedBits.isSubsetOf(Known.Zero)) {
14844 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
14845 DAG.salvageDebugInfo(*N0.getNode());
14846
14847 return ZExtOrTrunc;
14848 }
14849 }
14850
14851 // fold (zext (truncate x)) -> (and x, mask)
14852 if (N0.getOpcode() == ISD::TRUNCATE) {
14853 // fold (zext (truncate (load x))) -> (zext (smaller load x))
14854 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
14855 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
14856 SDNode *oye = N0.getOperand(0).getNode();
14857 if (NarrowLoad.getNode() != N0.getNode()) {
14858 CombineTo(N0.getNode(), NarrowLoad);
14859 // CombineTo deleted the truncate, if needed, but not what's under it.
14860 AddToWorklist(oye);
14861 }
14862 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14863 }
14864
14865 EVT SrcVT = N0.getOperand(0).getValueType();
14866 EVT MinVT = N0.getValueType();
14867
14868 if (N->getFlags().hasNonNeg()) {
14869 SDValue Op = N0.getOperand(0);
14870 unsigned OpBits = SrcVT.getScalarSizeInBits();
14871 unsigned MidBits = MinVT.getScalarSizeInBits();
14872 unsigned DestBits = VT.getScalarSizeInBits();
14873
14874 if (N0->getFlags().hasNoSignedWrap() ||
14875 DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
14876 if (OpBits == DestBits) {
14877 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
14878 // bits, it is already ready.
14879 return Op;
14880 }
14881
14882 if (OpBits < DestBits) {
14883 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
14884 // bits, just sext from i32.
14885 // FIXME: This can probably be ZERO_EXTEND nneg?
14886 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
14887 }
14888
14889 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
14890 // bits, just truncate to i32.
14891 SDNodeFlags Flags;
14892 Flags.setNoSignedWrap(true);
14893 Flags.setNoUnsignedWrap(true);
14894 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op, Flags);
14895 }
14896 }
14897
14898 // Try to mask before the extension to avoid having to generate a larger mask,
14899 // possibly over several sub-vectors.
14900 if (SrcVT.bitsLT(VT) && VT.isVector()) {
14901 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
14903 SDValue Op = N0.getOperand(0);
14904 Op = DAG.getZeroExtendInReg(Op, DL, MinVT);
14905 AddToWorklist(Op.getNode());
14906 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
14907 // Transfer the debug info; the new node is equivalent to N0.
14908 DAG.transferDbgValues(N0, ZExtOrTrunc);
14909 return ZExtOrTrunc;
14910 }
14911 }
14912
14913 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
14914 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
14915 AddToWorklist(Op.getNode());
14916 SDValue And = DAG.getZeroExtendInReg(Op, DL, MinVT);
14917 // We may safely transfer the debug info describing the truncate node over
14918 // to the equivalent and operation.
14919 DAG.transferDbgValues(N0, And);
14920 return And;
14921 }
14922 }
14923
14924 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
14925 // if either of the casts is not free.
14926 if (N0.getOpcode() == ISD::AND &&
14927 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
14928 N0.getOperand(1).getOpcode() == ISD::Constant &&
14929 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType()) ||
14930 !TLI.isZExtFree(N0.getValueType(), VT))) {
14931 SDValue X = N0.getOperand(0).getOperand(0);
14932 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
14933 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
14934 return DAG.getNode(ISD::AND, DL, VT,
14935 X, DAG.getConstant(Mask, DL, VT));
14936 }
14937
14938 // Try to simplify (zext (load x)).
14939 if (SDValue foldedExt = tryToFoldExtOfLoad(
14940 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD,
14941 ISD::ZERO_EXTEND, N->getFlags().hasNonNeg()))
14942 return foldedExt;
14943
14944 if (SDValue foldedExt =
14945 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
14947 return foldedExt;
14948
14949 // fold (zext (load x)) to multiple smaller zextloads.
14950 // Only on illegal but splittable vectors.
14951 if (SDValue ExtLoad = CombineExtLoad(N))
14952 return ExtLoad;
14953
14954 // Try to simplify (zext (atomic_load x)).
14955 if (SDValue foldedExt =
14956 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::ZEXTLOAD))
14957 return foldedExt;
14958
14959 // fold (zext (and/or/xor (load x), cst)) ->
14960 // (and/or/xor (zextload x), (zext cst))
14961 // Unless (and (load x) cst) will match as a zextload already and has
14962 // additional users, or the zext is already free.
14963 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && !TLI.isZExtFree(N0, VT) &&
14964 isa<LoadSDNode>(N0.getOperand(0)) &&
14965 N0.getOperand(1).getOpcode() == ISD::Constant &&
14966 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
14967 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
14968 EVT MemVT = LN00->getMemoryVT();
14969 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
14970 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
14971 bool DoXform = true;
14973 if (!N0.hasOneUse()) {
14974 if (N0.getOpcode() == ISD::AND) {
14975 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
14976 EVT LoadResultTy = AndC->getValueType(0);
14977 EVT ExtVT;
14978 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
14979 DoXform = false;
14980 }
14981 }
14982 if (DoXform)
14983 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
14984 ISD::ZERO_EXTEND, SetCCs, TLI);
14985 if (DoXform) {
14986 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
14987 LN00->getChain(), LN00->getBasePtr(),
14988 LN00->getMemoryVT(),
14989 LN00->getMemOperand());
14990 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
14991 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
14992 ExtLoad, DAG.getConstant(Mask, DL, VT));
14993 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
14994 bool NoReplaceTruncAnd = !N0.hasOneUse();
14995 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
14996 CombineTo(N, And);
14997 // If N0 has multiple uses, change other uses as well.
14998 if (NoReplaceTruncAnd) {
14999 SDValue TruncAnd =
15001 CombineTo(N0.getNode(), TruncAnd);
15002 }
15003 if (NoReplaceTrunc) {
15004 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
15005 } else {
15006 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
15007 LN00->getValueType(0), ExtLoad);
15008 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
15009 }
15010 return SDValue(N,0); // Return N so it doesn't get rechecked!
15011 }
15012 }
15013 }
15014
15015 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
15016 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
15017 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
15018 return ZExtLoad;
15019
15020 // Try to simplify (zext (zextload x)).
15021 if (SDValue foldedExt = tryToFoldExtOfExtload(
15022 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
15023 return foldedExt;
15024
15025 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
15026 return V;
15027
15028 if (N0.getOpcode() == ISD::SETCC) {
15029 // Propagate fast-math-flags.
15030 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
15031
15032 // Only do this before legalize for now.
15033 if (!LegalOperations && VT.isVector() &&
15034 N0.getValueType().getVectorElementType() == MVT::i1) {
15035 EVT N00VT = N0.getOperand(0).getValueType();
15036 if (getSetCCResultType(N00VT) == N0.getValueType())
15037 return SDValue();
15038
15039 // We know that the # elements of the results is the same as the #
15040 // elements of the compare (and the # elements of the compare result for
15041 // that matter). Check to see that they are the same size. If so, we know
15042 // that the element size of the sext'd result matches the element size of
15043 // the compare operands.
15044 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
15045 // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
15046 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
15047 N0.getOperand(1), N0.getOperand(2));
15048 return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
15049 }
15050
15051 // If the desired elements are smaller or larger than the source
15052 // elements we can use a matching integer vector type and then
15053 // truncate/any extend followed by zext_in_reg.
15054 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
15055 SDValue VsetCC =
15056 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
15057 N0.getOperand(1), N0.getOperand(2));
15058 return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
15059 N0.getValueType());
15060 }
15061
15062 // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
15063 EVT N0VT = N0.getValueType();
15064 EVT N00VT = N0.getOperand(0).getValueType();
15065 if (SDValue SCC = SimplifySelectCC(
15066 DL, N0.getOperand(0), N0.getOperand(1),
15067 DAG.getBoolConstant(true, DL, N0VT, N00VT),
15068 DAG.getBoolConstant(false, DL, N0VT, N00VT),
15069 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
15070 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
15071 }
15072
15073 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
15074 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
15075 !TLI.isZExtFree(N0, VT)) {
15076 SDValue ShVal = N0.getOperand(0);
15077 SDValue ShAmt = N0.getOperand(1);
15078 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt)) {
15079 if (ShVal.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) {
15080 if (N0.getOpcode() == ISD::SHL) {
15081 // If the original shl may be shifting out bits, do not perform this
15082 // transformation.
15083 unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
15084 ShVal.getOperand(0).getValueSizeInBits();
15085 if (ShAmtC->getAPIntValue().ugt(KnownZeroBits)) {
15086 // If the shift is too large, then see if we can deduce that the
15087 // shift is safe anyway.
15088
15089 // Check if the bits being shifted out are known to be zero.
15090 KnownBits KnownShVal = DAG.computeKnownBits(ShVal);
15091 if (ShAmtC->getAPIntValue().ugt(KnownShVal.countMinLeadingZeros()))
15092 return SDValue();
15093 }
15094 }
15095
15096 // Ensure that the shift amount is wide enough for the shifted value.
15097 if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
15098 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
15099
15100 return DAG.getNode(N0.getOpcode(), DL, VT,
15101 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ShVal), ShAmt);
15102 }
15103 }
15104 }
15105
15106 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15107 return NewVSel;
15108
15109 if (SDValue NewCtPop = widenCtPop(N, DAG, DL))
15110 return NewCtPop;
15111
15112 if (SDValue V = widenAbs(N, DAG))
15113 return V;
15114
15115 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
15116 return Res;
15117
15118 // CSE zext nneg with sext if the zext is not free.
15119 if (N->getFlags().hasNonNeg() && !TLI.isZExtFree(N0.getValueType(), VT)) {
15120 SDNode *CSENode = DAG.getNodeIfExists(ISD::SIGN_EXTEND, N->getVTList(), N0);
15121 if (CSENode)
15122 return SDValue(CSENode, 0);
15123 }
15124
15125 return SDValue();
15126}
15127
15128SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
15129 SDValue N0 = N->getOperand(0);
15130 EVT VT = N->getValueType(0);
15131 SDLoc DL(N);
15132
15133 // aext(undef) = undef
15134 if (N0.isUndef())
15135 return DAG.getUNDEF(VT);
15136
15137 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
15138 return Res;
15139
15140 // fold (aext (aext x)) -> (aext x)
15141 // fold (aext (zext x)) -> (zext x)
15142 // fold (aext (sext x)) -> (sext x)
15143 if (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::ZERO_EXTEND ||
15144 N0.getOpcode() == ISD::SIGN_EXTEND) {
15145 SDNodeFlags Flags;
15146 if (N0.getOpcode() == ISD::ZERO_EXTEND)
15147 Flags.setNonNeg(N0->getFlags().hasNonNeg());
15148 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
15149 }
15150
15151 // fold (aext (aext_extend_vector_inreg x)) -> (aext_extend_vector_inreg x)
15152 // fold (aext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
15153 // fold (aext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
15157 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
15158
15159 // fold (aext (truncate (load x))) -> (aext (smaller load x))
15160 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
15161 if (N0.getOpcode() == ISD::TRUNCATE) {
15162 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
15163 SDNode *oye = N0.getOperand(0).getNode();
15164 if (NarrowLoad.getNode() != N0.getNode()) {
15165 CombineTo(N0.getNode(), NarrowLoad);
15166 // CombineTo deleted the truncate, if needed, but not what's under it.
15167 AddToWorklist(oye);
15168 }
15169 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15170 }
15171 }
15172
15173 // fold (aext (truncate x))
15174 if (N0.getOpcode() == ISD::TRUNCATE)
15175 return DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
15176
15177 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
15178 // if the trunc is not free.
15179 if (N0.getOpcode() == ISD::AND &&
15180 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
15181 N0.getOperand(1).getOpcode() == ISD::Constant &&
15182 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType())) {
15183 SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
15184 SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
15185 assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!");
15186 return DAG.getNode(ISD::AND, DL, VT, X, Y);
15187 }
15188
15189 // fold (aext (load x)) -> (aext (truncate (extload x)))
15190 // None of the supported targets knows how to perform load and any_ext
15191 // on vectors in one instruction, so attempt to fold to zext instead.
15192 if (VT.isVector()) {
15193 // Try to simplify (zext (load x)).
15194 if (SDValue foldedExt =
15195 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
15197 return foldedExt;
15198 } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
15201 bool DoXform = true;
15203 if (!N0.hasOneUse())
15204 DoXform =
15205 ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
15206 if (DoXform) {
15207 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15208 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, LN0->getChain(),
15209 LN0->getBasePtr(), N0.getValueType(),
15210 LN0->getMemOperand());
15211 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
15212 // If the load value is used only by N, replace it via CombineTo N.
15213 bool NoReplaceTrunc = N0.hasOneUse();
15214 CombineTo(N, ExtLoad);
15215 if (NoReplaceTrunc) {
15216 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
15217 recursivelyDeleteUnusedNodes(LN0);
15218 } else {
15219 SDValue Trunc =
15220 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
15221 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
15222 }
15223 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15224 }
15225 }
15226
15227 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
15228 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
15229 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
15230 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
15231 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
15232 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15233 ISD::LoadExtType ExtType = LN0->getExtensionType();
15234 EVT MemVT = LN0->getMemoryVT();
15235 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
15236 SDValue ExtLoad =
15237 DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), LN0->getBasePtr(),
15238 MemVT, LN0->getMemOperand());
15239 CombineTo(N, ExtLoad);
15240 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
15241 recursivelyDeleteUnusedNodes(LN0);
15242 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15243 }
15244 }
15245
15246 if (N0.getOpcode() == ISD::SETCC) {
15247 // Propagate fast-math-flags.
15248 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
15249
15250 // For vectors:
15251 // aext(setcc) -> vsetcc
15252 // aext(setcc) -> truncate(vsetcc)
15253 // aext(setcc) -> aext(vsetcc)
15254 // Only do this before legalize for now.
15255 if (VT.isVector() && !LegalOperations) {
15256 EVT N00VT = N0.getOperand(0).getValueType();
15257 if (getSetCCResultType(N00VT) == N0.getValueType())
15258 return SDValue();
15259
15260 // We know that the # elements of the results is the same as the
15261 // # elements of the compare (and the # elements of the compare result
15262 // for that matter). Check to see that they are the same size. If so,
15263 // we know that the element size of the sext'd result matches the
15264 // element size of the compare operands.
15265 if (VT.getSizeInBits() == N00VT.getSizeInBits())
15266 return DAG.getSetCC(DL, VT, N0.getOperand(0), N0.getOperand(1),
15267 cast<CondCodeSDNode>(N0.getOperand(2))->get());
15268
15269 // If the desired elements are smaller or larger than the source
15270 // elements we can use a matching integer vector type and then
15271 // truncate/any extend
15272 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
15273 SDValue VsetCC = DAG.getSetCC(
15274 DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1),
15275 cast<CondCodeSDNode>(N0.getOperand(2))->get());
15276 return DAG.getAnyExtOrTrunc(VsetCC, DL, VT);
15277 }
15278
15279 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
15280 if (SDValue SCC = SimplifySelectCC(
15281 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
15282 DAG.getConstant(0, DL, VT),
15283 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
15284 return SCC;
15285 }
15286
15287 if (SDValue NewCtPop = widenCtPop(N, DAG, DL))
15288 return NewCtPop;
15289
15290 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
15291 return Res;
15292
15293 return SDValue();
15294}
15295
15296SDValue DAGCombiner::visitAssertExt(SDNode *N) {
15297 unsigned Opcode = N->getOpcode();
15298 SDValue N0 = N->getOperand(0);
15299 SDValue N1 = N->getOperand(1);
15300 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
15301
15302 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
15303 if (N0.getOpcode() == Opcode &&
15304 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
15305 return N0;
15306
15307 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
15308 N0.getOperand(0).getOpcode() == Opcode) {
15309 // We have an assert, truncate, assert sandwich. Make one stronger assert
15310 // by asserting on the smallest asserted type to the larger source type.
15311 // This eliminates the later assert:
15312 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
15313 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
15314 SDLoc DL(N);
15315 SDValue BigA = N0.getOperand(0);
15316 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
15317 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
15318 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
15319 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
15320 BigA.getOperand(0), MinAssertVTVal);
15321 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
15322 }
15323
15324 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
15325 // than X. Just move the AssertZext in front of the truncate and drop the
15326 // AssertSExt.
15327 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
15329 Opcode == ISD::AssertZext) {
15330 SDValue BigA = N0.getOperand(0);
15331 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
15332 if (AssertVT.bitsLT(BigA_AssertVT)) {
15333 SDLoc DL(N);
15334 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
15335 BigA.getOperand(0), N1);
15336 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
15337 }
15338 }
15339
15340 if (Opcode == ISD::AssertZext && N0.getOpcode() == ISD::AND &&
15342 const APInt &Mask = N0.getConstantOperandAPInt(1);
15343
15344 // If we have (AssertZext (and (AssertSext X, iX), M), iY) and Y is smaller
15345 // than X, and the And doesn't change the lower iX bits, we can move the
15346 // AssertZext in front of the And and drop the AssertSext.
15347 if (N0.getOperand(0).getOpcode() == ISD::AssertSext && N0.hasOneUse()) {
15348 SDValue BigA = N0.getOperand(0);
15349 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
15350 if (AssertVT.bitsLT(BigA_AssertVT) &&
15351 Mask.countr_one() >= BigA_AssertVT.getScalarSizeInBits()) {
15352 SDLoc DL(N);
15353 SDValue NewAssert =
15354 DAG.getNode(Opcode, DL, N->getValueType(0), BigA.getOperand(0), N1);
15355 return DAG.getNode(ISD::AND, DL, N->getValueType(0), NewAssert,
15356 N0.getOperand(1));
15357 }
15358 }
15359
15360 // Remove AssertZext entirely if the mask guarantees the assertion cannot
15361 // fail.
15362 // TODO: Use KB countMinLeadingZeros to handle non-constant masks?
15363 if (Mask.isIntN(AssertVT.getScalarSizeInBits()))
15364 return N0;
15365 }
15366
15367 return SDValue();
15368}
15369
15370SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
15371 SDLoc DL(N);
15372
15373 Align AL = cast<AssertAlignSDNode>(N)->getAlign();
15374 SDValue N0 = N->getOperand(0);
15375
15376 // Fold (assertalign (assertalign x, AL0), AL1) ->
15377 // (assertalign x, max(AL0, AL1))
15378 if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
15379 return DAG.getAssertAlign(DL, N0.getOperand(0),
15380 std::max(AL, AAN->getAlign()));
15381
15382 // In rare cases, there are trivial arithmetic ops in source operands. Sink
15383 // this assert down to source operands so that those arithmetic ops could be
15384 // exposed to the DAG combining.
15385 switch (N0.getOpcode()) {
15386 default:
15387 break;
15388 case ISD::ADD:
15389 case ISD::PTRADD:
15390 case ISD::SUB: {
15391 unsigned AlignShift = Log2(AL);
15392 SDValue LHS = N0.getOperand(0);
15393 SDValue RHS = N0.getOperand(1);
15394 unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
15395 unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
15396 if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
15397 if (LHSAlignShift < AlignShift)
15398 LHS = DAG.getAssertAlign(DL, LHS, AL);
15399 if (RHSAlignShift < AlignShift)
15400 RHS = DAG.getAssertAlign(DL, RHS, AL);
15401 return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
15402 }
15403 break;
15404 }
15405 }
15406
15407 return SDValue();
15408}
15409
15410/// If the result of a load is shifted/masked/truncated to an effectively
15411/// narrower type, try to transform the load to a narrower type and/or
15412/// use an extending load.
15413SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
15414 unsigned Opc = N->getOpcode();
15415
15417 SDValue N0 = N->getOperand(0);
15418 EVT VT = N->getValueType(0);
15419 EVT ExtVT = VT;
15420
15421 // This transformation isn't valid for vector loads.
15422 if (VT.isVector())
15423 return SDValue();
15424
15425 // The ShAmt variable is used to indicate that we've consumed a right
15426 // shift. I.e. we want to narrow the width of the load by skipping to load the
15427 // ShAmt least significant bits.
15428 unsigned ShAmt = 0;
15429 // A special case is when the least significant bits from the load are masked
15430 // away, but using an AND rather than a right shift. HasShiftedOffset is used
15431 // to indicate that the narrowed load should be left-shifted ShAmt bits to get
15432 // the result.
15433 unsigned ShiftedOffset = 0;
15434 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
15435 // extended to VT.
15436 if (Opc == ISD::SIGN_EXTEND_INREG) {
15437 ExtType = ISD::SEXTLOAD;
15438 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
15439 } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
15440 // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
15441 // value, or it may be shifting a higher subword, half or byte into the
15442 // lowest bits.
15443
15444 // Only handle shift with constant shift amount, and the shiftee must be a
15445 // load.
15446 auto *LN = dyn_cast<LoadSDNode>(N0);
15447 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
15448 if (!N1C || !LN)
15449 return SDValue();
15450 // If the shift amount is larger than the memory type then we're not
15451 // accessing any of the loaded bytes.
15452 ShAmt = N1C->getZExtValue();
15453 uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
15454 if (MemoryWidth <= ShAmt)
15455 return SDValue();
15456 // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
15457 ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
15458 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
15459 // If original load is a SEXTLOAD then we can't simply replace it by a
15460 // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
15461 // followed by a ZEXT, but that is not handled at the moment). Similarly if
15462 // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
15463 if ((LN->getExtensionType() == ISD::SEXTLOAD ||
15464 LN->getExtensionType() == ISD::ZEXTLOAD) &&
15465 LN->getExtensionType() != ExtType)
15466 return SDValue();
15467 } else if (Opc == ISD::AND) {
15468 // An AND with a constant mask is the same as a truncate + zero-extend.
15469 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
15470 if (!AndC)
15471 return SDValue();
15472
15473 const APInt &Mask = AndC->getAPIntValue();
15474 unsigned ActiveBits = 0;
15475 if (Mask.isMask()) {
15476 ActiveBits = Mask.countr_one();
15477 } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
15478 ShiftedOffset = ShAmt;
15479 } else {
15480 return SDValue();
15481 }
15482
15483 ExtType = ISD::ZEXTLOAD;
15484 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
15485 }
15486
15487 // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
15488 // a right shift. Here we redo some of those checks, to possibly adjust the
15489 // ExtVT even further based on "a masking AND". We could also end up here for
15490 // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
15491 // need to be done here as well.
15492 if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
15493 SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
15494 // Bail out when the SRL has more than one use. This is done for historical
15495 // (undocumented) reasons. Maybe intent was to guard the AND-masking below
15496 // check below? And maybe it could be non-profitable to do the transform in
15497 // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
15498 // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
15499 if (!SRL.hasOneUse())
15500 return SDValue();
15501
15502 // Only handle shift with constant shift amount, and the shiftee must be a
15503 // load.
15504 auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
15505 auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
15506 if (!SRL1C || !LN)
15507 return SDValue();
15508
15509 // If the shift amount is larger than the input type then we're not
15510 // accessing any of the loaded bytes. If the load was a zextload/extload
15511 // then the result of the shift+trunc is zero/undef (handled elsewhere).
15512 ShAmt = SRL1C->getZExtValue();
15513 uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
15514 if (ShAmt >= MemoryWidth)
15515 return SDValue();
15516
15517 // Because a SRL must be assumed to *need* to zero-extend the high bits
15518 // (as opposed to anyext the high bits), we can't combine the zextload
15519 // lowering of SRL and an sextload.
15520 if (LN->getExtensionType() == ISD::SEXTLOAD)
15521 return SDValue();
15522
15523 // Avoid reading outside the memory accessed by the original load (could
15524 // happened if we only adjust the load base pointer by ShAmt). Instead we
15525 // try to narrow the load even further. The typical scenario here is:
15526 // (i64 (truncate (i96 (srl (load x), 64)))) ->
15527 // (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
15528 if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
15529 // Don't replace sextload by zextload.
15530 if (ExtType == ISD::SEXTLOAD)
15531 return SDValue();
15532 // Narrow the load.
15533 ExtType = ISD::ZEXTLOAD;
15534 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
15535 }
15536
15537 // If the SRL is only used by a masking AND, we may be able to adjust
15538 // the ExtVT to make the AND redundant.
15539 SDNode *Mask = *(SRL->user_begin());
15540 if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
15541 isa<ConstantSDNode>(Mask->getOperand(1))) {
15542 unsigned Offset, ActiveBits;
15543 const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
15544 if (ShiftMask.isMask()) {
15545 EVT MaskedVT =
15546 EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countr_one());
15547 // If the mask is smaller, recompute the type.
15548 if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
15549 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
15550 ExtVT = MaskedVT;
15551 } else if (ExtType == ISD::ZEXTLOAD &&
15552 ShiftMask.isShiftedMask(Offset, ActiveBits) &&
15553 (Offset + ShAmt) < VT.getScalarSizeInBits()) {
15554 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
15555 // If the mask is shifted we can use a narrower load and a shl to insert
15556 // the trailing zeros.
15557 if (((Offset + ActiveBits) <= ExtVT.getScalarSizeInBits()) &&
15558 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT)) {
15559 ExtVT = MaskedVT;
15560 ShAmt = Offset + ShAmt;
15561 ShiftedOffset = Offset;
15562 }
15563 }
15564 }
15565
15566 N0 = SRL.getOperand(0);
15567 }
15568
15569 // If the load is shifted left (and the result isn't shifted back right), we
15570 // can fold a truncate through the shift. The typical scenario is that N
15571 // points at a TRUNCATE here so the attempted fold is:
15572 // (truncate (shl (load x), c))) -> (shl (narrow load x), c)
15573 // ShLeftAmt will indicate how much a narrowed load should be shifted left.
15574 unsigned ShLeftAmt = 0;
15575 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
15576 ExtVT == VT && TLI.isNarrowingProfitable(N, N0.getValueType(), VT)) {
15577 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
15578 ShLeftAmt = N01->getZExtValue();
15579 N0 = N0.getOperand(0);
15580 }
15581 }
15582
15583 // If we haven't found a load, we can't narrow it.
15584 if (!isa<LoadSDNode>(N0))
15585 return SDValue();
15586
15587 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15588 // Reducing the width of a volatile load is illegal. For atomics, we may be
15589 // able to reduce the width provided we never widen again. (see D66309)
15590 if (!LN0->isSimple() ||
15591 !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
15592 return SDValue();
15593
15594 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
15595 unsigned LVTStoreBits =
15597 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedValue();
15598 return LVTStoreBits - EVTStoreBits - ShAmt;
15599 };
15600
15601 // We need to adjust the pointer to the load by ShAmt bits in order to load
15602 // the correct bytes.
15603 unsigned PtrAdjustmentInBits =
15604 DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
15605
15606 uint64_t PtrOff = PtrAdjustmentInBits / 8;
15607 SDLoc DL(LN0);
15608 // The original load itself didn't wrap, so an offset within it doesn't.
15609 SDValue NewPtr =
15612 AddToWorklist(NewPtr.getNode());
15613
15614 SDValue Load;
15615 if (ExtType == ISD::NON_EXTLOAD) {
15616 const MDNode *OldRanges = LN0->getRanges();
15617 const MDNode *NewRanges = nullptr;
15618 // If LSBs are loaded and the truncated ConstantRange for the OldRanges
15619 // metadata is not the full-set for the new width then create a NewRanges
15620 // metadata for the truncated load
15621 if (ShAmt == 0 && OldRanges) {
15622 ConstantRange CR = getConstantRangeFromMetadata(*OldRanges);
15623 unsigned BitSize = VT.getScalarSizeInBits();
15624
15625 // It is possible for an 8-bit extending load with 8-bit range
15626 // metadata to be narrowed to an 8-bit load. This guard is necessary to
15627 // ensure that truncation is strictly smaller.
15628 if (CR.getBitWidth() > BitSize) {
15629 ConstantRange TruncatedCR = CR.truncate(BitSize);
15630 if (!TruncatedCR.isFullSet()) {
15631 Metadata *Bounds[2] = {
15633 ConstantInt::get(*DAG.getContext(), TruncatedCR.getLower())),
15635 ConstantInt::get(*DAG.getContext(), TruncatedCR.getUpper()))};
15636 NewRanges = MDNode::get(*DAG.getContext(), Bounds);
15637 }
15638 } else if (CR.getBitWidth() == BitSize)
15639 NewRanges = OldRanges;
15640 }
15641 Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
15642 LN0->getPointerInfo().getWithOffset(PtrOff),
15643 LN0->getBaseAlign(), LN0->getMemOperand()->getFlags(),
15644 LN0->getAAInfo(), NewRanges);
15645 } else
15646 Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
15647 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
15648 LN0->getBaseAlign(), LN0->getMemOperand()->getFlags(),
15649 LN0->getAAInfo());
15650
15651 // Replace the old load's chain with the new load's chain.
15652 WorklistRemover DeadNodes(*this);
15653 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15654
15655 // Shift the result left, if we've swallowed a left shift.
15657 if (ShLeftAmt != 0) {
15658 // If the shift amount is as large as the result size (but, presumably,
15659 // no larger than the source) then the useful bits of the result are
15660 // zero; we can't simply return the shortened shift, because the result
15661 // of that operation is undefined.
15662 if (ShLeftAmt >= VT.getScalarSizeInBits())
15663 Result = DAG.getConstant(0, DL, VT);
15664 else
15665 Result = DAG.getNode(ISD::SHL, DL, VT, Result,
15666 DAG.getShiftAmountConstant(ShLeftAmt, VT, DL));
15667 }
15668
15669 if (ShiftedOffset != 0) {
15670 // We're using a shifted mask, so the load now has an offset. This means
15671 // that data has been loaded into the lower bytes than it would have been
15672 // before, so we need to shl the loaded data into the correct position in the
15673 // register.
15674 SDValue ShiftC = DAG.getConstant(ShiftedOffset, DL, VT);
15675 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
15676 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
15677 }
15678
15679 // Return the new loaded value.
15680 return Result;
15681}
15682
15683SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
15684 SDValue N0 = N->getOperand(0);
15685 SDValue N1 = N->getOperand(1);
15686 EVT VT = N->getValueType(0);
15687 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
15688 unsigned VTBits = VT.getScalarSizeInBits();
15689 unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
15690 SDLoc DL(N);
15691
15692 // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
15693 if (N0.isUndef())
15694 return DAG.getConstant(0, DL, VT);
15695
15696 // fold (sext_in_reg c1) -> c1
15697 if (SDValue C =
15699 return C;
15700
15701 // If the input is already sign extended, just drop the extension.
15702 if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
15703 return N0;
15704
15705 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
15706 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
15707 ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
15708 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N0.getOperand(0), N1);
15709
15710 // fold (sext_in_reg (sext x)) -> (sext x)
15711 // fold (sext_in_reg (aext x)) -> (sext x)
15712 // if x is small enough or if we know that x has more than 1 sign bit and the
15713 // sign_extend_inreg is extending from one of them.
15714 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
15715 SDValue N00 = N0.getOperand(0);
15716 unsigned N00Bits = N00.getScalarValueSizeInBits();
15717 if ((N00Bits <= ExtVTBits ||
15718 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
15719 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
15720 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N00);
15721 }
15722
15723 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
15724 // if x is small enough or if we know that x has more than 1 sign bit and the
15725 // sign_extend_inreg is extending from one of them.
15727 SDValue N00 = N0.getOperand(0);
15728 unsigned N00Bits = N00.getScalarValueSizeInBits();
15729 bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
15730 if ((N00Bits == ExtVTBits ||
15731 (!IsZext && (N00Bits < ExtVTBits ||
15732 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
15733 (!LegalOperations ||
15735 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, N00);
15736 }
15737
15738 // fold (sext_in_reg (zext x)) -> (sext x)
15739 // iff we are extending the source sign bit.
15740 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
15741 SDValue N00 = N0.getOperand(0);
15742 if (N00.getScalarValueSizeInBits() == ExtVTBits &&
15743 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
15744 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N00);
15745 }
15746
15747 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
15748 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
15749 return DAG.getZeroExtendInReg(N0, DL, ExtVT);
15750
15751 // fold operands of sext_in_reg based on knowledge that the top bits are not
15752 // demanded.
15754 return SDValue(N, 0);
15755
15756 // fold (sext_in_reg (load x)) -> (smaller sextload x)
15757 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
15758 if (SDValue NarrowLoad = reduceLoadWidth(N))
15759 return NarrowLoad;
15760
15761 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
15762 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
15763 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
15764 if (N0.getOpcode() == ISD::SRL) {
15765 if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
15766 if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
15767 // We can turn this into an SRA iff the input to the SRL is already sign
15768 // extended enough.
15769 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
15770 if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
15771 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
15772 N0.getOperand(1));
15773 }
15774 }
15775
15776 // fold (sext_inreg (extload x)) -> (sextload x)
15777 // If sextload is not supported by target, we can only do the combine when
15778 // load has one use. Doing otherwise can block folding the extload with other
15779 // extends that the target does support.
15781 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
15782 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
15783 N0.hasOneUse()) ||
15784 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
15785 auto *LN0 = cast<LoadSDNode>(N0);
15786 SDValue ExtLoad =
15787 DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
15788 LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
15789 CombineTo(N, ExtLoad);
15790 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
15791 AddToWorklist(ExtLoad.getNode());
15792 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15793 }
15794
15795 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
15797 N0.hasOneUse() && ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
15798 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
15799 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
15800 auto *LN0 = cast<LoadSDNode>(N0);
15801 SDValue ExtLoad =
15802 DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
15803 LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
15804 CombineTo(N, ExtLoad);
15805 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
15806 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15807 }
15808
15809 // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
15810 // ignore it if the masked load is already sign extended
15811 if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
15812 if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
15813 Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
15814 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
15815 SDValue ExtMaskedLoad = DAG.getMaskedLoad(
15816 VT, DL, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
15817 Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
15818 Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
15819 CombineTo(N, ExtMaskedLoad);
15820 CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
15821 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15822 }
15823 }
15824
15825 // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
15826 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
15827 if (SDValue(GN0, 0).hasOneUse() && ExtVT == GN0->getMemoryVT() &&
15829 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
15830 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
15831
15832 SDValue ExtLoad = DAG.getMaskedGather(
15833 DAG.getVTList(VT, MVT::Other), ExtVT, DL, Ops, GN0->getMemOperand(),
15834 GN0->getIndexType(), ISD::SEXTLOAD);
15835
15836 CombineTo(N, ExtLoad);
15837 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
15838 AddToWorklist(ExtLoad.getNode());
15839 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15840 }
15841 }
15842
15843 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
15844 if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
15845 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
15846 N0.getOperand(1), false))
15847 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, BSwap, N1);
15848 }
15849
15850 // Fold (iM_signext_inreg
15851 // (extract_subvector (zext|anyext|sext iN_v to _) _)
15852 // from iN)
15853 // -> (extract_subvector (signext iN_v to iM))
15854 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() &&
15856 SDValue InnerExt = N0.getOperand(0);
15857 EVT InnerExtVT = InnerExt->getValueType(0);
15858 SDValue Extendee = InnerExt->getOperand(0);
15859
15860 if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() &&
15861 (!LegalOperations ||
15862 TLI.isOperationLegal(ISD::SIGN_EXTEND, InnerExtVT))) {
15863 SDValue SignExtExtendee =
15864 DAG.getNode(ISD::SIGN_EXTEND, DL, InnerExtVT, Extendee);
15865 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SignExtExtendee,
15866 N0.getOperand(1));
15867 }
15868 }
15869
15870 return SDValue();
15871}
15872
15874 SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG,
15875 bool LegalOperations) {
15876 unsigned InregOpcode = N->getOpcode();
15877 unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode);
15878
15879 SDValue Src = N->getOperand(0);
15880 EVT VT = N->getValueType(0);
15881 EVT SrcVT = EVT::getVectorVT(*DAG.getContext(),
15882 Src.getValueType().getVectorElementType(),
15884
15885 assert(ISD::isExtVecInRegOpcode(InregOpcode) &&
15886 "Expected EXTEND_VECTOR_INREG dag node in input!");
15887
15888 // Profitability check: our operand must be an one-use CONCAT_VECTORS.
15889 // FIXME: one-use check may be overly restrictive
15890 if (!Src.hasOneUse() || Src.getOpcode() != ISD::CONCAT_VECTORS)
15891 return SDValue();
15892
15893 // Profitability check: we must be extending exactly one of it's operands.
15894 // FIXME: this is probably overly restrictive.
15895 Src = Src.getOperand(0);
15896 if (Src.getValueType() != SrcVT)
15897 return SDValue();
15898
15899 if (LegalOperations && !TLI.isOperationLegal(Opcode, VT))
15900 return SDValue();
15901
15902 return DAG.getNode(Opcode, DL, VT, Src);
15903}
15904
15905SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
15906 SDValue N0 = N->getOperand(0);
15907 EVT VT = N->getValueType(0);
15908 SDLoc DL(N);
15909
15910 if (N0.isUndef()) {
15911 // aext_vector_inreg(undef) = undef because the top bits are undefined.
15912 // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
15913 return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
15914 ? DAG.getUNDEF(VT)
15915 : DAG.getConstant(0, DL, VT);
15916 }
15917
15918 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
15919 return Res;
15920
15922 return SDValue(N, 0);
15923
15925 LegalOperations))
15926 return R;
15927
15928 return SDValue();
15929}
15930
15931SDValue DAGCombiner::visitTRUNCATE_USAT_U(SDNode *N) {
15932 EVT VT = N->getValueType(0);
15933 SDValue N0 = N->getOperand(0);
15934
15935 SDValue FPVal;
15936 if (sd_match(N0, m_FPToUI(m_Value(FPVal))) &&
15938 ISD::FP_TO_UINT_SAT, FPVal.getValueType(), VT))
15939 return DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), VT, FPVal,
15940 DAG.getValueType(VT.getScalarType()));
15941
15942 return SDValue();
15943}
15944
15945/// Detect patterns of truncation with unsigned saturation:
15946///
15947/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
15948/// Return the source value x to be truncated or SDValue() if the pattern was
15949/// not matched.
15950///
15952 unsigned NumDstBits = VT.getScalarSizeInBits();
15953 unsigned NumSrcBits = In.getScalarValueSizeInBits();
15954 // Saturation with truncation. We truncate from InVT to VT.
15955 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
15956
15957 SDValue Min;
15958 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
15959 if (sd_match(In, m_UMin(m_Value(Min), m_SpecificInt(UnsignedMax))))
15960 return Min;
15961
15962 return SDValue();
15963}
15964
15965/// Detect patterns of truncation with signed saturation:
15966/// (truncate (smin (smax (x, signed_min_of_dest_type),
15967/// signed_max_of_dest_type)) to dest_type)
15968/// or:
15969/// (truncate (smax (smin (x, signed_max_of_dest_type),
15970/// signed_min_of_dest_type)) to dest_type).
15971///
15972/// Return the source value to be truncated or SDValue() if the pattern was not
15973/// matched.
15975 unsigned NumDstBits = VT.getScalarSizeInBits();
15976 unsigned NumSrcBits = In.getScalarValueSizeInBits();
15977 // Saturation with truncation. We truncate from InVT to VT.
15978 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
15979
15980 SDValue Val;
15981 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
15982 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
15983
15984 if (sd_match(In, m_SMin(m_SMax(m_Value(Val), m_SpecificInt(SignedMin)),
15985 m_SpecificInt(SignedMax))))
15986 return Val;
15987
15988 if (sd_match(In, m_SMax(m_SMin(m_Value(Val), m_SpecificInt(SignedMax)),
15989 m_SpecificInt(SignedMin))))
15990 return Val;
15991
15992 return SDValue();
15993}
15994
15995/// Detect patterns of truncation with unsigned saturation:
15997 const SDLoc &DL) {
15998 unsigned NumDstBits = VT.getScalarSizeInBits();
15999 unsigned NumSrcBits = In.getScalarValueSizeInBits();
16000 // Saturation with truncation. We truncate from InVT to VT.
16001 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
16002
16003 SDValue Val;
16004 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
16005 // Min == 0, Max is unsigned max of destination type.
16006 if (sd_match(In, m_SMax(m_SMin(m_Value(Val), m_SpecificInt(UnsignedMax)),
16007 m_Zero())))
16008 return Val;
16009
16010 if (sd_match(In, m_SMin(m_SMax(m_Value(Val), m_Zero()),
16011 m_SpecificInt(UnsignedMax))))
16012 return Val;
16013
16014 if (sd_match(In, m_UMin(m_SMax(m_Value(Val), m_Zero()),
16015 m_SpecificInt(UnsignedMax))))
16016 return Val;
16017
16018 return SDValue();
16019}
16020
16021static SDValue foldToSaturated(SDNode *N, EVT &VT, SDValue &Src, EVT &SrcVT,
16022 SDLoc &DL, const TargetLowering &TLI,
16023 SelectionDAG &DAG) {
16024 auto AllowedTruncateSat = [&](unsigned Opc, EVT SrcVT, EVT VT) -> bool {
16025 return (TLI.isOperationLegalOrCustom(Opc, SrcVT) &&
16026 TLI.isTypeDesirableForOp(Opc, VT));
16027 };
16028
16029 if (Src.getOpcode() == ISD::SMIN || Src.getOpcode() == ISD::SMAX) {
16030 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_S, SrcVT, VT))
16031 if (SDValue SSatVal = detectSSatSPattern(Src, VT))
16032 return DAG.getNode(ISD::TRUNCATE_SSAT_S, DL, VT, SSatVal);
16033 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_U, SrcVT, VT))
16034 if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
16035 return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
16036 } else if (Src.getOpcode() == ISD::UMIN) {
16037 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_U, SrcVT, VT))
16038 if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
16039 return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
16040 if (AllowedTruncateSat(ISD::TRUNCATE_USAT_U, SrcVT, VT))
16041 if (SDValue USatVal = detectUSatUPattern(Src, VT))
16042 return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, VT, USatVal);
16043 }
16044
16045 return SDValue();
16046}
16047
16048SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
16049 SDValue N0 = N->getOperand(0);
16050 EVT VT = N->getValueType(0);
16051 EVT SrcVT = N0.getValueType();
16052 bool isLE = DAG.getDataLayout().isLittleEndian();
16053 SDLoc DL(N);
16054
16055 // trunc(undef) = undef
16056 if (N0.isUndef())
16057 return DAG.getUNDEF(VT);
16058
16059 // fold (truncate (truncate x)) -> (truncate x)
16060 if (N0.getOpcode() == ISD::TRUNCATE)
16061 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16062
16063 // fold saturated truncate
16064 if (SDValue SaturatedTR = foldToSaturated(N, VT, N0, SrcVT, DL, TLI, DAG))
16065 return SaturatedTR;
16066
16067 // fold (truncate c1) -> c1
16068 if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, DL, VT, {N0}))
16069 return C;
16070
16071 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
16072 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
16073 N0.getOpcode() == ISD::SIGN_EXTEND ||
16074 N0.getOpcode() == ISD::ANY_EXTEND) {
16075 // if the source is smaller than the dest, we still need an extend.
16076 if (N0.getOperand(0).getValueType().bitsLT(VT)) {
16077 SDNodeFlags Flags;
16078 if (N0.getOpcode() == ISD::ZERO_EXTEND)
16079 Flags.setNonNeg(N0->getFlags().hasNonNeg());
16080 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
16081 }
16082 // if the source is larger than the dest, than we just need the truncate.
16083 if (N0.getOperand(0).getValueType().bitsGT(VT))
16084 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16085 // if the source and dest are the same type, we can drop both the extend
16086 // and the truncate.
16087 return N0.getOperand(0);
16088 }
16089
16090 // Try to narrow a truncate-of-sext_in_reg to the destination type:
16091 // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
16092 if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
16093 N0.hasOneUse()) {
16094 SDValue X = N0.getOperand(0);
16095 SDValue ExtVal = N0.getOperand(1);
16096 EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
16097 if (ExtVT.bitsLT(VT) && TLI.preferSextInRegOfTruncate(VT, SrcVT, ExtVT)) {
16098 SDValue TrX = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
16099 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, TrX, ExtVal);
16100 }
16101 }
16102
16103 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
16104 if (N->hasOneUse() && (N->user_begin()->getOpcode() == ISD::ANY_EXTEND))
16105 return SDValue();
16106
16107 // Fold extract-and-trunc into a narrow extract. For example:
16108 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
16109 // i32 y = TRUNCATE(i64 x)
16110 // -- becomes --
16111 // v16i8 b = BITCAST (v2i64 val)
16112 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
16113 //
16114 // Note: We only run this optimization after type legalization (which often
16115 // creates this pattern) and before operation legalization after which
16116 // we need to be more careful about the vector instructions that we generate.
16117 if (LegalTypes && !LegalOperations && VT.isScalarInteger() && VT != MVT::i1 &&
16118 N0->hasOneUse()) {
16119 EVT TrTy = N->getValueType(0);
16120 SDValue Src = N0;
16121
16122 // Check for cases where we shift down an upper element before truncation.
16123 int EltOffset = 0;
16124 if (Src.getOpcode() == ISD::SRL && Src.getOperand(0)->hasOneUse()) {
16125 if (auto ShAmt = DAG.getValidShiftAmount(Src)) {
16126 if ((*ShAmt % TrTy.getSizeInBits()) == 0) {
16127 Src = Src.getOperand(0);
16128 EltOffset = *ShAmt / TrTy.getSizeInBits();
16129 }
16130 }
16131 }
16132
16133 if (Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16134 EVT VecTy = Src.getOperand(0).getValueType();
16135 EVT ExTy = Src.getValueType();
16136
16137 auto EltCnt = VecTy.getVectorElementCount();
16138 unsigned SizeRatio = ExTy.getSizeInBits() / TrTy.getSizeInBits();
16139 auto NewEltCnt = EltCnt * SizeRatio;
16140
16141 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
16142 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
16143
16144 SDValue EltNo = Src->getOperand(1);
16145 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
16146 int Elt = EltNo->getAsZExtVal();
16147 int Index = isLE ? (Elt * SizeRatio + EltOffset)
16148 : (Elt * SizeRatio + (SizeRatio - 1) - EltOffset);
16149 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
16150 DAG.getBitcast(NVT, Src.getOperand(0)),
16151 DAG.getVectorIdxConstant(Index, DL));
16152 }
16153 }
16154 }
16155
16156 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
16157 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse() &&
16158 TLI.isTruncateFree(SrcVT, VT)) {
16159 if (!LegalOperations ||
16160 (TLI.isOperationLegal(ISD::SELECT, SrcVT) &&
16161 TLI.isNarrowingProfitable(N0.getNode(), SrcVT, VT))) {
16162 SDLoc SL(N0);
16163 SDValue Cond = N0.getOperand(0);
16164 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
16165 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
16166 return DAG.getNode(ISD::SELECT, DL, VT, Cond, TruncOp0, TruncOp1);
16167 }
16168 }
16169
16170 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
16171 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
16172 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
16173 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
16174 SDValue Amt = N0.getOperand(1);
16175 KnownBits Known = DAG.computeKnownBits(Amt);
16176 unsigned Size = VT.getScalarSizeInBits();
16177 if (Known.countMaxActiveBits() <= Log2_32(Size)) {
16178 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
16179 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16180 if (AmtVT != Amt.getValueType()) {
16181 Amt = DAG.getZExtOrTrunc(Amt, DL, AmtVT);
16182 AddToWorklist(Amt.getNode());
16183 }
16184 return DAG.getNode(ISD::SHL, DL, VT, Trunc, Amt);
16185 }
16186 }
16187
16188 if (SDValue V = foldSubToUSubSat(VT, N0.getNode(), DL))
16189 return V;
16190
16191 if (SDValue ABD = foldABSToABD(N, DL))
16192 return ABD;
16193
16194 // Attempt to pre-truncate BUILD_VECTOR sources.
16195 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
16196 N0.hasOneUse() &&
16197 TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
16198 // Avoid creating illegal types if running after type legalizer.
16199 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
16200 EVT SVT = VT.getScalarType();
16201 SmallVector<SDValue, 8> TruncOps;
16202 for (const SDValue &Op : N0->op_values()) {
16203 SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
16204 TruncOps.push_back(TruncOp);
16205 }
16206 return DAG.getBuildVector(VT, DL, TruncOps);
16207 }
16208
16209 // trunc (splat_vector x) -> splat_vector (trunc x)
16210 if (N0.getOpcode() == ISD::SPLAT_VECTOR &&
16211 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType())) &&
16212 (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) {
16213 EVT SVT = VT.getScalarType();
16214 return DAG.getSplatVector(
16215 VT, DL, DAG.getNode(ISD::TRUNCATE, DL, SVT, N0->getOperand(0)));
16216 }
16217
16218 // Fold a series of buildvector, bitcast, and truncate if possible.
16219 // For example fold
16220 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
16221 // (2xi32 (buildvector x, y)).
16222 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
16223 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
16225 N0.getOperand(0).hasOneUse()) {
16226 SDValue BuildVect = N0.getOperand(0);
16227 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
16228 EVT TruncVecEltTy = VT.getVectorElementType();
16229
16230 // Check that the element types match.
16231 if (BuildVectEltTy == TruncVecEltTy) {
16232 // Now we only need to compute the offset of the truncated elements.
16233 unsigned BuildVecNumElts = BuildVect.getNumOperands();
16234 unsigned TruncVecNumElts = VT.getVectorNumElements();
16235 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
16236 unsigned FirstElt = isLE ? 0 : (TruncEltOffset - 1);
16237
16238 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
16239 "Invalid number of elements");
16240
16242 for (unsigned i = FirstElt, e = BuildVecNumElts; i < e;
16243 i += TruncEltOffset)
16244 Opnds.push_back(BuildVect.getOperand(i));
16245
16246 return DAG.getBuildVector(VT, DL, Opnds);
16247 }
16248 }
16249
16250 // fold (truncate (load x)) -> (smaller load x)
16251 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
16252 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
16253 if (SDValue Reduced = reduceLoadWidth(N))
16254 return Reduced;
16255
16256 // Handle the case where the truncated result is at least as wide as the
16257 // loaded type.
16258 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
16259 auto *LN0 = cast<LoadSDNode>(N0);
16260 if (LN0->isSimple() && LN0->getMemoryVT().bitsLE(VT)) {
16261 SDValue NewLoad = DAG.getExtLoad(
16262 LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(),
16263 LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand());
16264 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
16265 return NewLoad;
16266 }
16267 }
16268 }
16269
16270 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
16271 // where ... are all 'undef'.
16272 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
16274 SDValue V;
16275 unsigned Idx = 0;
16276 unsigned NumDefs = 0;
16277
16278 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
16279 SDValue X = N0.getOperand(i);
16280 if (!X.isUndef()) {
16281 V = X;
16282 Idx = i;
16283 NumDefs++;
16284 }
16285 // Stop if more than one members are non-undef.
16286 if (NumDefs > 1)
16287 break;
16288
16291 X.getValueType().getVectorElementCount()));
16292 }
16293
16294 if (NumDefs == 0)
16295 return DAG.getUNDEF(VT);
16296
16297 if (NumDefs == 1) {
16298 assert(V.getNode() && "The single defined operand is empty!");
16300 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
16301 if (i != Idx) {
16302 Opnds.push_back(DAG.getUNDEF(VTs[i]));
16303 continue;
16304 }
16305 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
16306 AddToWorklist(NV.getNode());
16307 Opnds.push_back(NV);
16308 }
16309 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
16310 }
16311 }
16312
16313 // Fold truncate of a bitcast of a vector to an extract of the low vector
16314 // element.
16315 //
16316 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
16317 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
16318 SDValue VecSrc = N0.getOperand(0);
16319 EVT VecSrcVT = VecSrc.getValueType();
16320 if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
16321 (!LegalOperations ||
16322 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
16323 unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
16324 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VecSrc,
16325 DAG.getVectorIdxConstant(Idx, DL));
16326 }
16327 }
16328
16329 // Simplify the operands using demanded-bits information.
16331 return SDValue(N, 0);
16332
16333 // fold (truncate (extract_subvector(ext x))) ->
16334 // (extract_subvector x)
16335 // TODO: This can be generalized to cover cases where the truncate and extract
16336 // do not fully cancel each other out.
16337 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
16338 SDValue N00 = N0.getOperand(0);
16339 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
16340 N00.getOpcode() == ISD::ZERO_EXTEND ||
16341 N00.getOpcode() == ISD::ANY_EXTEND) {
16342 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
16344 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
16345 N00.getOperand(0), N0.getOperand(1));
16346 }
16347 }
16348
16349 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
16350 return NewVSel;
16351
16352 // Narrow a suitable binary operation with a non-opaque constant operand by
16353 // moving it ahead of the truncate. This is limited to pre-legalization
16354 // because targets may prefer a wider type during later combines and invert
16355 // this transform.
16356 switch (N0.getOpcode()) {
16357 case ISD::ADD:
16358 case ISD::SUB:
16359 case ISD::MUL:
16360 case ISD::AND:
16361 case ISD::OR:
16362 case ISD::XOR:
16363 if (!LegalOperations && N0.hasOneUse() &&
16364 (isConstantOrConstantVector(N0.getOperand(0), true) ||
16365 isConstantOrConstantVector(N0.getOperand(1), true))) {
16366 // TODO: We already restricted this to pre-legalization, but for vectors
16367 // we are extra cautious to not create an unsupported operation.
16368 // Target-specific changes are likely needed to avoid regressions here.
16369 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
16370 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16371 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
16372 SDNodeFlags Flags;
16373 // Propagate nuw for sub.
16374 if (N0->getOpcode() == ISD::SUB && N0->getFlags().hasNoUnsignedWrap() &&
16376 N0->getOperand(0),
16378 VT.getScalarSizeInBits())))
16379 Flags.setNoUnsignedWrap(true);
16380 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR, Flags);
16381 }
16382 }
16383 break;
16384 case ISD::ADDE:
16385 case ISD::UADDO_CARRY:
16386 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
16387 // (trunc uaddo_carry(X, Y, Carry)) ->
16388 // (uaddo_carry trunc(X), trunc(Y), Carry)
16389 // When the adde's carry is not used.
16390 // We only do for uaddo_carry before legalize operation
16391 if (((!LegalOperations && N0.getOpcode() == ISD::UADDO_CARRY) ||
16392 TLI.isOperationLegal(N0.getOpcode(), VT)) &&
16393 N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
16394 SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16395 SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
16396 SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
16397 return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2));
16398 }
16399 break;
16400 case ISD::USUBSAT:
16401 // Truncate the USUBSAT only if LHS is a known zero-extension, its not
16402 // enough to know that the upper bits are zero we must ensure that we don't
16403 // introduce an extra truncate.
16404 if (!LegalOperations && N0.hasOneUse() &&
16407 VT.getScalarSizeInBits() &&
16408 hasOperation(N0.getOpcode(), VT)) {
16409 return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
16410 DAG, DL);
16411 }
16412 break;
16413 case ISD::AVGFLOORS:
16414 case ISD::AVGFLOORU:
16415 case ISD::AVGCEILS:
16416 case ISD::AVGCEILU:
16417 case ISD::ABDS:
16418 case ISD::ABDU:
16419 // (trunc (avg a, b)) -> (avg (trunc a), (trunc b))
16420 // (trunc (abdu/abds a, b)) -> (abdu/abds (trunc a), (trunc b))
16421 if (!LegalOperations && N0.hasOneUse() &&
16422 TLI.isOperationLegal(N0.getOpcode(), VT)) {
16423 EVT TruncVT = VT;
16424 unsigned SrcBits = SrcVT.getScalarSizeInBits();
16425 unsigned TruncBits = TruncVT.getScalarSizeInBits();
16426
16427 SDValue A = N0.getOperand(0);
16428 SDValue B = N0.getOperand(1);
16429 bool CanFold = false;
16430
16431 if (N0.getOpcode() == ISD::AVGFLOORU || N0.getOpcode() == ISD::AVGCEILU ||
16432 N0.getOpcode() == ISD::ABDU) {
16433 APInt UpperBits = APInt::getBitsSetFrom(SrcBits, TruncBits);
16434 CanFold = DAG.MaskedValueIsZero(B, UpperBits) &&
16435 DAG.MaskedValueIsZero(A, UpperBits);
16436 } else {
16437 unsigned NeededBits = SrcBits - TruncBits;
16438 CanFold = DAG.ComputeNumSignBits(B) > NeededBits &&
16439 DAG.ComputeNumSignBits(A) > NeededBits;
16440 }
16441
16442 if (CanFold) {
16443 SDValue NewA = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, A);
16444 SDValue NewB = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, B);
16445 return DAG.getNode(N0.getOpcode(), DL, TruncVT, NewA, NewB);
16446 }
16447 }
16448 break;
16449 }
16450
16451 return SDValue();
16452}
16453
16454static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
16455 SDValue Elt = N->getOperand(i);
16456 if (Elt.getOpcode() != ISD::MERGE_VALUES)
16457 return Elt.getNode();
16458 return Elt.getOperand(Elt.getResNo()).getNode();
16459}
16460
16461/// build_pair (load, load) -> load
16462/// if load locations are consecutive.
16463SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
16464 assert(N->getOpcode() == ISD::BUILD_PAIR);
16465
16466 auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
16467 auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
16468
16469 // A BUILD_PAIR is always having the least significant part in elt 0 and the
16470 // most significant part in elt 1. So when combining into one large load, we
16471 // need to consider the endianness.
16472 if (DAG.getDataLayout().isBigEndian())
16473 std::swap(LD1, LD2);
16474
16475 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
16476 !LD1->hasOneUse() || !LD2->hasOneUse() ||
16477 LD1->getAddressSpace() != LD2->getAddressSpace())
16478 return SDValue();
16479
16480 unsigned LD1Fast = 0;
16481 EVT LD1VT = LD1->getValueType(0);
16482 unsigned LD1Bytes = LD1VT.getStoreSize();
16483 if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
16484 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
16485 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
16486 *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
16487 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
16488 LD1->getPointerInfo(), LD1->getAlign());
16489
16490 return SDValue();
16491}
16492
16493static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
16494 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
16495 // and Lo parts; on big-endian machines it doesn't.
16496 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
16497}
16498
16499SDValue DAGCombiner::foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
16500 const TargetLowering &TLI) {
16501 // If this is not a bitcast to an FP type or if the target doesn't have
16502 // IEEE754-compliant FP logic, we're done.
16503 EVT VT = N->getValueType(0);
16504 SDValue N0 = N->getOperand(0);
16505 EVT SourceVT = N0.getValueType();
16506
16507 if (!VT.isFloatingPoint())
16508 return SDValue();
16509
16510 // TODO: Handle cases where the integer constant is a different scalar
16511 // bitwidth to the FP.
16512 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
16513 return SDValue();
16514
16515 unsigned FPOpcode;
16516 APInt SignMask;
16517 switch (N0.getOpcode()) {
16518 case ISD::AND:
16519 FPOpcode = ISD::FABS;
16520 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
16521 break;
16522 case ISD::XOR:
16523 FPOpcode = ISD::FNEG;
16524 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
16525 break;
16526 case ISD::OR:
16527 FPOpcode = ISD::FABS;
16528 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
16529 break;
16530 default:
16531 return SDValue();
16532 }
16533
16534 if (LegalOperations && !TLI.isOperationLegal(FPOpcode, VT))
16535 return SDValue();
16536
16537 // This needs to be the inverse of logic in foldSignChangeInBitcast.
16538 // FIXME: I don't think looking for bitcast intrinsically makes sense, but
16539 // removing this would require more changes.
16540 auto IsBitCastOrFree = [&TLI, FPOpcode](SDValue Op, EVT VT) {
16541 if (sd_match(Op, m_BitCast(m_SpecificVT(VT))))
16542 return true;
16543
16544 return FPOpcode == ISD::FABS ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
16545 };
16546
16547 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
16548 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
16549 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
16550 // fneg (fabs X)
16551 SDValue LogicOp0 = N0.getOperand(0);
16552 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
16553 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
16554 IsBitCastOrFree(LogicOp0, VT)) {
16555 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, LogicOp0);
16556 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, CastOp0);
16557 NumFPLogicOpsConv++;
16558 if (N0.getOpcode() == ISD::OR)
16559 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
16560 return FPOp;
16561 }
16562
16563 return SDValue();
16564}
16565
16566SDValue DAGCombiner::visitBITCAST(SDNode *N) {
16567 SDValue N0 = N->getOperand(0);
16568 EVT VT = N->getValueType(0);
16569
16570 if (N0.isUndef())
16571 return DAG.getUNDEF(VT);
16572
16573 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
16574 // Only do this before legalize types, unless both types are integer and the
16575 // scalar type is legal. Only do this before legalize ops, since the target
16576 // maybe depending on the bitcast.
16577 // First check to see if this is all constant.
16578 // TODO: Support FP bitcasts after legalize types.
16579 if (VT.isVector() &&
16580 (!LegalTypes ||
16581 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
16582 TLI.isTypeLegal(VT.getVectorElementType()))) &&
16583 N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
16584 cast<BuildVectorSDNode>(N0)->isConstant())
16585 return DAG.FoldConstantBuildVector(cast<BuildVectorSDNode>(N0), SDLoc(N),
16587
16588 // If the input is a constant, let getNode fold it.
16589 if (isIntOrFPConstant(N0)) {
16590 // If we can't allow illegal operations, we need to check that this is just
16591 // a fp -> int or int -> conversion and that the resulting operation will
16592 // be legal.
16593 if (!LegalOperations ||
16594 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
16596 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
16597 TLI.isOperationLegal(ISD::Constant, VT))) {
16598 SDValue C = DAG.getBitcast(VT, N0);
16599 if (C.getNode() != N)
16600 return C;
16601 }
16602 }
16603
16604 // (conv (conv x, t1), t2) -> (conv x, t2)
16605 if (N0.getOpcode() == ISD::BITCAST)
16606 return DAG.getBitcast(VT, N0.getOperand(0));
16607
16608 // fold (conv (logicop (conv x), (c))) -> (logicop x, (conv c))
16609 // iff the current bitwise logicop type isn't legal
16610 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && VT.isInteger() &&
16611 !TLI.isTypeLegal(N0.getOperand(0).getValueType())) {
16612 auto IsFreeBitcast = [VT](SDValue V) {
16613 return (V.getOpcode() == ISD::BITCAST &&
16614 V.getOperand(0).getValueType() == VT) ||
16616 V->hasOneUse());
16617 };
16618 if (IsFreeBitcast(N0.getOperand(0)) && IsFreeBitcast(N0.getOperand(1)))
16619 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
16620 DAG.getBitcast(VT, N0.getOperand(0)),
16621 DAG.getBitcast(VT, N0.getOperand(1)));
16622 }
16623
16624 // fold (conv (load x)) -> (load (conv*)x)
16625 // If the resultant load doesn't need a higher alignment than the original!
16626 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
16627 // Do not remove the cast if the types differ in endian layout.
16629 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
16630 // If the load is volatile, we only want to change the load type if the
16631 // resulting load is legal. Otherwise we might increase the number of
16632 // memory accesses. We don't care if the original type was legal or not
16633 // as we assume software couldn't rely on the number of accesses of an
16634 // illegal type.
16635 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
16636 TLI.isOperationLegal(ISD::LOAD, VT))) {
16637 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
16638
16639 if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
16640 *LN0->getMemOperand())) {
16641 // If the range metadata type does not match the new memory
16642 // operation type, remove the range metadata.
16643 if (const MDNode *MD = LN0->getRanges()) {
16644 ConstantInt *Lower = mdconst::extract<ConstantInt>(MD->getOperand(0));
16645 if (Lower->getBitWidth() != VT.getScalarSizeInBits() ||
16646 !VT.isInteger()) {
16647 LN0->getMemOperand()->clearRanges();
16648 }
16649 }
16650 SDValue Load =
16651 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
16652 LN0->getMemOperand());
16653 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
16654 return Load;
16655 }
16656 }
16657
16658 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
16659 return V;
16660
16661 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
16662 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
16663 //
16664 // For ppc_fp128:
16665 // fold (bitcast (fneg x)) ->
16666 // flipbit = signbit
16667 // (xor (bitcast x) (build_pair flipbit, flipbit))
16668 //
16669 // fold (bitcast (fabs x)) ->
16670 // flipbit = (and (extract_element (bitcast x), 0), signbit)
16671 // (xor (bitcast x) (build_pair flipbit, flipbit))
16672 // This often reduces constant pool loads.
16673 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
16674 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
16675 N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
16676 !N0.getValueType().isVector()) {
16677 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
16678 AddToWorklist(NewConv.getNode());
16679
16680 SDLoc DL(N);
16681 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
16682 assert(VT.getSizeInBits() == 128);
16683 SDValue SignBit = DAG.getConstant(
16684 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
16685 SDValue FlipBit;
16686 if (N0.getOpcode() == ISD::FNEG) {
16687 FlipBit = SignBit;
16688 AddToWorklist(FlipBit.getNode());
16689 } else {
16690 assert(N0.getOpcode() == ISD::FABS);
16691 SDValue Hi =
16692 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
16694 SDLoc(NewConv)));
16695 AddToWorklist(Hi.getNode());
16696 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
16697 AddToWorklist(FlipBit.getNode());
16698 }
16699 SDValue FlipBits =
16700 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
16701 AddToWorklist(FlipBits.getNode());
16702 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
16703 }
16704 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
16705 if (N0.getOpcode() == ISD::FNEG)
16706 return DAG.getNode(ISD::XOR, DL, VT,
16707 NewConv, DAG.getConstant(SignBit, DL, VT));
16708 assert(N0.getOpcode() == ISD::FABS);
16709 return DAG.getNode(ISD::AND, DL, VT,
16710 NewConv, DAG.getConstant(~SignBit, DL, VT));
16711 }
16712
16713 // fold (bitconvert (fcopysign cst, x)) ->
16714 // (or (and (bitconvert x), sign), (and cst, (not sign)))
16715 // Note that we don't handle (copysign x, cst) because this can always be
16716 // folded to an fneg or fabs.
16717 //
16718 // For ppc_fp128:
16719 // fold (bitcast (fcopysign cst, x)) ->
16720 // flipbit = (and (extract_element
16721 // (xor (bitcast cst), (bitcast x)), 0),
16722 // signbit)
16723 // (xor (bitcast cst) (build_pair flipbit, flipbit))
16724 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
16726 !VT.isVector()) {
16727 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
16728 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
16729 if (isTypeLegal(IntXVT)) {
16730 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
16731 AddToWorklist(X.getNode());
16732
16733 // If X has a different width than the result/lhs, sext it or truncate it.
16734 unsigned VTWidth = VT.getSizeInBits();
16735 if (OrigXWidth < VTWidth) {
16736 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
16737 AddToWorklist(X.getNode());
16738 } else if (OrigXWidth > VTWidth) {
16739 // To get the sign bit in the right place, we have to shift it right
16740 // before truncating.
16741 SDLoc DL(X);
16742 X = DAG.getNode(ISD::SRL, DL,
16743 X.getValueType(), X,
16744 DAG.getConstant(OrigXWidth-VTWidth, DL,
16745 X.getValueType()));
16746 AddToWorklist(X.getNode());
16747 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
16748 AddToWorklist(X.getNode());
16749 }
16750
16751 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
16752 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
16753 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
16754 AddToWorklist(Cst.getNode());
16755 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
16756 AddToWorklist(X.getNode());
16757 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
16758 AddToWorklist(XorResult.getNode());
16759 SDValue XorResult64 = DAG.getNode(
16760 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
16762 SDLoc(XorResult)));
16763 AddToWorklist(XorResult64.getNode());
16764 SDValue FlipBit =
16765 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
16766 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
16767 AddToWorklist(FlipBit.getNode());
16768 SDValue FlipBits =
16769 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
16770 AddToWorklist(FlipBits.getNode());
16771 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
16772 }
16773 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
16774 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
16775 X, DAG.getConstant(SignBit, SDLoc(X), VT));
16776 AddToWorklist(X.getNode());
16777
16778 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
16779 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
16780 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
16781 AddToWorklist(Cst.getNode());
16782
16783 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
16784 }
16785 }
16786
16787 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
16788 if (N0.getOpcode() == ISD::BUILD_PAIR)
16789 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
16790 return CombineLD;
16791
16792 // int_vt (bitcast (vec_vt (scalar_to_vector elt_vt:x)))
16793 // => int_vt (any_extend elt_vt:x)
16794 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && VT.isScalarInteger()) {
16795 SDValue SrcScalar = N0.getOperand(0);
16796 if (SrcScalar.getValueType().isScalarInteger())
16797 return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SrcScalar);
16798 }
16799
16800 // Remove double bitcasts from shuffles - this is often a legacy of
16801 // XformToShuffleWithZero being used to combine bitmaskings (of
16802 // float vectors bitcast to integer vectors) into shuffles.
16803 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
16804 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
16805 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
16808 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
16809
16810 // If operands are a bitcast, peek through if it casts the original VT.
16811 // If operands are a constant, just bitcast back to original VT.
16812 auto PeekThroughBitcast = [&](SDValue Op) {
16813 if (Op.getOpcode() == ISD::BITCAST &&
16814 Op.getOperand(0).getValueType() == VT)
16815 return SDValue(Op.getOperand(0));
16816 if (Op.isUndef() || isAnyConstantBuildVector(Op))
16817 return DAG.getBitcast(VT, Op);
16818 return SDValue();
16819 };
16820
16821 // FIXME: If either input vector is bitcast, try to convert the shuffle to
16822 // the result type of this bitcast. This would eliminate at least one
16823 // bitcast. See the transform in InstCombine.
16824 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
16825 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
16826 if (!(SV0 && SV1))
16827 return SDValue();
16828
16829 int MaskScale =
16831 SmallVector<int, 8> NewMask;
16832 for (int M : SVN->getMask())
16833 for (int i = 0; i != MaskScale; ++i)
16834 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
16835
16836 SDValue LegalShuffle =
16837 TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
16838 if (LegalShuffle)
16839 return LegalShuffle;
16840 }
16841
16842 return SDValue();
16843}
16844
16845SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
16846 EVT VT = N->getValueType(0);
16847 return CombineConsecutiveLoads(N, VT);
16848}
16849
16850SDValue DAGCombiner::visitFREEZE(SDNode *N) {
16851 SDValue N0 = N->getOperand(0);
16852
16853 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
16854 return N0;
16855
16856 // If we have frozen and unfrozen users of N0, update so everything uses N.
16857 if (!N0.isUndef() && !N0.hasOneUse()) {
16858 SDValue FrozenN0(N, 0);
16859 // Unfreeze all uses of N to avoid double deleting N from the CSE map.
16860 DAG.ReplaceAllUsesOfValueWith(FrozenN0, N0);
16861 DAG.ReplaceAllUsesOfValueWith(N0, FrozenN0);
16862 // ReplaceAllUsesOfValueWith will have also updated the use in N, thus
16863 // creating a cycle in a DAG. Let's undo that by mutating the freeze.
16864 assert(N->getOperand(0) == FrozenN0 && "Expected cycle in DAG");
16865 DAG.UpdateNodeOperands(N, N0);
16866 return FrozenN0;
16867 }
16868
16869 // We currently avoid folding freeze over SRA/SRL, due to the problems seen
16870 // with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for
16871 // example https://reviews.llvm.org/D136529#4120959.
16872 if (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)
16873 return SDValue();
16874
16875 // Fold freeze(op(x, ...)) -> op(freeze(x), ...).
16876 // Try to push freeze through instructions that propagate but don't produce
16877 // poison as far as possible. If an operand of freeze follows three
16878 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
16879 // guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
16880 // the freeze through to the operands that are not guaranteed non-poison.
16881 // NOTE: we will strip poison-generating flags, so ignore them here.
16882 if (DAG.canCreateUndefOrPoison(N0, /*PoisonOnly*/ false,
16883 /*ConsiderFlags*/ false) ||
16884 N0->getNumValues() != 1 || !N0->hasOneUse())
16885 return SDValue();
16886
16887 // TOOD: we should always allow multiple operands, however this increases the
16888 // likelihood of infinite loops due to the ReplaceAllUsesOfValueWith call
16889 // below causing later nodes that share frozen operands to fold again and no
16890 // longer being able to confirm other operands are not poison due to recursion
16891 // depth limits on isGuaranteedNotToBeUndefOrPoison.
16892 bool AllowMultipleMaybePoisonOperands =
16893 N0.getOpcode() == ISD::SELECT_CC || N0.getOpcode() == ISD::SETCC ||
16894 N0.getOpcode() == ISD::BUILD_VECTOR ||
16895 N0.getOpcode() == ISD::BUILD_PAIR ||
16898
16899 // Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
16900 // ones" or "constant" into something that depends on FrozenUndef. We can
16901 // instead pick undef values to keep those properties, while at the same time
16902 // folding away the freeze.
16903 // If we implement a more general solution for folding away freeze(undef) in
16904 // the future, then this special handling can be removed.
16905 if (N0.getOpcode() == ISD::BUILD_VECTOR) {
16906 SDLoc DL(N0);
16907 EVT VT = N0.getValueType();
16909 return DAG.getAllOnesConstant(DL, VT);
16912 for (const SDValue &Op : N0->op_values())
16913 NewVecC.push_back(
16914 Op.isUndef() ? DAG.getConstant(0, DL, Op.getValueType()) : Op);
16915 return DAG.getBuildVector(VT, DL, NewVecC);
16916 }
16917 }
16918
16919 SmallSet<SDValue, 8> MaybePoisonOperands;
16920 SmallVector<unsigned, 8> MaybePoisonOperandNumbers;
16921 for (auto [OpNo, Op] : enumerate(N0->ops())) {
16922 if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly=*/false))
16923 continue;
16924 bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
16925 bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op).second;
16926 if (IsNewMaybePoisonOperand)
16927 MaybePoisonOperandNumbers.push_back(OpNo);
16928 if (!HadMaybePoisonOperands)
16929 continue;
16930 if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
16931 // Multiple maybe-poison ops when not allowed - bail out.
16932 return SDValue();
16933 }
16934 }
16935 // NOTE: the whole op may be not guaranteed to not be undef or poison because
16936 // it could create undef or poison due to it's poison-generating flags.
16937 // So not finding any maybe-poison operands is fine.
16938
16939 for (unsigned OpNo : MaybePoisonOperandNumbers) {
16940 // N0 can mutate during iteration, so make sure to refetch the maybe poison
16941 // operands via the operand numbers. The typical scenario is that we have
16942 // something like this
16943 // t262: i32 = freeze t181
16944 // t150: i32 = ctlz_zero_undef t262
16945 // t184: i32 = ctlz_zero_undef t181
16946 // t268: i32 = select_cc t181, Constant:i32<0>, t184, t186, setne:ch
16947 // When freezing the t181 operand we get t262 back, and then the
16948 // ReplaceAllUsesOfValueWith call will not only replace t181 by t262, but
16949 // also recursively replace t184 by t150.
16950 SDValue MaybePoisonOperand = N->getOperand(0).getOperand(OpNo);
16951 // Don't replace every single UNDEF everywhere with frozen UNDEF, though.
16952 if (MaybePoisonOperand.isUndef())
16953 continue;
16954 // First, freeze each offending operand.
16955 SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand);
16956 // Then, change all other uses of unfrozen operand to use frozen operand.
16957 DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
16958 if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE &&
16959 FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) {
16960 // But, that also updated the use in the freeze we just created, thus
16961 // creating a cycle in a DAG. Let's undo that by mutating the freeze.
16962 DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(),
16963 MaybePoisonOperand);
16964 }
16965
16966 // This node has been merged with another.
16967 if (N->getOpcode() == ISD::DELETED_NODE)
16968 return SDValue(N, 0);
16969 }
16970
16971 assert(N->getOpcode() != ISD::DELETED_NODE && "Node was deleted!");
16972
16973 // The whole node may have been updated, so the value we were holding
16974 // may no longer be valid. Re-fetch the operand we're `freeze`ing.
16975 N0 = N->getOperand(0);
16976
16977 // Finally, recreate the node, it's operands were updated to use
16978 // frozen operands, so we just need to use it's "original" operands.
16980 // TODO: ISD::UNDEF and ISD::POISON should get separate handling, but best
16981 // leave for a future patch.
16982 for (SDValue &Op : Ops) {
16983 if (Op.isUndef())
16984 Op = DAG.getFreeze(Op);
16985 }
16986
16987 SDLoc DL(N0);
16988
16989 // Special case handling for ShuffleVectorSDNode nodes.
16990 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N0))
16991 return DAG.getVectorShuffle(N0.getValueType(), DL, Ops[0], Ops[1],
16992 SVN->getMask());
16993
16994 // NOTE: this strips poison generating flags.
16995 // Folding freeze(op(x, ...)) -> op(freeze(x), ...) does not require nnan,
16996 // ninf, nsz, or fast.
16997 // However, contract, reassoc, afn, and arcp should be preserved,
16998 // as these fast-math flags do not introduce poison values.
16999 SDNodeFlags SrcFlags = N0->getFlags();
17000 SDNodeFlags SafeFlags;
17001 SafeFlags.setAllowContract(SrcFlags.hasAllowContract());
17002 SafeFlags.setAllowReassociation(SrcFlags.hasAllowReassociation());
17003 SafeFlags.setApproximateFuncs(SrcFlags.hasApproximateFuncs());
17004 SafeFlags.setAllowReciprocal(SrcFlags.hasAllowReciprocal());
17005 return DAG.getNode(N0.getOpcode(), DL, N0->getVTList(), Ops, SafeFlags);
17006}
17007
17008// Returns true if floating point contraction is allowed on the FMUL-SDValue
17009// `N`
17011 assert(N.getOpcode() == ISD::FMUL);
17012
17013 return Options.AllowFPOpFusion == FPOpFusion::Fast ||
17014 N->getFlags().hasAllowContract();
17015}
17016
17017// Returns true if `N` can assume no infinities involved in its computation.
17019 return Options.NoInfsFPMath || N->getFlags().hasNoInfs();
17020}
17021
17022/// Try to perform FMA combining on a given FADD node.
17023template <class MatchContextClass>
17024SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
17025 SDValue N0 = N->getOperand(0);
17026 SDValue N1 = N->getOperand(1);
17027 EVT VT = N->getValueType(0);
17028 SDLoc SL(N);
17029 MatchContextClass matcher(DAG, TLI, N);
17030 const TargetOptions &Options = DAG.getTarget().Options;
17031
17032 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
17033
17034 // Floating-point multiply-add with intermediate rounding.
17035 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
17036 // FIXME: Add VP_FMAD opcode.
17037 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
17038
17039 // Floating-point multiply-add without intermediate rounding.
17040 bool HasFMA =
17041 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT)) &&
17043
17044 // No valid opcode, do not combine.
17045 if (!HasFMAD && !HasFMA)
17046 return SDValue();
17047
17048 bool AllowFusionGlobally =
17049 Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD;
17050 // If the addition is not contractable, do not combine.
17051 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
17052 return SDValue();
17053
17054 // Folding fadd (fmul x, y), (fmul x, y) -> fma x, y, (fmul x, y) is never
17055 // beneficial. It does not reduce latency. It increases register pressure. It
17056 // replaces an fadd with an fma which is a more complex instruction, so is
17057 // likely to have a larger encoding, use more functional units, etc.
17058 if (N0 == N1)
17059 return SDValue();
17060
17061 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
17062 return SDValue();
17063
17064 // Always prefer FMAD to FMA for precision.
17065 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
17067
17068 auto isFusedOp = [&](SDValue N) {
17069 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
17070 };
17071
17072 // Is the node an FMUL and contractable either due to global flags or
17073 // SDNodeFlags.
17074 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
17075 if (!matcher.match(N, ISD::FMUL))
17076 return false;
17077 return AllowFusionGlobally || N->getFlags().hasAllowContract();
17078 };
17079 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
17080 // prefer to fold the multiply with fewer uses.
17082 if (N0->use_size() > N1->use_size())
17083 std::swap(N0, N1);
17084 }
17085
17086 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
17087 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
17088 return matcher.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
17089 N0.getOperand(1), N1);
17090 }
17091
17092 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
17093 // Note: Commutes FADD operands.
17094 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
17095 return matcher.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
17096 N1.getOperand(1), N0);
17097 }
17098
17099 // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
17100 // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
17101 // This also works with nested fma instructions:
17102 // fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G -->
17103 // fma A, B, (fma C, D, fma (E, F, G))
17104 // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
17105 // fma A, B, (fma C, D, fma (E, F, G)).
17106 // This requires reassociation because it changes the order of operations.
17107 bool CanReassociate = N->getFlags().hasAllowReassociation();
17108 if (CanReassociate) {
17109 SDValue FMA, E;
17110 if (isFusedOp(N0) && N0.hasOneUse()) {
17111 FMA = N0;
17112 E = N1;
17113 } else if (isFusedOp(N1) && N1.hasOneUse()) {
17114 FMA = N1;
17115 E = N0;
17116 }
17117
17118 SDValue TmpFMA = FMA;
17119 while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) {
17120 SDValue FMul = TmpFMA->getOperand(2);
17121 if (matcher.match(FMul, ISD::FMUL) && FMul.hasOneUse()) {
17122 SDValue C = FMul.getOperand(0);
17123 SDValue D = FMul.getOperand(1);
17124 SDValue CDE = matcher.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
17126 // Replacing the inner FMul could cause the outer FMA to be simplified
17127 // away.
17128 return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue(N, 0) : FMA;
17129 }
17130
17131 TmpFMA = TmpFMA->getOperand(2);
17132 }
17133 }
17134
17135 // Look through FP_EXTEND nodes to do more combining.
17136
17137 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
17138 if (matcher.match(N0, ISD::FP_EXTEND)) {
17139 SDValue N00 = N0.getOperand(0);
17140 if (isContractableFMUL(N00) &&
17141 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17142 N00.getValueType())) {
17143 return matcher.getNode(
17144 PreferredFusedOpcode, SL, VT,
17145 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
17146 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), N1);
17147 }
17148 }
17149
17150 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
17151 // Note: Commutes FADD operands.
17152 if (matcher.match(N1, ISD::FP_EXTEND)) {
17153 SDValue N10 = N1.getOperand(0);
17154 if (isContractableFMUL(N10) &&
17155 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17156 N10.getValueType())) {
17157 return matcher.getNode(
17158 PreferredFusedOpcode, SL, VT,
17159 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
17160 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
17161 }
17162 }
17163
17164 // More folding opportunities when target permits.
17165 if (Aggressive) {
17166 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
17167 // -> (fma x, y, (fma (fpext u), (fpext v), z))
17168 auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
17169 SDValue Z) {
17170 return matcher.getNode(
17171 PreferredFusedOpcode, SL, VT, X, Y,
17172 matcher.getNode(PreferredFusedOpcode, SL, VT,
17173 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
17174 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
17175 };
17176 if (isFusedOp(N0)) {
17177 SDValue N02 = N0.getOperand(2);
17178 if (matcher.match(N02, ISD::FP_EXTEND)) {
17179 SDValue N020 = N02.getOperand(0);
17180 if (isContractableFMUL(N020) &&
17181 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17182 N020.getValueType())) {
17183 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
17184 N020.getOperand(0), N020.getOperand(1),
17185 N1);
17186 }
17187 }
17188 }
17189
17190 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
17191 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
17192 // FIXME: This turns two single-precision and one double-precision
17193 // operation into two double-precision operations, which might not be
17194 // interesting for all targets, especially GPUs.
17195 auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
17196 SDValue Z) {
17197 return matcher.getNode(
17198 PreferredFusedOpcode, SL, VT,
17199 matcher.getNode(ISD::FP_EXTEND, SL, VT, X),
17200 matcher.getNode(ISD::FP_EXTEND, SL, VT, Y),
17201 matcher.getNode(PreferredFusedOpcode, SL, VT,
17202 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
17203 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
17204 };
17205 if (N0.getOpcode() == ISD::FP_EXTEND) {
17206 SDValue N00 = N0.getOperand(0);
17207 if (isFusedOp(N00)) {
17208 SDValue N002 = N00.getOperand(2);
17209 if (isContractableFMUL(N002) &&
17210 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17211 N00.getValueType())) {
17212 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
17213 N002.getOperand(0), N002.getOperand(1),
17214 N1);
17215 }
17216 }
17217 }
17218
17219 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
17220 // -> (fma y, z, (fma (fpext u), (fpext v), x))
17221 if (isFusedOp(N1)) {
17222 SDValue N12 = N1.getOperand(2);
17223 if (N12.getOpcode() == ISD::FP_EXTEND) {
17224 SDValue N120 = N12.getOperand(0);
17225 if (isContractableFMUL(N120) &&
17226 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17227 N120.getValueType())) {
17228 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
17229 N120.getOperand(0), N120.getOperand(1),
17230 N0);
17231 }
17232 }
17233 }
17234
17235 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
17236 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
17237 // FIXME: This turns two single-precision and one double-precision
17238 // operation into two double-precision operations, which might not be
17239 // interesting for all targets, especially GPUs.
17240 if (N1.getOpcode() == ISD::FP_EXTEND) {
17241 SDValue N10 = N1.getOperand(0);
17242 if (isFusedOp(N10)) {
17243 SDValue N102 = N10.getOperand(2);
17244 if (isContractableFMUL(N102) &&
17245 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17246 N10.getValueType())) {
17247 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
17248 N102.getOperand(0), N102.getOperand(1),
17249 N0);
17250 }
17251 }
17252 }
17253 }
17254
17255 return SDValue();
17256}
17257
17258/// Try to perform FMA combining on a given FSUB node.
17259template <class MatchContextClass>
17260SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
17261 SDValue N0 = N->getOperand(0);
17262 SDValue N1 = N->getOperand(1);
17263 EVT VT = N->getValueType(0);
17264 SDLoc SL(N);
17265 MatchContextClass matcher(DAG, TLI, N);
17266 const TargetOptions &Options = DAG.getTarget().Options;
17267
17268 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
17269
17270 // Floating-point multiply-add with intermediate rounding.
17271 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
17272 // FIXME: Add VP_FMAD opcode.
17273 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
17274
17275 // Floating-point multiply-add without intermediate rounding.
17276 bool HasFMA =
17277 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT)) &&
17279
17280 // No valid opcode, do not combine.
17281 if (!HasFMAD && !HasFMA)
17282 return SDValue();
17283
17284 const SDNodeFlags Flags = N->getFlags();
17285 bool AllowFusionGlobally =
17286 (Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD);
17287
17288 // If the subtraction is not contractable, do not combine.
17289 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
17290 return SDValue();
17291
17292 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
17293 return SDValue();
17294
17295 // Always prefer FMAD to FMA for precision.
17296 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
17298 bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
17299
17300 // Is the node an FMUL and contractable either due to global flags or
17301 // SDNodeFlags.
17302 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
17303 if (!matcher.match(N, ISD::FMUL))
17304 return false;
17305 return AllowFusionGlobally || N->getFlags().hasAllowContract();
17306 };
17307
17308 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
17309 auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
17310 if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
17311 return matcher.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
17312 XY.getOperand(1),
17313 matcher.getNode(ISD::FNEG, SL, VT, Z));
17314 }
17315 return SDValue();
17316 };
17317
17318 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
17319 // Note: Commutes FSUB operands.
17320 auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
17321 if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
17322 return matcher.getNode(
17323 PreferredFusedOpcode, SL, VT,
17324 matcher.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
17325 YZ.getOperand(1), X);
17326 }
17327 return SDValue();
17328 };
17329
17330 // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
17331 // prefer to fold the multiply with fewer uses.
17332 if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
17333 (N0->use_size() > N1->use_size())) {
17334 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
17335 if (SDValue V = tryToFoldXSubYZ(N0, N1))
17336 return V;
17337 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
17338 if (SDValue V = tryToFoldXYSubZ(N0, N1))
17339 return V;
17340 } else {
17341 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
17342 if (SDValue V = tryToFoldXYSubZ(N0, N1))
17343 return V;
17344 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
17345 if (SDValue V = tryToFoldXSubYZ(N0, N1))
17346 return V;
17347 }
17348
17349 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
17350 if (matcher.match(N0, ISD::FNEG) && isContractableFMUL(N0.getOperand(0)) &&
17351 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
17352 SDValue N00 = N0.getOperand(0).getOperand(0);
17353 SDValue N01 = N0.getOperand(0).getOperand(1);
17354 return matcher.getNode(PreferredFusedOpcode, SL, VT,
17355 matcher.getNode(ISD::FNEG, SL, VT, N00), N01,
17356 matcher.getNode(ISD::FNEG, SL, VT, N1));
17357 }
17358
17359 // Look through FP_EXTEND nodes to do more combining.
17360
17361 // fold (fsub (fpext (fmul x, y)), z)
17362 // -> (fma (fpext x), (fpext y), (fneg z))
17363 if (matcher.match(N0, ISD::FP_EXTEND)) {
17364 SDValue N00 = N0.getOperand(0);
17365 if (isContractableFMUL(N00) &&
17366 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17367 N00.getValueType())) {
17368 return matcher.getNode(
17369 PreferredFusedOpcode, SL, VT,
17370 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
17371 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
17372 matcher.getNode(ISD::FNEG, SL, VT, N1));
17373 }
17374 }
17375
17376 // fold (fsub x, (fpext (fmul y, z)))
17377 // -> (fma (fneg (fpext y)), (fpext z), x)
17378 // Note: Commutes FSUB operands.
17379 if (matcher.match(N1, ISD::FP_EXTEND)) {
17380 SDValue N10 = N1.getOperand(0);
17381 if (isContractableFMUL(N10) &&
17382 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17383 N10.getValueType())) {
17384 return matcher.getNode(
17385 PreferredFusedOpcode, SL, VT,
17386 matcher.getNode(
17387 ISD::FNEG, SL, VT,
17388 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
17389 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
17390 }
17391 }
17392
17393 // fold (fsub (fpext (fneg (fmul, x, y))), z)
17394 // -> (fneg (fma (fpext x), (fpext y), z))
17395 // Note: This could be removed with appropriate canonicalization of the
17396 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
17397 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
17398 // from implementing the canonicalization in visitFSUB.
17399 if (matcher.match(N0, ISD::FP_EXTEND)) {
17400 SDValue N00 = N0.getOperand(0);
17401 if (matcher.match(N00, ISD::FNEG)) {
17402 SDValue N000 = N00.getOperand(0);
17403 if (isContractableFMUL(N000) &&
17404 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17405 N00.getValueType())) {
17406 return matcher.getNode(
17407 ISD::FNEG, SL, VT,
17408 matcher.getNode(
17409 PreferredFusedOpcode, SL, VT,
17410 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
17411 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
17412 N1));
17413 }
17414 }
17415 }
17416
17417 // fold (fsub (fneg (fpext (fmul, x, y))), z)
17418 // -> (fneg (fma (fpext x)), (fpext y), z)
17419 // Note: This could be removed with appropriate canonicalization of the
17420 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
17421 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
17422 // from implementing the canonicalization in visitFSUB.
17423 if (matcher.match(N0, ISD::FNEG)) {
17424 SDValue N00 = N0.getOperand(0);
17425 if (matcher.match(N00, ISD::FP_EXTEND)) {
17426 SDValue N000 = N00.getOperand(0);
17427 if (isContractableFMUL(N000) &&
17428 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17429 N000.getValueType())) {
17430 return matcher.getNode(
17431 ISD::FNEG, SL, VT,
17432 matcher.getNode(
17433 PreferredFusedOpcode, SL, VT,
17434 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
17435 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
17436 N1));
17437 }
17438 }
17439 }
17440
17441 auto isContractableAndReassociableFMUL = [&isContractableFMUL](SDValue N) {
17442 return isContractableFMUL(N) && N->getFlags().hasAllowReassociation();
17443 };
17444
17445 auto isFusedOp = [&](SDValue N) {
17446 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
17447 };
17448
17449 // More folding opportunities when target permits.
17450 if (Aggressive && N->getFlags().hasAllowReassociation()) {
17451 bool CanFuse = N->getFlags().hasAllowContract();
17452 // fold (fsub (fma x, y, (fmul u, v)), z)
17453 // -> (fma x, y (fma u, v, (fneg z)))
17454 if (CanFuse && isFusedOp(N0) &&
17455 isContractableAndReassociableFMUL(N0.getOperand(2)) &&
17456 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
17457 return matcher.getNode(
17458 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
17459 matcher.getNode(PreferredFusedOpcode, SL, VT,
17460 N0.getOperand(2).getOperand(0),
17461 N0.getOperand(2).getOperand(1),
17462 matcher.getNode(ISD::FNEG, SL, VT, N1)));
17463 }
17464
17465 // fold (fsub x, (fma y, z, (fmul u, v)))
17466 // -> (fma (fneg y), z, (fma (fneg u), v, x))
17467 if (CanFuse && isFusedOp(N1) &&
17468 isContractableAndReassociableFMUL(N1.getOperand(2)) &&
17469 N1->hasOneUse() && NoSignedZero) {
17470 SDValue N20 = N1.getOperand(2).getOperand(0);
17471 SDValue N21 = N1.getOperand(2).getOperand(1);
17472 return matcher.getNode(
17473 PreferredFusedOpcode, SL, VT,
17474 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
17475 N1.getOperand(1),
17476 matcher.getNode(PreferredFusedOpcode, SL, VT,
17477 matcher.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
17478 }
17479
17480 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
17481 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
17482 if (isFusedOp(N0) && N0->hasOneUse()) {
17483 SDValue N02 = N0.getOperand(2);
17484 if (matcher.match(N02, ISD::FP_EXTEND)) {
17485 SDValue N020 = N02.getOperand(0);
17486 if (isContractableAndReassociableFMUL(N020) &&
17487 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17488 N020.getValueType())) {
17489 return matcher.getNode(
17490 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
17491 matcher.getNode(
17492 PreferredFusedOpcode, SL, VT,
17493 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
17494 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
17495 matcher.getNode(ISD::FNEG, SL, VT, N1)));
17496 }
17497 }
17498 }
17499
17500 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
17501 // -> (fma (fpext x), (fpext y),
17502 // (fma (fpext u), (fpext v), (fneg z)))
17503 // FIXME: This turns two single-precision and one double-precision
17504 // operation into two double-precision operations, which might not be
17505 // interesting for all targets, especially GPUs.
17506 if (matcher.match(N0, ISD::FP_EXTEND)) {
17507 SDValue N00 = N0.getOperand(0);
17508 if (isFusedOp(N00)) {
17509 SDValue N002 = N00.getOperand(2);
17510 if (isContractableAndReassociableFMUL(N002) &&
17511 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17512 N00.getValueType())) {
17513 return matcher.getNode(
17514 PreferredFusedOpcode, SL, VT,
17515 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
17516 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
17517 matcher.getNode(
17518 PreferredFusedOpcode, SL, VT,
17519 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
17520 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
17521 matcher.getNode(ISD::FNEG, SL, VT, N1)));
17522 }
17523 }
17524 }
17525
17526 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
17527 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
17528 if (isFusedOp(N1) && matcher.match(N1.getOperand(2), ISD::FP_EXTEND) &&
17529 N1->hasOneUse()) {
17530 SDValue N120 = N1.getOperand(2).getOperand(0);
17531 if (isContractableAndReassociableFMUL(N120) &&
17532 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17533 N120.getValueType())) {
17534 SDValue N1200 = N120.getOperand(0);
17535 SDValue N1201 = N120.getOperand(1);
17536 return matcher.getNode(
17537 PreferredFusedOpcode, SL, VT,
17538 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
17539 N1.getOperand(1),
17540 matcher.getNode(
17541 PreferredFusedOpcode, SL, VT,
17542 matcher.getNode(ISD::FNEG, SL, VT,
17543 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
17544 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
17545 }
17546 }
17547
17548 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
17549 // -> (fma (fneg (fpext y)), (fpext z),
17550 // (fma (fneg (fpext u)), (fpext v), x))
17551 // FIXME: This turns two single-precision and one double-precision
17552 // operation into two double-precision operations, which might not be
17553 // interesting for all targets, especially GPUs.
17554 if (matcher.match(N1, ISD::FP_EXTEND) && isFusedOp(N1.getOperand(0))) {
17555 SDValue CvtSrc = N1.getOperand(0);
17556 SDValue N100 = CvtSrc.getOperand(0);
17557 SDValue N101 = CvtSrc.getOperand(1);
17558 SDValue N102 = CvtSrc.getOperand(2);
17559 if (isContractableAndReassociableFMUL(N102) &&
17560 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17561 CvtSrc.getValueType())) {
17562 SDValue N1020 = N102.getOperand(0);
17563 SDValue N1021 = N102.getOperand(1);
17564 return matcher.getNode(
17565 PreferredFusedOpcode, SL, VT,
17566 matcher.getNode(ISD::FNEG, SL, VT,
17567 matcher.getNode(ISD::FP_EXTEND, SL, VT, N100)),
17568 matcher.getNode(ISD::FP_EXTEND, SL, VT, N101),
17569 matcher.getNode(
17570 PreferredFusedOpcode, SL, VT,
17571 matcher.getNode(ISD::FNEG, SL, VT,
17572 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
17573 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
17574 }
17575 }
17576 }
17577
17578 return SDValue();
17579}
17580
17581/// Try to perform FMA combining on a given FMUL node based on the distributive
17582/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
17583/// subtraction instead of addition).
17584SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
17585 SDValue N0 = N->getOperand(0);
17586 SDValue N1 = N->getOperand(1);
17587 EVT VT = N->getValueType(0);
17588 SDLoc SL(N);
17589
17590 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
17591
17592 const TargetOptions &Options = DAG.getTarget().Options;
17593
17594 // The transforms below are incorrect when x == 0 and y == inf, because the
17595 // intermediate multiplication produces a nan.
17596 SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
17597 if (!hasNoInfs(Options, FAdd))
17598 return SDValue();
17599
17600 // Floating-point multiply-add without intermediate rounding.
17601 bool HasFMA =
17603 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
17605
17606 // Floating-point multiply-add with intermediate rounding. This can result
17607 // in a less precise result due to the changed rounding order.
17608 bool HasFMAD = LegalOperations && TLI.isFMADLegal(DAG, N);
17609
17610 // No valid opcode, do not combine.
17611 if (!HasFMAD && !HasFMA)
17612 return SDValue();
17613
17614 // Always prefer FMAD to FMA for precision.
17615 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
17617
17618 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
17619 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
17620 auto FuseFADD = [&](SDValue X, SDValue Y) {
17621 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
17622 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
17623 if (C->isExactlyValue(+1.0))
17624 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17625 Y);
17626 if (C->isExactlyValue(-1.0))
17627 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17628 DAG.getNode(ISD::FNEG, SL, VT, Y));
17629 }
17630 }
17631 return SDValue();
17632 };
17633
17634 if (SDValue FMA = FuseFADD(N0, N1))
17635 return FMA;
17636 if (SDValue FMA = FuseFADD(N1, N0))
17637 return FMA;
17638
17639 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
17640 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
17641 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
17642 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
17643 auto FuseFSUB = [&](SDValue X, SDValue Y) {
17644 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
17645 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
17646 if (C0->isExactlyValue(+1.0))
17647 return DAG.getNode(PreferredFusedOpcode, SL, VT,
17648 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
17649 Y);
17650 if (C0->isExactlyValue(-1.0))
17651 return DAG.getNode(PreferredFusedOpcode, SL, VT,
17652 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
17653 DAG.getNode(ISD::FNEG, SL, VT, Y));
17654 }
17655 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
17656 if (C1->isExactlyValue(+1.0))
17657 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17658 DAG.getNode(ISD::FNEG, SL, VT, Y));
17659 if (C1->isExactlyValue(-1.0))
17660 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17661 Y);
17662 }
17663 }
17664 return SDValue();
17665 };
17666
17667 if (SDValue FMA = FuseFSUB(N0, N1))
17668 return FMA;
17669 if (SDValue FMA = FuseFSUB(N1, N0))
17670 return FMA;
17671
17672 return SDValue();
17673}
17674
17675SDValue DAGCombiner::visitVP_FADD(SDNode *N) {
17676 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17677
17678 // FADD -> FMA combines:
17679 if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) {
17680 if (Fused.getOpcode() != ISD::DELETED_NODE)
17681 AddToWorklist(Fused.getNode());
17682 return Fused;
17683 }
17684 return SDValue();
17685}
17686
17687SDValue DAGCombiner::visitFADD(SDNode *N) {
17688 SDValue N0 = N->getOperand(0);
17689 SDValue N1 = N->getOperand(1);
17690 bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
17691 bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
17692 EVT VT = N->getValueType(0);
17693 SDLoc DL(N);
17694 const TargetOptions &Options = DAG.getTarget().Options;
17695 SDNodeFlags Flags = N->getFlags();
17696 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17697
17698 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17699 return R;
17700
17701 // fold (fadd c1, c2) -> c1 + c2
17702 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
17703 return C;
17704
17705 // canonicalize constant to RHS
17706 if (N0CFP && !N1CFP)
17707 return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
17708
17709 // fold vector ops
17710 if (VT.isVector())
17711 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17712 return FoldedVOp;
17713
17714 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
17715 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
17716 if (N1C && N1C->isZero())
17717 if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
17718 return N0;
17719
17720 if (SDValue NewSel = foldBinOpIntoSelect(N))
17721 return NewSel;
17722
17723 // fold (fadd A, (fneg B)) -> (fsub A, B)
17724 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
17725 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
17726 N1, DAG, LegalOperations, ForCodeSize))
17727 return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
17728
17729 // fold (fadd (fneg A), B) -> (fsub B, A)
17730 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
17731 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
17732 N0, DAG, LegalOperations, ForCodeSize))
17733 return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
17734
17735 auto isFMulNegTwo = [](SDValue FMul) {
17736 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
17737 return false;
17738 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
17739 return C && C->isExactlyValue(-2.0);
17740 };
17741
17742 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
17743 if (isFMulNegTwo(N0)) {
17744 SDValue B = N0.getOperand(0);
17745 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
17746 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
17747 }
17748 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
17749 if (isFMulNegTwo(N1)) {
17750 SDValue B = N1.getOperand(0);
17751 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
17752 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
17753 }
17754
17755 // No FP constant should be created after legalization as Instruction
17756 // Selection pass has a hard time dealing with FP constants.
17757 bool AllowNewConst = (Level < AfterLegalizeDAG);
17758
17759 // If nnan is enabled, fold lots of things.
17760 if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
17761 // If allowed, fold (fadd (fneg x), x) -> 0.0
17762 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
17763 return DAG.getConstantFP(0.0, DL, VT);
17764
17765 // If allowed, fold (fadd x, (fneg x)) -> 0.0
17766 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
17767 return DAG.getConstantFP(0.0, DL, VT);
17768 }
17769
17770 // If 'unsafe math' or reassoc and nsz, fold lots of things.
17771 // TODO: break out portions of the transformations below for which Unsafe is
17772 // considered and which do not require both nsz and reassoc
17773 if ((Options.NoSignedZerosFPMath ||
17774 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
17775 AllowNewConst) {
17776 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
17777 if (N1CFP && N0.getOpcode() == ISD::FADD &&
17779 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
17780 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
17781 }
17782
17783 // We can fold chains of FADD's of the same value into multiplications.
17784 // This transform is not safe in general because we are reducing the number
17785 // of rounding steps.
17786 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
17787 if (N0.getOpcode() == ISD::FMUL) {
17788 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
17789 bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
17790
17791 // (fadd (fmul x, c), x) -> (fmul x, c+1)
17792 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
17793 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
17794 DAG.getConstantFP(1.0, DL, VT));
17795 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
17796 }
17797
17798 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
17799 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
17800 N1.getOperand(0) == N1.getOperand(1) &&
17801 N0.getOperand(0) == N1.getOperand(0)) {
17802 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
17803 DAG.getConstantFP(2.0, DL, VT));
17804 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
17805 }
17806 }
17807
17808 if (N1.getOpcode() == ISD::FMUL) {
17809 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
17810 bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
17811
17812 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
17813 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
17814 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
17815 DAG.getConstantFP(1.0, DL, VT));
17816 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
17817 }
17818
17819 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
17820 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
17821 N0.getOperand(0) == N0.getOperand(1) &&
17822 N1.getOperand(0) == N0.getOperand(0)) {
17823 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
17824 DAG.getConstantFP(2.0, DL, VT));
17825 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
17826 }
17827 }
17828
17829 if (N0.getOpcode() == ISD::FADD) {
17830 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
17831 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
17832 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
17833 (N0.getOperand(0) == N1)) {
17834 return DAG.getNode(ISD::FMUL, DL, VT, N1,
17835 DAG.getConstantFP(3.0, DL, VT));
17836 }
17837 }
17838
17839 if (N1.getOpcode() == ISD::FADD) {
17840 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
17841 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
17842 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
17843 N1.getOperand(0) == N0) {
17844 return DAG.getNode(ISD::FMUL, DL, VT, N0,
17845 DAG.getConstantFP(3.0, DL, VT));
17846 }
17847 }
17848
17849 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
17850 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
17851 N0.getOperand(0) == N0.getOperand(1) &&
17852 N1.getOperand(0) == N1.getOperand(1) &&
17853 N0.getOperand(0) == N1.getOperand(0)) {
17854 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
17855 DAG.getConstantFP(4.0, DL, VT));
17856 }
17857 }
17858 } // enable-unsafe-fp-math && AllowNewConst
17859
17860 if ((Options.NoSignedZerosFPMath ||
17861 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))) {
17862 // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
17863 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
17864 VT, N0, N1, Flags))
17865 return SD;
17866 }
17867
17868 // FADD -> FMA combines:
17869 if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {
17870 if (Fused.getOpcode() != ISD::DELETED_NODE)
17871 AddToWorklist(Fused.getNode());
17872 return Fused;
17873 }
17874 return SDValue();
17875}
17876
17877SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
17878 SDValue Chain = N->getOperand(0);
17879 SDValue N0 = N->getOperand(1);
17880 SDValue N1 = N->getOperand(2);
17881 EVT VT = N->getValueType(0);
17882 EVT ChainVT = N->getValueType(1);
17883 SDLoc DL(N);
17884 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17885
17886 // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
17887 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
17888 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
17889 N1, DAG, LegalOperations, ForCodeSize)) {
17890 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
17891 {Chain, N0, NegN1});
17892 }
17893
17894 // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
17895 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
17896 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
17897 N0, DAG, LegalOperations, ForCodeSize)) {
17898 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
17899 {Chain, N1, NegN0});
17900 }
17901 return SDValue();
17902}
17903
17904SDValue DAGCombiner::visitFSUB(SDNode *N) {
17905 SDValue N0 = N->getOperand(0);
17906 SDValue N1 = N->getOperand(1);
17907 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
17908 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
17909 EVT VT = N->getValueType(0);
17910 SDLoc DL(N);
17911 const TargetOptions &Options = DAG.getTarget().Options;
17912 const SDNodeFlags Flags = N->getFlags();
17913 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17914
17915 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17916 return R;
17917
17918 // fold (fsub c1, c2) -> c1-c2
17919 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
17920 return C;
17921
17922 // fold vector ops
17923 if (VT.isVector())
17924 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17925 return FoldedVOp;
17926
17927 if (SDValue NewSel = foldBinOpIntoSelect(N))
17928 return NewSel;
17929
17930 // (fsub A, 0) -> A
17931 if (N1CFP && N1CFP->isZero()) {
17932 if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
17933 Flags.hasNoSignedZeros()) {
17934 return N0;
17935 }
17936 }
17937
17938 if (N0 == N1) {
17939 // (fsub x, x) -> 0.0
17940 if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
17941 return DAG.getConstantFP(0.0f, DL, VT);
17942 }
17943
17944 // (fsub -0.0, N1) -> -N1
17945 if (N0CFP && N0CFP->isZero()) {
17946 if (N0CFP->isNegative() ||
17947 (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
17948 // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
17949 // flushed to zero, unless all users treat denorms as zero (DAZ).
17950 // FIXME: This transform will change the sign of a NaN and the behavior
17951 // of a signaling NaN. It is only valid when a NoNaN flag is present.
17952 DenormalMode DenormMode = DAG.getDenormalMode(VT);
17953 if (DenormMode == DenormalMode::getIEEE()) {
17954 if (SDValue NegN1 =
17955 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
17956 return NegN1;
17957 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
17958 return DAG.getNode(ISD::FNEG, DL, VT, N1);
17959 }
17960 }
17961 }
17962
17963 if ((Options.NoSignedZerosFPMath ||
17964 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
17965 N1.getOpcode() == ISD::FADD) {
17966 // X - (X + Y) -> -Y
17967 if (N0 == N1->getOperand(0))
17968 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
17969 // X - (Y + X) -> -Y
17970 if (N0 == N1->getOperand(1))
17971 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
17972 }
17973
17974 // fold (fsub A, (fneg B)) -> (fadd A, B)
17975 if (SDValue NegN1 =
17976 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
17977 return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
17978
17979 // FSUB -> FMA combines:
17980 if (SDValue Fused = visitFSUBForFMACombine<EmptyMatchContext>(N)) {
17981 AddToWorklist(Fused.getNode());
17982 return Fused;
17983 }
17984
17985 return SDValue();
17986}
17987
17988// Transform IEEE Floats:
17989// (fmul C, (uitofp Pow2))
17990// -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
17991// (fdiv C, (uitofp Pow2))
17992// -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
17993//
17994// The rationale is fmul/fdiv by a power of 2 is just change the exponent, so
17995// there is no need for more than an add/sub.
17996//
17997// This is valid under the following circumstances:
17998// 1) We are dealing with IEEE floats
17999// 2) C is normal
18000// 3) The fmul/fdiv add/sub will not go outside of min/max exponent bounds.
18001// TODO: Much of this could also be used for generating `ldexp` on targets the
18002// prefer it.
18003SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) {
18004 EVT VT = N->getValueType(0);
18006 return SDValue();
18007
18008 SDValue ConstOp, Pow2Op;
18009
18010 std::optional<int> Mantissa;
18011 auto GetConstAndPow2Ops = [&](unsigned ConstOpIdx) {
18012 if (ConstOpIdx == 1 && N->getOpcode() == ISD::FDIV)
18013 return false;
18014
18015 ConstOp = peekThroughBitcasts(N->getOperand(ConstOpIdx));
18016 Pow2Op = N->getOperand(1 - ConstOpIdx);
18017 if (Pow2Op.getOpcode() != ISD::UINT_TO_FP &&
18018 (Pow2Op.getOpcode() != ISD::SINT_TO_FP ||
18019 !DAG.computeKnownBits(Pow2Op).isNonNegative()))
18020 return false;
18021
18022 Pow2Op = Pow2Op.getOperand(0);
18023
18024 // `Log2(Pow2Op) < Pow2Op.getScalarSizeInBits()`.
18025 // TODO: We could use knownbits to make this bound more precise.
18026 int MaxExpChange = Pow2Op.getValueType().getScalarSizeInBits();
18027
18028 auto IsFPConstValid = [N, MaxExpChange, &Mantissa](ConstantFPSDNode *CFP) {
18029 if (CFP == nullptr)
18030 return false;
18031
18032 const APFloat &APF = CFP->getValueAPF();
18033
18034 // Make sure we have normal constant.
18035 if (!APF.isNormal())
18036 return false;
18037
18038 // Make sure the floats exponent is within the bounds that this transform
18039 // produces bitwise equals value.
18040 int CurExp = ilogb(APF);
18041 // FMul by pow2 will only increase exponent.
18042 int MinExp =
18043 N->getOpcode() == ISD::FMUL ? CurExp : (CurExp - MaxExpChange);
18044 // FDiv by pow2 will only decrease exponent.
18045 int MaxExp =
18046 N->getOpcode() == ISD::FDIV ? CurExp : (CurExp + MaxExpChange);
18047 if (MinExp <= APFloat::semanticsMinExponent(APF.getSemantics()) ||
18049 return false;
18050
18051 // Finally make sure we actually know the mantissa for the float type.
18052 int ThisMantissa = APFloat::semanticsPrecision(APF.getSemantics()) - 1;
18053 if (!Mantissa)
18054 Mantissa = ThisMantissa;
18055
18056 return *Mantissa == ThisMantissa && ThisMantissa > 0;
18057 };
18058
18059 // TODO: We may be able to include undefs.
18060 return ISD::matchUnaryFpPredicate(ConstOp, IsFPConstValid);
18061 };
18062
18063 if (!GetConstAndPow2Ops(0) && !GetConstAndPow2Ops(1))
18064 return SDValue();
18065
18066 if (!TLI.optimizeFMulOrFDivAsShiftAddBitcast(N, ConstOp, Pow2Op))
18067 return SDValue();
18068
18069 // Get log2 after all other checks have taken place. This is because
18070 // BuildLogBase2 may create a new node.
18071 SDLoc DL(N);
18072 // Get Log2 type with same bitwidth as the float type (VT).
18073 EVT NewIntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits());
18074 if (VT.isVector())
18075 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewIntVT,
18077
18078 SDValue Log2 = BuildLogBase2(Pow2Op, DL, DAG.isKnownNeverZero(Pow2Op),
18079 /*InexpensiveOnly*/ true, NewIntVT);
18080 if (!Log2)
18081 return SDValue();
18082
18083 // Perform actual transform.
18084 SDValue MantissaShiftCnt =
18085 DAG.getShiftAmountConstant(*Mantissa, NewIntVT, DL);
18086 // TODO: Sometimes Log2 is of form `(X + C)`. `(X + C) << C1` should fold to
18087 // `(X << C1) + (C << C1)`, but that isn't always the case because of the
18088 // cast. We could implement that by handle here to handle the casts.
18089 SDValue Shift = DAG.getNode(ISD::SHL, DL, NewIntVT, Log2, MantissaShiftCnt);
18090 SDValue ResAsInt =
18091 DAG.getNode(N->getOpcode() == ISD::FMUL ? ISD::ADD : ISD::SUB, DL,
18092 NewIntVT, DAG.getBitcast(NewIntVT, ConstOp), Shift);
18093 SDValue ResAsFP = DAG.getBitcast(VT, ResAsInt);
18094 return ResAsFP;
18095}
18096
18097SDValue DAGCombiner::visitFMUL(SDNode *N) {
18098 SDValue N0 = N->getOperand(0);
18099 SDValue N1 = N->getOperand(1);
18100 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
18101 EVT VT = N->getValueType(0);
18102 SDLoc DL(N);
18103 const SDNodeFlags Flags = N->getFlags();
18104 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18105
18106 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
18107 return R;
18108
18109 // fold (fmul c1, c2) -> c1*c2
18110 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
18111 return C;
18112
18113 // canonicalize constant to RHS
18116 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
18117
18118 // fold vector ops
18119 if (VT.isVector())
18120 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
18121 return FoldedVOp;
18122
18123 if (SDValue NewSel = foldBinOpIntoSelect(N))
18124 return NewSel;
18125
18126 if (Flags.hasAllowReassociation()) {
18127 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
18129 N0.getOpcode() == ISD::FMUL) {
18130 SDValue N00 = N0.getOperand(0);
18131 SDValue N01 = N0.getOperand(1);
18132 // Avoid an infinite loop by making sure that N00 is not a constant
18133 // (the inner multiply has not been constant folded yet).
18136 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
18137 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
18138 }
18139 }
18140
18141 // Match a special-case: we convert X * 2.0 into fadd.
18142 // fmul (fadd X, X), C -> fmul X, 2.0 * C
18143 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
18144 N0.getOperand(0) == N0.getOperand(1)) {
18145 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
18146 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
18147 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
18148 }
18149
18150 // Fold fmul(vecreduce(x), vecreduce(y)) -> vecreduce(fmul(x, y))
18151 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FMUL, ISD::FMUL, DL,
18152 VT, N0, N1, Flags))
18153 return SD;
18154 }
18155
18156 // fold (fmul X, 2.0) -> (fadd X, X)
18157 if (N1CFP && N1CFP->isExactlyValue(+2.0))
18158 return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
18159
18160 // fold (fmul X, -1.0) -> (fsub -0.0, X)
18161 if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
18162 if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
18163 return DAG.getNode(ISD::FSUB, DL, VT,
18164 DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
18165 }
18166 }
18167
18168 // -N0 * -N1 --> N0 * N1
18173 SDValue NegN0 =
18174 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
18175 if (NegN0) {
18176 HandleSDNode NegN0Handle(NegN0);
18177 SDValue NegN1 =
18178 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
18179 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
18181 return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
18182 }
18183
18184 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
18185 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
18186 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
18187 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
18188 TLI.isOperationLegal(ISD::FABS, VT)) {
18189 SDValue Select = N0, X = N1;
18190 if (Select.getOpcode() != ISD::SELECT)
18191 std::swap(Select, X);
18192
18193 SDValue Cond = Select.getOperand(0);
18194 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
18195 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
18196
18197 if (TrueOpnd && FalseOpnd &&
18198 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
18199 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
18200 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
18201 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
18202 switch (CC) {
18203 default: break;
18204 case ISD::SETOLT:
18205 case ISD::SETULT:
18206 case ISD::SETOLE:
18207 case ISD::SETULE:
18208 case ISD::SETLT:
18209 case ISD::SETLE:
18210 std::swap(TrueOpnd, FalseOpnd);
18211 [[fallthrough]];
18212 case ISD::SETOGT:
18213 case ISD::SETUGT:
18214 case ISD::SETOGE:
18215 case ISD::SETUGE:
18216 case ISD::SETGT:
18217 case ISD::SETGE:
18218 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
18219 TLI.isOperationLegal(ISD::FNEG, VT))
18220 return DAG.getNode(ISD::FNEG, DL, VT,
18221 DAG.getNode(ISD::FABS, DL, VT, X));
18222 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
18223 return DAG.getNode(ISD::FABS, DL, VT, X);
18224
18225 break;
18226 }
18227 }
18228 }
18229
18230 // FMUL -> FMA combines:
18231 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
18232 AddToWorklist(Fused.getNode());
18233 return Fused;
18234 }
18235
18236 // Don't do `combineFMulOrFDivWithIntPow2` until after FMUL -> FMA has been
18237 // able to run.
18238 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
18239 return R;
18240
18241 return SDValue();
18242}
18243
18244template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
18245 SDValue N0 = N->getOperand(0);
18246 SDValue N1 = N->getOperand(1);
18247 SDValue N2 = N->getOperand(2);
18248 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
18249 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
18250 ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
18251 EVT VT = N->getValueType(0);
18252 SDLoc DL(N);
18253 const TargetOptions &Options = DAG.getTarget().Options;
18254 // FMA nodes have flags that propagate to the created nodes.
18255 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18256 MatchContextClass matcher(DAG, TLI, N);
18257
18258 // Constant fold FMA.
18259 if (SDValue C =
18260 DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1, N2}))
18261 return C;
18262
18263 // (-N0 * -N1) + N2 --> (N0 * N1) + N2
18268 SDValue NegN0 =
18269 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
18270 if (NegN0) {
18271 HandleSDNode NegN0Handle(NegN0);
18272 SDValue NegN1 =
18273 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
18274 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
18276 return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
18277 }
18278
18279 // FIXME: use fast math flags instead of Options.UnsafeFPMath
18280 // TODO: Finally migrate away from global TargetOptions.
18281 if ((Options.NoNaNsFPMath && Options.NoInfsFPMath) ||
18282 (N->getFlags().hasNoNaNs() && N->getFlags().hasNoInfs())) {
18283 if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros() ||
18284 (N2CFP && !N2CFP->isExactlyValue(-0.0))) {
18285 if (N0CFP && N0CFP->isZero())
18286 return N2;
18287 if (N1CFP && N1CFP->isZero())
18288 return N2;
18289 }
18290 }
18291
18292 // FIXME: Support splat of constant.
18293 if (N0CFP && N0CFP->isExactlyValue(1.0))
18294 return matcher.getNode(ISD::FADD, DL, VT, N1, N2);
18295 if (N1CFP && N1CFP->isExactlyValue(1.0))
18296 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
18297
18298 // Canonicalize (fma c, x, y) -> (fma x, c, y)
18301 return matcher.getNode(ISD::FMA, DL, VT, N1, N0, N2);
18302
18303 bool CanReassociate = N->getFlags().hasAllowReassociation();
18304 if (CanReassociate) {
18305 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
18306 if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) &&
18309 return matcher.getNode(
18310 ISD::FMUL, DL, VT, N0,
18311 matcher.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
18312 }
18313
18314 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
18315 if (matcher.match(N0, ISD::FMUL) &&
18318 return matcher.getNode(
18319 ISD::FMA, DL, VT, N0.getOperand(0),
18320 matcher.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)), N2);
18321 }
18322 }
18323
18324 // (fma x, -1, y) -> (fadd (fneg x), y)
18325 // FIXME: Support splat of constant.
18326 if (N1CFP) {
18327 if (N1CFP->isExactlyValue(1.0))
18328 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
18329
18330 if (N1CFP->isExactlyValue(-1.0) &&
18331 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
18332 SDValue RHSNeg = matcher.getNode(ISD::FNEG, DL, VT, N0);
18333 AddToWorklist(RHSNeg.getNode());
18334 return matcher.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
18335 }
18336
18337 // fma (fneg x), K, y -> fma x -K, y
18338 if (matcher.match(N0, ISD::FNEG) &&
18340 (N1.hasOneUse() &&
18341 !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) {
18342 return matcher.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
18343 matcher.getNode(ISD::FNEG, DL, VT, N1), N2);
18344 }
18345 }
18346
18347 // FIXME: Support splat of constant.
18348 if (CanReassociate) {
18349 // (fma x, c, x) -> (fmul x, (c+1))
18350 if (N1CFP && N0 == N2) {
18351 return matcher.getNode(ISD::FMUL, DL, VT, N0,
18352 matcher.getNode(ISD::FADD, DL, VT, N1,
18353 DAG.getConstantFP(1.0, DL, VT)));
18354 }
18355
18356 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
18357 if (N1CFP && matcher.match(N2, ISD::FNEG) && N2.getOperand(0) == N0) {
18358 return matcher.getNode(ISD::FMUL, DL, VT, N0,
18359 matcher.getNode(ISD::FADD, DL, VT, N1,
18360 DAG.getConstantFP(-1.0, DL, VT)));
18361 }
18362 }
18363
18364 // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
18365 // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
18366 if (!TLI.isFNegFree(VT))
18368 SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
18369 return matcher.getNode(ISD::FNEG, DL, VT, Neg);
18370 return SDValue();
18371}
18372
18373SDValue DAGCombiner::visitFMAD(SDNode *N) {
18374 SDValue N0 = N->getOperand(0);
18375 SDValue N1 = N->getOperand(1);
18376 SDValue N2 = N->getOperand(2);
18377 EVT VT = N->getValueType(0);
18378 SDLoc DL(N);
18379
18380 // Constant fold FMAD.
18381 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMAD, DL, VT, {N0, N1, N2}))
18382 return C;
18383
18384 return SDValue();
18385}
18386
18387// Combine multiple FDIVs with the same divisor into multiple FMULs by the
18388// reciprocal.
18389// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
18390// Notice that this is not always beneficial. One reason is different targets
18391// may have different costs for FDIV and FMUL, so sometimes the cost of two
18392// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
18393// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
18394SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
18395 // TODO: Limit this transform based on optsize/minsize - it always creates at
18396 // least 1 extra instruction. But the perf win may be substantial enough
18397 // that only minsize should restrict this.
18398 const SDNodeFlags Flags = N->getFlags();
18399 if (LegalDAG || !Flags.hasAllowReciprocal())
18400 return SDValue();
18401
18402 // Skip if current node is a reciprocal/fneg-reciprocal.
18403 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
18404 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
18405 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
18406 return SDValue();
18407
18408 // Exit early if the target does not want this transform or if there can't
18409 // possibly be enough uses of the divisor to make the transform worthwhile.
18410 unsigned MinUses = TLI.combineRepeatedFPDivisors();
18411
18412 // For splat vectors, scale the number of uses by the splat factor. If we can
18413 // convert the division into a scalar op, that will likely be much faster.
18414 unsigned NumElts = 1;
18415 EVT VT = N->getValueType(0);
18416 if (VT.isVector() && DAG.isSplatValue(N1))
18417 NumElts = VT.getVectorMinNumElements();
18418
18419 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
18420 return SDValue();
18421
18422 // Find all FDIV users of the same divisor.
18423 // Use a set because duplicates may be present in the user list.
18424 SetVector<SDNode *> Users;
18425 for (auto *U : N1->users()) {
18426 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
18427 // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
18428 if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
18429 U->getOperand(0) == U->getOperand(1).getOperand(0) &&
18430 U->getFlags().hasAllowReassociation() &&
18431 U->getFlags().hasNoSignedZeros())
18432 continue;
18433
18434 // This division is eligible for optimization only if global unsafe math
18435 // is enabled or if this division allows reciprocal formation.
18436 if (U->getFlags().hasAllowReciprocal())
18437 Users.insert(U);
18438 }
18439 }
18440
18441 // Now that we have the actual number of divisor uses, make sure it meets
18442 // the minimum threshold specified by the target.
18443 if ((Users.size() * NumElts) < MinUses)
18444 return SDValue();
18445
18446 SDLoc DL(N);
18447 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
18448 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
18449
18450 // Dividend / Divisor -> Dividend * Reciprocal
18451 for (auto *U : Users) {
18452 SDValue Dividend = U->getOperand(0);
18453 if (Dividend != FPOne) {
18454 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
18455 Reciprocal, Flags);
18456 CombineTo(U, NewNode);
18457 } else if (U != Reciprocal.getNode()) {
18458 // In the absence of fast-math-flags, this user node is always the
18459 // same node as Reciprocal, but with FMF they may be different nodes.
18460 CombineTo(U, Reciprocal);
18461 }
18462 }
18463 return SDValue(N, 0); // N was replaced.
18464}
18465
18466SDValue DAGCombiner::visitFDIV(SDNode *N) {
18467 SDValue N0 = N->getOperand(0);
18468 SDValue N1 = N->getOperand(1);
18469 EVT VT = N->getValueType(0);
18470 SDLoc DL(N);
18471 const TargetOptions &Options = DAG.getTarget().Options;
18472 SDNodeFlags Flags = N->getFlags();
18473 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18474
18475 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
18476 return R;
18477
18478 // fold (fdiv c1, c2) -> c1/c2
18479 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
18480 return C;
18481
18482 // fold vector ops
18483 if (VT.isVector())
18484 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
18485 return FoldedVOp;
18486
18487 if (SDValue NewSel = foldBinOpIntoSelect(N))
18488 return NewSel;
18489
18491 return V;
18492
18493 // fold (fdiv X, c2) -> (fmul X, 1/c2) if there is no loss in precision, or
18494 // the loss is acceptable with AllowReciprocal.
18495 if (auto *N1CFP = isConstOrConstSplatFP(N1, true)) {
18496 // Compute the reciprocal 1.0 / c2.
18497 const APFloat &N1APF = N1CFP->getValueAPF();
18498 APFloat Recip = APFloat::getOne(N1APF.getSemantics());
18500 // Only do the transform if the reciprocal is a legal fp immediate that
18501 // isn't too nasty (eg NaN, denormal, ...).
18502 if (((st == APFloat::opOK && !Recip.isDenormal()) ||
18503 (st == APFloat::opInexact && Flags.hasAllowReciprocal())) &&
18504 (!LegalOperations ||
18505 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
18506 // backend)... we should handle this gracefully after Legalize.
18507 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
18509 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
18510 return DAG.getNode(ISD::FMUL, DL, VT, N0,
18511 DAG.getConstantFP(Recip, DL, VT));
18512 }
18513
18514 if (Flags.hasAllowReciprocal()) {
18515 // If this FDIV is part of a reciprocal square root, it may be folded
18516 // into a target-specific square root estimate instruction.
18517 if (N1.getOpcode() == ISD::FSQRT) {
18518 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
18519 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
18520 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
18521 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
18522 if (SDValue RV =
18523 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
18524 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
18525 AddToWorklist(RV.getNode());
18526 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
18527 }
18528 } else if (N1.getOpcode() == ISD::FP_ROUND &&
18529 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
18530 if (SDValue RV =
18531 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
18532 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
18533 AddToWorklist(RV.getNode());
18534 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
18535 }
18536 } else if (N1.getOpcode() == ISD::FMUL) {
18537 // Look through an FMUL. Even though this won't remove the FDIV directly,
18538 // it's still worthwhile to get rid of the FSQRT if possible.
18539 SDValue Sqrt, Y;
18540 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
18541 Sqrt = N1.getOperand(0);
18542 Y = N1.getOperand(1);
18543 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
18544 Sqrt = N1.getOperand(1);
18545 Y = N1.getOperand(0);
18546 }
18547 if (Sqrt.getNode()) {
18548 // If the other multiply operand is known positive, pull it into the
18549 // sqrt. That will eliminate the division if we convert to an estimate.
18550 if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
18551 N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
18552 SDValue A;
18553 if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
18554 A = Y.getOperand(0);
18555 else if (Y == Sqrt.getOperand(0))
18556 A = Y;
18557 if (A) {
18558 // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
18559 // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
18560 SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
18561 SDValue AAZ =
18562 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
18563 if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
18564 return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
18565
18566 // Estimate creation failed. Clean up speculatively created nodes.
18567 recursivelyDeleteUnusedNodes(AAZ.getNode());
18568 }
18569 }
18570
18571 // We found a FSQRT, so try to make this fold:
18572 // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
18573 if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
18574 SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
18575 AddToWorklist(Div.getNode());
18576 return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
18577 }
18578 }
18579 }
18580
18581 // Fold into a reciprocal estimate and multiply instead of a real divide.
18582 if (Options.NoInfsFPMath || Flags.hasNoInfs())
18583 if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
18584 return RV;
18585 }
18586
18587 // Fold X/Sqrt(X) -> Sqrt(X)
18588 if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
18589 Flags.hasAllowReassociation())
18590 if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
18591 return N1;
18592
18593 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
18598 SDValue NegN0 =
18599 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
18600 if (NegN0) {
18601 HandleSDNode NegN0Handle(NegN0);
18602 SDValue NegN1 =
18603 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
18604 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
18606 return DAG.getNode(ISD::FDIV, DL, VT, NegN0, NegN1);
18607 }
18608
18609 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
18610 return R;
18611
18612 return SDValue();
18613}
18614
18615SDValue DAGCombiner::visitFREM(SDNode *N) {
18616 SDValue N0 = N->getOperand(0);
18617 SDValue N1 = N->getOperand(1);
18618 EVT VT = N->getValueType(0);
18619 SDNodeFlags Flags = N->getFlags();
18620 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18621 SDLoc DL(N);
18622
18623 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
18624 return R;
18625
18626 // fold (frem c1, c2) -> fmod(c1,c2)
18627 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, DL, VT, {N0, N1}))
18628 return C;
18629
18630 if (SDValue NewSel = foldBinOpIntoSelect(N))
18631 return NewSel;
18632
18633 // Lower frem N0, N1 => x - trunc(N0 / N1) * N1, providing N1 is an integer
18634 // power of 2.
18635 if (!TLI.isOperationLegal(ISD::FREM, VT) &&
18638 TLI.isOperationLegalOrCustom(ISD::FTRUNC, VT) &&
18639 DAG.isKnownToBeAPowerOfTwoFP(N1)) {
18640 bool NeedsCopySign =
18641 !Flags.hasNoSignedZeros() && !DAG.cannotBeOrderedNegativeFP(N0);
18642 SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
18643 SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
18644 SDValue MLA;
18646 MLA = DAG.getNode(ISD::FMA, DL, VT, DAG.getNode(ISD::FNEG, DL, VT, Rnd),
18647 N1, N0);
18648 } else {
18649 SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
18650 MLA = DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
18651 }
18652 return NeedsCopySign ? DAG.getNode(ISD::FCOPYSIGN, DL, VT, MLA, N0) : MLA;
18653 }
18654
18655 return SDValue();
18656}
18657
18658SDValue DAGCombiner::visitFSQRT(SDNode *N) {
18659 SDNodeFlags Flags = N->getFlags();
18660 const TargetOptions &Options = DAG.getTarget().Options;
18661
18662 // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
18663 // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
18664 if (!Flags.hasApproximateFuncs() ||
18665 (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
18666 return SDValue();
18667
18668 SDValue N0 = N->getOperand(0);
18669 if (TLI.isFsqrtCheap(N0, DAG))
18670 return SDValue();
18671
18672 // FSQRT nodes have flags that propagate to the created nodes.
18673 // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
18674 // transform the fdiv, we may produce a sub-optimal estimate sequence
18675 // because the reciprocal calculation may not have to filter out a
18676 // 0.0 input.
18677 return buildSqrtEstimate(N0, Flags);
18678}
18679
18680/// copysign(x, fp_extend(y)) -> copysign(x, y)
18681/// copysign(x, fp_round(y)) -> copysign(x, y)
18682/// Operands to the functions are the type of X and Y respectively.
18683static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) {
18684 // Always fold no-op FP casts.
18685 if (XTy == YTy)
18686 return true;
18687
18688 // Do not optimize out type conversion of f128 type yet.
18689 // For some targets like x86_64, configuration is changed to keep one f128
18690 // value in one SSE register, but instruction selection cannot handle
18691 // FCOPYSIGN on SSE registers yet.
18692 if (YTy == MVT::f128)
18693 return false;
18694
18695 // Avoid mismatched vector operand types, for better instruction selection.
18696 return !YTy.isVector();
18697}
18698
18700 SDValue N1 = N->getOperand(1);
18701 if (N1.getOpcode() != ISD::FP_EXTEND &&
18702 N1.getOpcode() != ISD::FP_ROUND)
18703 return false;
18704 EVT N1VT = N1->getValueType(0);
18705 EVT N1Op0VT = N1->getOperand(0).getValueType();
18706 return CanCombineFCOPYSIGN_EXTEND_ROUND(N1VT, N1Op0VT);
18707}
18708
18709SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
18710 SDValue N0 = N->getOperand(0);
18711 SDValue N1 = N->getOperand(1);
18712 EVT VT = N->getValueType(0);
18713 SDLoc DL(N);
18714
18715 // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
18716 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, DL, VT, {N0, N1}))
18717 return C;
18718
18719 // copysign(x, fp_extend(y)) -> copysign(x, y)
18720 // copysign(x, fp_round(y)) -> copysign(x, y)
18722 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N0, N1.getOperand(0));
18723
18725 return SDValue(N, 0);
18726
18727 return SDValue();
18728}
18729
18730SDValue DAGCombiner::visitFPOW(SDNode *N) {
18731 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
18732 if (!ExponentC)
18733 return SDValue();
18734 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18735
18736 // Try to convert x ** (1/3) into cube root.
18737 // TODO: Handle the various flavors of long double.
18738 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
18739 // Some range near 1/3 should be fine.
18740 EVT VT = N->getValueType(0);
18741 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
18742 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
18743 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
18744 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
18745 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
18746 // For regular numbers, rounding may cause the results to differ.
18747 // Therefore, we require { nsz ninf nnan afn } for this transform.
18748 // TODO: We could select out the special cases if we don't have nsz/ninf.
18749 SDNodeFlags Flags = N->getFlags();
18750 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
18751 !Flags.hasApproximateFuncs())
18752 return SDValue();
18753
18754 // Do not create a cbrt() libcall if the target does not have it, and do not
18755 // turn a pow that has lowering support into a cbrt() libcall.
18756 if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
18757 (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
18758 DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
18759 return SDValue();
18760
18761 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
18762 }
18763
18764 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
18765 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
18766 // TODO: This could be extended (using a target hook) to handle smaller
18767 // power-of-2 fractional exponents.
18768 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
18769 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
18770 if (ExponentIs025 || ExponentIs075) {
18771 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
18772 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
18773 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
18774 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
18775 // For regular numbers, rounding may cause the results to differ.
18776 // Therefore, we require { nsz ninf afn } for this transform.
18777 // TODO: We could select out the special cases if we don't have nsz/ninf.
18778 SDNodeFlags Flags = N->getFlags();
18779
18780 // We only need no signed zeros for the 0.25 case.
18781 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
18782 !Flags.hasApproximateFuncs())
18783 return SDValue();
18784
18785 // Don't double the number of libcalls. We are trying to inline fast code.
18786 if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
18787 return SDValue();
18788
18789 // Assume that libcalls are the smallest code.
18790 // TODO: This restriction should probably be lifted for vectors.
18791 if (ForCodeSize)
18792 return SDValue();
18793
18794 // pow(X, 0.25) --> sqrt(sqrt(X))
18795 SDLoc DL(N);
18796 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
18797 SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
18798 if (ExponentIs025)
18799 return SqrtSqrt;
18800 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
18801 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
18802 }
18803
18804 return SDValue();
18805}
18806
18808 const TargetLowering &TLI) {
18809 // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
18810 // replacing casts with a libcall. We also must be allowed to ignore -0.0
18811 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
18812 // conversions would return +0.0.
18813 // FIXME: We should be able to use node-level FMF here.
18814 // TODO: If strict math, should we use FABS (+ range check for signed cast)?
18815 EVT VT = N->getValueType(0);
18816 if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
18818 return SDValue();
18819
18820 // fptosi/fptoui round towards zero, so converting from FP to integer and
18821 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
18822 SDValue N0 = N->getOperand(0);
18823 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
18824 N0.getOperand(0).getValueType() == VT)
18825 return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0));
18826
18827 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
18828 N0.getOperand(0).getValueType() == VT)
18829 return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0));
18830
18831 return SDValue();
18832}
18833
18834SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
18835 SDValue N0 = N->getOperand(0);
18836 EVT VT = N->getValueType(0);
18837 EVT OpVT = N0.getValueType();
18838 SDLoc DL(N);
18839
18840 // [us]itofp(undef) = 0, because the result value is bounded.
18841 if (N0.isUndef())
18842 return DAG.getConstantFP(0.0, DL, VT);
18843
18844 // fold (sint_to_fp c1) -> c1fp
18845 // ...but only if the target supports immediate floating-point values
18846 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18847 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SINT_TO_FP, DL, VT, {N0}))
18848 return C;
18849
18850 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
18851 // but UINT_TO_FP is legal on this target, try to convert.
18852 if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
18853 hasOperation(ISD::UINT_TO_FP, OpVT)) {
18854 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
18855 if (DAG.SignBitIsZero(N0))
18856 return DAG.getNode(ISD::UINT_TO_FP, DL, VT, N0);
18857 }
18858
18859 // The next optimizations are desirable only if SELECT_CC can be lowered.
18860 // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
18861 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
18862 !VT.isVector() &&
18863 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18864 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
18865 DAG.getConstantFP(0.0, DL, VT));
18866
18867 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
18868 // (select (setcc x, y, cc), 1.0, 0.0)
18869 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
18870 N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
18871 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18872 return DAG.getSelect(DL, VT, N0.getOperand(0),
18873 DAG.getConstantFP(1.0, DL, VT),
18874 DAG.getConstantFP(0.0, DL, VT));
18875
18876 if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
18877 return FTrunc;
18878
18879 // fold (sint_to_fp (trunc nsw x)) -> (sint_to_fp x)
18880 if (N0.getOpcode() == ISD::TRUNCATE && N0->getFlags().hasNoSignedWrap() &&
18882 N0.getOperand(0).getValueType()))
18883 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, N0.getOperand(0));
18884
18885 return SDValue();
18886}
18887
18888SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
18889 SDValue N0 = N->getOperand(0);
18890 EVT VT = N->getValueType(0);
18891 EVT OpVT = N0.getValueType();
18892 SDLoc DL(N);
18893
18894 // [us]itofp(undef) = 0, because the result value is bounded.
18895 if (N0.isUndef())
18896 return DAG.getConstantFP(0.0, DL, VT);
18897
18898 // fold (uint_to_fp c1) -> c1fp
18899 // ...but only if the target supports immediate floating-point values
18900 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18901 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UINT_TO_FP, DL, VT, {N0}))
18902 return C;
18903
18904 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
18905 // but SINT_TO_FP is legal on this target, try to convert.
18906 if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
18907 hasOperation(ISD::SINT_TO_FP, OpVT)) {
18908 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
18909 if (DAG.SignBitIsZero(N0))
18910 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, N0);
18911 }
18912
18913 // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
18914 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
18915 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18916 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
18917 DAG.getConstantFP(0.0, DL, VT));
18918
18919 if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
18920 return FTrunc;
18921
18922 // fold (uint_to_fp (trunc nuw x)) -> (uint_to_fp x)
18923 if (N0.getOpcode() == ISD::TRUNCATE && N0->getFlags().hasNoUnsignedWrap() &&
18925 N0.getOperand(0).getValueType()))
18926 return DAG.getNode(ISD::UINT_TO_FP, DL, VT, N0.getOperand(0));
18927
18928 return SDValue();
18929}
18930
18931// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
18933 SDValue N0 = N->getOperand(0);
18934 EVT VT = N->getValueType(0);
18935
18936 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
18937 return SDValue();
18938
18939 SDValue Src = N0.getOperand(0);
18940 EVT SrcVT = Src.getValueType();
18941 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
18942 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
18943
18944 // We can safely assume the conversion won't overflow the output range,
18945 // because (for example) (uint8_t)18293.f is undefined behavior.
18946
18947 // Since we can assume the conversion won't overflow, our decision as to
18948 // whether the input will fit in the float should depend on the minimum
18949 // of the input range and output range.
18950
18951 // This means this is also safe for a signed input and unsigned output, since
18952 // a negative input would lead to undefined behavior.
18953 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
18954 unsigned OutputSize = (int)VT.getScalarSizeInBits();
18955 unsigned ActualSize = std::min(InputSize, OutputSize);
18956 const fltSemantics &Sem = N0.getValueType().getFltSemantics();
18957
18958 // We can only fold away the float conversion if the input range can be
18959 // represented exactly in the float range.
18960 if (APFloat::semanticsPrecision(Sem) >= ActualSize) {
18961 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
18962 unsigned ExtOp =
18963 IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
18964 return DAG.getNode(ExtOp, DL, VT, Src);
18965 }
18966 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
18967 return DAG.getNode(ISD::TRUNCATE, DL, VT, Src);
18968 return DAG.getBitcast(VT, Src);
18969 }
18970 return SDValue();
18971}
18972
18973SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
18974 SDValue N0 = N->getOperand(0);
18975 EVT VT = N->getValueType(0);
18976 SDLoc DL(N);
18977
18978 // fold (fp_to_sint undef) -> undef
18979 if (N0.isUndef())
18980 return DAG.getUNDEF(VT);
18981
18982 // fold (fp_to_sint c1fp) -> c1
18983 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_TO_SINT, DL, VT, {N0}))
18984 return C;
18985
18986 return FoldIntToFPToInt(N, DL, DAG);
18987}
18988
18989SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
18990 SDValue N0 = N->getOperand(0);
18991 EVT VT = N->getValueType(0);
18992 SDLoc DL(N);
18993
18994 // fold (fp_to_uint undef) -> undef
18995 if (N0.isUndef())
18996 return DAG.getUNDEF(VT);
18997
18998 // fold (fp_to_uint c1fp) -> c1
18999 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_TO_UINT, DL, VT, {N0}))
19000 return C;
19001
19002 return FoldIntToFPToInt(N, DL, DAG);
19003}
19004
19005SDValue DAGCombiner::visitXROUND(SDNode *N) {
19006 SDValue N0 = N->getOperand(0);
19007 EVT VT = N->getValueType(0);
19008
19009 // fold (lrint|llrint undef) -> undef
19010 // fold (lround|llround undef) -> undef
19011 if (N0.isUndef())
19012 return DAG.getUNDEF(VT);
19013
19014 // fold (lrint|llrint c1fp) -> c1
19015 // fold (lround|llround c1fp) -> c1
19016 if (SDValue C =
19017 DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0}))
19018 return C;
19019
19020 return SDValue();
19021}
19022
19023SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
19024 SDValue N0 = N->getOperand(0);
19025 SDValue N1 = N->getOperand(1);
19026 EVT VT = N->getValueType(0);
19027 SDLoc DL(N);
19028
19029 // fold (fp_round c1fp) -> c1fp
19030 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_ROUND, DL, VT, {N0, N1}))
19031 return C;
19032
19033 // fold (fp_round (fp_extend x)) -> x
19034 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
19035 return N0.getOperand(0);
19036
19037 // fold (fp_round (fp_round x)) -> (fp_round x)
19038 if (N0.getOpcode() == ISD::FP_ROUND) {
19039 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
19040 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
19041
19042 // Avoid folding legal fp_rounds into non-legal ones.
19043 if (!hasOperation(ISD::FP_ROUND, VT))
19044 return SDValue();
19045
19046 // Skip this folding if it results in an fp_round from f80 to f16.
19047 //
19048 // f80 to f16 always generates an expensive (and as yet, unimplemented)
19049 // libcall to __truncxfhf2 instead of selecting native f16 conversion
19050 // instructions from f32 or f64. Moreover, the first (value-preserving)
19051 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
19052 // x86.
19053 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
19054 return SDValue();
19055
19056 // If the first fp_round isn't a value preserving truncation, it might
19057 // introduce a tie in the second fp_round, that wouldn't occur in the
19058 // single-step fp_round we want to fold to.
19059 // In other words, double rounding isn't the same as rounding.
19060 // Also, this is a value preserving truncation iff both fp_round's are.
19061 if ((N->getFlags().hasAllowContract() &&
19062 N0->getFlags().hasAllowContract()) ||
19063 N0IsTrunc)
19064 return DAG.getNode(
19065 ISD::FP_ROUND, DL, VT, N0.getOperand(0),
19066 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
19067 }
19068
19069 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
19070 // Note: From a legality perspective, this is a two step transform. First,
19071 // we duplicate the fp_round to the arguments of the copysign, then we
19072 // eliminate the fp_round on Y. The second step requires an additional
19073 // predicate to match the implementation above.
19074 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
19076 N0.getValueType())) {
19077 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
19078 N0.getOperand(0), N1);
19079 AddToWorklist(Tmp.getNode());
19080 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, Tmp, N0.getOperand(1));
19081 }
19082
19083 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
19084 return NewVSel;
19085
19086 return SDValue();
19087}
19088
19089// Eliminate a floating-point widening of a narrowed value if the fast math
19090// flags allow it.
19092 SDValue N0 = N->getOperand(0);
19093 EVT VT = N->getValueType(0);
19094
19095 unsigned NarrowingOp;
19096 switch (N->getOpcode()) {
19097 case ISD::FP16_TO_FP:
19098 NarrowingOp = ISD::FP_TO_FP16;
19099 break;
19100 case ISD::BF16_TO_FP:
19101 NarrowingOp = ISD::FP_TO_BF16;
19102 break;
19103 case ISD::FP_EXTEND:
19104 NarrowingOp = ISD::FP_ROUND;
19105 break;
19106 default:
19107 llvm_unreachable("Expected widening FP cast");
19108 }
19109
19110 if (N0.getOpcode() == NarrowingOp && N0.getOperand(0).getValueType() == VT) {
19111 const SDNodeFlags NarrowFlags = N0->getFlags();
19112 const SDNodeFlags WidenFlags = N->getFlags();
19113 // Narrowing can introduce inf and change the encoding of a nan, so the
19114 // widen must have the nnan and ninf flags to indicate that we don't need to
19115 // care about that. We are also removing a rounding step, and that requires
19116 // both the narrow and widen to allow contraction.
19117 if (WidenFlags.hasNoNaNs() && WidenFlags.hasNoInfs() &&
19118 NarrowFlags.hasAllowContract() && WidenFlags.hasAllowContract()) {
19119 return N0.getOperand(0);
19120 }
19121 }
19122
19123 return SDValue();
19124}
19125
19126SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
19127 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19128 SDValue N0 = N->getOperand(0);
19129 EVT VT = N->getValueType(0);
19130 SDLoc DL(N);
19131
19132 if (VT.isVector())
19133 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
19134 return FoldedVOp;
19135
19136 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
19137 if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::FP_ROUND)
19138 return SDValue();
19139
19140 // fold (fp_extend c1fp) -> c1fp
19141 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_EXTEND, DL, VT, {N0}))
19142 return C;
19143
19144 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
19145 if (N0.getOpcode() == ISD::FP16_TO_FP &&
19146 TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
19147 return DAG.getNode(ISD::FP16_TO_FP, DL, VT, N0.getOperand(0));
19148
19149 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
19150 // value of X.
19151 if (N0.getOpcode() == ISD::FP_ROUND && N0.getConstantOperandVal(1) == 1) {
19152 SDValue In = N0.getOperand(0);
19153 if (In.getValueType() == VT) return In;
19154 if (VT.bitsLT(In.getValueType()))
19155 return DAG.getNode(ISD::FP_ROUND, DL, VT, In, N0.getOperand(1));
19156 return DAG.getNode(ISD::FP_EXTEND, DL, VT, In);
19157 }
19158
19159 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
19160 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
19162 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
19163 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT,
19164 LN0->getChain(),
19165 LN0->getBasePtr(), N0.getValueType(),
19166 LN0->getMemOperand());
19167 CombineTo(N, ExtLoad);
19168 CombineTo(
19169 N0.getNode(),
19170 DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad,
19171 DAG.getIntPtrConstant(1, SDLoc(N0), /*isTarget=*/true)),
19172 ExtLoad.getValue(1));
19173 return SDValue(N, 0); // Return N so it doesn't get rechecked!
19174 }
19175
19176 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
19177 return NewVSel;
19178
19179 if (SDValue CastEliminated = eliminateFPCastPair(N))
19180 return CastEliminated;
19181
19182 return SDValue();
19183}
19184
19185SDValue DAGCombiner::visitFCEIL(SDNode *N) {
19186 SDValue N0 = N->getOperand(0);
19187 EVT VT = N->getValueType(0);
19188
19189 // fold (fceil c1) -> fceil(c1)
19190 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FCEIL, SDLoc(N), VT, {N0}))
19191 return C;
19192
19193 return SDValue();
19194}
19195
19196SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
19197 SDValue N0 = N->getOperand(0);
19198 EVT VT = N->getValueType(0);
19199
19200 // fold (ftrunc c1) -> ftrunc(c1)
19201 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FTRUNC, SDLoc(N), VT, {N0}))
19202 return C;
19203
19204 // fold ftrunc (known rounded int x) -> x
19205 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
19206 // likely to be generated to extract integer from a rounded floating value.
19207 switch (N0.getOpcode()) {
19208 default: break;
19209 case ISD::FRINT:
19210 case ISD::FTRUNC:
19211 case ISD::FNEARBYINT:
19212 case ISD::FROUNDEVEN:
19213 case ISD::FFLOOR:
19214 case ISD::FCEIL:
19215 return N0;
19216 }
19217
19218 return SDValue();
19219}
19220
19221SDValue DAGCombiner::visitFFREXP(SDNode *N) {
19222 SDValue N0 = N->getOperand(0);
19223
19224 // fold (ffrexp c1) -> ffrexp(c1)
19226 return DAG.getNode(ISD::FFREXP, SDLoc(N), N->getVTList(), N0);
19227 return SDValue();
19228}
19229
19230SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
19231 SDValue N0 = N->getOperand(0);
19232 EVT VT = N->getValueType(0);
19233
19234 // fold (ffloor c1) -> ffloor(c1)
19235 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FFLOOR, SDLoc(N), VT, {N0}))
19236 return C;
19237
19238 return SDValue();
19239}
19240
19241SDValue DAGCombiner::visitFNEG(SDNode *N) {
19242 SDValue N0 = N->getOperand(0);
19243 EVT VT = N->getValueType(0);
19244 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19245
19246 // Constant fold FNEG.
19247 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FNEG, SDLoc(N), VT, {N0}))
19248 return C;
19249
19250 if (SDValue NegN0 =
19251 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
19252 return NegN0;
19253
19254 // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
19255 // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
19256 // know it was called from a context with a nsz flag if the input fsub does
19257 // not.
19258 if (N0.getOpcode() == ISD::FSUB &&
19260 N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
19261 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
19262 N0.getOperand(0));
19263 }
19264
19266 return SDValue(N, 0);
19267
19268 if (SDValue Cast = foldSignChangeInBitcast(N))
19269 return Cast;
19270
19271 return SDValue();
19272}
19273
19274SDValue DAGCombiner::visitFMinMax(SDNode *N) {
19275 SDValue N0 = N->getOperand(0);
19276 SDValue N1 = N->getOperand(1);
19277 EVT VT = N->getValueType(0);
19278 const SDNodeFlags Flags = N->getFlags();
19279 unsigned Opc = N->getOpcode();
19280 bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
19281 bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
19282 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19283
19284 // Constant fold.
19285 if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
19286 return C;
19287
19288 // Canonicalize to constant on RHS.
19291 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
19292
19293 if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
19294 const APFloat &AF = N1CFP->getValueAPF();
19295
19296 // minnum(X, nan) -> X
19297 // maxnum(X, nan) -> X
19298 // minimum(X, nan) -> nan
19299 // maximum(X, nan) -> nan
19300 if (AF.isNaN())
19301 return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
19302
19303 // In the following folds, inf can be replaced with the largest finite
19304 // float, if the ninf flag is set.
19305 if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
19306 // minnum(X, -inf) -> -inf
19307 // maxnum(X, +inf) -> +inf
19308 // minimum(X, -inf) -> -inf if nnan
19309 // maximum(X, +inf) -> +inf if nnan
19310 if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
19311 return N->getOperand(1);
19312
19313 // minnum(X, +inf) -> X if nnan
19314 // maxnum(X, -inf) -> X if nnan
19315 // minimum(X, +inf) -> X
19316 // maximum(X, -inf) -> X
19317 if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
19318 return N->getOperand(0);
19319 }
19320 }
19321
19322 if (SDValue SD = reassociateReduction(
19323 PropagatesNaN
19324 ? (IsMin ? ISD::VECREDUCE_FMINIMUM : ISD::VECREDUCE_FMAXIMUM)
19325 : (IsMin ? ISD::VECREDUCE_FMIN : ISD::VECREDUCE_FMAX),
19326 Opc, SDLoc(N), VT, N0, N1, Flags))
19327 return SD;
19328
19329 return SDValue();
19330}
19331
19332SDValue DAGCombiner::visitFABS(SDNode *N) {
19333 SDValue N0 = N->getOperand(0);
19334 EVT VT = N->getValueType(0);
19335 SDLoc DL(N);
19336
19337 // fold (fabs c1) -> fabs(c1)
19338 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FABS, DL, VT, {N0}))
19339 return C;
19340
19342 return SDValue(N, 0);
19343
19344 if (SDValue Cast = foldSignChangeInBitcast(N))
19345 return Cast;
19346
19347 return SDValue();
19348}
19349
19350SDValue DAGCombiner::visitBRCOND(SDNode *N) {
19351 SDValue Chain = N->getOperand(0);
19352 SDValue N1 = N->getOperand(1);
19353 SDValue N2 = N->getOperand(2);
19354
19355 // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
19356 // nondeterministic jumps).
19357 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
19358 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
19359 N1->getOperand(0), N2, N->getFlags());
19360 }
19361
19362 // Variant of the previous fold where there is a SETCC in between:
19363 // BRCOND(SETCC(FREEZE(X), CONST, Cond))
19364 // =>
19365 // BRCOND(FREEZE(SETCC(X, CONST, Cond)))
19366 // =>
19367 // BRCOND(SETCC(X, CONST, Cond))
19368 // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
19369 // isn't equivalent to true or false.
19370 // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
19371 // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
19372 if (N1->getOpcode() == ISD::SETCC && N1.hasOneUse()) {
19373 SDValue S0 = N1->getOperand(0), S1 = N1->getOperand(1);
19375 ConstantSDNode *S0C = dyn_cast<ConstantSDNode>(S0);
19376 ConstantSDNode *S1C = dyn_cast<ConstantSDNode>(S1);
19377 bool Updated = false;
19378
19379 // Is 'X Cond C' always true or false?
19380 auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
19381 bool False = (Cond == ISD::SETULT && C->isZero()) ||
19382 (Cond == ISD::SETLT && C->isMinSignedValue()) ||
19383 (Cond == ISD::SETUGT && C->isAllOnes()) ||
19384 (Cond == ISD::SETGT && C->isMaxSignedValue());
19385 bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
19386 (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
19387 (Cond == ISD::SETUGE && C->isZero()) ||
19388 (Cond == ISD::SETGE && C->isMinSignedValue());
19389 return True || False;
19390 };
19391
19392 if (S0->getOpcode() == ISD::FREEZE && S0.hasOneUse() && S1C) {
19393 if (!IsAlwaysTrueOrFalse(Cond, S1C)) {
19394 S0 = S0->getOperand(0);
19395 Updated = true;
19396 }
19397 }
19398 if (S1->getOpcode() == ISD::FREEZE && S1.hasOneUse() && S0C) {
19399 if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond), S0C)) {
19400 S1 = S1->getOperand(0);
19401 Updated = true;
19402 }
19403 }
19404
19405 if (Updated)
19406 return DAG.getNode(
19407 ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
19408 DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2,
19409 N->getFlags());
19410 }
19411
19412 // If N is a constant we could fold this into a fallthrough or unconditional
19413 // branch. However that doesn't happen very often in normal code, because
19414 // Instcombine/SimplifyCFG should have handled the available opportunities.
19415 // If we did this folding here, it would be necessary to update the
19416 // MachineBasicBlock CFG, which is awkward.
19417
19418 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
19419 // on the target, also copy fast math flags.
19420 if (N1.getOpcode() == ISD::SETCC &&
19421 TLI.isOperationLegalOrCustom(ISD::BR_CC,
19422 N1.getOperand(0).getValueType())) {
19423 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, Chain,
19424 N1.getOperand(2), N1.getOperand(0), N1.getOperand(1), N2,
19425 N1->getFlags());
19426 }
19427
19428 if (N1.hasOneUse()) {
19429 // rebuildSetCC calls visitXor which may change the Chain when there is a
19430 // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
19431 HandleSDNode ChainHandle(Chain);
19432 if (SDValue NewN1 = rebuildSetCC(N1))
19433 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
19434 ChainHandle.getValue(), NewN1, N2, N->getFlags());
19435 }
19436
19437 return SDValue();
19438}
19439
19440SDValue DAGCombiner::rebuildSetCC(SDValue N) {
19441 if (N.getOpcode() == ISD::SRL ||
19442 (N.getOpcode() == ISD::TRUNCATE &&
19443 (N.getOperand(0).hasOneUse() &&
19444 N.getOperand(0).getOpcode() == ISD::SRL))) {
19445 // Look pass the truncate.
19446 if (N.getOpcode() == ISD::TRUNCATE)
19447 N = N.getOperand(0);
19448
19449 // Match this pattern so that we can generate simpler code:
19450 //
19451 // %a = ...
19452 // %b = and i32 %a, 2
19453 // %c = srl i32 %b, 1
19454 // brcond i32 %c ...
19455 //
19456 // into
19457 //
19458 // %a = ...
19459 // %b = and i32 %a, 2
19460 // %c = setcc eq %b, 0
19461 // brcond %c ...
19462 //
19463 // This applies only when the AND constant value has one bit set and the
19464 // SRL constant is equal to the log2 of the AND constant. The back-end is
19465 // smart enough to convert the result into a TEST/JMP sequence.
19466 SDValue Op0 = N.getOperand(0);
19467 SDValue Op1 = N.getOperand(1);
19468
19469 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
19470 SDValue AndOp1 = Op0.getOperand(1);
19471
19472 if (AndOp1.getOpcode() == ISD::Constant) {
19473 const APInt &AndConst = AndOp1->getAsAPIntVal();
19474
19475 if (AndConst.isPowerOf2() &&
19476 Op1->getAsAPIntVal() == AndConst.logBase2()) {
19477 SDLoc DL(N);
19478 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
19479 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
19480 ISD::SETNE);
19481 }
19482 }
19483 }
19484 }
19485
19486 // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
19487 // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
19488 if (N.getOpcode() == ISD::XOR) {
19489 // Because we may call this on a speculatively constructed
19490 // SimplifiedSetCC Node, we need to simplify this node first.
19491 // Ideally this should be folded into SimplifySetCC and not
19492 // here. For now, grab a handle to N so we don't lose it from
19493 // replacements interal to the visit.
19494 HandleSDNode XORHandle(N);
19495 while (N.getOpcode() == ISD::XOR) {
19496 SDValue Tmp = visitXOR(N.getNode());
19497 // No simplification done.
19498 if (!Tmp.getNode())
19499 break;
19500 // Returning N is form in-visit replacement that may invalidated
19501 // N. Grab value from Handle.
19502 if (Tmp.getNode() == N.getNode())
19503 N = XORHandle.getValue();
19504 else // Node simplified. Try simplifying again.
19505 N = Tmp;
19506 }
19507
19508 if (N.getOpcode() != ISD::XOR)
19509 return N;
19510
19511 SDValue Op0 = N->getOperand(0);
19512 SDValue Op1 = N->getOperand(1);
19513
19514 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
19515 bool Equal = false;
19516 // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
19517 if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
19518 Op0.getValueType() == MVT::i1) {
19519 N = Op0;
19520 Op0 = N->getOperand(0);
19521 Op1 = N->getOperand(1);
19522 Equal = true;
19523 }
19524
19525 EVT SetCCVT = N.getValueType();
19526 if (LegalTypes)
19527 SetCCVT = getSetCCResultType(SetCCVT);
19528 // Replace the uses of XOR with SETCC. Note, avoid this transformation if
19529 // it would introduce illegal operations post-legalization as this can
19530 // result in infinite looping between converting xor->setcc here, and
19531 // expanding setcc->xor in LegalizeSetCCCondCode if requested.
19533 if (!LegalOperations || TLI.isCondCodeLegal(CC, Op0.getSimpleValueType()))
19534 return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1, CC);
19535 }
19536 }
19537
19538 return SDValue();
19539}
19540
19541// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
19542//
19543SDValue DAGCombiner::visitBR_CC(SDNode *N) {
19544 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
19545 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
19546
19547 // If N is a constant we could fold this into a fallthrough or unconditional
19548 // branch. However that doesn't happen very often in normal code, because
19549 // Instcombine/SimplifyCFG should have handled the available opportunities.
19550 // If we did this folding here, it would be necessary to update the
19551 // MachineBasicBlock CFG, which is awkward.
19552
19553 // Use SimplifySetCC to simplify SETCC's.
19555 CondLHS, CondRHS, CC->get(), SDLoc(N),
19556 false);
19557 if (Simp.getNode()) AddToWorklist(Simp.getNode());
19558
19559 // fold to a simpler setcc
19560 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
19561 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
19562 N->getOperand(0), Simp.getOperand(2),
19563 Simp.getOperand(0), Simp.getOperand(1),
19564 N->getOperand(4));
19565
19566 return SDValue();
19567}
19568
19569static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
19570 bool &IsLoad, bool &IsMasked, SDValue &Ptr,
19571 const TargetLowering &TLI) {
19572 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19573 if (LD->isIndexed())
19574 return false;
19575 EVT VT = LD->getMemoryVT();
19576 if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
19577 return false;
19578 Ptr = LD->getBasePtr();
19579 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19580 if (ST->isIndexed())
19581 return false;
19582 EVT VT = ST->getMemoryVT();
19583 if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
19584 return false;
19585 Ptr = ST->getBasePtr();
19586 IsLoad = false;
19587 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
19588 if (LD->isIndexed())
19589 return false;
19590 EVT VT = LD->getMemoryVT();
19591 if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
19592 !TLI.isIndexedMaskedLoadLegal(Dec, VT))
19593 return false;
19594 Ptr = LD->getBasePtr();
19595 IsMasked = true;
19597 if (ST->isIndexed())
19598 return false;
19599 EVT VT = ST->getMemoryVT();
19600 if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
19601 !TLI.isIndexedMaskedStoreLegal(Dec, VT))
19602 return false;
19603 Ptr = ST->getBasePtr();
19604 IsLoad = false;
19605 IsMasked = true;
19606 } else {
19607 return false;
19608 }
19609 return true;
19610}
19611
19612/// Try turning a load/store into a pre-indexed load/store when the base
19613/// pointer is an add or subtract and it has other uses besides the load/store.
19614/// After the transformation, the new indexed load/store has effectively folded
19615/// the add/subtract in and all of its other uses are redirected to the
19616/// new load/store.
19617bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
19618 if (Level < AfterLegalizeDAG)
19619 return false;
19620
19621 bool IsLoad = true;
19622 bool IsMasked = false;
19623 SDValue Ptr;
19624 if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
19625 Ptr, TLI))
19626 return false;
19627
19628 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
19629 // out. There is no reason to make this a preinc/predec.
19630 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
19631 Ptr->hasOneUse())
19632 return false;
19633
19634 // Ask the target to do addressing mode selection.
19638 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
19639 return false;
19640
19641 // Backends without true r+i pre-indexed forms may need to pass a
19642 // constant base with a variable offset so that constant coercion
19643 // will work with the patterns in canonical form.
19644 bool Swapped = false;
19645 if (isa<ConstantSDNode>(BasePtr)) {
19646 std::swap(BasePtr, Offset);
19647 Swapped = true;
19648 }
19649
19650 // Don't create a indexed load / store with zero offset.
19652 return false;
19653
19654 // Try turning it into a pre-indexed load / store except when:
19655 // 1) The new base ptr is a frame index.
19656 // 2) If N is a store and the new base ptr is either the same as or is a
19657 // predecessor of the value being stored.
19658 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
19659 // that would create a cycle.
19660 // 4) All uses are load / store ops that use it as old base ptr.
19661
19662 // Check #1. Preinc'ing a frame index would require copying the stack pointer
19663 // (plus the implicit offset) to a register to preinc anyway.
19664 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
19665 return false;
19666
19667 // Check #2.
19668 if (!IsLoad) {
19669 SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
19670 : cast<StoreSDNode>(N)->getValue();
19671
19672 // Would require a copy.
19673 if (Val == BasePtr)
19674 return false;
19675
19676 // Would create a cycle.
19677 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
19678 return false;
19679 }
19680
19681 // Caches for hasPredecessorHelper.
19682 SmallPtrSet<const SDNode *, 32> Visited;
19684 Worklist.push_back(N);
19685
19686 // If the offset is a constant, there may be other adds of constants that
19687 // can be folded with this one. We should do this to avoid having to keep
19688 // a copy of the original base pointer.
19689 SmallVector<SDNode *, 16> OtherUses;
19692 for (SDUse &Use : BasePtr->uses()) {
19693 // Skip the use that is Ptr and uses of other results from BasePtr's
19694 // node (important for nodes that return multiple results).
19695 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
19696 continue;
19697
19698 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist,
19699 MaxSteps))
19700 continue;
19701
19702 if (Use.getUser()->getOpcode() != ISD::ADD &&
19703 Use.getUser()->getOpcode() != ISD::SUB) {
19704 OtherUses.clear();
19705 break;
19706 }
19707
19708 SDValue Op1 = Use.getUser()->getOperand((Use.getOperandNo() + 1) & 1);
19709 if (!isa<ConstantSDNode>(Op1)) {
19710 OtherUses.clear();
19711 break;
19712 }
19713
19714 // FIXME: In some cases, we can be smarter about this.
19715 if (Op1.getValueType() != Offset.getValueType()) {
19716 OtherUses.clear();
19717 break;
19718 }
19719
19720 OtherUses.push_back(Use.getUser());
19721 }
19722
19723 if (Swapped)
19724 std::swap(BasePtr, Offset);
19725
19726 // Now check for #3 and #4.
19727 bool RealUse = false;
19728
19729 for (SDNode *User : Ptr->users()) {
19730 if (User == N)
19731 continue;
19732 if (SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))
19733 return false;
19734
19735 // If Ptr may be folded in addressing mode of other use, then it's
19736 // not profitable to do this transformation.
19737 if (!canFoldInAddressingMode(Ptr.getNode(), User, DAG, TLI))
19738 RealUse = true;
19739 }
19740
19741 if (!RealUse)
19742 return false;
19743
19745 if (!IsMasked) {
19746 if (IsLoad)
19747 Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
19748 else
19749 Result =
19750 DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
19751 } else {
19752 if (IsLoad)
19753 Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
19754 Offset, AM);
19755 else
19756 Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
19757 Offset, AM);
19758 }
19759 ++PreIndexedNodes;
19760 ++NodesCombined;
19761 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
19762 Result.dump(&DAG); dbgs() << '\n');
19763 WorklistRemover DeadNodes(*this);
19764 if (IsLoad) {
19765 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
19766 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
19767 } else {
19768 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
19769 }
19770
19771 // Finally, since the node is now dead, remove it from the graph.
19772 deleteAndRecombine(N);
19773
19774 if (Swapped)
19775 std::swap(BasePtr, Offset);
19776
19777 // Replace other uses of BasePtr that can be updated to use Ptr
19778 for (SDNode *OtherUse : OtherUses) {
19779 unsigned OffsetIdx = 1;
19780 if (OtherUse->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
19781 OffsetIdx = 0;
19782 assert(OtherUse->getOperand(!OffsetIdx).getNode() == BasePtr.getNode() &&
19783 "Expected BasePtr operand");
19784
19785 // We need to replace ptr0 in the following expression:
19786 // x0 * offset0 + y0 * ptr0 = t0
19787 // knowing that
19788 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
19789 //
19790 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
19791 // indexed load/store and the expression that needs to be re-written.
19792 //
19793 // Therefore, we have:
19794 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
19795
19796 auto *CN = cast<ConstantSDNode>(OtherUse->getOperand(OffsetIdx));
19797 const APInt &Offset0 = CN->getAPIntValue();
19798 const APInt &Offset1 = Offset->getAsAPIntVal();
19799 int X0 = (OtherUse->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
19800 int Y0 = (OtherUse->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
19801 int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
19802 int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
19803
19804 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
19805
19806 APInt CNV = Offset0;
19807 if (X0 < 0) CNV = -CNV;
19808 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
19809 else CNV = CNV - Offset1;
19810
19811 SDLoc DL(OtherUse);
19812
19813 // We can now generate the new expression.
19814 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
19815 SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
19816
19817 SDValue NewUse =
19818 DAG.getNode(Opcode, DL, OtherUse->getValueType(0), NewOp1, NewOp2);
19819 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUse, 0), NewUse);
19820 deleteAndRecombine(OtherUse);
19821 }
19822
19823 // Replace the uses of Ptr with uses of the updated base value.
19824 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
19825 deleteAndRecombine(Ptr.getNode());
19826 AddToWorklist(Result.getNode());
19827
19828 return true;
19829}
19830
19832 SDValue &BasePtr, SDValue &Offset,
19834 SelectionDAG &DAG,
19835 const TargetLowering &TLI) {
19836 if (PtrUse == N ||
19837 (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
19838 return false;
19839
19840 if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
19841 return false;
19842
19843 // Don't create a indexed load / store with zero offset.
19845 return false;
19846
19847 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
19848 return false;
19849
19852 for (SDNode *User : BasePtr->users()) {
19853 if (User == Ptr.getNode())
19854 continue;
19855
19856 // No if there's a later user which could perform the index instead.
19857 if (isa<MemSDNode>(User)) {
19858 bool IsLoad = true;
19859 bool IsMasked = false;
19860 SDValue OtherPtr;
19862 IsMasked, OtherPtr, TLI)) {
19864 Worklist.push_back(User);
19865 if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps))
19866 return false;
19867 }
19868 }
19869
19870 // If all the uses are load / store addresses, then don't do the
19871 // transformation.
19872 if (User->getOpcode() == ISD::ADD || User->getOpcode() == ISD::SUB) {
19873 for (SDNode *UserUser : User->users())
19874 if (canFoldInAddressingMode(User, UserUser, DAG, TLI))
19875 return false;
19876 }
19877 }
19878 return true;
19879}
19880
19882 bool &IsMasked, SDValue &Ptr,
19883 SDValue &BasePtr, SDValue &Offset,
19885 SelectionDAG &DAG,
19886 const TargetLowering &TLI) {
19888 IsMasked, Ptr, TLI) ||
19889 Ptr->hasOneUse())
19890 return nullptr;
19891
19892 // Try turning it into a post-indexed load / store except when
19893 // 1) All uses are load / store ops that use it as base ptr (and
19894 // it may be folded as addressing mmode).
19895 // 2) Op must be independent of N, i.e. Op is neither a predecessor
19896 // nor a successor of N. Otherwise, if Op is folded that would
19897 // create a cycle.
19899 for (SDNode *Op : Ptr->users()) {
19900 // Check for #1.
19901 if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
19902 continue;
19903
19904 // Check for #2.
19907 // Ptr is predecessor to both N and Op.
19908 Visited.insert(Ptr.getNode());
19909 Worklist.push_back(N);
19910 Worklist.push_back(Op);
19911 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) &&
19912 !SDNode::hasPredecessorHelper(Op, Visited, Worklist, MaxSteps))
19913 return Op;
19914 }
19915 return nullptr;
19916}
19917
19918/// Try to combine a load/store with a add/sub of the base pointer node into a
19919/// post-indexed load/store. The transformation folded the add/subtract into the
19920/// new indexed load/store effectively and all of its uses are redirected to the
19921/// new load/store.
19922bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
19923 if (Level < AfterLegalizeDAG)
19924 return false;
19925
19926 bool IsLoad = true;
19927 bool IsMasked = false;
19928 SDValue Ptr;
19932 SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
19933 Offset, AM, DAG, TLI);
19934 if (!Op)
19935 return false;
19936
19938 if (!IsMasked)
19939 Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
19940 Offset, AM)
19941 : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
19942 BasePtr, Offset, AM);
19943 else
19944 Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
19945 BasePtr, Offset, AM)
19946 : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
19947 BasePtr, Offset, AM);
19948 ++PostIndexedNodes;
19949 ++NodesCombined;
19950 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";
19951 Result.dump(&DAG); dbgs() << '\n');
19952 WorklistRemover DeadNodes(*this);
19953 if (IsLoad) {
19954 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
19955 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
19956 } else {
19957 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
19958 }
19959
19960 // Finally, since the node is now dead, remove it from the graph.
19961 deleteAndRecombine(N);
19962
19963 // Replace the uses of Use with uses of the updated base value.
19965 Result.getValue(IsLoad ? 1 : 0));
19966 deleteAndRecombine(Op);
19967 return true;
19968}
19969
19970/// Return the base-pointer arithmetic from an indexed \p LD.
19971SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
19972 ISD::MemIndexedMode AM = LD->getAddressingMode();
19973 assert(AM != ISD::UNINDEXED);
19974 SDValue BP = LD->getOperand(1);
19975 SDValue Inc = LD->getOperand(2);
19976
19977 // Some backends use TargetConstants for load offsets, but don't expect
19978 // TargetConstants in general ADD nodes. We can convert these constants into
19979 // regular Constants (if the constant is not opaque).
19981 !cast<ConstantSDNode>(Inc)->isOpaque()) &&
19982 "Cannot split out indexing using opaque target constants");
19983 if (Inc.getOpcode() == ISD::TargetConstant) {
19984 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
19985 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
19986 ConstInc->getValueType(0));
19987 }
19988
19989 unsigned Opc =
19990 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
19991 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
19992}
19993
19995 return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
19996}
19997
19998bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
19999 EVT STType = Val.getValueType();
20000 EVT STMemType = ST->getMemoryVT();
20001 if (STType == STMemType)
20002 return true;
20003 if (isTypeLegal(STMemType))
20004 return false; // fail.
20005 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
20006 TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
20007 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
20008 return true;
20009 }
20010 if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
20011 STType.isInteger() && STMemType.isInteger()) {
20012 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
20013 return true;
20014 }
20015 if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
20016 Val = DAG.getBitcast(STMemType, Val);
20017 return true;
20018 }
20019 return false; // fail.
20020}
20021
20022bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
20023 EVT LDMemType = LD->getMemoryVT();
20024 EVT LDType = LD->getValueType(0);
20025 assert(Val.getValueType() == LDMemType &&
20026 "Attempting to extend value of non-matching type");
20027 if (LDType == LDMemType)
20028 return true;
20029 if (LDMemType.isInteger() && LDType.isInteger()) {
20030 switch (LD->getExtensionType()) {
20031 case ISD::NON_EXTLOAD:
20032 Val = DAG.getBitcast(LDType, Val);
20033 return true;
20034 case ISD::EXTLOAD:
20035 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
20036 return true;
20037 case ISD::SEXTLOAD:
20038 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
20039 return true;
20040 case ISD::ZEXTLOAD:
20041 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
20042 return true;
20043 }
20044 }
20045 return false;
20046}
20047
20048StoreSDNode *DAGCombiner::getUniqueStoreFeeding(LoadSDNode *LD,
20049 int64_t &Offset) {
20050 SDValue Chain = LD->getOperand(0);
20051
20052 // Look through CALLSEQ_START.
20053 if (Chain.getOpcode() == ISD::CALLSEQ_START)
20054 Chain = Chain->getOperand(0);
20055
20056 StoreSDNode *ST = nullptr;
20058 if (Chain.getOpcode() == ISD::TokenFactor) {
20059 // Look for unique store within the TokenFactor.
20060 for (SDValue Op : Chain->ops()) {
20061 StoreSDNode *Store = dyn_cast<StoreSDNode>(Op.getNode());
20062 if (!Store)
20063 continue;
20064 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
20065 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
20066 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
20067 continue;
20068 // Make sure the store is not aliased with any nodes in TokenFactor.
20069 GatherAllAliases(Store, Chain, Aliases);
20070 if (Aliases.empty() ||
20071 (Aliases.size() == 1 && Aliases.front().getNode() == Store))
20072 ST = Store;
20073 break;
20074 }
20075 } else {
20076 StoreSDNode *Store = dyn_cast<StoreSDNode>(Chain.getNode());
20077 if (Store) {
20078 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
20079 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
20080 if (BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
20081 ST = Store;
20082 }
20083 }
20084
20085 return ST;
20086}
20087
20088SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
20089 if (OptLevel == CodeGenOptLevel::None || !LD->isSimple())
20090 return SDValue();
20091 SDValue Chain = LD->getOperand(0);
20092 int64_t Offset;
20093
20094 StoreSDNode *ST = getUniqueStoreFeeding(LD, Offset);
20095 // TODO: Relax this restriction for unordered atomics (see D66309)
20096 if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace())
20097 return SDValue();
20098
20099 EVT LDType = LD->getValueType(0);
20100 EVT LDMemType = LD->getMemoryVT();
20101 EVT STMemType = ST->getMemoryVT();
20102 EVT STType = ST->getValue().getValueType();
20103
20104 // There are two cases to consider here:
20105 // 1. The store is fixed width and the load is scalable. In this case we
20106 // don't know at compile time if the store completely envelops the load
20107 // so we abandon the optimisation.
20108 // 2. The store is scalable and the load is fixed width. We could
20109 // potentially support a limited number of cases here, but there has been
20110 // no cost-benefit analysis to prove it's worth it.
20111 bool LdStScalable = LDMemType.isScalableVT();
20112 if (LdStScalable != STMemType.isScalableVT())
20113 return SDValue();
20114
20115 // If we are dealing with scalable vectors on a big endian platform the
20116 // calculation of offsets below becomes trickier, since we do not know at
20117 // compile time the absolute size of the vector. Until we've done more
20118 // analysis on big-endian platforms it seems better to bail out for now.
20119 if (LdStScalable && DAG.getDataLayout().isBigEndian())
20120 return SDValue();
20121
20122 // Normalize for Endianness. After this Offset=0 will denote that the least
20123 // significant bit in the loaded value maps to the least significant bit in
20124 // the stored value). With Offset=n (for n > 0) the loaded value starts at the
20125 // n:th least significant byte of the stored value.
20126 int64_t OrigOffset = Offset;
20127 if (DAG.getDataLayout().isBigEndian())
20128 Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedValue() -
20129 (int64_t)LDMemType.getStoreSizeInBits().getFixedValue()) /
20130 8 -
20131 Offset;
20132
20133 // Check that the stored value cover all bits that are loaded.
20134 bool STCoversLD;
20135
20136 TypeSize LdMemSize = LDMemType.getSizeInBits();
20137 TypeSize StMemSize = STMemType.getSizeInBits();
20138 if (LdStScalable)
20139 STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
20140 else
20141 STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedValue() <=
20142 StMemSize.getFixedValue());
20143
20144 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
20145 if (LD->isIndexed()) {
20146 // Cannot handle opaque target constants and we must respect the user's
20147 // request not to split indexes from loads.
20148 if (!canSplitIdx(LD))
20149 return SDValue();
20150 SDValue Idx = SplitIndexingFromLoad(LD);
20151 SDValue Ops[] = {Val, Idx, Chain};
20152 return CombineTo(LD, Ops, 3);
20153 }
20154 return CombineTo(LD, Val, Chain);
20155 };
20156
20157 if (!STCoversLD)
20158 return SDValue();
20159
20160 // Memory as copy space (potentially masked).
20161 if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
20162 // Simple case: Direct non-truncating forwarding
20163 if (LDType.getSizeInBits() == LdMemSize)
20164 return ReplaceLd(LD, ST->getValue(), Chain);
20165 // Can we model the truncate and extension with an and mask?
20166 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
20167 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
20168 // Mask to size of LDMemType
20169 auto Mask =
20171 StMemSize.getFixedValue()),
20172 SDLoc(ST), STType);
20173 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
20174 return ReplaceLd(LD, Val, Chain);
20175 }
20176 }
20177
20178 // Handle some cases for big-endian that would be Offset 0 and handled for
20179 // little-endian.
20180 SDValue Val = ST->getValue();
20181 if (DAG.getDataLayout().isBigEndian() && Offset > 0 && OrigOffset == 0) {
20182 if (STType.isInteger() && !STType.isVector() && LDType.isInteger() &&
20183 !LDType.isVector() && isTypeLegal(STType) &&
20184 TLI.isOperationLegal(ISD::SRL, STType)) {
20185 Val = DAG.getNode(ISD::SRL, SDLoc(LD), STType, Val,
20186 DAG.getConstant(Offset * 8, SDLoc(LD), STType));
20187 Offset = 0;
20188 }
20189 }
20190
20191 // TODO: Deal with nonzero offset.
20192 if (LD->getBasePtr().isUndef() || Offset != 0)
20193 return SDValue();
20194 // Model necessary truncations / extenstions.
20195 // Truncate Value To Stored Memory Size.
20196 do {
20197 if (!getTruncatedStoreValue(ST, Val))
20198 break;
20199 if (!isTypeLegal(LDMemType))
20200 break;
20201 if (STMemType != LDMemType) {
20202 // TODO: Support vectors? This requires extract_subvector/bitcast.
20203 if (!STMemType.isVector() && !LDMemType.isVector() &&
20204 STMemType.isInteger() && LDMemType.isInteger())
20205 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
20206 else
20207 break;
20208 }
20209 if (!extendLoadedValueToExtension(LD, Val))
20210 break;
20211 return ReplaceLd(LD, Val, Chain);
20212 } while (false);
20213
20214 // On failure, cleanup dead nodes we may have created.
20215 if (Val->use_empty())
20216 deleteAndRecombine(Val.getNode());
20217 return SDValue();
20218}
20219
20220SDValue DAGCombiner::visitLOAD(SDNode *N) {
20221 LoadSDNode *LD = cast<LoadSDNode>(N);
20222 SDValue Chain = LD->getChain();
20223 SDValue Ptr = LD->getBasePtr();
20224
20225 // If load is not volatile and there are no uses of the loaded value (and
20226 // the updated indexed value in case of indexed loads), change uses of the
20227 // chain value into uses of the chain input (i.e. delete the dead load).
20228 // TODO: Allow this for unordered atomics (see D66309)
20229 if (LD->isSimple()) {
20230 if (N->getValueType(1) == MVT::Other) {
20231 // Unindexed loads.
20232 if (!N->hasAnyUseOfValue(0)) {
20233 // It's not safe to use the two value CombineTo variant here. e.g.
20234 // v1, chain2 = load chain1, loc
20235 // v2, chain3 = load chain2, loc
20236 // v3 = add v2, c
20237 // Now we replace use of chain2 with chain1. This makes the second load
20238 // isomorphic to the one we are deleting, and thus makes this load live.
20239 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
20240 dbgs() << "\nWith chain: "; Chain.dump(&DAG);
20241 dbgs() << "\n");
20242 WorklistRemover DeadNodes(*this);
20243 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
20244 AddUsersToWorklist(Chain.getNode());
20245 if (N->use_empty())
20246 deleteAndRecombine(N);
20247
20248 return SDValue(N, 0); // Return N so it doesn't get rechecked!
20249 }
20250 } else {
20251 // Indexed loads.
20252 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
20253
20254 // If this load has an opaque TargetConstant offset, then we cannot split
20255 // the indexing into an add/sub directly (that TargetConstant may not be
20256 // valid for a different type of node, and we cannot convert an opaque
20257 // target constant into a regular constant).
20258 bool CanSplitIdx = canSplitIdx(LD);
20259
20260 if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
20261 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
20262 SDValue Index;
20263 if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
20264 Index = SplitIndexingFromLoad(LD);
20265 // Try to fold the base pointer arithmetic into subsequent loads and
20266 // stores.
20267 AddUsersToWorklist(N);
20268 } else
20269 Index = DAG.getUNDEF(N->getValueType(1));
20270 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
20271 dbgs() << "\nWith: "; Undef.dump(&DAG);
20272 dbgs() << " and 2 other values\n");
20273 WorklistRemover DeadNodes(*this);
20274 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
20275 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
20276 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
20277 deleteAndRecombine(N);
20278 return SDValue(N, 0); // Return N so it doesn't get rechecked!
20279 }
20280 }
20281 }
20282
20283 // If this load is directly stored, replace the load value with the stored
20284 // value.
20285 if (auto V = ForwardStoreValueToDirectLoad(LD))
20286 return V;
20287
20288 // Try to infer better alignment information than the load already has.
20289 if (OptLevel != CodeGenOptLevel::None && LD->isUnindexed() &&
20290 !LD->isAtomic()) {
20291 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
20292 if (*Alignment > LD->getAlign() &&
20293 isAligned(*Alignment, LD->getSrcValueOffset())) {
20294 SDValue NewLoad = DAG.getExtLoad(
20295 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
20296 LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
20297 LD->getMemOperand()->getFlags(), LD->getAAInfo());
20298 // NewLoad will always be N as we are only refining the alignment
20299 assert(NewLoad.getNode() == N);
20300 (void)NewLoad;
20301 }
20302 }
20303 }
20304
20305 if (LD->isUnindexed()) {
20306 // Walk up chain skipping non-aliasing memory nodes.
20307 SDValue BetterChain = FindBetterChain(LD, Chain);
20308
20309 // If there is a better chain.
20310 if (Chain != BetterChain) {
20311 SDValue ReplLoad;
20312
20313 // Replace the chain to void dependency.
20314 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
20315 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
20316 BetterChain, Ptr, LD->getMemOperand());
20317 } else {
20318 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
20319 LD->getValueType(0),
20320 BetterChain, Ptr, LD->getMemoryVT(),
20321 LD->getMemOperand());
20322 }
20323
20324 // Create token factor to keep old chain connected.
20325 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
20326 MVT::Other, Chain, ReplLoad.getValue(1));
20327
20328 // Replace uses with load result and token factor
20329 return CombineTo(N, ReplLoad.getValue(0), Token);
20330 }
20331 }
20332
20333 // Try transforming N to an indexed load.
20334 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
20335 return SDValue(N, 0);
20336
20337 // Try to slice up N to more direct loads if the slices are mapped to
20338 // different register banks or pairing can take place.
20339 if (SliceUpLoad(N))
20340 return SDValue(N, 0);
20341
20342 return SDValue();
20343}
20344
20345namespace {
20346
20347/// Helper structure used to slice a load in smaller loads.
20348/// Basically a slice is obtained from the following sequence:
20349/// Origin = load Ty1, Base
20350/// Shift = srl Ty1 Origin, CstTy Amount
20351/// Inst = trunc Shift to Ty2
20352///
20353/// Then, it will be rewritten into:
20354/// Slice = load SliceTy, Base + SliceOffset
20355/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
20356///
20357/// SliceTy is deduced from the number of bits that are actually used to
20358/// build Inst.
20359struct LoadedSlice {
20360 /// Helper structure used to compute the cost of a slice.
20361 struct Cost {
20362 /// Are we optimizing for code size.
20363 bool ForCodeSize = false;
20364
20365 /// Various cost.
20366 unsigned Loads = 0;
20367 unsigned Truncates = 0;
20368 unsigned CrossRegisterBanksCopies = 0;
20369 unsigned ZExts = 0;
20370 unsigned Shift = 0;
20371
20372 explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
20373
20374 /// Get the cost of one isolated slice.
20375 Cost(const LoadedSlice &LS, bool ForCodeSize)
20376 : ForCodeSize(ForCodeSize), Loads(1) {
20377 EVT TruncType = LS.Inst->getValueType(0);
20378 EVT LoadedType = LS.getLoadedType();
20379 if (TruncType != LoadedType &&
20380 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
20381 ZExts = 1;
20382 }
20383
20384 /// Account for slicing gain in the current cost.
20385 /// Slicing provide a few gains like removing a shift or a
20386 /// truncate. This method allows to grow the cost of the original
20387 /// load with the gain from this slice.
20388 void addSliceGain(const LoadedSlice &LS) {
20389 // Each slice saves a truncate.
20390 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
20391 if (!TLI.isTruncateFree(LS.Inst->getOperand(0), LS.Inst->getValueType(0)))
20392 ++Truncates;
20393 // If there is a shift amount, this slice gets rid of it.
20394 if (LS.Shift)
20395 ++Shift;
20396 // If this slice can merge a cross register bank copy, account for it.
20397 if (LS.canMergeExpensiveCrossRegisterBankCopy())
20398 ++CrossRegisterBanksCopies;
20399 }
20400
20401 Cost &operator+=(const Cost &RHS) {
20402 Loads += RHS.Loads;
20403 Truncates += RHS.Truncates;
20404 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
20405 ZExts += RHS.ZExts;
20406 Shift += RHS.Shift;
20407 return *this;
20408 }
20409
20410 bool operator==(const Cost &RHS) const {
20411 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
20412 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
20413 ZExts == RHS.ZExts && Shift == RHS.Shift;
20414 }
20415
20416 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
20417
20418 bool operator<(const Cost &RHS) const {
20419 // Assume cross register banks copies are as expensive as loads.
20420 // FIXME: Do we want some more target hooks?
20421 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
20422 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
20423 // Unless we are optimizing for code size, consider the
20424 // expensive operation first.
20425 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
20426 return ExpensiveOpsLHS < ExpensiveOpsRHS;
20427 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
20428 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
20429 }
20430
20431 bool operator>(const Cost &RHS) const { return RHS < *this; }
20432
20433 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
20434
20435 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
20436 };
20437
20438 // The last instruction that represent the slice. This should be a
20439 // truncate instruction.
20440 SDNode *Inst;
20441
20442 // The original load instruction.
20443 LoadSDNode *Origin;
20444
20445 // The right shift amount in bits from the original load.
20446 unsigned Shift;
20447
20448 // The DAG from which Origin came from.
20449 // This is used to get some contextual information about legal types, etc.
20450 SelectionDAG *DAG;
20451
20452 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
20453 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
20454 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
20455
20456 /// Get the bits used in a chunk of bits \p BitWidth large.
20457 /// \return Result is \p BitWidth and has used bits set to 1 and
20458 /// not used bits set to 0.
20459 APInt getUsedBits() const {
20460 // Reproduce the trunc(lshr) sequence:
20461 // - Start from the truncated value.
20462 // - Zero extend to the desired bit width.
20463 // - Shift left.
20464 assert(Origin && "No original load to compare against.");
20465 unsigned BitWidth = Origin->getValueSizeInBits(0);
20466 assert(Inst && "This slice is not bound to an instruction");
20467 assert(Inst->getValueSizeInBits(0) <= BitWidth &&
20468 "Extracted slice is bigger than the whole type!");
20469 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
20470 UsedBits.setAllBits();
20471 UsedBits = UsedBits.zext(BitWidth);
20472 UsedBits <<= Shift;
20473 return UsedBits;
20474 }
20475
20476 /// Get the size of the slice to be loaded in bytes.
20477 unsigned getLoadedSize() const {
20478 unsigned SliceSize = getUsedBits().popcount();
20479 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
20480 return SliceSize / 8;
20481 }
20482
20483 /// Get the type that will be loaded for this slice.
20484 /// Note: This may not be the final type for the slice.
20485 EVT getLoadedType() const {
20486 assert(DAG && "Missing context");
20487 LLVMContext &Ctxt = *DAG->getContext();
20488 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
20489 }
20490
20491 /// Get the alignment of the load used for this slice.
20492 Align getAlign() const {
20493 Align Alignment = Origin->getAlign();
20494 uint64_t Offset = getOffsetFromBase();
20495 if (Offset != 0)
20496 Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
20497 return Alignment;
20498 }
20499
20500 /// Check if this slice can be rewritten with legal operations.
20501 bool isLegal() const {
20502 // An invalid slice is not legal.
20503 if (!Origin || !Inst || !DAG)
20504 return false;
20505
20506 // Offsets are for indexed load only, we do not handle that.
20507 if (!Origin->getOffset().isUndef())
20508 return false;
20509
20510 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
20511
20512 // Check that the type is legal.
20513 EVT SliceType = getLoadedType();
20514 if (!TLI.isTypeLegal(SliceType))
20515 return false;
20516
20517 // Check that the load is legal for this type.
20518 if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
20519 return false;
20520
20521 // Check that the offset can be computed.
20522 // 1. Check its type.
20523 EVT PtrType = Origin->getBasePtr().getValueType();
20524 if (PtrType == MVT::Untyped || PtrType.isExtended())
20525 return false;
20526
20527 // 2. Check that it fits in the immediate.
20528 if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
20529 return false;
20530
20531 // 3. Check that the computation is legal.
20532 if (!TLI.isOperationLegal(ISD::ADD, PtrType))
20533 return false;
20534
20535 // Check that the zext is legal if it needs one.
20536 EVT TruncateType = Inst->getValueType(0);
20537 if (TruncateType != SliceType &&
20538 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
20539 return false;
20540
20541 return true;
20542 }
20543
20544 /// Get the offset in bytes of this slice in the original chunk of
20545 /// bits.
20546 /// \pre DAG != nullptr.
20547 uint64_t getOffsetFromBase() const {
20548 assert(DAG && "Missing context.");
20549 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
20550 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
20551 uint64_t Offset = Shift / 8;
20552 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
20553 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
20554 "The size of the original loaded type is not a multiple of a"
20555 " byte.");
20556 // If Offset is bigger than TySizeInBytes, it means we are loading all
20557 // zeros. This should have been optimized before in the process.
20558 assert(TySizeInBytes > Offset &&
20559 "Invalid shift amount for given loaded size");
20560 if (IsBigEndian)
20561 Offset = TySizeInBytes - Offset - getLoadedSize();
20562 return Offset;
20563 }
20564
20565 /// Generate the sequence of instructions to load the slice
20566 /// represented by this object and redirect the uses of this slice to
20567 /// this new sequence of instructions.
20568 /// \pre this->Inst && this->Origin are valid Instructions and this
20569 /// object passed the legal check: LoadedSlice::isLegal returned true.
20570 /// \return The last instruction of the sequence used to load the slice.
20571 SDValue loadSlice() const {
20572 assert(Inst && Origin && "Unable to replace a non-existing slice.");
20573 const SDValue &OldBaseAddr = Origin->getBasePtr();
20574 SDValue BaseAddr = OldBaseAddr;
20575 // Get the offset in that chunk of bytes w.r.t. the endianness.
20576 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
20577 assert(Offset >= 0 && "Offset too big to fit in int64_t!");
20578 if (Offset) {
20579 // BaseAddr = BaseAddr + Offset.
20580 EVT ArithType = BaseAddr.getValueType();
20581 SDLoc DL(Origin);
20582 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
20583 DAG->getConstant(Offset, DL, ArithType));
20584 }
20585
20586 // Create the type of the loaded slice according to its size.
20587 EVT SliceType = getLoadedType();
20588
20589 // Create the load for the slice.
20590 SDValue LastInst =
20591 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
20593 Origin->getMemOperand()->getFlags());
20594 // If the final type is not the same as the loaded type, this means that
20595 // we have to pad with zero. Create a zero extend for that.
20596 EVT FinalType = Inst->getValueType(0);
20597 if (SliceType != FinalType)
20598 LastInst =
20599 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
20600 return LastInst;
20601 }
20602
20603 /// Check if this slice can be merged with an expensive cross register
20604 /// bank copy. E.g.,
20605 /// i = load i32
20606 /// f = bitcast i32 i to float
20607 bool canMergeExpensiveCrossRegisterBankCopy() const {
20608 if (!Inst || !Inst->hasOneUse())
20609 return false;
20610 SDNode *User = *Inst->user_begin();
20611 if (User->getOpcode() != ISD::BITCAST)
20612 return false;
20613 assert(DAG && "Missing context");
20614 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
20615 EVT ResVT = User->getValueType(0);
20616 const TargetRegisterClass *ResRC =
20617 TLI.getRegClassFor(ResVT.getSimpleVT(), User->isDivergent());
20618 const TargetRegisterClass *ArgRC =
20619 TLI.getRegClassFor(User->getOperand(0).getValueType().getSimpleVT(),
20620 User->getOperand(0)->isDivergent());
20621 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
20622 return false;
20623
20624 // At this point, we know that we perform a cross-register-bank copy.
20625 // Check if it is expensive.
20626 const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
20627 // Assume bitcasts are cheap, unless both register classes do not
20628 // explicitly share a common sub class.
20629 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
20630 return false;
20631
20632 // Check if it will be merged with the load.
20633 // 1. Check the alignment / fast memory access constraint.
20634 unsigned IsFast = 0;
20635 if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
20636 Origin->getAddressSpace(), getAlign(),
20637 Origin->getMemOperand()->getFlags(), &IsFast) ||
20638 !IsFast)
20639 return false;
20640
20641 // 2. Check that the load is a legal operation for that type.
20642 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
20643 return false;
20644
20645 // 3. Check that we do not have a zext in the way.
20646 if (Inst->getValueType(0) != getLoadedType())
20647 return false;
20648
20649 return true;
20650 }
20651};
20652
20653} // end anonymous namespace
20654
20655/// Check that all bits set in \p UsedBits form a dense region, i.e.,
20656/// \p UsedBits looks like 0..0 1..1 0..0.
20657static bool areUsedBitsDense(const APInt &UsedBits) {
20658 // If all the bits are one, this is dense!
20659 if (UsedBits.isAllOnes())
20660 return true;
20661
20662 // Get rid of the unused bits on the right.
20663 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countr_zero());
20664 // Get rid of the unused bits on the left.
20665 if (NarrowedUsedBits.countl_zero())
20666 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
20667 // Check that the chunk of bits is completely used.
20668 return NarrowedUsedBits.isAllOnes();
20669}
20670
20671/// Check whether or not \p First and \p Second are next to each other
20672/// in memory. This means that there is no hole between the bits loaded
20673/// by \p First and the bits loaded by \p Second.
20674static bool areSlicesNextToEachOther(const LoadedSlice &First,
20675 const LoadedSlice &Second) {
20676 assert(First.Origin == Second.Origin && First.Origin &&
20677 "Unable to match different memory origins.");
20678 APInt UsedBits = First.getUsedBits();
20679 assert((UsedBits & Second.getUsedBits()) == 0 &&
20680 "Slices are not supposed to overlap.");
20681 UsedBits |= Second.getUsedBits();
20682 return areUsedBitsDense(UsedBits);
20683}
20684
20685/// Adjust the \p GlobalLSCost according to the target
20686/// paring capabilities and the layout of the slices.
20687/// \pre \p GlobalLSCost should account for at least as many loads as
20688/// there is in the slices in \p LoadedSlices.
20690 LoadedSlice::Cost &GlobalLSCost) {
20691 unsigned NumberOfSlices = LoadedSlices.size();
20692 // If there is less than 2 elements, no pairing is possible.
20693 if (NumberOfSlices < 2)
20694 return;
20695
20696 // Sort the slices so that elements that are likely to be next to each
20697 // other in memory are next to each other in the list.
20698 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
20699 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
20700 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
20701 });
20702 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
20703 // First (resp. Second) is the first (resp. Second) potentially candidate
20704 // to be placed in a paired load.
20705 const LoadedSlice *First = nullptr;
20706 const LoadedSlice *Second = nullptr;
20707 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
20708 // Set the beginning of the pair.
20709 First = Second) {
20710 Second = &LoadedSlices[CurrSlice];
20711
20712 // If First is NULL, it means we start a new pair.
20713 // Get to the next slice.
20714 if (!First)
20715 continue;
20716
20717 EVT LoadedType = First->getLoadedType();
20718
20719 // If the types of the slices are different, we cannot pair them.
20720 if (LoadedType != Second->getLoadedType())
20721 continue;
20722
20723 // Check if the target supplies paired loads for this type.
20724 Align RequiredAlignment;
20725 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
20726 // move to the next pair, this type is hopeless.
20727 Second = nullptr;
20728 continue;
20729 }
20730 // Check if we meet the alignment requirement.
20731 if (First->getAlign() < RequiredAlignment)
20732 continue;
20733
20734 // Check that both loads are next to each other in memory.
20735 if (!areSlicesNextToEachOther(*First, *Second))
20736 continue;
20737
20738 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
20739 --GlobalLSCost.Loads;
20740 // Move to the next pair.
20741 Second = nullptr;
20742 }
20743}
20744
20745/// Check the profitability of all involved LoadedSlice.
20746/// Currently, it is considered profitable if there is exactly two
20747/// involved slices (1) which are (2) next to each other in memory, and
20748/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
20749///
20750/// Note: The order of the elements in \p LoadedSlices may be modified, but not
20751/// the elements themselves.
20752///
20753/// FIXME: When the cost model will be mature enough, we can relax
20754/// constraints (1) and (2).
20756 const APInt &UsedBits, bool ForCodeSize) {
20757 unsigned NumberOfSlices = LoadedSlices.size();
20759 return NumberOfSlices > 1;
20760
20761 // Check (1).
20762 if (NumberOfSlices != 2)
20763 return false;
20764
20765 // Check (2).
20766 if (!areUsedBitsDense(UsedBits))
20767 return false;
20768
20769 // Check (3).
20770 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
20771 // The original code has one big load.
20772 OrigCost.Loads = 1;
20773 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
20774 const LoadedSlice &LS = LoadedSlices[CurrSlice];
20775 // Accumulate the cost of all the slices.
20776 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
20777 GlobalSlicingCost += SliceCost;
20778
20779 // Account as cost in the original configuration the gain obtained
20780 // with the current slices.
20781 OrigCost.addSliceGain(LS);
20782 }
20783
20784 // If the target supports paired load, adjust the cost accordingly.
20785 adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
20786 return OrigCost > GlobalSlicingCost;
20787}
20788
20789/// If the given load, \p LI, is used only by trunc or trunc(lshr)
20790/// operations, split it in the various pieces being extracted.
20791///
20792/// This sort of thing is introduced by SROA.
20793/// This slicing takes care not to insert overlapping loads.
20794/// \pre LI is a simple load (i.e., not an atomic or volatile load).
20795bool DAGCombiner::SliceUpLoad(SDNode *N) {
20796 if (Level < AfterLegalizeDAG)
20797 return false;
20798
20799 LoadSDNode *LD = cast<LoadSDNode>(N);
20800 if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
20801 !LD->getValueType(0).isInteger())
20802 return false;
20803
20804 // The algorithm to split up a load of a scalable vector into individual
20805 // elements currently requires knowing the length of the loaded type,
20806 // so will need adjusting to work on scalable vectors.
20807 if (LD->getValueType(0).isScalableVector())
20808 return false;
20809
20810 // Keep track of already used bits to detect overlapping values.
20811 // In that case, we will just abort the transformation.
20812 APInt UsedBits(LD->getValueSizeInBits(0), 0);
20813
20814 SmallVector<LoadedSlice, 4> LoadedSlices;
20815
20816 // Check if this load is used as several smaller chunks of bits.
20817 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
20818 // of computation for each trunc.
20819 for (SDUse &U : LD->uses()) {
20820 // Skip the uses of the chain.
20821 if (U.getResNo() != 0)
20822 continue;
20823
20824 SDNode *User = U.getUser();
20825 unsigned Shift = 0;
20826
20827 // Check if this is a trunc(lshr).
20828 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
20829 isa<ConstantSDNode>(User->getOperand(1))) {
20830 Shift = User->getConstantOperandVal(1);
20831 User = *User->user_begin();
20832 }
20833
20834 // At this point, User is a Truncate, iff we encountered, trunc or
20835 // trunc(lshr).
20836 if (User->getOpcode() != ISD::TRUNCATE)
20837 return false;
20838
20839 // The width of the type must be a power of 2 and greater than 8-bits.
20840 // Otherwise the load cannot be represented in LLVM IR.
20841 // Moreover, if we shifted with a non-8-bits multiple, the slice
20842 // will be across several bytes. We do not support that.
20843 unsigned Width = User->getValueSizeInBits(0);
20844 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
20845 return false;
20846
20847 // Build the slice for this chain of computations.
20848 LoadedSlice LS(User, LD, Shift, &DAG);
20849 APInt CurrentUsedBits = LS.getUsedBits();
20850
20851 // Check if this slice overlaps with another.
20852 if ((CurrentUsedBits & UsedBits) != 0)
20853 return false;
20854 // Update the bits used globally.
20855 UsedBits |= CurrentUsedBits;
20856
20857 // Check if the new slice would be legal.
20858 if (!LS.isLegal())
20859 return false;
20860
20861 // Record the slice.
20862 LoadedSlices.push_back(LS);
20863 }
20864
20865 // Abort slicing if it does not seem to be profitable.
20866 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
20867 return false;
20868
20869 ++SlicedLoads;
20870
20871 // Rewrite each chain to use an independent load.
20872 // By construction, each chain can be represented by a unique load.
20873
20874 // Prepare the argument for the new token factor for all the slices.
20875 SmallVector<SDValue, 8> ArgChains;
20876 for (const LoadedSlice &LS : LoadedSlices) {
20877 SDValue SliceInst = LS.loadSlice();
20878 CombineTo(LS.Inst, SliceInst, true);
20879 if (SliceInst.getOpcode() != ISD::LOAD)
20880 SliceInst = SliceInst.getOperand(0);
20881 assert(SliceInst->getOpcode() == ISD::LOAD &&
20882 "It takes more than a zext to get to the loaded slice!!");
20883 ArgChains.push_back(SliceInst.getValue(1));
20884 }
20885
20886 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
20887 ArgChains);
20888 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
20889 AddToWorklist(Chain.getNode());
20890 return true;
20891}
20892
20893/// Check to see if V is (and load (ptr), imm), where the load is having
20894/// specific bytes cleared out. If so, return the byte size being masked out
20895/// and the shift amount.
20896static std::pair<unsigned, unsigned>
20898 std::pair<unsigned, unsigned> Result(0, 0);
20899
20900 // Check for the structure we're looking for.
20901 if (V->getOpcode() != ISD::AND ||
20902 !isa<ConstantSDNode>(V->getOperand(1)) ||
20903 !ISD::isNormalLoad(V->getOperand(0).getNode()))
20904 return Result;
20905
20906 // Check the chain and pointer.
20907 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
20908 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
20909
20910 // This only handles simple types.
20911 if (V.getValueType() != MVT::i16 &&
20912 V.getValueType() != MVT::i32 &&
20913 V.getValueType() != MVT::i64)
20914 return Result;
20915
20916 // Check the constant mask. Invert it so that the bits being masked out are
20917 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
20918 // follow the sign bit for uniformity.
20919 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
20920 unsigned NotMaskLZ = llvm::countl_zero(NotMask);
20921 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
20922 unsigned NotMaskTZ = llvm::countr_zero(NotMask);
20923 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
20924 if (NotMaskLZ == 64) return Result; // All zero mask.
20925
20926 // See if we have a continuous run of bits. If so, we have 0*1+0*
20927 if (llvm::countr_one(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
20928 return Result;
20929
20930 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
20931 if (V.getValueType() != MVT::i64 && NotMaskLZ)
20932 NotMaskLZ -= 64-V.getValueSizeInBits();
20933
20934 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
20935 switch (MaskedBytes) {
20936 case 1:
20937 case 2:
20938 case 4: break;
20939 default: return Result; // All one mask, or 5-byte mask.
20940 }
20941
20942 // Verify that the first bit starts at a multiple of mask so that the access
20943 // is aligned the same as the access width.
20944 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
20945
20946 // For narrowing to be valid, it must be the case that the load the
20947 // immediately preceding memory operation before the store.
20948 if (LD == Chain.getNode())
20949 ; // ok.
20950 else if (Chain->getOpcode() == ISD::TokenFactor &&
20951 SDValue(LD, 1).hasOneUse()) {
20952 // LD has only 1 chain use so they are no indirect dependencies.
20953 if (!LD->isOperandOf(Chain.getNode()))
20954 return Result;
20955 } else
20956 return Result; // Fail.
20957
20958 Result.first = MaskedBytes;
20959 Result.second = NotMaskTZ/8;
20960 return Result;
20961}
20962
20963/// Check to see if IVal is something that provides a value as specified by
20964/// MaskInfo. If so, replace the specified store with a narrower store of
20965/// truncated IVal.
20966static SDValue
20967ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
20968 SDValue IVal, StoreSDNode *St,
20969 DAGCombiner *DC) {
20970 unsigned NumBytes = MaskInfo.first;
20971 unsigned ByteShift = MaskInfo.second;
20972 SelectionDAG &DAG = DC->getDAG();
20973
20974 // Check to see if IVal is all zeros in the part being masked in by the 'or'
20975 // that uses this. If not, this is not a replacement.
20976 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
20977 ByteShift*8, (ByteShift+NumBytes)*8);
20978 if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
20979
20980 // Check that it is legal on the target to do this. It is legal if the new
20981 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
20982 // legalization. If the source type is legal, but the store type isn't, see
20983 // if we can use a truncating store.
20984 MVT VT = MVT::getIntegerVT(NumBytes * 8);
20985 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20986 bool UseTruncStore;
20987 if (DC->isTypeLegal(VT))
20988 UseTruncStore = false;
20989 else if (TLI.isTypeLegal(IVal.getValueType()) &&
20990 TLI.isTruncStoreLegal(IVal.getValueType(), VT))
20991 UseTruncStore = true;
20992 else
20993 return SDValue();
20994
20995 // Can't do this for indexed stores.
20996 if (St->isIndexed())
20997 return SDValue();
20998
20999 // Check that the target doesn't think this is a bad idea.
21000 if (St->getMemOperand() &&
21001 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
21002 *St->getMemOperand()))
21003 return SDValue();
21004
21005 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
21006 // shifted by ByteShift and truncated down to NumBytes.
21007 if (ByteShift) {
21008 SDLoc DL(IVal);
21009 IVal = DAG.getNode(
21010 ISD::SRL, DL, IVal.getValueType(), IVal,
21011 DAG.getShiftAmountConstant(ByteShift * 8, IVal.getValueType(), DL));
21012 }
21013
21014 // Figure out the offset for the store and the alignment of the access.
21015 unsigned StOffset;
21016 if (DAG.getDataLayout().isLittleEndian())
21017 StOffset = ByteShift;
21018 else
21019 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
21020
21021 SDValue Ptr = St->getBasePtr();
21022 if (StOffset) {
21023 SDLoc DL(IVal);
21025 }
21026
21027 ++OpsNarrowed;
21028 if (UseTruncStore)
21029 return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr,
21030 St->getPointerInfo().getWithOffset(StOffset), VT,
21031 St->getBaseAlign());
21032
21033 // Truncate down to the new size.
21034 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
21035
21036 return DAG.getStore(St->getChain(), SDLoc(St), IVal, Ptr,
21037 St->getPointerInfo().getWithOffset(StOffset),
21038 St->getBaseAlign());
21039}
21040
21041/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
21042/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
21043/// narrowing the load and store if it would end up being a win for performance
21044/// or code size.
21045SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
21046 StoreSDNode *ST = cast<StoreSDNode>(N);
21047 if (!ST->isSimple())
21048 return SDValue();
21049
21050 SDValue Chain = ST->getChain();
21051 SDValue Value = ST->getValue();
21052 SDValue Ptr = ST->getBasePtr();
21053 EVT VT = Value.getValueType();
21054
21055 if (ST->isTruncatingStore() || VT.isVector())
21056 return SDValue();
21057
21058 unsigned Opc = Value.getOpcode();
21059
21060 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
21061 !Value.hasOneUse())
21062 return SDValue();
21063
21064 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
21065 // is a byte mask indicating a consecutive number of bytes, check to see if
21066 // Y is known to provide just those bytes. If so, we try to replace the
21067 // load + replace + store sequence with a single (narrower) store, which makes
21068 // the load dead.
21070 std::pair<unsigned, unsigned> MaskedLoad;
21071 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
21072 if (MaskedLoad.first)
21073 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
21074 Value.getOperand(1), ST,this))
21075 return NewST;
21076
21077 // Or is commutative, so try swapping X and Y.
21078 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
21079 if (MaskedLoad.first)
21080 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
21081 Value.getOperand(0), ST,this))
21082 return NewST;
21083 }
21084
21086 return SDValue();
21087
21088 if (Value.getOperand(1).getOpcode() != ISD::Constant)
21089 return SDValue();
21090
21091 SDValue N0 = Value.getOperand(0);
21092 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
21093 Chain == SDValue(N0.getNode(), 1)) {
21094 LoadSDNode *LD = cast<LoadSDNode>(N0);
21095 if (LD->getBasePtr() != Ptr ||
21096 LD->getPointerInfo().getAddrSpace() !=
21097 ST->getPointerInfo().getAddrSpace())
21098 return SDValue();
21099
21100 // Find the type NewVT to narrow the load / op / store to.
21101 SDValue N1 = Value.getOperand(1);
21102 unsigned BitWidth = N1.getValueSizeInBits();
21103 APInt Imm = N1->getAsAPIntVal();
21104 if (Opc == ISD::AND)
21105 Imm.flipAllBits();
21106 if (Imm == 0 || Imm.isAllOnes())
21107 return SDValue();
21108 // Find least/most significant bit that need to be part of the narrowed
21109 // operation. We assume target will need to address/access full bytes, so
21110 // we make sure to align LSB and MSB at byte boundaries.
21111 unsigned BitsPerByteMask = 7u;
21112 unsigned LSB = Imm.countr_zero() & ~BitsPerByteMask;
21113 unsigned MSB = (Imm.getActiveBits() - 1) | BitsPerByteMask;
21114 unsigned NewBW = NextPowerOf2(MSB - LSB);
21115 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
21116 // The narrowing should be profitable, the load/store operation should be
21117 // legal (or custom) and the store size should be equal to the NewVT width.
21118 while (NewBW < BitWidth &&
21119 (NewVT.getStoreSizeInBits() != NewBW ||
21120 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
21122 !TLI.isNarrowingProfitable(N, VT, NewVT)))) {
21123 NewBW = NextPowerOf2(NewBW);
21124 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
21125 }
21126 if (NewBW >= BitWidth)
21127 return SDValue();
21128
21129 // If we come this far NewVT/NewBW reflect a power-of-2 sized type that is
21130 // large enough to cover all bits that should be modified. This type might
21131 // however be larger than really needed (such as i32 while we actually only
21132 // need to modify one byte). Now we need to find our how to align the memory
21133 // accesses to satisfy preferred alignments as well as avoiding to access
21134 // memory outside the store size of the orignal access.
21135
21136 unsigned VTStoreSize = VT.getStoreSizeInBits().getFixedValue();
21137
21138 // Let ShAmt denote amount of bits to skip, counted from the least
21139 // significant bits of Imm. And let PtrOff how much the pointer needs to be
21140 // offsetted (in bytes) for the new access.
21141 unsigned ShAmt = 0;
21142 uint64_t PtrOff = 0;
21143 for (; ShAmt + NewBW <= VTStoreSize; ShAmt += 8) {
21144 // Make sure the range [ShAmt, ShAmt+NewBW) cover both LSB and MSB.
21145 if (ShAmt > LSB)
21146 return SDValue();
21147 if (ShAmt + NewBW < MSB)
21148 continue;
21149
21150 // Calculate PtrOff.
21151 unsigned PtrAdjustmentInBits = DAG.getDataLayout().isBigEndian()
21152 ? VTStoreSize - NewBW - ShAmt
21153 : ShAmt;
21154 PtrOff = PtrAdjustmentInBits / 8;
21155
21156 // Now check if narrow access is allowed and fast, considering alignments.
21157 unsigned IsFast = 0;
21158 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
21159 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
21160 LD->getAddressSpace(), NewAlign,
21161 LD->getMemOperand()->getFlags(), &IsFast) &&
21162 IsFast)
21163 break;
21164 }
21165 // If loop above did not find any accepted ShAmt we need to exit here.
21166 if (ShAmt + NewBW > VTStoreSize)
21167 return SDValue();
21168
21169 APInt NewImm = Imm.lshr(ShAmt).trunc(NewBW);
21170 if (Opc == ISD::AND)
21171 NewImm.flipAllBits();
21172 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
21173 SDValue NewPtr =
21174 DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(PtrOff), SDLoc(LD));
21175 SDValue NewLD =
21176 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
21177 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
21178 LD->getMemOperand()->getFlags(), LD->getAAInfo());
21179 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
21180 DAG.getConstant(NewImm, SDLoc(Value), NewVT));
21181 SDValue NewST =
21182 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
21183 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
21184
21185 AddToWorklist(NewPtr.getNode());
21186 AddToWorklist(NewLD.getNode());
21187 AddToWorklist(NewVal.getNode());
21188 WorklistRemover DeadNodes(*this);
21189 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
21190 ++OpsNarrowed;
21191 return NewST;
21192 }
21193
21194 return SDValue();
21195}
21196
21197/// For a given floating point load / store pair, if the load value isn't used
21198/// by any other operations, then consider transforming the pair to integer
21199/// load / store operations if the target deems the transformation profitable.
21200SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
21201 StoreSDNode *ST = cast<StoreSDNode>(N);
21202 SDValue Value = ST->getValue();
21203 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
21204 Value.hasOneUse()) {
21205 LoadSDNode *LD = cast<LoadSDNode>(Value);
21206 EVT VT = LD->getMemoryVT();
21207 if (!VT.isSimple() || !VT.isFloatingPoint() || VT != ST->getMemoryVT() ||
21208 LD->isNonTemporal() || ST->isNonTemporal() ||
21209 LD->getPointerInfo().getAddrSpace() != 0 ||
21210 ST->getPointerInfo().getAddrSpace() != 0)
21211 return SDValue();
21212
21213 TypeSize VTSize = VT.getSizeInBits();
21214
21215 // We don't know the size of scalable types at compile time so we cannot
21216 // create an integer of the equivalent size.
21217 if (VTSize.isScalable())
21218 return SDValue();
21219
21220 unsigned FastLD = 0, FastST = 0;
21221 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedValue());
21222 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
21223 !TLI.isOperationLegal(ISD::STORE, IntVT) ||
21224 !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
21225 !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) ||
21226 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
21227 *LD->getMemOperand(), &FastLD) ||
21228 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
21229 *ST->getMemOperand(), &FastST) ||
21230 !FastLD || !FastST)
21231 return SDValue();
21232
21233 SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(),
21234 LD->getBasePtr(), LD->getMemOperand());
21235
21236 SDValue NewST = DAG.getStore(ST->getChain(), SDLoc(N), NewLD,
21237 ST->getBasePtr(), ST->getMemOperand());
21238
21239 AddToWorklist(NewLD.getNode());
21240 AddToWorklist(NewST.getNode());
21241 WorklistRemover DeadNodes(*this);
21242 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
21243 ++LdStFP2Int;
21244 return NewST;
21245 }
21246
21247 return SDValue();
21248}
21249
21250// This is a helper function for visitMUL to check the profitability
21251// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
21252// MulNode is the original multiply, AddNode is (add x, c1),
21253// and ConstNode is c2.
21254//
21255// If the (add x, c1) has multiple uses, we could increase
21256// the number of adds if we make this transformation.
21257// It would only be worth doing this if we can remove a
21258// multiply in the process. Check for that here.
21259// To illustrate:
21260// (A + c1) * c3
21261// (A + c2) * c3
21262// We're checking for cases where we have common "c3 * A" expressions.
21263bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
21264 SDValue ConstNode) {
21265 // If the add only has one use, and the target thinks the folding is
21266 // profitable or does not lead to worse code, this would be OK to do.
21267 if (AddNode->hasOneUse() &&
21268 TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
21269 return true;
21270
21271 // Walk all the users of the constant with which we're multiplying.
21272 for (SDNode *User : ConstNode->users()) {
21273 if (User == MulNode) // This use is the one we're on right now. Skip it.
21274 continue;
21275
21276 if (User->getOpcode() == ISD::MUL) { // We have another multiply use.
21277 SDNode *OtherOp;
21278 SDNode *MulVar = AddNode.getOperand(0).getNode();
21279
21280 // OtherOp is what we're multiplying against the constant.
21281 if (User->getOperand(0) == ConstNode)
21282 OtherOp = User->getOperand(1).getNode();
21283 else
21284 OtherOp = User->getOperand(0).getNode();
21285
21286 // Check to see if multiply is with the same operand of our "add".
21287 //
21288 // ConstNode = CONST
21289 // User = ConstNode * A <-- visiting User. OtherOp is A.
21290 // ...
21291 // AddNode = (A + c1) <-- MulVar is A.
21292 // = AddNode * ConstNode <-- current visiting instruction.
21293 //
21294 // If we make this transformation, we will have a common
21295 // multiply (ConstNode * A) that we can save.
21296 if (OtherOp == MulVar)
21297 return true;
21298
21299 // Now check to see if a future expansion will give us a common
21300 // multiply.
21301 //
21302 // ConstNode = CONST
21303 // AddNode = (A + c1)
21304 // ... = AddNode * ConstNode <-- current visiting instruction.
21305 // ...
21306 // OtherOp = (A + c2)
21307 // User = OtherOp * ConstNode <-- visiting User.
21308 //
21309 // If we make this transformation, we will have a common
21310 // multiply (CONST * A) after we also do the same transformation
21311 // to the "t2" instruction.
21312 if (OtherOp->getOpcode() == ISD::ADD &&
21314 OtherOp->getOperand(0).getNode() == MulVar)
21315 return true;
21316 }
21317 }
21318
21319 // Didn't find a case where this would be profitable.
21320 return false;
21321}
21322
21323SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
21324 unsigned NumStores) {
21326 SmallPtrSet<const SDNode *, 8> Visited;
21327 SDLoc StoreDL(StoreNodes[0].MemNode);
21328
21329 for (unsigned i = 0; i < NumStores; ++i) {
21330 Visited.insert(StoreNodes[i].MemNode);
21331 }
21332
21333 // don't include nodes that are children or repeated nodes.
21334 for (unsigned i = 0; i < NumStores; ++i) {
21335 if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
21336 Chains.push_back(StoreNodes[i].MemNode->getChain());
21337 }
21338
21339 assert(!Chains.empty() && "Chain should have generated a chain");
21340 return DAG.getTokenFactor(StoreDL, Chains);
21341}
21342
21343bool DAGCombiner::hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes) {
21344 const Value *UnderlyingObj = nullptr;
21345 for (const auto &MemOp : StoreNodes) {
21346 const MachineMemOperand *MMO = MemOp.MemNode->getMemOperand();
21347 // Pseudo value like stack frame has its own frame index and size, should
21348 // not use the first store's frame index for other frames.
21349 if (MMO->getPseudoValue())
21350 return false;
21351
21352 if (!MMO->getValue())
21353 return false;
21354
21355 const Value *Obj = getUnderlyingObject(MMO->getValue());
21356
21357 if (UnderlyingObj && UnderlyingObj != Obj)
21358 return false;
21359
21360 if (!UnderlyingObj)
21361 UnderlyingObj = Obj;
21362 }
21363
21364 return true;
21365}
21366
21367bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
21368 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
21369 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
21370 // Make sure we have something to merge.
21371 if (NumStores < 2)
21372 return false;
21373
21374 assert((!UseTrunc || !UseVector) &&
21375 "This optimization cannot emit a vector truncating store");
21376
21377 // The latest Node in the DAG.
21378 SDLoc DL(StoreNodes[0].MemNode);
21379
21380 TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
21381 unsigned SizeInBits = NumStores * ElementSizeBits;
21382 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
21383
21384 std::optional<MachineMemOperand::Flags> Flags;
21385 AAMDNodes AAInfo;
21386 for (unsigned I = 0; I != NumStores; ++I) {
21387 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
21388 if (!Flags) {
21389 Flags = St->getMemOperand()->getFlags();
21390 AAInfo = St->getAAInfo();
21391 continue;
21392 }
21393 // Skip merging if there's an inconsistent flag.
21394 if (Flags != St->getMemOperand()->getFlags())
21395 return false;
21396 // Concatenate AA metadata.
21397 AAInfo = AAInfo.concat(St->getAAInfo());
21398 }
21399
21400 EVT StoreTy;
21401 if (UseVector) {
21402 unsigned Elts = NumStores * NumMemElts;
21403 // Get the type for the merged vector store.
21404 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
21405 } else
21406 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
21407
21408 SDValue StoredVal;
21409 if (UseVector) {
21410 if (IsConstantSrc) {
21411 SmallVector<SDValue, 8> BuildVector;
21412 for (unsigned I = 0; I != NumStores; ++I) {
21413 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
21414 SDValue Val = St->getValue();
21415 // If constant is of the wrong type, convert it now. This comes up
21416 // when one of our stores was truncating.
21417 if (MemVT != Val.getValueType()) {
21418 Val = peekThroughBitcasts(Val);
21419 // Deal with constants of wrong size.
21420 if (ElementSizeBits != Val.getValueSizeInBits()) {
21421 auto *C = dyn_cast<ConstantSDNode>(Val);
21422 if (!C)
21423 // Not clear how to truncate FP values.
21424 // TODO: Handle truncation of build_vector constants
21425 return false;
21426
21427 EVT IntMemVT =
21429 Val = DAG.getConstant(C->getAPIntValue()
21430 .zextOrTrunc(Val.getValueSizeInBits())
21431 .zextOrTrunc(ElementSizeBits),
21432 SDLoc(C), IntMemVT);
21433 }
21434 // Make sure correctly size type is the correct type.
21435 Val = DAG.getBitcast(MemVT, Val);
21436 }
21437 BuildVector.push_back(Val);
21438 }
21439 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
21441 DL, StoreTy, BuildVector);
21442 } else {
21444 for (unsigned i = 0; i < NumStores; ++i) {
21445 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
21447 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
21448 // type MemVT. If the underlying value is not the correct
21449 // type, but it is an extraction of an appropriate vector we
21450 // can recast Val to be of the correct type. This may require
21451 // converting between EXTRACT_VECTOR_ELT and
21452 // EXTRACT_SUBVECTOR.
21453 if ((MemVT != Val.getValueType()) &&
21456 EVT MemVTScalarTy = MemVT.getScalarType();
21457 // We may need to add a bitcast here to get types to line up.
21458 if (MemVTScalarTy != Val.getValueType().getScalarType()) {
21459 Val = DAG.getBitcast(MemVT, Val);
21460 } else if (MemVT.isVector() &&
21462 Val = DAG.getNode(ISD::BUILD_VECTOR, DL, MemVT, Val);
21463 } else {
21464 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
21466 SDValue Vec = Val.getOperand(0);
21467 SDValue Idx = Val.getOperand(1);
21468 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
21469 }
21470 }
21471 Ops.push_back(Val);
21472 }
21473
21474 // Build the extracted vector elements back into a vector.
21475 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
21477 DL, StoreTy, Ops);
21478 }
21479 } else {
21480 // We should always use a vector store when merging extracted vector
21481 // elements, so this path implies a store of constants.
21482 assert(IsConstantSrc && "Merged vector elements should use vector store");
21483
21484 APInt StoreInt(SizeInBits, 0);
21485
21486 // Construct a single integer constant which is made of the smaller
21487 // constant inputs.
21488 bool IsLE = DAG.getDataLayout().isLittleEndian();
21489 for (unsigned i = 0; i < NumStores; ++i) {
21490 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
21491 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
21492
21493 SDValue Val = St->getValue();
21494 Val = peekThroughBitcasts(Val);
21495 StoreInt <<= ElementSizeBits;
21496 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
21497 StoreInt |= C->getAPIntValue()
21498 .zextOrTrunc(ElementSizeBits)
21499 .zextOrTrunc(SizeInBits);
21500 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
21501 StoreInt |= C->getValueAPF()
21502 .bitcastToAPInt()
21503 .zextOrTrunc(ElementSizeBits)
21504 .zextOrTrunc(SizeInBits);
21505 // If fp truncation is necessary give up for now.
21506 if (MemVT.getSizeInBits() != ElementSizeBits)
21507 return false;
21508 } else if (ISD::isBuildVectorOfConstantSDNodes(Val.getNode()) ||
21510 // Not yet handled
21511 return false;
21512 } else {
21513 llvm_unreachable("Invalid constant element type");
21514 }
21515 }
21516
21517 // Create the new Load and Store operations.
21518 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
21519 }
21520
21521 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
21522 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
21523 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
21524
21525 // make sure we use trunc store if it's necessary to be legal.
21526 // When generate the new widen store, if the first store's pointer info can
21527 // not be reused, discard the pointer info except the address space because
21528 // now the widen store can not be represented by the original pointer info
21529 // which is for the narrow memory object.
21530 SDValue NewStore;
21531 if (!UseTrunc) {
21532 NewStore = DAG.getStore(
21533 NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
21534 CanReusePtrInfo
21535 ? FirstInChain->getPointerInfo()
21536 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
21537 FirstInChain->getAlign(), *Flags, AAInfo);
21538 } else { // Must be realized as a trunc store
21539 EVT LegalizedStoredValTy =
21540 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
21541 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
21542 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
21543 SDValue ExtendedStoreVal =
21544 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
21545 LegalizedStoredValTy);
21546 NewStore = DAG.getTruncStore(
21547 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
21548 CanReusePtrInfo
21549 ? FirstInChain->getPointerInfo()
21550 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
21551 StoredVal.getValueType() /*TVT*/, FirstInChain->getAlign(), *Flags,
21552 AAInfo);
21553 }
21554
21555 // Replace all merged stores with the new store.
21556 for (unsigned i = 0; i < NumStores; ++i)
21557 CombineTo(StoreNodes[i].MemNode, NewStore);
21558
21559 AddToWorklist(NewChain.getNode());
21560 return true;
21561}
21562
21563SDNode *
21564DAGCombiner::getStoreMergeCandidates(StoreSDNode *St,
21565 SmallVectorImpl<MemOpLink> &StoreNodes) {
21566 // This holds the base pointer, index, and the offset in bytes from the base
21567 // pointer. We must have a base and an offset. Do not handle stores to undef
21568 // base pointers.
21569 BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
21570 if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
21571 return nullptr;
21572
21574 StoreSource StoreSrc = getStoreSource(Val);
21575 assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
21576
21577 // Match on loadbaseptr if relevant.
21578 EVT MemVT = St->getMemoryVT();
21579 BaseIndexOffset LBasePtr;
21580 EVT LoadVT;
21581 if (StoreSrc == StoreSource::Load) {
21582 auto *Ld = cast<LoadSDNode>(Val);
21583 LBasePtr = BaseIndexOffset::match(Ld, DAG);
21584 LoadVT = Ld->getMemoryVT();
21585 // Load and store should be the same type.
21586 if (MemVT != LoadVT)
21587 return nullptr;
21588 // Loads must only have one use.
21589 if (!Ld->hasNUsesOfValue(1, 0))
21590 return nullptr;
21591 // The memory operands must not be volatile/indexed/atomic.
21592 // TODO: May be able to relax for unordered atomics (see D66309)
21593 if (!Ld->isSimple() || Ld->isIndexed())
21594 return nullptr;
21595 }
21596 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
21597 int64_t &Offset) -> bool {
21598 // The memory operands must not be volatile/indexed/atomic.
21599 // TODO: May be able to relax for unordered atomics (see D66309)
21600 if (!Other->isSimple() || Other->isIndexed())
21601 return false;
21602 // Don't mix temporal stores with non-temporal stores.
21603 if (St->isNonTemporal() != Other->isNonTemporal())
21604 return false;
21606 return false;
21607 SDValue OtherBC = peekThroughBitcasts(Other->getValue());
21608 // Allow merging constants of different types as integers.
21609 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
21610 : Other->getMemoryVT() != MemVT;
21611 switch (StoreSrc) {
21612 case StoreSource::Load: {
21613 if (NoTypeMatch)
21614 return false;
21615 // The Load's Base Ptr must also match.
21616 auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
21617 if (!OtherLd)
21618 return false;
21619 BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
21620 if (LoadVT != OtherLd->getMemoryVT())
21621 return false;
21622 // Loads must only have one use.
21623 if (!OtherLd->hasNUsesOfValue(1, 0))
21624 return false;
21625 // The memory operands must not be volatile/indexed/atomic.
21626 // TODO: May be able to relax for unordered atomics (see D66309)
21627 if (!OtherLd->isSimple() || OtherLd->isIndexed())
21628 return false;
21629 // Don't mix temporal loads with non-temporal loads.
21630 if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
21631 return false;
21633 *OtherLd))
21634 return false;
21635 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
21636 return false;
21637 break;
21638 }
21639 case StoreSource::Constant:
21640 if (NoTypeMatch)
21641 return false;
21642 if (getStoreSource(OtherBC) != StoreSource::Constant)
21643 return false;
21644 break;
21645 case StoreSource::Extract:
21646 // Do not merge truncated stores here.
21647 if (Other->isTruncatingStore())
21648 return false;
21649 if (!MemVT.bitsEq(OtherBC.getValueType()))
21650 return false;
21651 if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
21652 OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
21653 return false;
21654 break;
21655 default:
21656 llvm_unreachable("Unhandled store source for merging");
21657 }
21659 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
21660 };
21661
21662 // We are looking for a root node which is an ancestor to all mergable
21663 // stores. We search up through a load, to our root and then down
21664 // through all children. For instance we will find Store{1,2,3} if
21665 // St is Store1, Store2. or Store3 where the root is not a load
21666 // which always true for nonvolatile ops. TODO: Expand
21667 // the search to find all valid candidates through multiple layers of loads.
21668 //
21669 // Root
21670 // |-------|-------|
21671 // Load Load Store3
21672 // | |
21673 // Store1 Store2
21674 //
21675 // FIXME: We should be able to climb and
21676 // descend TokenFactors to find candidates as well.
21677
21678 SDNode *RootNode = St->getChain().getNode();
21679 // Bail out if we already analyzed this root node and found nothing.
21680 if (ChainsWithoutMergeableStores.contains(RootNode))
21681 return nullptr;
21682
21683 // Check if the pair of StoreNode and the RootNode already bail out many
21684 // times which is over the limit in dependence check.
21685 auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
21686 SDNode *RootNode) -> bool {
21687 auto RootCount = StoreRootCountMap.find(StoreNode);
21688 return RootCount != StoreRootCountMap.end() &&
21689 RootCount->second.first == RootNode &&
21690 RootCount->second.second > StoreMergeDependenceLimit;
21691 };
21692
21693 auto TryToAddCandidate = [&](SDUse &Use) {
21694 // This must be a chain use.
21695 if (Use.getOperandNo() != 0)
21696 return;
21697 if (auto *OtherStore = dyn_cast<StoreSDNode>(Use.getUser())) {
21698 BaseIndexOffset Ptr;
21699 int64_t PtrDiff;
21700 if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
21701 !OverLimitInDependenceCheck(OtherStore, RootNode))
21702 StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
21703 }
21704 };
21705
21706 unsigned NumNodesExplored = 0;
21707 const unsigned MaxSearchNodes = 1024;
21708 if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
21709 RootNode = Ldn->getChain().getNode();
21710 // Bail out if we already analyzed this root node and found nothing.
21711 if (ChainsWithoutMergeableStores.contains(RootNode))
21712 return nullptr;
21713 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
21714 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
21715 SDNode *User = I->getUser();
21716 if (I->getOperandNo() == 0 && isa<LoadSDNode>(User)) { // walk down chain
21717 for (SDUse &U2 : User->uses())
21718 TryToAddCandidate(U2);
21719 }
21720 // Check stores that depend on the root (e.g. Store 3 in the chart above).
21721 if (I->getOperandNo() == 0 && isa<StoreSDNode>(User)) {
21722 TryToAddCandidate(*I);
21723 }
21724 }
21725 } else {
21726 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
21727 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
21728 TryToAddCandidate(*I);
21729 }
21730
21731 return RootNode;
21732}
21733
21734// We need to check that merging these stores does not cause a loop in the
21735// DAG. Any store candidate may depend on another candidate indirectly through
21736// its operands. Check in parallel by searching up from operands of candidates.
21737bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
21738 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
21739 SDNode *RootNode) {
21740 // FIXME: We should be able to truncate a full search of
21741 // predecessors by doing a BFS and keeping tabs the originating
21742 // stores from which worklist nodes come from in a similar way to
21743 // TokenFactor simplfication.
21744
21745 SmallPtrSet<const SDNode *, 32> Visited;
21747
21748 // RootNode is a predecessor to all candidates so we need not search
21749 // past it. Add RootNode (peeking through TokenFactors). Do not count
21750 // these towards size check.
21751
21752 Worklist.push_back(RootNode);
21753 while (!Worklist.empty()) {
21754 auto N = Worklist.pop_back_val();
21755 if (!Visited.insert(N).second)
21756 continue; // Already present in Visited.
21757 if (N->getOpcode() == ISD::TokenFactor) {
21758 for (SDValue Op : N->ops())
21759 Worklist.push_back(Op.getNode());
21760 }
21761 }
21762
21763 // Don't count pruning nodes towards max.
21764 unsigned int Max = 1024 + Visited.size();
21765 // Search Ops of store candidates.
21766 for (unsigned i = 0; i < NumStores; ++i) {
21767 SDNode *N = StoreNodes[i].MemNode;
21768 // Of the 4 Store Operands:
21769 // * Chain (Op 0) -> We have already considered these
21770 // in candidate selection, but only by following the
21771 // chain dependencies. We could still have a chain
21772 // dependency to a load, that has a non-chain dep to
21773 // another load, that depends on a store, etc. So it is
21774 // possible to have dependencies that consist of a mix
21775 // of chain and non-chain deps, and we need to include
21776 // chain operands in the analysis here..
21777 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
21778 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
21779 // but aren't necessarily fromt the same base node, so
21780 // cycles possible (e.g. via indexed store).
21781 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
21782 // non-indexed stores). Not constant on all targets (e.g. ARM)
21783 // and so can participate in a cycle.
21784 for (const SDValue &Op : N->op_values())
21785 Worklist.push_back(Op.getNode());
21786 }
21787 // Search through DAG. We can stop early if we find a store node.
21788 for (unsigned i = 0; i < NumStores; ++i)
21789 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
21790 Max)) {
21791 // If the searching bail out, record the StoreNode and RootNode in the
21792 // StoreRootCountMap. If we have seen the pair many times over a limit,
21793 // we won't add the StoreNode into StoreNodes set again.
21794 if (Visited.size() >= Max) {
21795 auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
21796 if (RootCount.first == RootNode)
21797 RootCount.second++;
21798 else
21799 RootCount = {RootNode, 1};
21800 }
21801 return false;
21802 }
21803 return true;
21804}
21805
21806bool DAGCombiner::hasCallInLdStChain(StoreSDNode *St, LoadSDNode *Ld) {
21807 SmallPtrSet<const SDNode *, 32> Visited;
21809 Worklist.emplace_back(St->getChain().getNode(), false);
21810
21811 while (!Worklist.empty()) {
21812 auto [Node, FoundCall] = Worklist.pop_back_val();
21813 if (!Visited.insert(Node).second || Node->getNumOperands() == 0)
21814 continue;
21815
21816 switch (Node->getOpcode()) {
21817 case ISD::CALLSEQ_END:
21818 Worklist.emplace_back(Node->getOperand(0).getNode(), true);
21819 break;
21820 case ISD::TokenFactor:
21821 for (SDValue Op : Node->ops())
21822 Worklist.emplace_back(Op.getNode(), FoundCall);
21823 break;
21824 case ISD::LOAD:
21825 if (Node == Ld)
21826 return FoundCall;
21827 [[fallthrough]];
21828 default:
21829 assert(Node->getOperand(0).getValueType() == MVT::Other &&
21830 "Invalid chain type");
21831 Worklist.emplace_back(Node->getOperand(0).getNode(), FoundCall);
21832 break;
21833 }
21834 }
21835 return false;
21836}
21837
21838unsigned
21839DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
21840 int64_t ElementSizeBytes) const {
21841 while (true) {
21842 // Find a store past the width of the first store.
21843 size_t StartIdx = 0;
21844 while ((StartIdx + 1 < StoreNodes.size()) &&
21845 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
21846 StoreNodes[StartIdx + 1].OffsetFromBase)
21847 ++StartIdx;
21848
21849 // Bail if we don't have enough candidates to merge.
21850 if (StartIdx + 1 >= StoreNodes.size())
21851 return 0;
21852
21853 // Trim stores that overlapped with the first store.
21854 if (StartIdx)
21855 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
21856
21857 // Scan the memory operations on the chain and find the first
21858 // non-consecutive store memory address.
21859 unsigned NumConsecutiveStores = 1;
21860 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
21861 // Check that the addresses are consecutive starting from the second
21862 // element in the list of stores.
21863 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
21864 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
21865 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
21866 break;
21867 NumConsecutiveStores = i + 1;
21868 }
21869 if (NumConsecutiveStores > 1)
21870 return NumConsecutiveStores;
21871
21872 // There are no consecutive stores at the start of the list.
21873 // Remove the first store and try again.
21874 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
21875 }
21876}
21877
21878bool DAGCombiner::tryStoreMergeOfConstants(
21879 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
21880 EVT MemVT, SDNode *RootNode, bool AllowVectors) {
21881 LLVMContext &Context = *DAG.getContext();
21882 const DataLayout &DL = DAG.getDataLayout();
21883 int64_t ElementSizeBytes = MemVT.getStoreSize();
21884 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
21885 bool MadeChange = false;
21886
21887 // Store the constants into memory as one consecutive store.
21888 while (NumConsecutiveStores >= 2) {
21889 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
21890 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
21891 Align FirstStoreAlign = FirstInChain->getAlign();
21892 unsigned LastLegalType = 1;
21893 unsigned LastLegalVectorType = 1;
21894 bool LastIntegerTrunc = false;
21895 bool NonZero = false;
21896 unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
21897 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
21898 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
21899 SDValue StoredVal = ST->getValue();
21900 bool IsElementZero = false;
21901 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
21902 IsElementZero = C->isZero();
21903 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
21904 IsElementZero = C->getConstantFPValue()->isNullValue();
21905 else if (ISD::isBuildVectorAllZeros(StoredVal.getNode()))
21906 IsElementZero = true;
21907 if (IsElementZero) {
21908 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
21909 FirstZeroAfterNonZero = i;
21910 }
21911 NonZero |= !IsElementZero;
21912
21913 // Find a legal type for the constant store.
21914 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
21915 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
21916 unsigned IsFast = 0;
21917
21918 // Break early when size is too large to be legal.
21919 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
21920 break;
21921
21922 if (TLI.isTypeLegal(StoreTy) &&
21923 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
21924 DAG.getMachineFunction()) &&
21925 TLI.allowsMemoryAccess(Context, DL, StoreTy,
21926 *FirstInChain->getMemOperand(), &IsFast) &&
21927 IsFast) {
21928 LastIntegerTrunc = false;
21929 LastLegalType = i + 1;
21930 // Or check whether a truncstore is legal.
21931 } else if (TLI.getTypeAction(Context, StoreTy) ==
21933 EVT LegalizedStoredValTy =
21934 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
21935 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
21936 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
21937 DAG.getMachineFunction()) &&
21938 TLI.allowsMemoryAccess(Context, DL, StoreTy,
21939 *FirstInChain->getMemOperand(), &IsFast) &&
21940 IsFast) {
21941 LastIntegerTrunc = true;
21942 LastLegalType = i + 1;
21943 }
21944 }
21945
21946 // We only use vectors if the target allows it and the function is not
21947 // marked with the noimplicitfloat attribute.
21948 if (TLI.storeOfVectorConstantIsCheap(!NonZero, MemVT, i + 1, FirstStoreAS) &&
21949 AllowVectors) {
21950 // Find a legal type for the vector store.
21951 unsigned Elts = (i + 1) * NumMemElts;
21952 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
21953 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
21954 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
21955 TLI.allowsMemoryAccess(Context, DL, Ty,
21956 *FirstInChain->getMemOperand(), &IsFast) &&
21957 IsFast)
21958 LastLegalVectorType = i + 1;
21959 }
21960 }
21961
21962 bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
21963 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
21964 bool UseTrunc = LastIntegerTrunc && !UseVector;
21965
21966 // Check if we found a legal integer type that creates a meaningful
21967 // merge.
21968 if (NumElem < 2) {
21969 // We know that candidate stores are in order and of correct
21970 // shape. While there is no mergeable sequence from the
21971 // beginning one may start later in the sequence. The only
21972 // reason a merge of size N could have failed where another of
21973 // the same size would not have, is if the alignment has
21974 // improved or we've dropped a non-zero value. Drop as many
21975 // candidates as we can here.
21976 unsigned NumSkip = 1;
21977 while ((NumSkip < NumConsecutiveStores) &&
21978 (NumSkip < FirstZeroAfterNonZero) &&
21979 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
21980 NumSkip++;
21981
21982 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
21983 NumConsecutiveStores -= NumSkip;
21984 continue;
21985 }
21986
21987 // Check that we can merge these candidates without causing a cycle.
21988 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
21989 RootNode)) {
21990 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
21991 NumConsecutiveStores -= NumElem;
21992 continue;
21993 }
21994
21995 MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
21996 /*IsConstantSrc*/ true,
21997 UseVector, UseTrunc);
21998
21999 // Remove merged stores for next iteration.
22000 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
22001 NumConsecutiveStores -= NumElem;
22002 }
22003 return MadeChange;
22004}
22005
22006bool DAGCombiner::tryStoreMergeOfExtracts(
22007 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
22008 EVT MemVT, SDNode *RootNode) {
22009 LLVMContext &Context = *DAG.getContext();
22010 const DataLayout &DL = DAG.getDataLayout();
22011 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
22012 bool MadeChange = false;
22013
22014 // Loop on Consecutive Stores on success.
22015 while (NumConsecutiveStores >= 2) {
22016 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
22017 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
22018 Align FirstStoreAlign = FirstInChain->getAlign();
22019 unsigned NumStoresToMerge = 1;
22020 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
22021 // Find a legal type for the vector store.
22022 unsigned Elts = (i + 1) * NumMemElts;
22023 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
22024 unsigned IsFast = 0;
22025
22026 // Break early when size is too large to be legal.
22027 if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
22028 break;
22029
22030 if (TLI.isTypeLegal(Ty) &&
22031 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
22032 TLI.allowsMemoryAccess(Context, DL, Ty,
22033 *FirstInChain->getMemOperand(), &IsFast) &&
22034 IsFast)
22035 NumStoresToMerge = i + 1;
22036 }
22037
22038 // Check if we found a legal integer type creating a meaningful
22039 // merge.
22040 if (NumStoresToMerge < 2) {
22041 // We know that candidate stores are in order and of correct
22042 // shape. While there is no mergeable sequence from the
22043 // beginning one may start later in the sequence. The only
22044 // reason a merge of size N could have failed where another of
22045 // the same size would not have, is if the alignment has
22046 // improved. Drop as many candidates as we can here.
22047 unsigned NumSkip = 1;
22048 while ((NumSkip < NumConsecutiveStores) &&
22049 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
22050 NumSkip++;
22051
22052 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
22053 NumConsecutiveStores -= NumSkip;
22054 continue;
22055 }
22056
22057 // Check that we can merge these candidates without causing a cycle.
22058 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
22059 RootNode)) {
22060 StoreNodes.erase(StoreNodes.begin(),
22061 StoreNodes.begin() + NumStoresToMerge);
22062 NumConsecutiveStores -= NumStoresToMerge;
22063 continue;
22064 }
22065
22066 MadeChange |= mergeStoresOfConstantsOrVecElts(
22067 StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
22068 /*UseVector*/ true, /*UseTrunc*/ false);
22069
22070 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
22071 NumConsecutiveStores -= NumStoresToMerge;
22072 }
22073 return MadeChange;
22074}
22075
22076bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
22077 unsigned NumConsecutiveStores, EVT MemVT,
22078 SDNode *RootNode, bool AllowVectors,
22079 bool IsNonTemporalStore,
22080 bool IsNonTemporalLoad) {
22081 LLVMContext &Context = *DAG.getContext();
22082 const DataLayout &DL = DAG.getDataLayout();
22083 int64_t ElementSizeBytes = MemVT.getStoreSize();
22084 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
22085 bool MadeChange = false;
22086
22087 // Look for load nodes which are used by the stored values.
22088 SmallVector<MemOpLink, 8> LoadNodes;
22089
22090 // Find acceptable loads. Loads need to have the same chain (token factor),
22091 // must not be zext, volatile, indexed, and they must be consecutive.
22092 BaseIndexOffset LdBasePtr;
22093
22094 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
22095 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
22097 LoadSDNode *Ld = cast<LoadSDNode>(Val);
22098
22099 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
22100 // If this is not the first ptr that we check.
22101 int64_t LdOffset = 0;
22102 if (LdBasePtr.getBase().getNode()) {
22103 // The base ptr must be the same.
22104 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
22105 break;
22106 } else {
22107 // Check that all other base pointers are the same as this one.
22108 LdBasePtr = LdPtr;
22109 }
22110
22111 // We found a potential memory operand to merge.
22112 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
22113 }
22114
22115 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
22116 Align RequiredAlignment;
22117 bool NeedRotate = false;
22118 if (LoadNodes.size() == 2) {
22119 // If we have load/store pair instructions and we only have two values,
22120 // don't bother merging.
22121 if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
22122 StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
22123 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
22124 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
22125 break;
22126 }
22127 // If the loads are reversed, see if we can rotate the halves into place.
22128 int64_t Offset0 = LoadNodes[0].OffsetFromBase;
22129 int64_t Offset1 = LoadNodes[1].OffsetFromBase;
22130 EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
22131 if (Offset0 - Offset1 == ElementSizeBytes &&
22132 (hasOperation(ISD::ROTL, PairVT) ||
22133 hasOperation(ISD::ROTR, PairVT))) {
22134 std::swap(LoadNodes[0], LoadNodes[1]);
22135 NeedRotate = true;
22136 }
22137 }
22138 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
22139 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
22140 Align FirstStoreAlign = FirstInChain->getAlign();
22141 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
22142
22143 // Scan the memory operations on the chain and find the first
22144 // non-consecutive load memory address. These variables hold the index in
22145 // the store node array.
22146
22147 unsigned LastConsecutiveLoad = 1;
22148
22149 // This variable refers to the size and not index in the array.
22150 unsigned LastLegalVectorType = 1;
22151 unsigned LastLegalIntegerType = 1;
22152 bool isDereferenceable = true;
22153 bool DoIntegerTruncate = false;
22154 int64_t StartAddress = LoadNodes[0].OffsetFromBase;
22155 SDValue LoadChain = FirstLoad->getChain();
22156 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
22157 // All loads must share the same chain.
22158 if (LoadNodes[i].MemNode->getChain() != LoadChain)
22159 break;
22160
22161 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
22162 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
22163 break;
22164 LastConsecutiveLoad = i;
22165
22166 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
22167 isDereferenceable = false;
22168
22169 // Find a legal type for the vector store.
22170 unsigned Elts = (i + 1) * NumMemElts;
22171 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
22172
22173 // Break early when size is too large to be legal.
22174 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
22175 break;
22176
22177 unsigned IsFastSt = 0;
22178 unsigned IsFastLd = 0;
22179 // Don't try vector types if we need a rotate. We may still fail the
22180 // legality checks for the integer type, but we can't handle the rotate
22181 // case with vectors.
22182 // FIXME: We could use a shuffle in place of the rotate.
22183 if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
22184 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
22185 DAG.getMachineFunction()) &&
22186 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22187 *FirstInChain->getMemOperand(), &IsFastSt) &&
22188 IsFastSt &&
22189 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22190 *FirstLoad->getMemOperand(), &IsFastLd) &&
22191 IsFastLd) {
22192 LastLegalVectorType = i + 1;
22193 }
22194
22195 // Find a legal type for the integer store.
22196 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
22197 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
22198 if (TLI.isTypeLegal(StoreTy) &&
22199 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
22200 DAG.getMachineFunction()) &&
22201 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22202 *FirstInChain->getMemOperand(), &IsFastSt) &&
22203 IsFastSt &&
22204 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22205 *FirstLoad->getMemOperand(), &IsFastLd) &&
22206 IsFastLd) {
22207 LastLegalIntegerType = i + 1;
22208 DoIntegerTruncate = false;
22209 // Or check whether a truncstore and extload is legal.
22210 } else if (TLI.getTypeAction(Context, StoreTy) ==
22212 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
22213 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
22214 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
22215 DAG.getMachineFunction()) &&
22216 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
22217 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
22218 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
22219 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22220 *FirstInChain->getMemOperand(), &IsFastSt) &&
22221 IsFastSt &&
22222 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22223 *FirstLoad->getMemOperand(), &IsFastLd) &&
22224 IsFastLd) {
22225 LastLegalIntegerType = i + 1;
22226 DoIntegerTruncate = true;
22227 }
22228 }
22229 }
22230
22231 // Only use vector types if the vector type is larger than the integer
22232 // type. If they are the same, use integers.
22233 bool UseVectorTy =
22234 LastLegalVectorType > LastLegalIntegerType && AllowVectors;
22235 unsigned LastLegalType =
22236 std::max(LastLegalVectorType, LastLegalIntegerType);
22237
22238 // We add +1 here because the LastXXX variables refer to location while
22239 // the NumElem refers to array/index size.
22240 unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
22241 NumElem = std::min(LastLegalType, NumElem);
22242 Align FirstLoadAlign = FirstLoad->getAlign();
22243
22244 if (NumElem < 2) {
22245 // We know that candidate stores are in order and of correct
22246 // shape. While there is no mergeable sequence from the
22247 // beginning one may start later in the sequence. The only
22248 // reason a merge of size N could have failed where another of
22249 // the same size would not have is if the alignment or either
22250 // the load or store has improved. Drop as many candidates as we
22251 // can here.
22252 unsigned NumSkip = 1;
22253 while ((NumSkip < LoadNodes.size()) &&
22254 (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
22255 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
22256 NumSkip++;
22257 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
22258 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
22259 NumConsecutiveStores -= NumSkip;
22260 continue;
22261 }
22262
22263 // Check that we can merge these candidates without causing a cycle.
22264 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
22265 RootNode)) {
22266 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
22267 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
22268 NumConsecutiveStores -= NumElem;
22269 continue;
22270 }
22271
22272 // Find if it is better to use vectors or integers to load and store
22273 // to memory.
22274 EVT JointMemOpVT;
22275 if (UseVectorTy) {
22276 // Find a legal type for the vector store.
22277 unsigned Elts = NumElem * NumMemElts;
22278 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
22279 } else {
22280 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
22281 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
22282 }
22283
22284 // Check if there is a call in the load/store chain.
22285 if (!TLI.shouldMergeStoreOfLoadsOverCall(MemVT, JointMemOpVT) &&
22286 hasCallInLdStChain(cast<StoreSDNode>(StoreNodes[0].MemNode),
22287 cast<LoadSDNode>(LoadNodes[0].MemNode))) {
22288 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
22289 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
22290 NumConsecutiveStores -= NumElem;
22291 continue;
22292 }
22293
22294 SDLoc LoadDL(LoadNodes[0].MemNode);
22295 SDLoc StoreDL(StoreNodes[0].MemNode);
22296
22297 // The merged loads are required to have the same incoming chain, so
22298 // using the first's chain is acceptable.
22299
22300 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
22301 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
22302 AddToWorklist(NewStoreChain.getNode());
22303
22304 MachineMemOperand::Flags LdMMOFlags =
22305 isDereferenceable ? MachineMemOperand::MODereferenceable
22307 if (IsNonTemporalLoad)
22309
22310 LdMMOFlags |= TLI.getTargetMMOFlags(*FirstLoad);
22311
22312 MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
22315
22316 StMMOFlags |= TLI.getTargetMMOFlags(*StoreNodes[0].MemNode);
22317
22318 SDValue NewLoad, NewStore;
22319 if (UseVectorTy || !DoIntegerTruncate) {
22320 NewLoad = DAG.getLoad(
22321 JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
22322 FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
22323 SDValue StoreOp = NewLoad;
22324 if (NeedRotate) {
22325 unsigned LoadWidth = ElementSizeBytes * 8 * 2;
22326 assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
22327 "Unexpected type for rotate-able load pair");
22328 SDValue RotAmt =
22329 DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
22330 // Target can convert to the identical ROTR if it does not have ROTL.
22331 StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
22332 }
22333 NewStore = DAG.getStore(
22334 NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
22335 CanReusePtrInfo ? FirstInChain->getPointerInfo()
22336 : MachinePointerInfo(FirstStoreAS),
22337 FirstStoreAlign, StMMOFlags);
22338 } else { // This must be the truncstore/extload case
22339 EVT ExtendedTy =
22340 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
22341 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
22342 FirstLoad->getChain(), FirstLoad->getBasePtr(),
22343 FirstLoad->getPointerInfo(), JointMemOpVT,
22344 FirstLoadAlign, LdMMOFlags);
22345 NewStore = DAG.getTruncStore(
22346 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
22347 CanReusePtrInfo ? FirstInChain->getPointerInfo()
22348 : MachinePointerInfo(FirstStoreAS),
22349 JointMemOpVT, FirstInChain->getAlign(),
22350 FirstInChain->getMemOperand()->getFlags());
22351 }
22352
22353 // Transfer chain users from old loads to the new load.
22354 for (unsigned i = 0; i < NumElem; ++i) {
22355 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
22357 SDValue(NewLoad.getNode(), 1));
22358 }
22359
22360 // Replace all stores with the new store. Recursively remove corresponding
22361 // values if they are no longer used.
22362 for (unsigned i = 0; i < NumElem; ++i) {
22363 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
22364 CombineTo(StoreNodes[i].MemNode, NewStore);
22365 if (Val->use_empty())
22366 recursivelyDeleteUnusedNodes(Val.getNode());
22367 }
22368
22369 MadeChange = true;
22370 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
22371 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
22372 NumConsecutiveStores -= NumElem;
22373 }
22374 return MadeChange;
22375}
22376
22377bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
22378 if (OptLevel == CodeGenOptLevel::None || !EnableStoreMerging)
22379 return false;
22380
22381 // TODO: Extend this function to merge stores of scalable vectors.
22382 // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
22383 // store since we know <vscale x 16 x i8> is exactly twice as large as
22384 // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
22385 EVT MemVT = St->getMemoryVT();
22386 if (MemVT.isScalableVT())
22387 return false;
22388 if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
22389 return false;
22390
22391 // This function cannot currently deal with non-byte-sized memory sizes.
22392 int64_t ElementSizeBytes = MemVT.getStoreSize();
22393 if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
22394 return false;
22395
22396 // Do not bother looking at stored values that are not constants, loads, or
22397 // extracted vector elements.
22398 SDValue StoredVal = peekThroughBitcasts(St->getValue());
22399 const StoreSource StoreSrc = getStoreSource(StoredVal);
22400 if (StoreSrc == StoreSource::Unknown)
22401 return false;
22402
22403 SmallVector<MemOpLink, 8> StoreNodes;
22404 // Find potential store merge candidates by searching through chain sub-DAG
22405 SDNode *RootNode = getStoreMergeCandidates(St, StoreNodes);
22406
22407 // Check if there is anything to merge.
22408 if (StoreNodes.size() < 2)
22409 return false;
22410
22411 // Sort the memory operands according to their distance from the
22412 // base pointer.
22413 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
22414 return LHS.OffsetFromBase < RHS.OffsetFromBase;
22415 });
22416
22417 bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
22418 Attribute::NoImplicitFloat);
22419 bool IsNonTemporalStore = St->isNonTemporal();
22420 bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
22421 cast<LoadSDNode>(StoredVal)->isNonTemporal();
22422
22423 // Store Merge attempts to merge the lowest stores. This generally
22424 // works out as if successful, as the remaining stores are checked
22425 // after the first collection of stores is merged. However, in the
22426 // case that a non-mergeable store is found first, e.g., {p[-2],
22427 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
22428 // mergeable cases. To prevent this, we prune such stores from the
22429 // front of StoreNodes here.
22430 bool MadeChange = false;
22431 while (StoreNodes.size() > 1) {
22432 unsigned NumConsecutiveStores =
22433 getConsecutiveStores(StoreNodes, ElementSizeBytes);
22434 // There are no more stores in the list to examine.
22435 if (NumConsecutiveStores == 0)
22436 return MadeChange;
22437
22438 // We have at least 2 consecutive stores. Try to merge them.
22439 assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
22440 switch (StoreSrc) {
22441 case StoreSource::Constant:
22442 MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
22443 MemVT, RootNode, AllowVectors);
22444 break;
22445
22446 case StoreSource::Extract:
22447 MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
22448 MemVT, RootNode);
22449 break;
22450
22451 case StoreSource::Load:
22452 MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
22453 MemVT, RootNode, AllowVectors,
22454 IsNonTemporalStore, IsNonTemporalLoad);
22455 break;
22456
22457 default:
22458 llvm_unreachable("Unhandled store source type");
22459 }
22460 }
22461
22462 // Remember if we failed to optimize, to save compile time.
22463 if (!MadeChange)
22464 ChainsWithoutMergeableStores.insert(RootNode);
22465
22466 return MadeChange;
22467}
22468
22469SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
22470 SDLoc SL(ST);
22471 SDValue ReplStore;
22472
22473 // Replace the chain to avoid dependency.
22474 if (ST->isTruncatingStore()) {
22475 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
22476 ST->getBasePtr(), ST->getMemoryVT(),
22477 ST->getMemOperand());
22478 } else {
22479 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
22480 ST->getMemOperand());
22481 }
22482
22483 // Create token to keep both nodes around.
22484 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
22485 MVT::Other, ST->getChain(), ReplStore);
22486
22487 // Make sure the new and old chains are cleaned up.
22488 AddToWorklist(Token.getNode());
22489
22490 // Don't add users to work list.
22491 return CombineTo(ST, Token, false);
22492}
22493
22494SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
22495 SDValue Value = ST->getValue();
22496 if (Value.getOpcode() == ISD::TargetConstantFP)
22497 return SDValue();
22498
22499 if (!ISD::isNormalStore(ST))
22500 return SDValue();
22501
22502 SDLoc DL(ST);
22503
22504 SDValue Chain = ST->getChain();
22505 SDValue Ptr = ST->getBasePtr();
22506
22507 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
22508
22509 // NOTE: If the original store is volatile, this transform must not increase
22510 // the number of stores. For example, on x86-32 an f64 can be stored in one
22511 // processor operation but an i64 (which is not legal) requires two. So the
22512 // transform should not be done in this case.
22513
22514 SDValue Tmp;
22515 switch (CFP->getSimpleValueType(0).SimpleTy) {
22516 default:
22517 llvm_unreachable("Unknown FP type");
22518 case MVT::f16: // We don't do this for these yet.
22519 case MVT::bf16:
22520 case MVT::f80:
22521 case MVT::f128:
22522 case MVT::ppcf128:
22523 return SDValue();
22524 case MVT::f32:
22525 if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
22526 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
22527 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
22528 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
22529 MVT::i32);
22530 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
22531 }
22532
22533 return SDValue();
22534 case MVT::f64:
22535 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
22536 ST->isSimple()) ||
22537 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
22538 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
22539 getZExtValue(), SDLoc(CFP), MVT::i64);
22540 return DAG.getStore(Chain, DL, Tmp,
22541 Ptr, ST->getMemOperand());
22542 }
22543
22544 if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32) &&
22545 !TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) {
22546 // Many FP stores are not made apparent until after legalize, e.g. for
22547 // argument passing. Since this is so common, custom legalize the
22548 // 64-bit integer store into two 32-bit stores.
22549 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
22550 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
22551 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
22552 if (DAG.getDataLayout().isBigEndian())
22553 std::swap(Lo, Hi);
22554
22555 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
22556 AAMDNodes AAInfo = ST->getAAInfo();
22557
22558 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
22559 ST->getBaseAlign(), MMOFlags, AAInfo);
22561 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
22562 ST->getPointerInfo().getWithOffset(4),
22563 ST->getBaseAlign(), MMOFlags, AAInfo);
22564 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
22565 St0, St1);
22566 }
22567
22568 return SDValue();
22569 }
22570}
22571
22572// (store (insert_vector_elt (load p), x, i), p) -> (store x, p+offset)
22573//
22574// If a store of a load with an element inserted into it has no other
22575// uses in between the chain, then we can consider the vector store
22576// dead and replace it with just the single scalar element store.
22577SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
22578 SDLoc DL(ST);
22579 SDValue Value = ST->getValue();
22580 SDValue Ptr = ST->getBasePtr();
22581 SDValue Chain = ST->getChain();
22582 if (Value.getOpcode() != ISD::INSERT_VECTOR_ELT || !Value.hasOneUse())
22583 return SDValue();
22584
22585 SDValue Elt = Value.getOperand(1);
22586 SDValue Idx = Value.getOperand(2);
22587
22588 // If the element isn't byte sized or is implicitly truncated then we can't
22589 // compute an offset.
22590 EVT EltVT = Elt.getValueType();
22591 if (!EltVT.isByteSized() ||
22592 EltVT != Value.getOperand(0).getValueType().getVectorElementType())
22593 return SDValue();
22594
22595 auto *Ld = dyn_cast<LoadSDNode>(Value.getOperand(0));
22596 if (!Ld || Ld->getBasePtr() != Ptr ||
22597 ST->getMemoryVT() != Ld->getMemoryVT() || !ST->isSimple() ||
22598 !ISD::isNormalStore(ST) ||
22599 Ld->getAddressSpace() != ST->getAddressSpace() ||
22601 return SDValue();
22602
22603 unsigned IsFast;
22604 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
22605 Elt.getValueType(), ST->getAddressSpace(),
22606 ST->getAlign(), ST->getMemOperand()->getFlags(),
22607 &IsFast) ||
22608 !IsFast)
22609 return SDValue();
22610
22611 MachinePointerInfo PointerInfo(ST->getAddressSpace());
22612
22613 // If the offset is a known constant then try to recover the pointer
22614 // info
22615 SDValue NewPtr;
22616 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
22617 unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8;
22618 NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(COffset), DL);
22619 PointerInfo = ST->getPointerInfo().getWithOffset(COffset);
22620 } else {
22621 NewPtr = TLI.getVectorElementPointer(DAG, Ptr, Value.getValueType(), Idx);
22622 }
22623
22624 return DAG.getStore(Chain, DL, Elt, NewPtr, PointerInfo, ST->getAlign(),
22625 ST->getMemOperand()->getFlags());
22626}
22627
22628SDValue DAGCombiner::visitATOMIC_STORE(SDNode *N) {
22629 AtomicSDNode *ST = cast<AtomicSDNode>(N);
22630 SDValue Val = ST->getVal();
22631 EVT VT = Val.getValueType();
22632 EVT MemVT = ST->getMemoryVT();
22633
22634 if (MemVT.bitsLT(VT)) { // Is truncating store
22635 APInt TruncDemandedBits = APInt::getLowBitsSet(VT.getScalarSizeInBits(),
22636 MemVT.getScalarSizeInBits());
22637 // See if we can simplify the operation with SimplifyDemandedBits, which
22638 // only works if the value has a single use.
22639 if (SimplifyDemandedBits(Val, TruncDemandedBits))
22640 return SDValue(N, 0);
22641 }
22642
22643 return SDValue();
22644}
22645
22647 const SDLoc &Dl) {
22648 if (!Store->isSimple() || !ISD::isNormalStore(Store))
22649 return SDValue();
22650
22651 SDValue StoredVal = Store->getValue();
22652 SDValue StorePtr = Store->getBasePtr();
22653 SDValue StoreOffset = Store->getOffset();
22654 EVT VT = Store->getMemoryVT();
22655 unsigned AddrSpace = Store->getAddressSpace();
22656 Align Alignment = Store->getAlign();
22657 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22658
22659 if (!TLI.isOperationLegalOrCustom(ISD::MSTORE, VT) ||
22660 !TLI.allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment))
22661 return SDValue();
22662
22663 SDValue Mask, OtherVec, LoadCh;
22664 unsigned LoadPos;
22665 if (sd_match(StoredVal,
22666 m_VSelect(m_Value(Mask), m_Value(OtherVec),
22667 m_Load(m_Value(LoadCh), m_Specific(StorePtr),
22668 m_Specific(StoreOffset))))) {
22669 LoadPos = 2;
22670 } else if (sd_match(StoredVal,
22671 m_VSelect(m_Value(Mask),
22672 m_Load(m_Value(LoadCh), m_Specific(StorePtr),
22673 m_Specific(StoreOffset)),
22674 m_Value(OtherVec)))) {
22675 LoadPos = 1;
22676 } else {
22677 return SDValue();
22678 }
22679
22680 auto *Load = cast<LoadSDNode>(StoredVal.getOperand(LoadPos));
22681 if (!Load->isSimple() || !ISD::isNormalLoad(Load) ||
22682 Load->getAddressSpace() != AddrSpace)
22683 return SDValue();
22684
22685 if (!Store->getChain().reachesChainWithoutSideEffects(LoadCh))
22686 return SDValue();
22687
22688 if (LoadPos == 1)
22689 Mask = DAG.getNOT(Dl, Mask, Mask.getValueType());
22690
22691 return DAG.getMaskedStore(Store->getChain(), Dl, OtherVec, StorePtr,
22692 StoreOffset, Mask, VT, Store->getMemOperand(),
22693 Store->getAddressingMode());
22694}
22695
22696SDValue DAGCombiner::visitSTORE(SDNode *N) {
22697 StoreSDNode *ST = cast<StoreSDNode>(N);
22698 SDValue Chain = ST->getChain();
22699 SDValue Value = ST->getValue();
22700 SDValue Ptr = ST->getBasePtr();
22701
22702 // If this is a store of a bit convert, store the input value if the
22703 // resultant store does not need a higher alignment than the original.
22704 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
22705 ST->isUnindexed()) {
22706 EVT SVT = Value.getOperand(0).getValueType();
22707 // If the store is volatile, we only want to change the store type if the
22708 // resulting store is legal. Otherwise we might increase the number of
22709 // memory accesses. We don't care if the original type was legal or not
22710 // as we assume software couldn't rely on the number of accesses of an
22711 // illegal type.
22712 // TODO: May be able to relax for unordered atomics (see D66309)
22713 if (((!LegalOperations && ST->isSimple()) ||
22714 TLI.isOperationLegal(ISD::STORE, SVT)) &&
22715 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
22716 DAG, *ST->getMemOperand())) {
22717 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
22718 ST->getMemOperand());
22719 }
22720 }
22721
22722 // Turn 'store undef, Ptr' -> nothing.
22723 if (Value.isUndef() && ST->isUnindexed() && !ST->isVolatile())
22724 return Chain;
22725
22726 // Try to infer better alignment information than the store already has.
22727 if (OptLevel != CodeGenOptLevel::None && ST->isUnindexed() &&
22728 !ST->isAtomic()) {
22729 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
22730 if (*Alignment > ST->getAlign() &&
22731 isAligned(*Alignment, ST->getSrcValueOffset())) {
22732 SDValue NewStore =
22733 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
22734 ST->getMemoryVT(), *Alignment,
22735 ST->getMemOperand()->getFlags(), ST->getAAInfo());
22736 // NewStore will always be N as we are only refining the alignment
22737 assert(NewStore.getNode() == N);
22738 (void)NewStore;
22739 }
22740 }
22741 }
22742
22743 // Try transforming a pair floating point load / store ops to integer
22744 // load / store ops.
22745 if (SDValue NewST = TransformFPLoadStorePair(N))
22746 return NewST;
22747
22748 // Try transforming several stores into STORE (BSWAP).
22749 if (SDValue Store = mergeTruncStores(ST))
22750 return Store;
22751
22752 if (ST->isUnindexed()) {
22753 // Walk up chain skipping non-aliasing memory nodes, on this store and any
22754 // adjacent stores.
22755 if (findBetterNeighborChains(ST)) {
22756 // replaceStoreChain uses CombineTo, which handled all of the worklist
22757 // manipulation. Return the original node to not do anything else.
22758 return SDValue(ST, 0);
22759 }
22760 Chain = ST->getChain();
22761 }
22762
22763 // FIXME: is there such a thing as a truncating indexed store?
22764 if (ST->isTruncatingStore() && ST->isUnindexed() &&
22765 Value.getValueType().isInteger() &&
22767 !cast<ConstantSDNode>(Value)->isOpaque())) {
22768 // Convert a truncating store of a extension into a standard store.
22769 if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
22770 Value.getOpcode() == ISD::SIGN_EXTEND ||
22771 Value.getOpcode() == ISD::ANY_EXTEND) &&
22772 Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
22773 TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
22774 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
22775 ST->getMemOperand());
22776
22777 APInt TruncDemandedBits =
22778 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
22779 ST->getMemoryVT().getScalarSizeInBits());
22780
22781 // See if we can simplify the operation with SimplifyDemandedBits, which
22782 // only works if the value has a single use.
22783 AddToWorklist(Value.getNode());
22784 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
22785 // Re-visit the store if anything changed and the store hasn't been merged
22786 // with another node (N is deleted) SimplifyDemandedBits will add Value's
22787 // node back to the worklist if necessary, but we also need to re-visit
22788 // the Store node itself.
22789 if (N->getOpcode() != ISD::DELETED_NODE)
22790 AddToWorklist(N);
22791 return SDValue(N, 0);
22792 }
22793
22794 // Otherwise, see if we can simplify the input to this truncstore with
22795 // knowledge that only the low bits are being used. For example:
22796 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
22797 if (SDValue Shorter =
22798 TLI.SimplifyMultipleUseDemandedBits(Value, TruncDemandedBits, DAG))
22799 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
22800 ST->getMemOperand());
22801
22802 // If we're storing a truncated constant, see if we can simplify it.
22803 // TODO: Move this to targetShrinkDemandedConstant?
22804 if (auto *Cst = dyn_cast<ConstantSDNode>(Value))
22805 if (!Cst->isOpaque()) {
22806 const APInt &CValue = Cst->getAPIntValue();
22807 APInt NewVal = CValue & TruncDemandedBits;
22808 if (NewVal != CValue) {
22809 SDValue Shorter =
22810 DAG.getConstant(NewVal, SDLoc(N), Value.getValueType());
22811 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr,
22812 ST->getMemoryVT(), ST->getMemOperand());
22813 }
22814 }
22815 }
22816
22817 // If this is a load followed by a store to the same location, then the store
22818 // is dead/noop. Peek through any truncates if canCombineTruncStore failed.
22819 // TODO: Add big-endian truncate support with test coverage.
22820 // TODO: Can relax for unordered atomics (see D66309)
22821 SDValue TruncVal = DAG.getDataLayout().isLittleEndian()
22823 : Value;
22824 if (auto *Ld = dyn_cast<LoadSDNode>(TruncVal)) {
22825 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
22826 ST->isUnindexed() && ST->isSimple() &&
22827 Ld->getAddressSpace() == ST->getAddressSpace() &&
22828 // There can't be any side effects between the load and store, such as
22829 // a call or store.
22831 // The store is dead, remove it.
22832 return Chain;
22833 }
22834 }
22835
22836 // Try scalarizing vector stores of loads where we only change one element
22837 if (SDValue NewST = replaceStoreOfInsertLoad(ST))
22838 return NewST;
22839
22840 // TODO: Can relax for unordered atomics (see D66309)
22841 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
22842 if (ST->isUnindexed() && ST->isSimple() &&
22843 ST1->isUnindexed() && ST1->isSimple()) {
22844 if (OptLevel != CodeGenOptLevel::None && ST1->getBasePtr() == Ptr &&
22845 ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
22846 ST->getAddressSpace() == ST1->getAddressSpace()) {
22847 // If this is a store followed by a store with the same value to the
22848 // same location, then the store is dead/noop.
22849 return Chain;
22850 }
22851
22852 if (OptLevel != CodeGenOptLevel::None && ST1->hasOneUse() &&
22853 !ST1->getBasePtr().isUndef() &&
22854 ST->getAddressSpace() == ST1->getAddressSpace()) {
22855 // If we consider two stores and one smaller in size is a scalable
22856 // vector type and another one a bigger size store with a fixed type,
22857 // then we could not allow the scalable store removal because we don't
22858 // know its final size in the end.
22859 if (ST->getMemoryVT().isScalableVector() ||
22860 ST1->getMemoryVT().isScalableVector()) {
22861 if (ST1->getBasePtr() == Ptr &&
22862 TypeSize::isKnownLE(ST1->getMemoryVT().getStoreSize(),
22863 ST->getMemoryVT().getStoreSize())) {
22864 CombineTo(ST1, ST1->getChain());
22865 return SDValue(N, 0);
22866 }
22867 } else {
22868 const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
22869 const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
22870 // If this is a store who's preceding store to a subset of the current
22871 // location and no one other node is chained to that store we can
22872 // effectively drop the store. Do not remove stores to undef as they
22873 // may be used as data sinks.
22874 if (STBase.contains(DAG, ST->getMemoryVT().getFixedSizeInBits(),
22875 ChainBase,
22876 ST1->getMemoryVT().getFixedSizeInBits())) {
22877 CombineTo(ST1, ST1->getChain());
22878 return SDValue(N, 0);
22879 }
22880 }
22881 }
22882 }
22883 }
22884
22885 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
22886 // truncating store. We can do this even if this is already a truncstore.
22887 if ((Value.getOpcode() == ISD::FP_ROUND ||
22888 Value.getOpcode() == ISD::TRUNCATE) &&
22889 Value->hasOneUse() && ST->isUnindexed() &&
22890 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
22891 ST->getMemoryVT(), LegalOperations)) {
22892 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
22893 Ptr, ST->getMemoryVT(), ST->getMemOperand());
22894 }
22895
22896 // Always perform this optimization before types are legal. If the target
22897 // prefers, also try this after legalization to catch stores that were created
22898 // by intrinsics or other nodes.
22899 if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
22900 while (true) {
22901 // There can be multiple store sequences on the same chain.
22902 // Keep trying to merge store sequences until we are unable to do so
22903 // or until we merge the last store on the chain.
22904 bool Changed = mergeConsecutiveStores(ST);
22905 if (!Changed) break;
22906 // Return N as merge only uses CombineTo and no worklist clean
22907 // up is necessary.
22908 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
22909 return SDValue(N, 0);
22910 }
22911 }
22912
22913 // Try transforming N to an indexed store.
22914 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
22915 return SDValue(N, 0);
22916
22917 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
22918 //
22919 // Make sure to do this only after attempting to merge stores in order to
22920 // avoid changing the types of some subset of stores due to visit order,
22921 // preventing their merging.
22922 if (isa<ConstantFPSDNode>(ST->getValue())) {
22923 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
22924 return NewSt;
22925 }
22926
22927 if (SDValue NewSt = splitMergedValStore(ST))
22928 return NewSt;
22929
22930 if (SDValue MaskedStore = foldToMaskedStore(ST, DAG, SDLoc(N)))
22931 return MaskedStore;
22932
22933 return ReduceLoadOpStoreWidth(N);
22934}
22935
22936SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
22937 const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
22938 const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(), 0, false);
22939
22940 // We walk up the chains to find stores.
22941 SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
22942 while (!Chains.empty()) {
22943 SDValue Chain = Chains.pop_back_val();
22944 if (!Chain.hasOneUse())
22945 continue;
22946 switch (Chain.getOpcode()) {
22947 case ISD::TokenFactor:
22948 for (unsigned Nops = Chain.getNumOperands(); Nops;)
22949 Chains.push_back(Chain.getOperand(--Nops));
22950 break;
22951 case ISD::LIFETIME_START:
22952 case ISD::LIFETIME_END:
22953 // We can forward past any lifetime start/end that can be proven not to
22954 // alias the node.
22955 if (!mayAlias(Chain.getNode(), N))
22956 Chains.push_back(Chain.getOperand(0));
22957 break;
22958 case ISD::STORE: {
22959 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
22960 // TODO: Can relax for unordered atomics (see D66309)
22961 if (!ST->isSimple() || ST->isIndexed())
22962 continue;
22963 const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
22964 // The bounds of a scalable store are not known until runtime, so this
22965 // store cannot be elided.
22966 if (StoreSize.isScalable())
22967 continue;
22968 const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
22969 // If we store purely within object bounds just before its lifetime ends,
22970 // we can remove the store.
22971 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
22972 if (LifetimeEndBase.contains(
22973 DAG, MFI.getObjectSize(LifetimeEnd->getFrameIndex()) * 8,
22974 StoreBase, StoreSize.getFixedValue() * 8)) {
22975 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
22976 dbgs() << "\nwithin LIFETIME_END of : ";
22977 LifetimeEndBase.dump(); dbgs() << "\n");
22978 CombineTo(ST, ST->getChain());
22979 return SDValue(N, 0);
22980 }
22981 }
22982 }
22983 }
22984 return SDValue();
22985}
22986
22987/// For the instruction sequence of store below, F and I values
22988/// are bundled together as an i64 value before being stored into memory.
22989/// Sometimes it is more efficent to generate separate stores for F and I,
22990/// which can remove the bitwise instructions or sink them to colder places.
22991///
22992/// (store (or (zext (bitcast F to i32) to i64),
22993/// (shl (zext I to i64), 32)), addr) -->
22994/// (store F, addr) and (store I, addr+4)
22995///
22996/// Similarly, splitting for other merged store can also be beneficial, like:
22997/// For pair of {i32, i32}, i64 store --> two i32 stores.
22998/// For pair of {i32, i16}, i64 store --> two i32 stores.
22999/// For pair of {i16, i16}, i32 store --> two i16 stores.
23000/// For pair of {i16, i8}, i32 store --> two i16 stores.
23001/// For pair of {i8, i8}, i16 store --> two i8 stores.
23002///
23003/// We allow each target to determine specifically which kind of splitting is
23004/// supported.
23005///
23006/// The store patterns are commonly seen from the simple code snippet below
23007/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
23008/// void goo(const std::pair<int, float> &);
23009/// hoo() {
23010/// ...
23011/// goo(std::make_pair(tmp, ftmp));
23012/// ...
23013/// }
23014///
23015SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
23016 if (OptLevel == CodeGenOptLevel::None)
23017 return SDValue();
23018
23019 // Can't change the number of memory accesses for a volatile store or break
23020 // atomicity for an atomic one.
23021 if (!ST->isSimple())
23022 return SDValue();
23023
23024 SDValue Val = ST->getValue();
23025 SDLoc DL(ST);
23026
23027 // Match OR operand.
23028 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
23029 return SDValue();
23030
23031 // Match SHL operand and get Lower and Higher parts of Val.
23032 SDValue Op1 = Val.getOperand(0);
23033 SDValue Op2 = Val.getOperand(1);
23034 SDValue Lo, Hi;
23035 if (Op1.getOpcode() != ISD::SHL) {
23036 std::swap(Op1, Op2);
23037 if (Op1.getOpcode() != ISD::SHL)
23038 return SDValue();
23039 }
23040 Lo = Op2;
23041 Hi = Op1.getOperand(0);
23042 if (!Op1.hasOneUse())
23043 return SDValue();
23044
23045 // Match shift amount to HalfValBitSize.
23046 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
23047 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
23048 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
23049 return SDValue();
23050
23051 // Lo and Hi are zero-extended from int with size less equal than 32
23052 // to i64.
23053 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
23054 !Lo.getOperand(0).getValueType().isScalarInteger() ||
23055 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
23056 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
23057 !Hi.getOperand(0).getValueType().isScalarInteger() ||
23058 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
23059 return SDValue();
23060
23061 // Use the EVT of low and high parts before bitcast as the input
23062 // of target query.
23063 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
23064 ? Lo.getOperand(0).getValueType()
23065 : Lo.getValueType();
23066 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
23067 ? Hi.getOperand(0).getValueType()
23068 : Hi.getValueType();
23069 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
23070 return SDValue();
23071
23072 // Start to split store.
23073 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
23074 AAMDNodes AAInfo = ST->getAAInfo();
23075
23076 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
23077 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
23078 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
23079 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
23080
23081 SDValue Chain = ST->getChain();
23082 SDValue Ptr = ST->getBasePtr();
23083 // Lower value store.
23084 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
23085 ST->getBaseAlign(), MMOFlags, AAInfo);
23086 Ptr =
23087 DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(HalfValBitSize / 8), DL);
23088 // Higher value store.
23089 SDValue St1 = DAG.getStore(
23090 St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
23091 ST->getBaseAlign(), MMOFlags, AAInfo);
23092 return St1;
23093}
23094
23095// Merge an insertion into an existing shuffle:
23096// (insert_vector_elt (vector_shuffle X, Y, Mask),
23097// .(extract_vector_elt X, N), InsIndex)
23098// --> (vector_shuffle X, Y, NewMask)
23099// and variations where shuffle operands may be CONCAT_VECTORS.
23101 SmallVectorImpl<int> &NewMask, SDValue Elt,
23102 unsigned InsIndex) {
23103 if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
23105 return false;
23106
23107 // Vec's operand 0 is using indices from 0 to N-1 and
23108 // operand 1 from N to 2N - 1, where N is the number of
23109 // elements in the vectors.
23110 SDValue InsertVal0 = Elt.getOperand(0);
23111 int ElementOffset = -1;
23112
23113 // We explore the inputs of the shuffle in order to see if we find the
23114 // source of the extract_vector_elt. If so, we can use it to modify the
23115 // shuffle rather than perform an insert_vector_elt.
23117 ArgWorkList.emplace_back(Mask.size(), Y);
23118 ArgWorkList.emplace_back(0, X);
23119
23120 while (!ArgWorkList.empty()) {
23121 int ArgOffset;
23122 SDValue ArgVal;
23123 std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
23124
23125 if (ArgVal == InsertVal0) {
23126 ElementOffset = ArgOffset;
23127 break;
23128 }
23129
23130 // Peek through concat_vector.
23131 if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
23132 int CurrentArgOffset =
23133 ArgOffset + ArgVal.getValueType().getVectorNumElements();
23134 int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
23135 for (SDValue Op : reverse(ArgVal->ops())) {
23136 CurrentArgOffset -= Step;
23137 ArgWorkList.emplace_back(CurrentArgOffset, Op);
23138 }
23139
23140 // Make sure we went through all the elements and did not screw up index
23141 // computation.
23142 assert(CurrentArgOffset == ArgOffset);
23143 }
23144 }
23145
23146 // If we failed to find a match, see if we can replace an UNDEF shuffle
23147 // operand.
23148 if (ElementOffset == -1) {
23149 if (!Y.isUndef() || InsertVal0.getValueType() != Y.getValueType())
23150 return false;
23151 ElementOffset = Mask.size();
23152 Y = InsertVal0;
23153 }
23154
23155 NewMask.assign(Mask.begin(), Mask.end());
23156 NewMask[InsIndex] = ElementOffset + Elt.getConstantOperandVal(1);
23157 assert(NewMask[InsIndex] < (int)(2 * Mask.size()) && NewMask[InsIndex] >= 0 &&
23158 "NewMask[InsIndex] is out of bound");
23159 return true;
23160}
23161
23162// Merge an insertion into an existing shuffle:
23163// (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
23164// InsIndex)
23165// --> (vector_shuffle X, Y) and variations where shuffle operands may be
23166// CONCAT_VECTORS.
23167SDValue DAGCombiner::mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex) {
23168 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
23169 "Expected extract_vector_elt");
23170 SDValue InsertVal = N->getOperand(1);
23171 SDValue Vec = N->getOperand(0);
23172
23173 auto *SVN = dyn_cast<ShuffleVectorSDNode>(Vec);
23174 if (!SVN || !Vec.hasOneUse())
23175 return SDValue();
23176
23177 ArrayRef<int> Mask = SVN->getMask();
23178 SDValue X = Vec.getOperand(0);
23179 SDValue Y = Vec.getOperand(1);
23180
23181 SmallVector<int, 16> NewMask(Mask);
23182 if (mergeEltWithShuffle(X, Y, Mask, NewMask, InsertVal, InsIndex)) {
23183 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
23184 Vec.getValueType(), SDLoc(N), X, Y, NewMask, DAG);
23185 if (LegalShuffle)
23186 return LegalShuffle;
23187 }
23188
23189 return SDValue();
23190}
23191
23192// Convert a disguised subvector insertion into a shuffle:
23193// insert_vector_elt V, (bitcast X from vector type), IdxC -->
23194// bitcast(shuffle (bitcast V), (extended X), Mask)
23195// Note: We do not use an insert_subvector node because that requires a
23196// legal subvector type.
23197SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
23198 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
23199 "Expected extract_vector_elt");
23200 SDValue InsertVal = N->getOperand(1);
23201
23202 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
23203 !InsertVal.getOperand(0).getValueType().isVector())
23204 return SDValue();
23205
23206 SDValue SubVec = InsertVal.getOperand(0);
23207 SDValue DestVec = N->getOperand(0);
23208 EVT SubVecVT = SubVec.getValueType();
23209 EVT VT = DestVec.getValueType();
23210 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
23211 // If the source only has a single vector element, the cost of creating adding
23212 // it to a vector is likely to exceed the cost of a insert_vector_elt.
23213 if (NumSrcElts == 1)
23214 return SDValue();
23215 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
23216 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
23217
23218 // Step 1: Create a shuffle mask that implements this insert operation. The
23219 // vector that we are inserting into will be operand 0 of the shuffle, so
23220 // those elements are just 'i'. The inserted subvector is in the first
23221 // positions of operand 1 of the shuffle. Example:
23222 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
23223 SmallVector<int, 16> Mask(NumMaskVals);
23224 for (unsigned i = 0; i != NumMaskVals; ++i) {
23225 if (i / NumSrcElts == InsIndex)
23226 Mask[i] = (i % NumSrcElts) + NumMaskVals;
23227 else
23228 Mask[i] = i;
23229 }
23230
23231 // Bail out if the target can not handle the shuffle we want to create.
23232 EVT SubVecEltVT = SubVecVT.getVectorElementType();
23233 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
23234 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
23235 return SDValue();
23236
23237 // Step 2: Create a wide vector from the inserted source vector by appending
23238 // undefined elements. This is the same size as our destination vector.
23239 SDLoc DL(N);
23240 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
23241 ConcatOps[0] = SubVec;
23242 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
23243
23244 // Step 3: Shuffle in the padded subvector.
23245 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
23246 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
23247 AddToWorklist(PaddedSubV.getNode());
23248 AddToWorklist(DestVecBC.getNode());
23249 AddToWorklist(Shuf.getNode());
23250 return DAG.getBitcast(VT, Shuf);
23251}
23252
23253// Combine insert(shuffle(load, <u,0,1,2>), load, 0) into a single load if
23254// possible and the new load will be quick. We use more loads but less shuffles
23255// and inserts.
23256SDValue DAGCombiner::combineInsertEltToLoad(SDNode *N, unsigned InsIndex) {
23257 EVT VT = N->getValueType(0);
23258
23259 // InsIndex is expected to be the first of last lane.
23260 if (!VT.isFixedLengthVector() ||
23261 (InsIndex != 0 && InsIndex != VT.getVectorNumElements() - 1))
23262 return SDValue();
23263
23264 // Look for a shuffle with the mask u,0,1,2,3,4,5,6 or 1,2,3,4,5,6,7,u
23265 // depending on the InsIndex.
23266 auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
23267 SDValue Scalar = N->getOperand(1);
23268 if (!Shuffle || !all_of(enumerate(Shuffle->getMask()), [&](auto P) {
23269 return InsIndex == P.index() || P.value() < 0 ||
23270 (InsIndex == 0 && P.value() == (int)P.index() - 1) ||
23271 (InsIndex == VT.getVectorNumElements() - 1 &&
23272 P.value() == (int)P.index() + 1);
23273 }))
23274 return SDValue();
23275
23276 // We optionally skip over an extend so long as both loads are extended in the
23277 // same way from the same type.
23278 unsigned Extend = 0;
23279 if (Scalar.getOpcode() == ISD::ZERO_EXTEND ||
23280 Scalar.getOpcode() == ISD::SIGN_EXTEND ||
23281 Scalar.getOpcode() == ISD::ANY_EXTEND) {
23282 Extend = Scalar.getOpcode();
23283 Scalar = Scalar.getOperand(0);
23284 }
23285
23286 auto *ScalarLoad = dyn_cast<LoadSDNode>(Scalar);
23287 if (!ScalarLoad)
23288 return SDValue();
23289
23290 SDValue Vec = Shuffle->getOperand(0);
23291 if (Extend) {
23292 if (Vec.getOpcode() != Extend)
23293 return SDValue();
23294 Vec = Vec.getOperand(0);
23295 }
23296 auto *VecLoad = dyn_cast<LoadSDNode>(Vec);
23297 if (!VecLoad || Vec.getValueType().getScalarType() != Scalar.getValueType())
23298 return SDValue();
23299
23300 int EltSize = ScalarLoad->getValueType(0).getScalarSizeInBits();
23301 if (EltSize == 0 || EltSize % 8 != 0 || !ScalarLoad->isSimple() ||
23302 !VecLoad->isSimple() || VecLoad->getExtensionType() != ISD::NON_EXTLOAD ||
23303 ScalarLoad->getExtensionType() != ISD::NON_EXTLOAD ||
23304 ScalarLoad->getAddressSpace() != VecLoad->getAddressSpace())
23305 return SDValue();
23306
23307 // Check that the offset between the pointers to produce a single continuous
23308 // load.
23309 if (InsIndex == 0) {
23310 if (!DAG.areNonVolatileConsecutiveLoads(ScalarLoad, VecLoad, EltSize / 8,
23311 -1))
23312 return SDValue();
23313 } else {
23315 VecLoad, ScalarLoad, VT.getVectorNumElements() * EltSize / 8, -1))
23316 return SDValue();
23317 }
23318
23319 // And that the new unaligned load will be fast.
23320 unsigned IsFast = 0;
23321 Align NewAlign = commonAlignment(VecLoad->getAlign(), EltSize / 8);
23322 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
23323 Vec.getValueType(), VecLoad->getAddressSpace(),
23324 NewAlign, VecLoad->getMemOperand()->getFlags(),
23325 &IsFast) ||
23326 !IsFast)
23327 return SDValue();
23328
23329 // Calculate the new Ptr and create the new load.
23330 SDLoc DL(N);
23331 SDValue Ptr = ScalarLoad->getBasePtr();
23332 if (InsIndex != 0)
23333 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), VecLoad->getBasePtr(),
23334 DAG.getConstant(EltSize / 8, DL, Ptr.getValueType()));
23335 MachinePointerInfo PtrInfo =
23336 InsIndex == 0 ? ScalarLoad->getPointerInfo()
23337 : VecLoad->getPointerInfo().getWithOffset(EltSize / 8);
23338
23339 SDValue Load = DAG.getLoad(VecLoad->getValueType(0), DL,
23340 ScalarLoad->getChain(), Ptr, PtrInfo, NewAlign);
23341 DAG.makeEquivalentMemoryOrdering(ScalarLoad, Load.getValue(1));
23342 DAG.makeEquivalentMemoryOrdering(VecLoad, Load.getValue(1));
23343 return Extend ? DAG.getNode(Extend, DL, VT, Load) : Load;
23344}
23345
23346SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
23347 SDValue InVec = N->getOperand(0);
23348 SDValue InVal = N->getOperand(1);
23349 SDValue EltNo = N->getOperand(2);
23350 SDLoc DL(N);
23351
23352 EVT VT = InVec.getValueType();
23353 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
23354
23355 // Insert into out-of-bounds element is undefined.
23356 if (IndexC && VT.isFixedLengthVector() &&
23357 IndexC->getZExtValue() >= VT.getVectorNumElements())
23358 return DAG.getUNDEF(VT);
23359
23360 // Remove redundant insertions:
23361 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
23362 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23363 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
23364 return InVec;
23365
23366 if (!IndexC) {
23367 // If this is variable insert to undef vector, it might be better to splat:
23368 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
23369 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
23370 return DAG.getSplat(VT, DL, InVal);
23371 return SDValue();
23372 }
23373
23374 if (VT.isScalableVector())
23375 return SDValue();
23376
23377 unsigned NumElts = VT.getVectorNumElements();
23378
23379 // We must know which element is being inserted for folds below here.
23380 unsigned Elt = IndexC->getZExtValue();
23381
23382 // Handle <1 x ???> vector insertion special cases.
23383 if (NumElts == 1) {
23384 // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
23385 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23386 InVal.getOperand(0).getValueType() == VT &&
23387 isNullConstant(InVal.getOperand(1)))
23388 return InVal.getOperand(0);
23389 }
23390
23391 // Canonicalize insert_vector_elt dag nodes.
23392 // Example:
23393 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
23394 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
23395 //
23396 // Do this only if the child insert_vector node has one use; also
23397 // do this only if indices are both constants and Idx1 < Idx0.
23398 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
23399 && isa<ConstantSDNode>(InVec.getOperand(2))) {
23400 unsigned OtherElt = InVec.getConstantOperandVal(2);
23401 if (Elt < OtherElt) {
23402 // Swap nodes.
23403 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
23404 InVec.getOperand(0), InVal, EltNo);
23405 AddToWorklist(NewOp.getNode());
23406 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
23407 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
23408 }
23409 }
23410
23411 if (SDValue Shuf = mergeInsertEltWithShuffle(N, Elt))
23412 return Shuf;
23413
23414 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
23415 return Shuf;
23416
23417 if (SDValue Shuf = combineInsertEltToLoad(N, Elt))
23418 return Shuf;
23419
23420 // Attempt to convert an insert_vector_elt chain into a legal build_vector.
23421 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
23422 // vXi1 vector - we don't need to recurse.
23423 if (NumElts == 1)
23424 return DAG.getBuildVector(VT, DL, {InVal});
23425
23426 // If we haven't already collected the element, insert into the op list.
23427 EVT MaxEltVT = InVal.getValueType();
23428 auto AddBuildVectorOp = [&](SmallVectorImpl<SDValue> &Ops, SDValue Elt,
23429 unsigned Idx) {
23430 if (!Ops[Idx]) {
23431 Ops[Idx] = Elt;
23432 if (VT.isInteger()) {
23433 EVT EltVT = Elt.getValueType();
23434 MaxEltVT = MaxEltVT.bitsGE(EltVT) ? MaxEltVT : EltVT;
23435 }
23436 }
23437 };
23438
23439 // Ensure all the operands are the same value type, fill any missing
23440 // operands with UNDEF and create the BUILD_VECTOR.
23441 auto CanonicalizeBuildVector = [&](SmallVectorImpl<SDValue> &Ops,
23442 bool FreezeUndef = false) {
23443 assert(Ops.size() == NumElts && "Unexpected vector size");
23444 SDValue UndefOp = FreezeUndef ? DAG.getFreeze(DAG.getUNDEF(MaxEltVT))
23445 : DAG.getUNDEF(MaxEltVT);
23446 for (SDValue &Op : Ops) {
23447 if (Op)
23448 Op = VT.isInteger() ? DAG.getAnyExtOrTrunc(Op, DL, MaxEltVT) : Op;
23449 else
23450 Op = UndefOp;
23451 }
23452 return DAG.getBuildVector(VT, DL, Ops);
23453 };
23454
23456 Ops[Elt] = InVal;
23457
23458 // Recurse up a INSERT_VECTOR_ELT chain to build a BUILD_VECTOR.
23459 for (SDValue CurVec = InVec; CurVec;) {
23460 // UNDEF - build new BUILD_VECTOR from already inserted operands.
23461 if (CurVec.isUndef())
23462 return CanonicalizeBuildVector(Ops);
23463
23464 // FREEZE(UNDEF) - build new BUILD_VECTOR from already inserted operands.
23465 if (ISD::isFreezeUndef(CurVec.getNode()) && CurVec.hasOneUse())
23466 return CanonicalizeBuildVector(Ops, /*FreezeUndef=*/true);
23467
23468 // BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR.
23469 if (CurVec.getOpcode() == ISD::BUILD_VECTOR && CurVec.hasOneUse()) {
23470 for (unsigned I = 0; I != NumElts; ++I)
23471 AddBuildVectorOp(Ops, CurVec.getOperand(I), I);
23472 return CanonicalizeBuildVector(Ops);
23473 }
23474
23475 // SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR.
23476 if (CurVec.getOpcode() == ISD::SCALAR_TO_VECTOR && CurVec.hasOneUse()) {
23477 AddBuildVectorOp(Ops, CurVec.getOperand(0), 0);
23478 return CanonicalizeBuildVector(Ops);
23479 }
23480
23481 // INSERT_VECTOR_ELT - insert operand and continue up the chain.
23482 if (CurVec.getOpcode() == ISD::INSERT_VECTOR_ELT && CurVec.hasOneUse())
23483 if (auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2)))
23484 if (CurIdx->getAPIntValue().ult(NumElts)) {
23485 unsigned Idx = CurIdx->getZExtValue();
23486 AddBuildVectorOp(Ops, CurVec.getOperand(1), Idx);
23487
23488 // Found entire BUILD_VECTOR.
23489 if (all_of(Ops, [](SDValue Op) { return !!Op; }))
23490 return CanonicalizeBuildVector(Ops);
23491
23492 CurVec = CurVec->getOperand(0);
23493 continue;
23494 }
23495
23496 // VECTOR_SHUFFLE - if all the operands match the shuffle's sources,
23497 // update the shuffle mask (and second operand if we started with unary
23498 // shuffle) and create a new legal shuffle.
23499 if (CurVec.getOpcode() == ISD::VECTOR_SHUFFLE && CurVec.hasOneUse()) {
23500 auto *SVN = cast<ShuffleVectorSDNode>(CurVec);
23501 SDValue LHS = SVN->getOperand(0);
23502 SDValue RHS = SVN->getOperand(1);
23503 SmallVector<int, 16> Mask(SVN->getMask());
23504 bool Merged = true;
23505 for (auto I : enumerate(Ops)) {
23506 SDValue &Op = I.value();
23507 if (Op) {
23508 SmallVector<int, 16> NewMask;
23509 if (!mergeEltWithShuffle(LHS, RHS, Mask, NewMask, Op, I.index())) {
23510 Merged = false;
23511 break;
23512 }
23513 Mask = std::move(NewMask);
23514 }
23515 }
23516 if (Merged)
23517 if (SDValue NewShuffle =
23518 TLI.buildLegalVectorShuffle(VT, DL, LHS, RHS, Mask, DAG))
23519 return NewShuffle;
23520 }
23521
23522 if (!LegalOperations) {
23523 bool IsNull = llvm::isNullConstant(InVal);
23524 // We can convert to AND/OR mask if all insertions are zero or -1
23525 // respectively.
23526 if ((IsNull || llvm::isAllOnesConstant(InVal)) &&
23527 all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
23528 count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
23529 SDValue Zero = DAG.getConstant(0, DL, MaxEltVT);
23530 SDValue AllOnes = DAG.getAllOnesConstant(DL, MaxEltVT);
23532
23533 // Build the mask and return the corresponding DAG node.
23534 auto BuildMaskAndNode = [&](SDValue TrueVal, SDValue FalseVal,
23535 unsigned MaskOpcode) {
23536 for (unsigned I = 0; I != NumElts; ++I)
23537 Mask[I] = Ops[I] ? TrueVal : FalseVal;
23538 return DAG.getNode(MaskOpcode, DL, VT, CurVec,
23539 DAG.getBuildVector(VT, DL, Mask));
23540 };
23541
23542 // If all elements are zero, we can use AND with all ones.
23543 if (IsNull)
23544 return BuildMaskAndNode(Zero, AllOnes, ISD::AND);
23545
23546 // If all elements are -1, we can use OR with zero.
23547 return BuildMaskAndNode(AllOnes, Zero, ISD::OR);
23548 }
23549 }
23550
23551 // Failed to find a match in the chain - bail.
23552 break;
23553 }
23554
23555 // See if we can fill in the missing constant elements as zeros.
23556 // TODO: Should we do this for any constant?
23557 APInt DemandedZeroElts = APInt::getZero(NumElts);
23558 for (unsigned I = 0; I != NumElts; ++I)
23559 if (!Ops[I])
23560 DemandedZeroElts.setBit(I);
23561
23562 if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) {
23563 SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT)
23564 : DAG.getConstantFP(0, DL, MaxEltVT);
23565 for (unsigned I = 0; I != NumElts; ++I)
23566 if (!Ops[I])
23567 Ops[I] = Zero;
23568
23569 return CanonicalizeBuildVector(Ops);
23570 }
23571 }
23572
23573 return SDValue();
23574}
23575
23576/// Transform a vector binary operation into a scalar binary operation by moving
23577/// the math/logic after an extract element of a vector.
23579 const SDLoc &DL, bool LegalTypes) {
23580 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23581 SDValue Vec = ExtElt->getOperand(0);
23582 SDValue Index = ExtElt->getOperand(1);
23583 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
23584 unsigned Opc = Vec.getOpcode();
23585 if (!IndexC || !Vec.hasOneUse() || (!TLI.isBinOp(Opc) && Opc != ISD::SETCC) ||
23586 Vec->getNumValues() != 1)
23587 return SDValue();
23588
23589 // Targets may want to avoid this to prevent an expensive register transfer.
23590 if (!TLI.shouldScalarizeBinop(Vec))
23591 return SDValue();
23592
23593 EVT ResVT = ExtElt->getValueType(0);
23594 if (Opc == ISD::SETCC &&
23595 (ResVT != Vec.getValueType().getVectorElementType() || LegalTypes))
23596 return SDValue();
23597
23598 // Extracting an element of a vector constant is constant-folded, so this
23599 // transform is just replacing a vector op with a scalar op while moving the
23600 // extract.
23601 SDValue Op0 = Vec.getOperand(0);
23602 SDValue Op1 = Vec.getOperand(1);
23603 APInt SplatVal;
23604 if (!isAnyConstantBuildVector(Op0, true) &&
23605 !ISD::isConstantSplatVector(Op0.getNode(), SplatVal) &&
23606 !isAnyConstantBuildVector(Op1, true) &&
23607 !ISD::isConstantSplatVector(Op1.getNode(), SplatVal))
23608 return SDValue();
23609
23610 // extractelt (op X, C), IndexC --> op (extractelt X, IndexC), C'
23611 // extractelt (op C, X), IndexC --> op C', (extractelt X, IndexC)
23612 if (Opc == ISD::SETCC) {
23613 EVT OpVT = Op0.getValueType().getVectorElementType();
23614 Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op0, Index);
23615 Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op1, Index);
23616 SDValue NewVal = DAG.getSetCC(
23617 DL, ResVT, Op0, Op1, cast<CondCodeSDNode>(Vec->getOperand(2))->get());
23618 // We may need to sign- or zero-extend the result to match the same
23619 // behaviour as the vector version of SETCC.
23620 unsigned VecBoolContents = TLI.getBooleanContents(Vec.getValueType());
23621 if (ResVT != MVT::i1 &&
23622 VecBoolContents != TargetLowering::UndefinedBooleanContent &&
23623 VecBoolContents != TLI.getBooleanContents(ResVT)) {
23625 NewVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ResVT, NewVal,
23626 DAG.getValueType(MVT::i1));
23627 else
23628 NewVal = DAG.getZeroExtendInReg(NewVal, DL, MVT::i1);
23629 }
23630 return NewVal;
23631 }
23632 Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op0, Index);
23633 Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op1, Index);
23634 return DAG.getNode(Opc, DL, ResVT, Op0, Op1);
23635}
23636
23637// Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract,
23638// recursively analyse all of it's users. and try to model themselves as
23639// bit sequence extractions. If all of them agree on the new, narrower element
23640// type, and all of them can be modelled as ISD::EXTRACT_VECTOR_ELT's of that
23641// new element type, do so now.
23642// This is mainly useful to recover from legalization that scalarized
23643// the vector as wide elements, but tries to rebuild it with narrower elements.
23644//
23645// Some more nodes could be modelled if that helps cover interesting patterns.
23646bool DAGCombiner::refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(
23647 SDNode *N) {
23648 // We perform this optimization post type-legalization because
23649 // the type-legalizer often scalarizes integer-promoted vectors.
23650 // Performing this optimization before may cause legalizaton cycles.
23651 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
23652 return false;
23653
23654 // TODO: Add support for big-endian.
23655 if (DAG.getDataLayout().isBigEndian())
23656 return false;
23657
23658 SDValue VecOp = N->getOperand(0);
23659 EVT VecVT = VecOp.getValueType();
23660 assert(!VecVT.isScalableVector() && "Only for fixed vectors.");
23661
23662 // We must start with a constant extraction index.
23663 auto *IndexC = dyn_cast<ConstantSDNode>(N->getOperand(1));
23664 if (!IndexC)
23665 return false;
23666
23667 assert(IndexC->getZExtValue() < VecVT.getVectorNumElements() &&
23668 "Original ISD::EXTRACT_VECTOR_ELT is undefinend?");
23669
23670 // TODO: deal with the case of implicit anyext of the extraction.
23671 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
23672 EVT ScalarVT = N->getValueType(0);
23673 if (VecVT.getScalarType() != ScalarVT)
23674 return false;
23675
23676 // TODO: deal with the cases other than everything being integer-typed.
23677 if (!ScalarVT.isScalarInteger())
23678 return false;
23679
23680 struct Entry {
23681 SDNode *Producer;
23682
23683 // Which bits of VecOp does it contain?
23684 unsigned BitPos;
23685 int NumBits;
23686 // NOTE: the actual width of \p Producer may be wider than NumBits!
23687
23688 Entry(Entry &&) = default;
23689 Entry(SDNode *Producer_, unsigned BitPos_, int NumBits_)
23690 : Producer(Producer_), BitPos(BitPos_), NumBits(NumBits_) {}
23691
23692 Entry() = delete;
23693 Entry(const Entry &) = delete;
23694 Entry &operator=(const Entry &) = delete;
23695 Entry &operator=(Entry &&) = delete;
23696 };
23697 SmallVector<Entry, 32> Worklist;
23699
23700 // We start at the "root" ISD::EXTRACT_VECTOR_ELT.
23701 Worklist.emplace_back(N, /*BitPos=*/VecEltBitWidth * IndexC->getZExtValue(),
23702 /*NumBits=*/VecEltBitWidth);
23703
23704 while (!Worklist.empty()) {
23705 Entry E = Worklist.pop_back_val();
23706 // Does the node not even use any of the VecOp bits?
23707 if (!(E.NumBits > 0 && E.BitPos < VecVT.getSizeInBits() &&
23708 E.BitPos + E.NumBits <= VecVT.getSizeInBits()))
23709 return false; // Let's allow the other combines clean this up first.
23710 // Did we fail to model any of the users of the Producer?
23711 bool ProducerIsLeaf = false;
23712 // Look at each user of this Producer.
23713 for (SDNode *User : E.Producer->users()) {
23714 switch (User->getOpcode()) {
23715 // TODO: support ISD::BITCAST
23716 // TODO: support ISD::ANY_EXTEND
23717 // TODO: support ISD::ZERO_EXTEND
23718 // TODO: support ISD::SIGN_EXTEND
23719 case ISD::TRUNCATE:
23720 // Truncation simply means we keep position, but extract less bits.
23721 Worklist.emplace_back(User, E.BitPos,
23722 /*NumBits=*/User->getValueSizeInBits(0));
23723 break;
23724 // TODO: support ISD::SRA
23725 // TODO: support ISD::SHL
23726 case ISD::SRL:
23727 // We should be shifting the Producer by a constant amount.
23728 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(User->getOperand(1));
23729 User->getOperand(0).getNode() == E.Producer && ShAmtC) {
23730 // Logical right-shift means that we start extraction later,
23731 // but stop it at the same position we did previously.
23732 unsigned ShAmt = ShAmtC->getZExtValue();
23733 Worklist.emplace_back(User, E.BitPos + ShAmt, E.NumBits - ShAmt);
23734 break;
23735 }
23736 [[fallthrough]];
23737 default:
23738 // We can not model this user of the Producer.
23739 // Which means the current Producer will be a ISD::EXTRACT_VECTOR_ELT.
23740 ProducerIsLeaf = true;
23741 // Profitability check: all users that we can not model
23742 // must be ISD::BUILD_VECTOR's.
23743 if (User->getOpcode() != ISD::BUILD_VECTOR)
23744 return false;
23745 break;
23746 }
23747 }
23748 if (ProducerIsLeaf)
23749 Leafs.emplace_back(std::move(E));
23750 }
23751
23752 unsigned NewVecEltBitWidth = Leafs.front().NumBits;
23753
23754 // If we are still at the same element granularity, give up,
23755 if (NewVecEltBitWidth == VecEltBitWidth)
23756 return false;
23757
23758 // The vector width must be a multiple of the new element width.
23759 if (VecVT.getSizeInBits() % NewVecEltBitWidth != 0)
23760 return false;
23761
23762 // All leafs must agree on the new element width.
23763 // All leafs must not expect any "padding" bits ontop of that width.
23764 // All leafs must start extraction from multiple of that width.
23765 if (!all_of(Leafs, [NewVecEltBitWidth](const Entry &E) {
23766 return (unsigned)E.NumBits == NewVecEltBitWidth &&
23767 E.Producer->getValueSizeInBits(0) == NewVecEltBitWidth &&
23768 E.BitPos % NewVecEltBitWidth == 0;
23769 }))
23770 return false;
23771
23772 EVT NewScalarVT = EVT::getIntegerVT(*DAG.getContext(), NewVecEltBitWidth);
23773 EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewScalarVT,
23774 VecVT.getSizeInBits() / NewVecEltBitWidth);
23775
23776 if (LegalTypes &&
23777 !(TLI.isTypeLegal(NewScalarVT) && TLI.isTypeLegal(NewVecVT)))
23778 return false;
23779
23780 if (LegalOperations &&
23781 !(TLI.isOperationLegalOrCustom(ISD::BITCAST, NewVecVT) &&
23783 return false;
23784
23785 SDValue NewVecOp = DAG.getBitcast(NewVecVT, VecOp);
23786 for (const Entry &E : Leafs) {
23787 SDLoc DL(E.Producer);
23788 unsigned NewIndex = E.BitPos / NewVecEltBitWidth;
23789 assert(NewIndex < NewVecVT.getVectorNumElements() &&
23790 "Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?");
23791 SDValue V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, NewScalarVT, NewVecOp,
23792 DAG.getVectorIdxConstant(NewIndex, DL));
23793 CombineTo(E.Producer, V);
23794 }
23795
23796 return true;
23797}
23798
23799SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
23800 SDValue VecOp = N->getOperand(0);
23801 SDValue Index = N->getOperand(1);
23802 EVT ScalarVT = N->getValueType(0);
23803 EVT VecVT = VecOp.getValueType();
23804 if (VecOp.isUndef())
23805 return DAG.getUNDEF(ScalarVT);
23806
23807 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
23808 //
23809 // This only really matters if the index is non-constant since other combines
23810 // on the constant elements already work.
23811 SDLoc DL(N);
23812 if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
23813 Index == VecOp.getOperand(2)) {
23814 SDValue Elt = VecOp.getOperand(1);
23815 AddUsersToWorklist(VecOp.getNode());
23816 return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
23817 }
23818
23819 // (vextract (scalar_to_vector val, 0) -> val
23820 if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
23821 // Only 0'th element of SCALAR_TO_VECTOR is defined.
23822 if (DAG.isKnownNeverZero(Index))
23823 return DAG.getUNDEF(ScalarVT);
23824
23825 // Check if the result type doesn't match the inserted element type.
23826 // The inserted element and extracted element may have mismatched bitwidth.
23827 // As a result, EXTRACT_VECTOR_ELT may extend or truncate the extracted vector.
23828 SDValue InOp = VecOp.getOperand(0);
23829 if (InOp.getValueType() != ScalarVT) {
23830 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
23831 if (InOp.getValueType().bitsGT(ScalarVT))
23832 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
23833 return DAG.getNode(ISD::ANY_EXTEND, DL, ScalarVT, InOp);
23834 }
23835 return InOp;
23836 }
23837
23838 // extract_vector_elt of out-of-bounds element -> UNDEF
23839 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
23840 if (IndexC && VecVT.isFixedLengthVector() &&
23841 IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
23842 return DAG.getUNDEF(ScalarVT);
23843
23844 // extract_vector_elt (build_vector x, y), 1 -> y
23845 if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
23846 VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
23847 TLI.isTypeLegal(VecVT)) {
23848 assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
23849 VecVT.isFixedLengthVector()) &&
23850 "BUILD_VECTOR used for scalable vectors");
23851 unsigned IndexVal =
23852 VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
23853 SDValue Elt = VecOp.getOperand(IndexVal);
23854 EVT InEltVT = Elt.getValueType();
23855
23856 if (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT) ||
23857 isNullConstant(Elt)) {
23858 // Sometimes build_vector's scalar input types do not match result type.
23859 if (ScalarVT == InEltVT)
23860 return Elt;
23861
23862 // TODO: It may be useful to truncate if free if the build_vector
23863 // implicitly converts.
23864 }
23865 }
23866
23867 if (SDValue BO = scalarizeExtractedBinOp(N, DAG, DL, LegalTypes))
23868 return BO;
23869
23870 if (VecVT.isScalableVector())
23871 return SDValue();
23872
23873 // All the code from this point onwards assumes fixed width vectors, but it's
23874 // possible that some of the combinations could be made to work for scalable
23875 // vectors too.
23876 unsigned NumElts = VecVT.getVectorNumElements();
23877 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
23878
23879 // See if the extracted element is constant, in which case fold it if its
23880 // a legal fp immediate.
23881 if (IndexC && ScalarVT.isFloatingPoint()) {
23882 APInt EltMask = APInt::getOneBitSet(NumElts, IndexC->getZExtValue());
23883 KnownBits KnownElt = DAG.computeKnownBits(VecOp, EltMask);
23884 if (KnownElt.isConstant()) {
23885 APFloat CstFP =
23886 APFloat(ScalarVT.getFltSemantics(), KnownElt.getConstant());
23887 if (TLI.isFPImmLegal(CstFP, ScalarVT))
23888 return DAG.getConstantFP(CstFP, DL, ScalarVT);
23889 }
23890 }
23891
23892 // TODO: These transforms should not require the 'hasOneUse' restriction, but
23893 // there are regressions on multiple targets without it. We can end up with a
23894 // mess of scalar and vector code if we reduce only part of the DAG to scalar.
23895 if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
23896 VecOp.hasOneUse()) {
23897 // The vector index of the LSBs of the source depend on the endian-ness.
23898 bool IsLE = DAG.getDataLayout().isLittleEndian();
23899 unsigned ExtractIndex = IndexC->getZExtValue();
23900 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
23901 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
23902 SDValue BCSrc = VecOp.getOperand(0);
23903 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
23904 return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT);
23905
23906 // TODO: Add support for SCALAR_TO_VECTOR implicit truncation.
23907 if (LegalTypes && BCSrc.getValueType().isInteger() &&
23908 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR &&
23909 BCSrc.getScalarValueSizeInBits() ==
23911 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
23912 // trunc i64 X to i32
23913 SDValue X = BCSrc.getOperand(0);
23914 EVT XVT = X.getValueType();
23915 assert(XVT.isScalarInteger() && ScalarVT.isScalarInteger() &&
23916 "Extract element and scalar to vector can't change element type "
23917 "from FP to integer.");
23918 unsigned XBitWidth = X.getValueSizeInBits();
23919 unsigned Scale = XBitWidth / VecEltBitWidth;
23920 BCTruncElt = IsLE ? 0 : Scale - 1;
23921
23922 // An extract element return value type can be wider than its vector
23923 // operand element type. In that case, the high bits are undefined, so
23924 // it's possible that we may need to extend rather than truncate.
23925 if (ExtractIndex < Scale && XBitWidth > VecEltBitWidth) {
23926 assert(XBitWidth % VecEltBitWidth == 0 &&
23927 "Scalar bitwidth must be a multiple of vector element bitwidth");
23928
23929 if (ExtractIndex != BCTruncElt) {
23930 unsigned ShiftIndex =
23931 IsLE ? ExtractIndex : (Scale - 1) - ExtractIndex;
23932 X = DAG.getNode(
23933 ISD::SRL, DL, XVT, X,
23934 DAG.getShiftAmountConstant(ShiftIndex * VecEltBitWidth, XVT, DL));
23935 }
23936
23937 return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
23938 }
23939 }
23940 }
23941
23942 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
23943 // We only perform this optimization before the op legalization phase because
23944 // we may introduce new vector instructions which are not backed by TD
23945 // patterns. For example on AVX, extracting elements from a wide vector
23946 // without using extract_subvector. However, if we can find an underlying
23947 // scalar value, then we can always use that.
23948 if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
23949 auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
23950 // Find the new index to extract from.
23951 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
23952
23953 // Extracting an undef index is undef.
23954 if (OrigElt == -1)
23955 return DAG.getUNDEF(ScalarVT);
23956
23957 // Select the right vector half to extract from.
23958 SDValue SVInVec;
23959 if (OrigElt < (int)NumElts) {
23960 SVInVec = VecOp.getOperand(0);
23961 } else {
23962 SVInVec = VecOp.getOperand(1);
23963 OrigElt -= NumElts;
23964 }
23965
23966 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
23967 // TODO: Check if shuffle mask is legal?
23968 if (LegalOperations && TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VecVT) &&
23969 !VecOp.hasOneUse())
23970 return SDValue();
23971
23972 SDValue InOp = SVInVec.getOperand(OrigElt);
23973 if (InOp.getValueType() != ScalarVT) {
23974 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
23975 InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
23976 }
23977
23978 return InOp;
23979 }
23980
23981 // FIXME: We should handle recursing on other vector shuffles and
23982 // scalar_to_vector here as well.
23983
23984 if (!LegalOperations ||
23985 // FIXME: Should really be just isOperationLegalOrCustom.
23988 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
23989 DAG.getVectorIdxConstant(OrigElt, DL));
23990 }
23991 }
23992
23993 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
23994 // simplify it based on the (valid) extraction indices.
23995 if (llvm::all_of(VecOp->users(), [&](SDNode *Use) {
23996 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23997 Use->getOperand(0) == VecOp &&
23998 isa<ConstantSDNode>(Use->getOperand(1));
23999 })) {
24000 APInt DemandedElts = APInt::getZero(NumElts);
24001 for (SDNode *User : VecOp->users()) {
24002 auto *CstElt = cast<ConstantSDNode>(User->getOperand(1));
24003 if (CstElt->getAPIntValue().ult(NumElts))
24004 DemandedElts.setBit(CstElt->getZExtValue());
24005 }
24006 if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
24007 // We simplified the vector operand of this extract element. If this
24008 // extract is not dead, visit it again so it is folded properly.
24009 if (N->getOpcode() != ISD::DELETED_NODE)
24010 AddToWorklist(N);
24011 return SDValue(N, 0);
24012 }
24013 APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
24014 if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
24015 // We simplified the vector operand of this extract element. If this
24016 // extract is not dead, visit it again so it is folded properly.
24017 if (N->getOpcode() != ISD::DELETED_NODE)
24018 AddToWorklist(N);
24019 return SDValue(N, 0);
24020 }
24021 }
24022
24023 if (refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(N))
24024 return SDValue(N, 0);
24025
24026 // Everything under here is trying to match an extract of a loaded value.
24027 // If the result of load has to be truncated, then it's not necessarily
24028 // profitable.
24029 bool BCNumEltsChanged = false;
24030 EVT ExtVT = VecVT.getVectorElementType();
24031 EVT LVT = ExtVT;
24032 if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
24033 return SDValue();
24034
24035 if (VecOp.getOpcode() == ISD::BITCAST) {
24036 // Don't duplicate a load with other uses.
24037 if (!VecOp.hasOneUse())
24038 return SDValue();
24039
24040 EVT BCVT = VecOp.getOperand(0).getValueType();
24041 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
24042 return SDValue();
24043 if (NumElts != BCVT.getVectorNumElements())
24044 BCNumEltsChanged = true;
24045 VecOp = VecOp.getOperand(0);
24046 ExtVT = BCVT.getVectorElementType();
24047 }
24048
24049 // extract (vector load $addr), i --> load $addr + i * size
24050 if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
24051 ISD::isNormalLoad(VecOp.getNode()) &&
24052 !Index->hasPredecessor(VecOp.getNode())) {
24053 auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
24054 if (VecLoad && VecLoad->isSimple()) {
24055 if (SDValue Scalarized = TLI.scalarizeExtractedVectorLoad(
24056 ScalarVT, SDLoc(N), VecVT, Index, VecLoad, DAG)) {
24057 ++OpsNarrowed;
24058 return Scalarized;
24059 }
24060 }
24061 }
24062
24063 // Perform only after legalization to ensure build_vector / vector_shuffle
24064 // optimizations have already been done.
24065 if (!LegalOperations || !IndexC)
24066 return SDValue();
24067
24068 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
24069 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
24070 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
24071 int Elt = IndexC->getZExtValue();
24072 LoadSDNode *LN0 = nullptr;
24073 if (ISD::isNormalLoad(VecOp.getNode())) {
24074 LN0 = cast<LoadSDNode>(VecOp);
24075 } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
24076 VecOp.getOperand(0).getValueType() == ExtVT &&
24077 ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
24078 // Don't duplicate a load with other uses.
24079 if (!VecOp.hasOneUse())
24080 return SDValue();
24081
24082 LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
24083 }
24084 if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
24085 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
24086 // =>
24087 // (load $addr+1*size)
24088
24089 // Don't duplicate a load with other uses.
24090 if (!VecOp.hasOneUse())
24091 return SDValue();
24092
24093 // If the bit convert changed the number of elements, it is unsafe
24094 // to examine the mask.
24095 if (BCNumEltsChanged)
24096 return SDValue();
24097
24098 // Select the input vector, guarding against out of range extract vector.
24099 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
24100 VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
24101
24102 if (VecOp.getOpcode() == ISD::BITCAST) {
24103 // Don't duplicate a load with other uses.
24104 if (!VecOp.hasOneUse())
24105 return SDValue();
24106
24107 VecOp = VecOp.getOperand(0);
24108 }
24109 if (ISD::isNormalLoad(VecOp.getNode())) {
24110 LN0 = cast<LoadSDNode>(VecOp);
24111 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
24112 Index = DAG.getConstant(Elt, DL, Index.getValueType());
24113 }
24114 } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
24115 VecVT.getVectorElementType() == ScalarVT &&
24116 (!LegalTypes ||
24117 TLI.isTypeLegal(
24119 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
24120 // -> extract_vector_elt a, 0
24121 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
24122 // -> extract_vector_elt a, 1
24123 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
24124 // -> extract_vector_elt b, 0
24125 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
24126 // -> extract_vector_elt b, 1
24127 EVT ConcatVT = VecOp.getOperand(0).getValueType();
24128 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
24129 SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, DL,
24130 Index.getValueType());
24131
24132 SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
24134 ConcatVT.getVectorElementType(),
24135 ConcatOp, NewIdx);
24136 return DAG.getNode(ISD::BITCAST, DL, ScalarVT, Elt);
24137 }
24138
24139 // Make sure we found a non-volatile load and the extractelement is
24140 // the only use.
24141 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
24142 return SDValue();
24143
24144 // If Idx was -1 above, Elt is going to be -1, so just return undef.
24145 if (Elt == -1)
24146 return DAG.getUNDEF(LVT);
24147
24148 if (SDValue Scalarized =
24149 TLI.scalarizeExtractedVectorLoad(LVT, DL, VecVT, Index, LN0, DAG)) {
24150 ++OpsNarrowed;
24151 return Scalarized;
24152 }
24153
24154 return SDValue();
24155}
24156
24157// Simplify (build_vec (ext )) to (bitcast (build_vec ))
24158SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
24159 // We perform this optimization post type-legalization because
24160 // the type-legalizer often scalarizes integer-promoted vectors.
24161 // Performing this optimization before may create bit-casts which
24162 // will be type-legalized to complex code sequences.
24163 // We perform this optimization only before the operation legalizer because we
24164 // may introduce illegal operations.
24165 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
24166 return SDValue();
24167
24168 unsigned NumInScalars = N->getNumOperands();
24169 SDLoc DL(N);
24170 EVT VT = N->getValueType(0);
24171
24172 // Check to see if this is a BUILD_VECTOR of a bunch of values
24173 // which come from any_extend or zero_extend nodes. If so, we can create
24174 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
24175 // optimizations. We do not handle sign-extend because we can't fill the sign
24176 // using shuffles.
24177 EVT SourceType = MVT::Other;
24178 bool AllAnyExt = true;
24179
24180 for (unsigned i = 0; i != NumInScalars; ++i) {
24181 SDValue In = N->getOperand(i);
24182 // Ignore undef inputs.
24183 if (In.isUndef()) continue;
24184
24185 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
24186 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
24187
24188 // Abort if the element is not an extension.
24189 if (!ZeroExt && !AnyExt) {
24190 SourceType = MVT::Other;
24191 break;
24192 }
24193
24194 // The input is a ZeroExt or AnyExt. Check the original type.
24195 EVT InTy = In.getOperand(0).getValueType();
24196
24197 // Check that all of the widened source types are the same.
24198 if (SourceType == MVT::Other)
24199 // First time.
24200 SourceType = InTy;
24201 else if (InTy != SourceType) {
24202 // Multiple income types. Abort.
24203 SourceType = MVT::Other;
24204 break;
24205 }
24206
24207 // Check if all of the extends are ANY_EXTENDs.
24208 AllAnyExt &= AnyExt;
24209 }
24210
24211 // In order to have valid types, all of the inputs must be extended from the
24212 // same source type and all of the inputs must be any or zero extend.
24213 // Scalar sizes must be a power of two.
24214 EVT OutScalarTy = VT.getScalarType();
24215 bool ValidTypes =
24216 SourceType != MVT::Other &&
24219
24220 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
24221 // turn into a single shuffle instruction.
24222 if (!ValidTypes)
24223 return SDValue();
24224
24225 // If we already have a splat buildvector, then don't fold it if it means
24226 // introducing zeros.
24227 if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
24228 return SDValue();
24229
24230 bool isLE = DAG.getDataLayout().isLittleEndian();
24231 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
24232 assert(ElemRatio > 1 && "Invalid element size ratio");
24233 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
24234 DAG.getConstant(0, DL, SourceType);
24235
24236 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
24237 SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
24238
24239 // Populate the new build_vector
24240 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
24241 SDValue Cast = N->getOperand(i);
24242 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
24243 Cast.getOpcode() == ISD::ZERO_EXTEND ||
24244 Cast.isUndef()) && "Invalid cast opcode");
24245 SDValue In;
24246 if (Cast.isUndef())
24247 In = DAG.getUNDEF(SourceType);
24248 else
24249 In = Cast->getOperand(0);
24250 unsigned Index = isLE ? (i * ElemRatio) :
24251 (i * ElemRatio + (ElemRatio - 1));
24252
24253 assert(Index < Ops.size() && "Invalid index");
24254 Ops[Index] = In;
24255 }
24256
24257 // The type of the new BUILD_VECTOR node.
24258 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
24259 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
24260 "Invalid vector size");
24261 // Check if the new vector type is legal.
24262 if (!isTypeLegal(VecVT) ||
24263 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
24265 return SDValue();
24266
24267 // Make the new BUILD_VECTOR.
24268 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
24269
24270 // The new BUILD_VECTOR node has the potential to be further optimized.
24271 AddToWorklist(BV.getNode());
24272 // Bitcast to the desired type.
24273 return DAG.getBitcast(VT, BV);
24274}
24275
24276// Simplify (build_vec (trunc $1)
24277// (trunc (srl $1 half-width))
24278// (trunc (srl $1 (2 * half-width))))
24279// to (bitcast $1)
24280SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
24281 assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
24282
24283 EVT VT = N->getValueType(0);
24284
24285 // Don't run this before LegalizeTypes if VT is legal.
24286 // Targets may have other preferences.
24287 if (Level < AfterLegalizeTypes && TLI.isTypeLegal(VT))
24288 return SDValue();
24289
24290 // Only for little endian
24291 if (!DAG.getDataLayout().isLittleEndian())
24292 return SDValue();
24293
24294 EVT OutScalarTy = VT.getScalarType();
24295 uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
24296
24297 // Only for power of two types to be sure that bitcast works well
24298 if (!isPowerOf2_64(ScalarTypeBitsize))
24299 return SDValue();
24300
24301 unsigned NumInScalars = N->getNumOperands();
24302
24303 // Look through bitcasts
24304 auto PeekThroughBitcast = [](SDValue Op) {
24305 if (Op.getOpcode() == ISD::BITCAST)
24306 return Op.getOperand(0);
24307 return Op;
24308 };
24309
24310 // The source value where all the parts are extracted.
24311 SDValue Src;
24312 for (unsigned i = 0; i != NumInScalars; ++i) {
24313 SDValue In = PeekThroughBitcast(N->getOperand(i));
24314 // Ignore undef inputs.
24315 if (In.isUndef()) continue;
24316
24317 if (In.getOpcode() != ISD::TRUNCATE)
24318 return SDValue();
24319
24320 In = PeekThroughBitcast(In.getOperand(0));
24321
24322 if (In.getOpcode() != ISD::SRL) {
24323 // For now only build_vec without shuffling, handle shifts here in the
24324 // future.
24325 if (i != 0)
24326 return SDValue();
24327
24328 Src = In;
24329 } else {
24330 // In is SRL
24331 SDValue part = PeekThroughBitcast(In.getOperand(0));
24332
24333 if (!Src) {
24334 Src = part;
24335 } else if (Src != part) {
24336 // Vector parts do not stem from the same variable
24337 return SDValue();
24338 }
24339
24340 SDValue ShiftAmtVal = In.getOperand(1);
24341 if (!isa<ConstantSDNode>(ShiftAmtVal))
24342 return SDValue();
24343
24344 uint64_t ShiftAmt = In.getConstantOperandVal(1);
24345
24346 // The extracted value is not extracted at the right position
24347 if (ShiftAmt != i * ScalarTypeBitsize)
24348 return SDValue();
24349 }
24350 }
24351
24352 // Only cast if the size is the same
24353 if (!Src || Src.getValueType().getSizeInBits() != VT.getSizeInBits())
24354 return SDValue();
24355
24356 return DAG.getBitcast(VT, Src);
24357}
24358
24359SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
24360 ArrayRef<int> VectorMask,
24361 SDValue VecIn1, SDValue VecIn2,
24362 unsigned LeftIdx, bool DidSplitVec) {
24363 EVT VT = N->getValueType(0);
24364 EVT InVT1 = VecIn1.getValueType();
24365 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
24366
24367 unsigned NumElems = VT.getVectorNumElements();
24368 unsigned ShuffleNumElems = NumElems;
24369
24370 // If we artificially split a vector in two already, then the offsets in the
24371 // operands will all be based off of VecIn1, even those in VecIn2.
24372 unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
24373
24374 uint64_t VTSize = VT.getFixedSizeInBits();
24375 uint64_t InVT1Size = InVT1.getFixedSizeInBits();
24376 uint64_t InVT2Size = InVT2.getFixedSizeInBits();
24377
24378 assert(InVT2Size <= InVT1Size &&
24379 "Inputs must be sorted to be in non-increasing vector size order.");
24380
24381 // We can't generate a shuffle node with mismatched input and output types.
24382 // Try to make the types match the type of the output.
24383 if (InVT1 != VT || InVT2 != VT) {
24384 if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
24385 // If the output vector length is a multiple of both input lengths,
24386 // we can concatenate them and pad the rest with undefs.
24387 unsigned NumConcats = VTSize / InVT1Size;
24388 assert(NumConcats >= 2 && "Concat needs at least two inputs!");
24389 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
24390 ConcatOps[0] = VecIn1;
24391 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
24392 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
24393 VecIn2 = SDValue();
24394 } else if (InVT1Size == VTSize * 2) {
24395 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
24396 return SDValue();
24397
24398 if (!VecIn2.getNode()) {
24399 // If we only have one input vector, and it's twice the size of the
24400 // output, split it in two.
24401 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
24402 DAG.getVectorIdxConstant(NumElems, DL));
24403 VecIn1 = DAG.getExtractSubvector(DL, VT, VecIn1, 0);
24404 // Since we now have shorter input vectors, adjust the offset of the
24405 // second vector's start.
24406 Vec2Offset = NumElems;
24407 } else {
24408 assert(InVT2Size <= InVT1Size &&
24409 "Second input is not going to be larger than the first one.");
24410
24411 // VecIn1 is wider than the output, and we have another, possibly
24412 // smaller input. Pad the smaller input with undefs, shuffle at the
24413 // input vector width, and extract the output.
24414 // The shuffle type is different than VT, so check legality again.
24415 if (LegalOperations &&
24417 return SDValue();
24418
24419 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
24420 // lower it back into a BUILD_VECTOR. So if the inserted type is
24421 // illegal, don't even try.
24422 if (InVT1 != InVT2) {
24423 if (!TLI.isTypeLegal(InVT2))
24424 return SDValue();
24425 VecIn2 = DAG.getInsertSubvector(DL, DAG.getUNDEF(InVT1), VecIn2, 0);
24426 }
24427 ShuffleNumElems = NumElems * 2;
24428 }
24429 } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
24430 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
24431 ConcatOps[0] = VecIn2;
24432 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
24433 } else if (InVT1Size / VTSize > 1 && InVT1Size % VTSize == 0) {
24434 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems) ||
24435 !TLI.isTypeLegal(InVT1) || !TLI.isTypeLegal(InVT2))
24436 return SDValue();
24437 // If dest vector has less than two elements, then use shuffle and extract
24438 // from larger regs will cost even more.
24439 if (VT.getVectorNumElements() <= 2 || !VecIn2.getNode())
24440 return SDValue();
24441 assert(InVT2Size <= InVT1Size &&
24442 "Second input is not going to be larger than the first one.");
24443
24444 // VecIn1 is wider than the output, and we have another, possibly
24445 // smaller input. Pad the smaller input with undefs, shuffle at the
24446 // input vector width, and extract the output.
24447 // The shuffle type is different than VT, so check legality again.
24448 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
24449 return SDValue();
24450
24451 if (InVT1 != InVT2) {
24452 VecIn2 = DAG.getInsertSubvector(DL, DAG.getUNDEF(InVT1), VecIn2, 0);
24453 }
24454 ShuffleNumElems = InVT1Size / VTSize * NumElems;
24455 } else {
24456 // TODO: Support cases where the length mismatch isn't exactly by a
24457 // factor of 2.
24458 // TODO: Move this check upwards, so that if we have bad type
24459 // mismatches, we don't create any DAG nodes.
24460 return SDValue();
24461 }
24462 }
24463
24464 // Initialize mask to undef.
24465 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
24466
24467 // Only need to run up to the number of elements actually used, not the
24468 // total number of elements in the shuffle - if we are shuffling a wider
24469 // vector, the high lanes should be set to undef.
24470 for (unsigned i = 0; i != NumElems; ++i) {
24471 if (VectorMask[i] <= 0)
24472 continue;
24473
24474 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
24475 if (VectorMask[i] == (int)LeftIdx) {
24476 Mask[i] = ExtIndex;
24477 } else if (VectorMask[i] == (int)LeftIdx + 1) {
24478 Mask[i] = Vec2Offset + ExtIndex;
24479 }
24480 }
24481
24482 // The type the input vectors may have changed above.
24483 InVT1 = VecIn1.getValueType();
24484
24485 // If we already have a VecIn2, it should have the same type as VecIn1.
24486 // If we don't, get an undef/zero vector of the appropriate type.
24487 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
24488 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
24489
24490 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
24491 if (ShuffleNumElems > NumElems)
24492 Shuffle = DAG.getExtractSubvector(DL, VT, Shuffle, 0);
24493
24494 return Shuffle;
24495}
24496
24498 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
24499
24500 // First, determine where the build vector is not undef.
24501 // TODO: We could extend this to handle zero elements as well as undefs.
24502 int NumBVOps = BV->getNumOperands();
24503 int ZextElt = -1;
24504 for (int i = 0; i != NumBVOps; ++i) {
24505 SDValue Op = BV->getOperand(i);
24506 if (Op.isUndef())
24507 continue;
24508 if (ZextElt == -1)
24509 ZextElt = i;
24510 else
24511 return SDValue();
24512 }
24513 // Bail out if there's no non-undef element.
24514 if (ZextElt == -1)
24515 return SDValue();
24516
24517 // The build vector contains some number of undef elements and exactly
24518 // one other element. That other element must be a zero-extended scalar
24519 // extracted from a vector at a constant index to turn this into a shuffle.
24520 // Also, require that the build vector does not implicitly truncate/extend
24521 // its elements.
24522 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
24523 EVT VT = BV->getValueType(0);
24524 SDValue Zext = BV->getOperand(ZextElt);
24525 if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
24529 return SDValue();
24530
24531 // The zero-extend must be a multiple of the source size, and we must be
24532 // building a vector of the same size as the source of the extract element.
24533 SDValue Extract = Zext.getOperand(0);
24534 unsigned DestSize = Zext.getValueSizeInBits();
24535 unsigned SrcSize = Extract.getValueSizeInBits();
24536 if (DestSize % SrcSize != 0 ||
24537 Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
24538 return SDValue();
24539
24540 // Create a shuffle mask that will combine the extracted element with zeros
24541 // and undefs.
24542 int ZextRatio = DestSize / SrcSize;
24543 int NumMaskElts = NumBVOps * ZextRatio;
24544 SmallVector<int, 32> ShufMask(NumMaskElts, -1);
24545 for (int i = 0; i != NumMaskElts; ++i) {
24546 if (i / ZextRatio == ZextElt) {
24547 // The low bits of the (potentially translated) extracted element map to
24548 // the source vector. The high bits map to zero. We will use a zero vector
24549 // as the 2nd source operand of the shuffle, so use the 1st element of
24550 // that vector (mask value is number-of-elements) for the high bits.
24551 int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - 1) : 0;
24552 ShufMask[i] = (i % ZextRatio == Low) ? Extract.getConstantOperandVal(1)
24553 : NumMaskElts;
24554 }
24555
24556 // Undef elements of the build vector remain undef because we initialize
24557 // the shuffle mask with -1.
24558 }
24559
24560 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
24561 // bitcast (shuffle V, ZeroVec, VectorMask)
24562 SDLoc DL(BV);
24563 EVT VecVT = Extract.getOperand(0).getValueType();
24564 SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
24565 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24566 SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
24567 ZeroVec, ShufMask, DAG);
24568 if (!Shuf)
24569 return SDValue();
24570 return DAG.getBitcast(VT, Shuf);
24571}
24572
24573// FIXME: promote to STLExtras.
24574template <typename R, typename T>
24575static auto getFirstIndexOf(R &&Range, const T &Val) {
24576 auto I = find(Range, Val);
24577 if (I == Range.end())
24578 return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
24579 return std::distance(Range.begin(), I);
24580}
24581
24582// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
24583// operations. If the types of the vectors we're extracting from allow it,
24584// turn this into a vector_shuffle node.
24585SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
24586 SDLoc DL(N);
24587 EVT VT = N->getValueType(0);
24588
24589 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
24590 if (!isTypeLegal(VT))
24591 return SDValue();
24592
24594 return V;
24595
24596 // May only combine to shuffle after legalize if shuffle is legal.
24597 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
24598 return SDValue();
24599
24600 bool UsesZeroVector = false;
24601 unsigned NumElems = N->getNumOperands();
24602
24603 // Record, for each element of the newly built vector, which input vector
24604 // that element comes from. -1 stands for undef, 0 for the zero vector,
24605 // and positive values for the input vectors.
24606 // VectorMask maps each element to its vector number, and VecIn maps vector
24607 // numbers to their initial SDValues.
24608
24609 SmallVector<int, 8> VectorMask(NumElems, -1);
24611 VecIn.push_back(SDValue());
24612
24613 // If we have a single extract_element with a constant index, track the index
24614 // value.
24615 unsigned OneConstExtractIndex = ~0u;
24616
24617 // Count the number of extract_vector_elt sources (i.e. non-constant or undef)
24618 unsigned NumExtracts = 0;
24619
24620 for (unsigned i = 0; i != NumElems; ++i) {
24621 SDValue Op = N->getOperand(i);
24622
24623 if (Op.isUndef())
24624 continue;
24625
24626 // See if we can use a blend with a zero vector.
24627 // TODO: Should we generalize this to a blend with an arbitrary constant
24628 // vector?
24630 UsesZeroVector = true;
24631 VectorMask[i] = 0;
24632 continue;
24633 }
24634
24635 // Not an undef or zero. If the input is something other than an
24636 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
24637 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
24638 return SDValue();
24639
24640 SDValue ExtractedFromVec = Op.getOperand(0);
24641 if (ExtractedFromVec.getValueType().isScalableVector())
24642 return SDValue();
24643 auto *ExtractIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
24644 if (!ExtractIdx)
24645 return SDValue();
24646
24647 if (ExtractIdx->getAsAPIntVal().uge(
24648 ExtractedFromVec.getValueType().getVectorNumElements()))
24649 return SDValue();
24650
24651 // All inputs must have the same element type as the output.
24652 if (VT.getVectorElementType() !=
24653 ExtractedFromVec.getValueType().getVectorElementType())
24654 return SDValue();
24655
24656 OneConstExtractIndex = ExtractIdx->getZExtValue();
24657 ++NumExtracts;
24658
24659 // Have we seen this input vector before?
24660 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
24661 // a map back from SDValues to numbers isn't worth it.
24662 int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
24663 if (Idx == -1) { // A new source vector?
24664 Idx = VecIn.size();
24665 VecIn.push_back(ExtractedFromVec);
24666 }
24667
24668 VectorMask[i] = Idx;
24669 }
24670
24671 // If we didn't find at least one input vector, bail out.
24672 if (VecIn.size() < 2)
24673 return SDValue();
24674
24675 // If all the Operands of BUILD_VECTOR extract from same
24676 // vector, then split the vector efficiently based on the maximum
24677 // vector access index and adjust the VectorMask and
24678 // VecIn accordingly.
24679 bool DidSplitVec = false;
24680 if (VecIn.size() == 2) {
24681 // If we only found a single constant indexed extract_vector_elt feeding the
24682 // build_vector, do not produce a more complicated shuffle if the extract is
24683 // cheap with other constant/undef elements. Skip broadcast patterns with
24684 // multiple uses in the build_vector.
24685
24686 // TODO: This should be more aggressive about skipping the shuffle
24687 // formation, particularly if VecIn[1].hasOneUse(), and regardless of the
24688 // index.
24689 if (NumExtracts == 1 &&
24692 TLI.isExtractVecEltCheap(VT, OneConstExtractIndex))
24693 return SDValue();
24694
24695 unsigned MaxIndex = 0;
24696 unsigned NearestPow2 = 0;
24697 SDValue Vec = VecIn.back();
24698 EVT InVT = Vec.getValueType();
24699 SmallVector<unsigned, 8> IndexVec(NumElems, 0);
24700
24701 for (unsigned i = 0; i < NumElems; i++) {
24702 if (VectorMask[i] <= 0)
24703 continue;
24704 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
24705 IndexVec[i] = Index;
24706 MaxIndex = std::max(MaxIndex, Index);
24707 }
24708
24709 NearestPow2 = PowerOf2Ceil(MaxIndex);
24710 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
24711 NumElems * 2 < NearestPow2) {
24712 unsigned SplitSize = NearestPow2 / 2;
24713 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
24714 InVT.getVectorElementType(), SplitSize);
24715 if (TLI.isTypeLegal(SplitVT) &&
24716 SplitSize + SplitVT.getVectorNumElements() <=
24717 InVT.getVectorNumElements()) {
24718 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
24719 DAG.getVectorIdxConstant(SplitSize, DL));
24720 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
24721 DAG.getVectorIdxConstant(0, DL));
24722 VecIn.pop_back();
24723 VecIn.push_back(VecIn1);
24724 VecIn.push_back(VecIn2);
24725 DidSplitVec = true;
24726
24727 for (unsigned i = 0; i < NumElems; i++) {
24728 if (VectorMask[i] <= 0)
24729 continue;
24730 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
24731 }
24732 }
24733 }
24734 }
24735
24736 // Sort input vectors by decreasing vector element count,
24737 // while preserving the relative order of equally-sized vectors.
24738 // Note that we keep the first "implicit zero vector as-is.
24739 SmallVector<SDValue, 8> SortedVecIn(VecIn);
24740 llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
24741 [](const SDValue &a, const SDValue &b) {
24742 return a.getValueType().getVectorNumElements() >
24743 b.getValueType().getVectorNumElements();
24744 });
24745
24746 // We now also need to rebuild the VectorMask, because it referenced element
24747 // order in VecIn, and we just sorted them.
24748 for (int &SourceVectorIndex : VectorMask) {
24749 if (SourceVectorIndex <= 0)
24750 continue;
24751 unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
24752 assert(Idx > 0 && Idx < SortedVecIn.size() &&
24753 VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
24754 SourceVectorIndex = Idx;
24755 }
24756
24757 VecIn = std::move(SortedVecIn);
24758
24759 // TODO: Should this fire if some of the input vectors has illegal type (like
24760 // it does now), or should we let legalization run its course first?
24761
24762 // Shuffle phase:
24763 // Take pairs of vectors, and shuffle them so that the result has elements
24764 // from these vectors in the correct places.
24765 // For example, given:
24766 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
24767 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
24768 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
24769 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
24770 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
24771 // We will generate:
24772 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
24773 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
24774 SmallVector<SDValue, 4> Shuffles;
24775 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
24776 unsigned LeftIdx = 2 * In + 1;
24777 SDValue VecLeft = VecIn[LeftIdx];
24778 SDValue VecRight =
24779 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
24780
24781 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
24782 VecRight, LeftIdx, DidSplitVec))
24783 Shuffles.push_back(Shuffle);
24784 else
24785 return SDValue();
24786 }
24787
24788 // If we need the zero vector as an "ingredient" in the blend tree, add it
24789 // to the list of shuffles.
24790 if (UsesZeroVector)
24791 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
24792 : DAG.getConstantFP(0.0, DL, VT));
24793
24794 // If we only have one shuffle, we're done.
24795 if (Shuffles.size() == 1)
24796 return Shuffles[0];
24797
24798 // Update the vector mask to point to the post-shuffle vectors.
24799 for (int &Vec : VectorMask)
24800 if (Vec == 0)
24801 Vec = Shuffles.size() - 1;
24802 else
24803 Vec = (Vec - 1) / 2;
24804
24805 // More than one shuffle. Generate a binary tree of blends, e.g. if from
24806 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
24807 // generate:
24808 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
24809 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
24810 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
24811 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
24812 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
24813 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
24814 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
24815
24816 // Make sure the initial size of the shuffle list is even.
24817 if (Shuffles.size() % 2)
24818 Shuffles.push_back(DAG.getUNDEF(VT));
24819
24820 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
24821 if (CurSize % 2) {
24822 Shuffles[CurSize] = DAG.getUNDEF(VT);
24823 CurSize++;
24824 }
24825 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
24826 int Left = 2 * In;
24827 int Right = 2 * In + 1;
24828 SmallVector<int, 8> Mask(NumElems, -1);
24829 SDValue L = Shuffles[Left];
24830 ArrayRef<int> LMask;
24831 bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE &&
24832 L.use_empty() && L.getOperand(1).isUndef() &&
24833 L.getOperand(0).getValueType() == L.getValueType();
24834 if (IsLeftShuffle) {
24835 LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask();
24836 L = L.getOperand(0);
24837 }
24838 SDValue R = Shuffles[Right];
24839 ArrayRef<int> RMask;
24840 bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE &&
24841 R.use_empty() && R.getOperand(1).isUndef() &&
24842 R.getOperand(0).getValueType() == R.getValueType();
24843 if (IsRightShuffle) {
24844 RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask();
24845 R = R.getOperand(0);
24846 }
24847 for (unsigned I = 0; I != NumElems; ++I) {
24848 if (VectorMask[I] == Left) {
24849 Mask[I] = I;
24850 if (IsLeftShuffle)
24851 Mask[I] = LMask[I];
24852 VectorMask[I] = In;
24853 } else if (VectorMask[I] == Right) {
24854 Mask[I] = I + NumElems;
24855 if (IsRightShuffle)
24856 Mask[I] = RMask[I] + NumElems;
24857 VectorMask[I] = In;
24858 }
24859 }
24860
24861 Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask);
24862 }
24863 }
24864 return Shuffles[0];
24865}
24866
24867// Try to turn a build vector of zero extends of extract vector elts into a
24868// a vector zero extend and possibly an extract subvector.
24869// TODO: Support sign extend?
24870// TODO: Allow undef elements?
24871SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
24872 if (LegalOperations)
24873 return SDValue();
24874
24875 EVT VT = N->getValueType(0);
24876
24877 bool FoundZeroExtend = false;
24878 SDValue Op0 = N->getOperand(0);
24879 auto checkElem = [&](SDValue Op) -> int64_t {
24880 unsigned Opc = Op.getOpcode();
24881 FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
24882 if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
24883 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
24884 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
24885 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
24886 return C->getZExtValue();
24887 return -1;
24888 };
24889
24890 // Make sure the first element matches
24891 // (zext (extract_vector_elt X, C))
24892 // Offset must be a constant multiple of the
24893 // known-minimum vector length of the result type.
24894 int64_t Offset = checkElem(Op0);
24895 if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
24896 return SDValue();
24897
24898 unsigned NumElems = N->getNumOperands();
24899 SDValue In = Op0.getOperand(0).getOperand(0);
24900 EVT InSVT = In.getValueType().getScalarType();
24901 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
24902
24903 // Don't create an illegal input type after type legalization.
24904 if (LegalTypes && !TLI.isTypeLegal(InVT))
24905 return SDValue();
24906
24907 // Ensure all the elements come from the same vector and are adjacent.
24908 for (unsigned i = 1; i != NumElems; ++i) {
24909 if ((Offset + i) != checkElem(N->getOperand(i)))
24910 return SDValue();
24911 }
24912
24913 SDLoc DL(N);
24914 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
24915 Op0.getOperand(0).getOperand(1));
24916 return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
24917 VT, In);
24918}
24919
24920// If this is a very simple BUILD_VECTOR with first element being a ZERO_EXTEND,
24921// and all other elements being constant zero's, granularize the BUILD_VECTOR's
24922// element width, absorbing the ZERO_EXTEND, turning it into a constant zero op.
24923// This patten can appear during legalization.
24924//
24925// NOTE: This can be generalized to allow more than a single
24926// non-constant-zero op, UNDEF's, and to be KnownBits-based,
24927SDValue DAGCombiner::convertBuildVecZextToBuildVecWithZeros(SDNode *N) {
24928 // Don't run this after legalization. Targets may have other preferences.
24929 if (Level >= AfterLegalizeDAG)
24930 return SDValue();
24931
24932 // FIXME: support big-endian.
24933 if (DAG.getDataLayout().isBigEndian())
24934 return SDValue();
24935
24936 EVT VT = N->getValueType(0);
24937 EVT OpVT = N->getOperand(0).getValueType();
24938 assert(!VT.isScalableVector() && "Encountered scalable BUILD_VECTOR?");
24939
24940 EVT OpIntVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
24941
24942 if (!TLI.isTypeLegal(OpIntVT) ||
24943 (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::BITCAST, OpIntVT)))
24944 return SDValue();
24945
24946 unsigned EltBitwidth = VT.getScalarSizeInBits();
24947 // NOTE: the actual width of operands may be wider than that!
24948
24949 // Analyze all operands of this BUILD_VECTOR. What is the largest number of
24950 // active bits they all have? We'll want to truncate them all to that width.
24951 unsigned ActiveBits = 0;
24952 APInt KnownZeroOps(VT.getVectorNumElements(), 0);
24953 for (auto I : enumerate(N->ops())) {
24954 SDValue Op = I.value();
24955 // FIXME: support UNDEF elements?
24956 if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
24957 unsigned OpActiveBits =
24958 Cst->getAPIntValue().trunc(EltBitwidth).getActiveBits();
24959 if (OpActiveBits == 0) {
24960 KnownZeroOps.setBit(I.index());
24961 continue;
24962 }
24963 // Profitability check: don't allow non-zero constant operands.
24964 return SDValue();
24965 }
24966 // Profitability check: there must only be a single non-zero operand,
24967 // and it must be the first operand of the BUILD_VECTOR.
24968 if (I.index() != 0)
24969 return SDValue();
24970 // The operand must be a zero-extension itself.
24971 // FIXME: this could be generalized to known leading zeros check.
24972 if (Op.getOpcode() != ISD::ZERO_EXTEND)
24973 return SDValue();
24974 unsigned CurrActiveBits =
24975 Op.getOperand(0).getValueSizeInBits().getFixedValue();
24976 assert(!ActiveBits && "Already encountered non-constant-zero operand?");
24977 ActiveBits = CurrActiveBits;
24978 // We want to at least halve the element size.
24979 if (2 * ActiveBits > EltBitwidth)
24980 return SDValue();
24981 }
24982
24983 // This BUILD_VECTOR must have at least one non-constant-zero operand.
24984 if (ActiveBits == 0)
24985 return SDValue();
24986
24987 // We have EltBitwidth bits, the *minimal* chunk size is ActiveBits,
24988 // into how many chunks can we split our element width?
24989 EVT NewScalarIntVT, NewIntVT;
24990 std::optional<unsigned> Factor;
24991 // We can split the element into at least two chunks, but not into more
24992 // than |_ EltBitwidth / ActiveBits _| chunks. Find a largest split factor
24993 // for which the element width is a multiple of it,
24994 // and the resulting types/operations on that chunk width are legal.
24995 assert(2 * ActiveBits <= EltBitwidth &&
24996 "We know that half or less bits of the element are active.");
24997 for (unsigned Scale = EltBitwidth / ActiveBits; Scale >= 2; --Scale) {
24998 if (EltBitwidth % Scale != 0)
24999 continue;
25000 unsigned ChunkBitwidth = EltBitwidth / Scale;
25001 assert(ChunkBitwidth >= ActiveBits && "As per starting point.");
25002 NewScalarIntVT = EVT::getIntegerVT(*DAG.getContext(), ChunkBitwidth);
25003 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewScalarIntVT,
25004 Scale * N->getNumOperands());
25005 if (!TLI.isTypeLegal(NewScalarIntVT) || !TLI.isTypeLegal(NewIntVT) ||
25006 (LegalOperations &&
25007 !(TLI.isOperationLegalOrCustom(ISD::TRUNCATE, NewScalarIntVT) &&
25009 continue;
25010 Factor = Scale;
25011 break;
25012 }
25013 if (!Factor)
25014 return SDValue();
25015
25016 SDLoc DL(N);
25017 SDValue ZeroOp = DAG.getConstant(0, DL, NewScalarIntVT);
25018
25019 // Recreate the BUILD_VECTOR, with elements now being Factor times smaller.
25021 NewOps.reserve(NewIntVT.getVectorNumElements());
25022 for (auto I : enumerate(N->ops())) {
25023 SDValue Op = I.value();
25024 assert(!Op.isUndef() && "FIXME: after allowing UNDEF's, handle them here.");
25025 unsigned SrcOpIdx = I.index();
25026 if (KnownZeroOps[SrcOpIdx]) {
25027 NewOps.append(*Factor, ZeroOp);
25028 continue;
25029 }
25030 Op = DAG.getBitcast(OpIntVT, Op);
25031 Op = DAG.getNode(ISD::TRUNCATE, DL, NewScalarIntVT, Op);
25032 NewOps.emplace_back(Op);
25033 NewOps.append(*Factor - 1, ZeroOp);
25034 }
25035 assert(NewOps.size() == NewIntVT.getVectorNumElements());
25036 SDValue NewBV = DAG.getBuildVector(NewIntVT, DL, NewOps);
25037 NewBV = DAG.getBitcast(VT, NewBV);
25038 return NewBV;
25039}
25040
25041SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
25042 EVT VT = N->getValueType(0);
25043
25044 // A vector built entirely of undefs is undef.
25046 return DAG.getUNDEF(VT);
25047
25048 // If this is a splat of a bitcast from another vector, change to a
25049 // concat_vector.
25050 // For example:
25051 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
25052 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
25053 //
25054 // If X is a build_vector itself, the concat can become a larger build_vector.
25055 // TODO: Maybe this is useful for non-splat too?
25056 if (!LegalOperations) {
25057 SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue();
25058 // Only change build_vector to a concat_vector if the splat value type is
25059 // same as the vector element type.
25060 if (Splat && Splat.getValueType() == VT.getVectorElementType()) {
25062 EVT SrcVT = Splat.getValueType();
25063 if (SrcVT.isVector()) {
25064 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
25065 EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
25066 SrcVT.getVectorElementType(), NumElts);
25067 if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
25068 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
25069 SDValue Concat =
25070 DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), NewVT, Ops);
25071 return DAG.getBitcast(VT, Concat);
25072 }
25073 }
25074 }
25075 }
25076
25077 // Check if we can express BUILD VECTOR via subvector extract.
25078 if (!LegalTypes && (N->getNumOperands() > 1)) {
25079 SDValue Op0 = N->getOperand(0);
25080 auto checkElem = [&](SDValue Op) -> uint64_t {
25081 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
25082 (Op0.getOperand(0) == Op.getOperand(0)))
25083 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
25084 return CNode->getZExtValue();
25085 return -1;
25086 };
25087
25088 int Offset = checkElem(Op0);
25089 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
25090 if (Offset + i != checkElem(N->getOperand(i))) {
25091 Offset = -1;
25092 break;
25093 }
25094 }
25095
25096 if ((Offset == 0) &&
25097 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
25098 return Op0.getOperand(0);
25099 if ((Offset != -1) &&
25100 ((Offset % N->getValueType(0).getVectorNumElements()) ==
25101 0)) // IDX must be multiple of output size.
25102 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
25103 Op0.getOperand(0), Op0.getOperand(1));
25104 }
25105
25106 if (SDValue V = convertBuildVecZextToZext(N))
25107 return V;
25108
25109 if (SDValue V = convertBuildVecZextToBuildVecWithZeros(N))
25110 return V;
25111
25112 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
25113 return V;
25114
25115 if (SDValue V = reduceBuildVecTruncToBitCast(N))
25116 return V;
25117
25118 if (SDValue V = reduceBuildVecToShuffle(N))
25119 return V;
25120
25121 // A splat of a single element is a SPLAT_VECTOR if supported on the target.
25122 // Do this late as some of the above may replace the splat.
25125 assert(!V.isUndef() && "Splat of undef should have been handled earlier");
25126 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
25127 }
25128
25129 return SDValue();
25130}
25131
25133 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25134 EVT OpVT = N->getOperand(0).getValueType();
25135
25136 // If the operands are legal vectors, leave them alone.
25137 if (TLI.isTypeLegal(OpVT) || OpVT.isScalableVector())
25138 return SDValue();
25139
25140 SDLoc DL(N);
25141 EVT VT = N->getValueType(0);
25143 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
25144
25145 // Keep track of what we encounter.
25146 EVT AnyFPVT;
25147
25148 for (const SDValue &Op : N->ops()) {
25149 if (ISD::BITCAST == Op.getOpcode() &&
25150 !Op.getOperand(0).getValueType().isVector())
25151 Ops.push_back(Op.getOperand(0));
25152 else if (Op.isUndef())
25153 Ops.push_back(DAG.getNode(ISD::UNDEF, DL, SVT));
25154 else
25155 return SDValue();
25156
25157 // Note whether we encounter an integer or floating point scalar.
25158 // If it's neither, bail out, it could be something weird like x86mmx.
25159 EVT LastOpVT = Ops.back().getValueType();
25160 if (LastOpVT.isFloatingPoint())
25161 AnyFPVT = LastOpVT;
25162 else if (!LastOpVT.isInteger())
25163 return SDValue();
25164 }
25165
25166 // If any of the operands is a floating point scalar bitcast to a vector,
25167 // use floating point types throughout, and bitcast everything.
25168 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
25169 if (AnyFPVT != EVT()) {
25170 SVT = AnyFPVT;
25171 for (SDValue &Op : Ops) {
25172 if (Op.getValueType() == SVT)
25173 continue;
25174 if (Op.isUndef())
25175 Op = DAG.getNode(ISD::UNDEF, DL, SVT);
25176 else
25177 Op = DAG.getBitcast(SVT, Op);
25178 }
25179 }
25180
25181 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
25182 VT.getSizeInBits() / SVT.getSizeInBits());
25183 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
25184}
25185
25186// Attempt to merge nested concat_vectors/undefs.
25187// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
25188// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
25190 SelectionDAG &DAG) {
25191 EVT VT = N->getValueType(0);
25192
25193 // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
25194 EVT SubVT;
25195 SDValue FirstConcat;
25196 for (const SDValue &Op : N->ops()) {
25197 if (Op.isUndef())
25198 continue;
25199 if (Op.getOpcode() != ISD::CONCAT_VECTORS)
25200 return SDValue();
25201 if (!FirstConcat) {
25202 SubVT = Op.getOperand(0).getValueType();
25203 if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
25204 return SDValue();
25205 FirstConcat = Op;
25206 continue;
25207 }
25208 if (SubVT != Op.getOperand(0).getValueType())
25209 return SDValue();
25210 }
25211 assert(FirstConcat && "Concat of all-undefs found");
25212
25213 SmallVector<SDValue> ConcatOps;
25214 for (const SDValue &Op : N->ops()) {
25215 if (Op.isUndef()) {
25216 ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
25217 continue;
25218 }
25219 ConcatOps.append(Op->op_begin(), Op->op_end());
25220 }
25221 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
25222}
25223
25224// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
25225// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
25226// most two distinct vectors the same size as the result, attempt to turn this
25227// into a legal shuffle.
25229 EVT VT = N->getValueType(0);
25230 EVT OpVT = N->getOperand(0).getValueType();
25231
25232 // We currently can't generate an appropriate shuffle for a scalable vector.
25233 if (VT.isScalableVector())
25234 return SDValue();
25235
25236 int NumElts = VT.getVectorNumElements();
25237 int NumOpElts = OpVT.getVectorNumElements();
25238
25239 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
25241
25242 for (SDValue Op : N->ops()) {
25244
25245 // UNDEF nodes convert to UNDEF shuffle mask values.
25246 if (Op.isUndef()) {
25247 Mask.append((unsigned)NumOpElts, -1);
25248 continue;
25249 }
25250
25251 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
25252 return SDValue();
25253
25254 // What vector are we extracting the subvector from and at what index?
25255 SDValue ExtVec = Op.getOperand(0);
25256 int ExtIdx = Op.getConstantOperandVal(1);
25257
25258 // We want the EVT of the original extraction to correctly scale the
25259 // extraction index.
25260 EVT ExtVT = ExtVec.getValueType();
25261 ExtVec = peekThroughBitcasts(ExtVec);
25262
25263 // UNDEF nodes convert to UNDEF shuffle mask values.
25264 if (ExtVec.isUndef()) {
25265 Mask.append((unsigned)NumOpElts, -1);
25266 continue;
25267 }
25268
25269 // Ensure that we are extracting a subvector from a vector the same
25270 // size as the result.
25271 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
25272 return SDValue();
25273
25274 // Scale the subvector index to account for any bitcast.
25275 int NumExtElts = ExtVT.getVectorNumElements();
25276 if (0 == (NumExtElts % NumElts))
25277 ExtIdx /= (NumExtElts / NumElts);
25278 else if (0 == (NumElts % NumExtElts))
25279 ExtIdx *= (NumElts / NumExtElts);
25280 else
25281 return SDValue();
25282
25283 // At most we can reference 2 inputs in the final shuffle.
25284 if (SV0.isUndef() || SV0 == ExtVec) {
25285 SV0 = ExtVec;
25286 for (int i = 0; i != NumOpElts; ++i)
25287 Mask.push_back(i + ExtIdx);
25288 } else if (SV1.isUndef() || SV1 == ExtVec) {
25289 SV1 = ExtVec;
25290 for (int i = 0; i != NumOpElts; ++i)
25291 Mask.push_back(i + ExtIdx + NumElts);
25292 } else {
25293 return SDValue();
25294 }
25295 }
25296
25297 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25298 return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
25299 DAG.getBitcast(VT, SV1), Mask, DAG);
25300}
25301
25303 unsigned CastOpcode = N->getOperand(0).getOpcode();
25304 switch (CastOpcode) {
25305 case ISD::SINT_TO_FP:
25306 case ISD::UINT_TO_FP:
25307 case ISD::FP_TO_SINT:
25308 case ISD::FP_TO_UINT:
25309 // TODO: Allow more opcodes?
25310 // case ISD::BITCAST:
25311 // case ISD::TRUNCATE:
25312 // case ISD::ZERO_EXTEND:
25313 // case ISD::SIGN_EXTEND:
25314 // case ISD::FP_EXTEND:
25315 break;
25316 default:
25317 return SDValue();
25318 }
25319
25320 EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
25321 if (!SrcVT.isVector())
25322 return SDValue();
25323
25324 // All operands of the concat must be the same kind of cast from the same
25325 // source type.
25327 for (SDValue Op : N->ops()) {
25328 if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
25329 Op.getOperand(0).getValueType() != SrcVT)
25330 return SDValue();
25331 SrcOps.push_back(Op.getOperand(0));
25332 }
25333
25334 // The wider cast must be supported by the target. This is unusual because
25335 // the operation support type parameter depends on the opcode. In addition,
25336 // check the other type in the cast to make sure this is really legal.
25337 EVT VT = N->getValueType(0);
25338 EVT SrcEltVT = SrcVT.getVectorElementType();
25339 ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
25340 EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
25341 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25342 switch (CastOpcode) {
25343 case ISD::SINT_TO_FP:
25344 case ISD::UINT_TO_FP:
25345 if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
25346 !TLI.isTypeLegal(VT))
25347 return SDValue();
25348 break;
25349 case ISD::FP_TO_SINT:
25350 case ISD::FP_TO_UINT:
25351 if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
25352 !TLI.isTypeLegal(ConcatSrcVT))
25353 return SDValue();
25354 break;
25355 default:
25356 llvm_unreachable("Unexpected cast opcode");
25357 }
25358
25359 // concat (cast X), (cast Y)... -> cast (concat X, Y...)
25360 SDLoc DL(N);
25361 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
25362 return DAG.getNode(CastOpcode, DL, VT, NewConcat);
25363}
25364
25365// See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of
25366// the operands is a SHUFFLE_VECTOR, and all other operands are also operands
25367// to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR.
25369 SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
25370 bool LegalOperations) {
25371 EVT VT = N->getValueType(0);
25372 EVT OpVT = N->getOperand(0).getValueType();
25373 if (VT.isScalableVector())
25374 return SDValue();
25375
25376 // For now, only allow simple 2-operand concatenations.
25377 if (N->getNumOperands() != 2)
25378 return SDValue();
25379
25380 // Don't create illegal types/shuffles when not allowed to.
25381 if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
25382 (LegalOperations &&
25384 return SDValue();
25385
25386 // Analyze all of the operands of the CONCAT_VECTORS. Out of all of them,
25387 // we want to find one that is: (1) a SHUFFLE_VECTOR (2) only used by us,
25388 // and (3) all operands of CONCAT_VECTORS must be either that SHUFFLE_VECTOR,
25389 // or one of the operands of that SHUFFLE_VECTOR (but not UNDEF!).
25390 // (4) and for now, the SHUFFLE_VECTOR must be unary.
25391 ShuffleVectorSDNode *SVN = nullptr;
25392 for (SDValue Op : N->ops()) {
25393 if (auto *CurSVN = dyn_cast<ShuffleVectorSDNode>(Op);
25394 CurSVN && CurSVN->getOperand(1).isUndef() && N->isOnlyUserOf(CurSVN) &&
25395 all_of(N->ops(), [CurSVN](SDValue Op) {
25396 // FIXME: can we allow UNDEF operands?
25397 return !Op.isUndef() &&
25398 (Op.getNode() == CurSVN || is_contained(CurSVN->ops(), Op));
25399 })) {
25400 SVN = CurSVN;
25401 break;
25402 }
25403 }
25404 if (!SVN)
25405 return SDValue();
25406
25407 // We are going to pad the shuffle operands, so any indice, that was picking
25408 // from the second operand, must be adjusted.
25409 SmallVector<int, 16> AdjustedMask(SVN->getMask());
25410 assert(SVN->getOperand(1).isUndef() && "Expected unary shuffle!");
25411
25412 // Identity masks for the operands of the (padded) shuffle.
25413 SmallVector<int, 32> IdentityMask(2 * OpVT.getVectorNumElements());
25414 MutableArrayRef<int> FirstShufOpIdentityMask =
25415 MutableArrayRef<int>(IdentityMask)
25417 MutableArrayRef<int> SecondShufOpIdentityMask =
25419 std::iota(FirstShufOpIdentityMask.begin(), FirstShufOpIdentityMask.end(), 0);
25420 std::iota(SecondShufOpIdentityMask.begin(), SecondShufOpIdentityMask.end(),
25422
25423 // New combined shuffle mask.
25425 Mask.reserve(VT.getVectorNumElements());
25426 for (SDValue Op : N->ops()) {
25427 assert(!Op.isUndef() && "Not expecting to concatenate UNDEF.");
25428 if (Op.getNode() == SVN) {
25429 append_range(Mask, AdjustedMask);
25430 continue;
25431 }
25432 if (Op == SVN->getOperand(0)) {
25433 append_range(Mask, FirstShufOpIdentityMask);
25434 continue;
25435 }
25436 if (Op == SVN->getOperand(1)) {
25437 append_range(Mask, SecondShufOpIdentityMask);
25438 continue;
25439 }
25440 llvm_unreachable("Unexpected operand!");
25441 }
25442
25443 // Don't create illegal shuffle masks.
25444 if (!TLI.isShuffleMaskLegal(Mask, VT))
25445 return SDValue();
25446
25447 // Pad the shuffle operands with UNDEF.
25448 SDLoc dl(N);
25449 std::array<SDValue, 2> ShufOps;
25450 for (auto I : zip(SVN->ops(), ShufOps)) {
25451 SDValue ShufOp = std::get<0>(I);
25452 SDValue &NewShufOp = std::get<1>(I);
25453 if (ShufOp.isUndef())
25454 NewShufOp = DAG.getUNDEF(VT);
25455 else {
25456 SmallVector<SDValue, 2> ShufOpParts(N->getNumOperands(),
25457 DAG.getUNDEF(OpVT));
25458 ShufOpParts[0] = ShufOp;
25459 NewShufOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, ShufOpParts);
25460 }
25461 }
25462 // Finally, create the new wide shuffle.
25463 return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask);
25464}
25465
25467 const TargetLowering &TLI,
25468 bool LegalTypes,
25469 bool LegalOperations) {
25470 EVT VT = N->getValueType(0);
25471
25472 // Post-legalization we can only create wider SPLAT_VECTOR operations if both
25473 // the type and operation is legal. The Hexagon target has custom
25474 // legalization for SPLAT_VECTOR that splits the operation into two parts and
25475 // concatenates them. Therefore, custom lowering must also be rejected in
25476 // order to avoid an infinite loop.
25477 if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
25478 (LegalOperations && !TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT)))
25479 return SDValue();
25480
25481 SDValue Op0 = N->getOperand(0);
25482 if (!llvm::all_equal(N->op_values()) || Op0.getOpcode() != ISD::SPLAT_VECTOR)
25483 return SDValue();
25484
25485 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, Op0.getOperand(0));
25486}
25487
25488SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
25489 // If we only have one input vector, we don't need to do any concatenation.
25490 if (N->getNumOperands() == 1)
25491 return N->getOperand(0);
25492
25493 // Check if all of the operands are undefs.
25494 EVT VT = N->getValueType(0);
25496 return DAG.getUNDEF(VT);
25497
25498 // Optimize concat_vectors where all but the first of the vectors are undef.
25499 if (all_of(drop_begin(N->ops()),
25500 [](const SDValue &Op) { return Op.isUndef(); })) {
25501 SDValue In = N->getOperand(0);
25502 assert(In.getValueType().isVector() && "Must concat vectors");
25503
25504 // If the input is a concat_vectors, just make a larger concat by padding
25505 // with smaller undefs.
25506 //
25507 // Legalizing in AArch64TargetLowering::LowerCONCAT_VECTORS() and combining
25508 // here could cause an infinite loop. That legalizing happens when LegalDAG
25509 // is true and input of AArch64TargetLowering::LowerCONCAT_VECTORS() is
25510 // scalable.
25511 if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse() &&
25512 !(LegalDAG && In.getValueType().isScalableVector())) {
25513 unsigned NumOps = N->getNumOperands() * In.getNumOperands();
25515 Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
25516 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
25517 }
25518
25520
25521 // concat_vectors(scalar_to_vector(scalar), undef) ->
25522 // scalar_to_vector(scalar)
25523 if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
25524 Scalar.hasOneUse()) {
25525 EVT SVT = Scalar.getValueType().getVectorElementType();
25526 if (SVT == Scalar.getOperand(0).getValueType())
25527 Scalar = Scalar.getOperand(0);
25528 }
25529
25530 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
25531 if (!Scalar.getValueType().isVector() && In.hasOneUse()) {
25532 // If the bitcast type isn't legal, it might be a trunc of a legal type;
25533 // look through the trunc so we can still do the transform:
25534 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
25535 if (Scalar->getOpcode() == ISD::TRUNCATE &&
25536 !TLI.isTypeLegal(Scalar.getValueType()) &&
25537 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
25538 Scalar = Scalar->getOperand(0);
25539
25540 EVT SclTy = Scalar.getValueType();
25541
25542 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
25543 return SDValue();
25544
25545 // Bail out if the vector size is not a multiple of the scalar size.
25546 if (VT.getSizeInBits() % SclTy.getSizeInBits())
25547 return SDValue();
25548
25549 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
25550 if (VNTNumElms < 2)
25551 return SDValue();
25552
25553 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
25554 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
25555 return SDValue();
25556
25557 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
25558 return DAG.getBitcast(VT, Res);
25559 }
25560 }
25561
25562 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
25563 // We have already tested above for an UNDEF only concatenation.
25564 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
25565 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
25566 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
25567 return Op.isUndef() || ISD::BUILD_VECTOR == Op.getOpcode();
25568 };
25569 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
25571 EVT SVT = VT.getScalarType();
25572
25573 EVT MinVT = SVT;
25574 if (!SVT.isFloatingPoint()) {
25575 // If BUILD_VECTOR are from built from integer, they may have different
25576 // operand types. Get the smallest type and truncate all operands to it.
25577 bool FoundMinVT = false;
25578 for (const SDValue &Op : N->ops())
25579 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
25580 EVT OpSVT = Op.getOperand(0).getValueType();
25581 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
25582 FoundMinVT = true;
25583 }
25584 assert(FoundMinVT && "Concat vector type mismatch");
25585 }
25586
25587 for (const SDValue &Op : N->ops()) {
25588 EVT OpVT = Op.getValueType();
25589 unsigned NumElts = OpVT.getVectorNumElements();
25590
25591 if (Op.isUndef())
25592 Opnds.append(NumElts, DAG.getUNDEF(MinVT));
25593
25594 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
25595 if (SVT.isFloatingPoint()) {
25596 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
25597 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
25598 } else {
25599 for (unsigned i = 0; i != NumElts; ++i)
25600 Opnds.push_back(
25601 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
25602 }
25603 }
25604 }
25605
25606 assert(VT.getVectorNumElements() == Opnds.size() &&
25607 "Concat vector type mismatch");
25608 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
25609 }
25610
25611 if (SDValue V =
25612 combineConcatVectorOfSplats(N, DAG, TLI, LegalTypes, LegalOperations))
25613 return V;
25614
25615 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
25616 // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
25618 return V;
25619
25620 if (Level <= AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
25621 // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
25623 return V;
25624
25625 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
25627 return V;
25628 }
25629
25630 if (SDValue V = combineConcatVectorOfCasts(N, DAG))
25631 return V;
25632
25634 N, DAG, TLI, LegalTypes, LegalOperations))
25635 return V;
25636
25637 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
25638 // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
25639 // operands and look for a CONCAT operations that place the incoming vectors
25640 // at the exact same location.
25641 //
25642 // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
25643 SDValue SingleSource = SDValue();
25644 unsigned PartNumElem =
25645 N->getOperand(0).getValueType().getVectorMinNumElements();
25646
25647 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
25648 SDValue Op = N->getOperand(i);
25649
25650 if (Op.isUndef())
25651 continue;
25652
25653 // Check if this is the identity extract:
25654 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
25655 return SDValue();
25656
25657 // Find the single incoming vector for the extract_subvector.
25658 if (SingleSource.getNode()) {
25659 if (Op.getOperand(0) != SingleSource)
25660 return SDValue();
25661 } else {
25662 SingleSource = Op.getOperand(0);
25663
25664 // Check the source type is the same as the type of the result.
25665 // If not, this concat may extend the vector, so we can not
25666 // optimize it away.
25667 if (SingleSource.getValueType() != N->getValueType(0))
25668 return SDValue();
25669 }
25670
25671 // Check that we are reading from the identity index.
25672 unsigned IdentityIndex = i * PartNumElem;
25673 if (Op.getConstantOperandAPInt(1) != IdentityIndex)
25674 return SDValue();
25675 }
25676
25677 if (SingleSource.getNode())
25678 return SingleSource;
25679
25680 return SDValue();
25681}
25682
25683SDValue DAGCombiner::visitVECTOR_INTERLEAVE(SDNode *N) {
25684 // Check to see if all operands are identical.
25685 if (!llvm::all_equal(N->op_values()))
25686 return SDValue();
25687
25688 // Check to see if the identical operand is a splat.
25689 if (!DAG.isSplatValue(N->getOperand(0)))
25690 return SDValue();
25691
25692 // interleave splat(X), splat(X).... --> splat(X), splat(X)....
25694 Ops.append(N->op_values().begin(), N->op_values().end());
25695 return CombineTo(N, &Ops);
25696}
25697
25698// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
25699// if the subvector can be sourced for free.
25700static SDValue getSubVectorSrc(SDValue V, unsigned Index, EVT SubVT) {
25701 if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
25702 V.getOperand(1).getValueType() == SubVT &&
25703 V.getConstantOperandAPInt(2) == Index) {
25704 return V.getOperand(1);
25705 }
25706 if (V.getOpcode() == ISD::CONCAT_VECTORS &&
25707 V.getOperand(0).getValueType() == SubVT &&
25708 (Index % SubVT.getVectorMinNumElements()) == 0) {
25709 uint64_t SubIdx = Index / SubVT.getVectorMinNumElements();
25710 return V.getOperand(SubIdx);
25711 }
25712 return SDValue();
25713}
25714
25716 unsigned Index, const SDLoc &DL,
25717 SelectionDAG &DAG,
25718 bool LegalOperations) {
25719 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25720 unsigned BinOpcode = BinOp.getOpcode();
25721 if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
25722 return SDValue();
25723
25724 EVT VecVT = BinOp.getValueType();
25725 SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
25726 if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
25727 return SDValue();
25728 if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
25729 return SDValue();
25730
25731 SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
25732 SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
25733
25734 // TODO: We could handle the case where only 1 operand is being inserted by
25735 // creating an extract of the other operand, but that requires checking
25736 // number of uses and/or costs.
25737 if (!Sub0 || !Sub1)
25738 return SDValue();
25739
25740 // We are inserting both operands of the wide binop only to extract back
25741 // to the narrow vector size. Eliminate all of the insert/extract:
25742 // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
25743 return DAG.getNode(BinOpcode, DL, SubVT, Sub0, Sub1, BinOp->getFlags());
25744}
25745
25746/// If we are extracting a subvector produced by a wide binary operator try
25747/// to use a narrow binary operator and/or avoid concatenation and extraction.
25748static SDValue narrowExtractedVectorBinOp(EVT VT, SDValue Src, unsigned Index,
25749 const SDLoc &DL, SelectionDAG &DAG,
25750 bool LegalOperations) {
25751 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
25752 // some of these bailouts with other transforms.
25753
25754 if (SDValue V = narrowInsertExtractVectorBinOp(VT, Src, Index, DL, DAG,
25755 LegalOperations))
25756 return V;
25757
25758 // We are looking for an optionally bitcasted wide vector binary operator
25759 // feeding an extract subvector.
25760 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25761 SDValue BinOp = peekThroughBitcasts(Src);
25762 unsigned BOpcode = BinOp.getOpcode();
25763 if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
25764 return SDValue();
25765
25766 // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
25767 // reduced to the unary fneg when it is visited, and we probably want to deal
25768 // with fneg in a target-specific way.
25769 if (BOpcode == ISD::FSUB) {
25770 auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
25771 if (C && C->getValueAPF().isNegZero())
25772 return SDValue();
25773 }
25774
25775 // The binop must be a vector type, so we can extract some fraction of it.
25776 EVT WideBVT = BinOp.getValueType();
25777 // The optimisations below currently assume we are dealing with fixed length
25778 // vectors. It is possible to add support for scalable vectors, but at the
25779 // moment we've done no analysis to prove whether they are profitable or not.
25780 if (!WideBVT.isFixedLengthVector())
25781 return SDValue();
25782
25783 assert((Index % VT.getVectorNumElements()) == 0 &&
25784 "Extract index is not a multiple of the vector length.");
25785
25786 // Bail out if this is not a proper multiple width extraction.
25787 unsigned WideWidth = WideBVT.getSizeInBits();
25788 unsigned NarrowWidth = VT.getSizeInBits();
25789 if (WideWidth % NarrowWidth != 0)
25790 return SDValue();
25791
25792 // Bail out if we are extracting a fraction of a single operation. This can
25793 // occur because we potentially looked through a bitcast of the binop.
25794 unsigned NarrowingRatio = WideWidth / NarrowWidth;
25795 unsigned WideNumElts = WideBVT.getVectorNumElements();
25796 if (WideNumElts % NarrowingRatio != 0)
25797 return SDValue();
25798
25799 // Bail out if the target does not support a narrower version of the binop.
25800 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
25801 WideNumElts / NarrowingRatio);
25802 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT,
25803 LegalOperations))
25804 return SDValue();
25805
25806 // If extraction is cheap, we don't need to look at the binop operands
25807 // for concat ops. The narrow binop alone makes this transform profitable.
25808 // We can't just reuse the original extract index operand because we may have
25809 // bitcasted.
25810 unsigned ConcatOpNum = Index / VT.getVectorNumElements();
25811 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
25812 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
25813 BinOp.hasOneUse() && Src->hasOneUse()) {
25814 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
25815 SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
25816 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
25817 BinOp.getOperand(0), NewExtIndex);
25818 SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
25819 BinOp.getOperand(1), NewExtIndex);
25820 SDValue NarrowBinOp =
25821 DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
25822 return DAG.getBitcast(VT, NarrowBinOp);
25823 }
25824
25825 // Only handle the case where we are doubling and then halving. A larger ratio
25826 // may require more than two narrow binops to replace the wide binop.
25827 if (NarrowingRatio != 2)
25828 return SDValue();
25829
25830 // TODO: The motivating case for this transform is an x86 AVX1 target. That
25831 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
25832 // flavors, but no other 256-bit integer support. This could be extended to
25833 // handle any binop, but that may require fixing/adding other folds to avoid
25834 // codegen regressions.
25835 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
25836 return SDValue();
25837
25838 // We need at least one concatenation operation of a binop operand to make
25839 // this transform worthwhile. The concat must double the input vector sizes.
25840 auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
25841 if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
25842 return V.getOperand(ConcatOpNum);
25843 return SDValue();
25844 };
25845 SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
25846 SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
25847
25848 if (SubVecL || SubVecR) {
25849 // If a binop operand was not the result of a concat, we must extract a
25850 // half-sized operand for our new narrow binop:
25851 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
25852 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
25853 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
25854 SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
25855 SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
25856 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
25857 BinOp.getOperand(0), IndexC);
25858
25859 SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
25860 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
25861 BinOp.getOperand(1), IndexC);
25862
25863 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
25864 return DAG.getBitcast(VT, NarrowBinOp);
25865 }
25866
25867 return SDValue();
25868}
25869
25870/// If we are extracting a subvector from a wide vector load, convert to a
25871/// narrow load to eliminate the extraction:
25872/// (extract_subvector (load wide vector)) --> (load narrow vector)
25873static SDValue narrowExtractedVectorLoad(EVT VT, SDValue Src, unsigned Index,
25874 const SDLoc &DL, SelectionDAG &DAG) {
25875 // TODO: Add support for big-endian. The offset calculation must be adjusted.
25876 if (DAG.getDataLayout().isBigEndian())
25877 return SDValue();
25878
25879 auto *Ld = dyn_cast<LoadSDNode>(Src);
25880 if (!Ld || !ISD::isNormalLoad(Ld) || !Ld->isSimple())
25881 return SDValue();
25882
25883 // We can only create byte sized loads.
25884 if (!VT.isByteSized())
25885 return SDValue();
25886
25887 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25888 if (!TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, VT))
25889 return SDValue();
25890
25891 unsigned NumElts = VT.getVectorMinNumElements();
25892 // A fixed length vector being extracted from a scalable vector
25893 // may not be any *smaller* than the scalable one.
25894 if (Index == 0 && NumElts >= Ld->getValueType(0).getVectorMinNumElements())
25895 return SDValue();
25896
25897 // The definition of EXTRACT_SUBVECTOR states that the index must be a
25898 // multiple of the minimum number of elements in the result type.
25899 assert(Index % NumElts == 0 && "The extract subvector index is not a "
25900 "multiple of the result's element count");
25901
25902 // It's fine to use TypeSize here as we know the offset will not be negative.
25903 TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
25904 std::optional<unsigned> ByteOffset;
25905 if (Offset.isFixed())
25906 ByteOffset = Offset.getFixedValue();
25907
25908 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT, ByteOffset))
25909 return SDValue();
25910
25911 // The narrow load will be offset from the base address of the old load if
25912 // we are extracting from something besides index 0 (little-endian).
25913 // TODO: Use "BaseIndexOffset" to make this more effective.
25914 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
25915
25917 MachineMemOperand *MMO;
25918 if (Offset.isScalable()) {
25919 MachinePointerInfo MPI =
25921 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, VT.getStoreSize());
25922 } else
25923 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedValue(),
25924 VT.getStoreSize());
25925
25926 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
25927 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
25928 return NewLd;
25929}
25930
25931/// Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
25932/// try to produce VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
25933/// EXTRACT_SUBVECTOR(Op?, ?),
25934/// Mask'))
25935/// iff it is legal and profitable to do so. Notably, the trimmed mask
25936/// (containing only the elements that are extracted)
25937/// must reference at most two subvectors.
25939 unsigned Index,
25940 const SDLoc &DL,
25941 SelectionDAG &DAG,
25942 bool LegalOperations) {
25943 // Only deal with non-scalable vectors.
25944 EVT WideVT = Src.getValueType();
25945 if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
25946 return SDValue();
25947
25948 // The operand must be a shufflevector.
25949 auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(Src);
25950 if (!WideShuffleVector)
25951 return SDValue();
25952
25953 // The old shuffleneeds to go away.
25954 if (!WideShuffleVector->hasOneUse())
25955 return SDValue();
25956
25957 // And the narrow shufflevector that we'll form must be legal.
25958 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25959 if (LegalOperations &&
25961 return SDValue();
25962
25963 int NumEltsExtracted = NarrowVT.getVectorNumElements();
25964 assert((Index % NumEltsExtracted) == 0 &&
25965 "Extract index is not a multiple of the output vector length.");
25966
25967 int WideNumElts = WideVT.getVectorNumElements();
25968
25969 SmallVector<int, 16> NewMask;
25970 NewMask.reserve(NumEltsExtracted);
25971 SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
25972 DemandedSubvectors;
25973
25974 // Try to decode the wide mask into narrow mask from at most two subvectors.
25975 for (int M : WideShuffleVector->getMask().slice(Index, NumEltsExtracted)) {
25976 assert((M >= -1) && (M < (2 * WideNumElts)) &&
25977 "Out-of-bounds shuffle mask?");
25978
25979 if (M < 0) {
25980 // Does not depend on operands, does not require adjustment.
25981 NewMask.emplace_back(M);
25982 continue;
25983 }
25984
25985 // From which operand of the shuffle does this shuffle mask element pick?
25986 int WideShufOpIdx = M / WideNumElts;
25987 // Which element of that operand is picked?
25988 int OpEltIdx = M % WideNumElts;
25989
25990 assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
25991 "Shuffle mask vector decomposition failure.");
25992
25993 // And which NumEltsExtracted-sized subvector of that operand is that?
25994 int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
25995 // And which element within that subvector of that operand is that?
25996 int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
25997
25998 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
25999 "Shuffle mask subvector decomposition failure.");
26000
26001 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
26002 WideShufOpIdx * WideNumElts) == M &&
26003 "Shuffle mask full decomposition failure.");
26004
26005 SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
26006
26007 if (Op.isUndef()) {
26008 // Picking from an undef operand. Let's adjust mask instead.
26009 NewMask.emplace_back(-1);
26010 continue;
26011 }
26012
26013 const std::pair<SDValue, int> DemandedSubvector =
26014 std::make_pair(Op, OpSubvecIdx);
26015
26016 if (DemandedSubvectors.insert(DemandedSubvector)) {
26017 if (DemandedSubvectors.size() > 2)
26018 return SDValue(); // We can't handle more than two subvectors.
26019 // How many elements into the WideVT does this subvector start?
26020 int Index = NumEltsExtracted * OpSubvecIdx;
26021 // Bail out if the extraction isn't going to be cheap.
26022 if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
26023 return SDValue();
26024 }
26025
26026 // Ok, but from which operand of the new shuffle will this element pick?
26027 int NewOpIdx =
26028 getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
26029 assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
26030
26031 int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
26032 NewMask.emplace_back(AdjM);
26033 }
26034 assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
26035 assert(DemandedSubvectors.size() <= 2 &&
26036 "Should have ended up demanding at most two subvectors.");
26037
26038 // Did we discover that the shuffle does not actually depend on operands?
26039 if (DemandedSubvectors.empty())
26040 return DAG.getUNDEF(NarrowVT);
26041
26042 // Profitability check: only deal with extractions from the first subvector
26043 // unless the mask becomes an identity mask.
26044 if (!ShuffleVectorInst::isIdentityMask(NewMask, NewMask.size()) ||
26045 any_of(NewMask, [](int M) { return M < 0; }))
26046 for (auto &DemandedSubvector : DemandedSubvectors)
26047 if (DemandedSubvector.second != 0)
26048 return SDValue();
26049
26050 // We still perform the exact same EXTRACT_SUBVECTOR, just on different
26051 // operand[s]/index[es], so there is no point in checking for it's legality.
26052
26053 // Do not turn a legal shuffle into an illegal one.
26054 if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
26055 !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
26056 return SDValue();
26057
26059 for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
26060 &DemandedSubvector : DemandedSubvectors) {
26061 // How many elements into the WideVT does this subvector start?
26062 int Index = NumEltsExtracted * DemandedSubvector.second;
26063 SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
26064 NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
26065 DemandedSubvector.first, IndexC));
26066 }
26067 assert((NewOps.size() == 1 || NewOps.size() == 2) &&
26068 "Should end up with either one or two ops");
26069
26070 // If we ended up with only one operand, pad with an undef.
26071 if (NewOps.size() == 1)
26072 NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
26073
26074 return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
26075}
26076
26077SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
26078 EVT NVT = N->getValueType(0);
26079 SDValue V = N->getOperand(0);
26080 uint64_t ExtIdx = N->getConstantOperandVal(1);
26081 SDLoc DL(N);
26082
26083 // Extract from UNDEF is UNDEF.
26084 if (V.isUndef())
26085 return DAG.getUNDEF(NVT);
26086
26087 if (SDValue NarrowLoad = narrowExtractedVectorLoad(NVT, V, ExtIdx, DL, DAG))
26088 return NarrowLoad;
26089
26090 // Combine an extract of an extract into a single extract_subvector.
26091 // ext (ext X, C), 0 --> ext X, C
26092 if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
26093 // The index has to be a multiple of the new result type's known minimum
26094 // vector length.
26095 if (V.getConstantOperandVal(1) % NVT.getVectorMinNumElements() == 0 &&
26096 TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
26097 V.getConstantOperandVal(1)) &&
26099 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, V.getOperand(0),
26100 V.getOperand(1));
26101 }
26102 }
26103
26104 // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
26105 if (V.getOpcode() == ISD::SPLAT_VECTOR)
26106 if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
26107 if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
26108 return DAG.getSplatVector(NVT, DL, V.getOperand(0));
26109
26110 // extract_subvector(insert_subvector(x,y,c1),c2)
26111 // --> extract_subvector(y,c2-c1)
26112 // iff we're just extracting from the inserted subvector.
26113 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
26114 SDValue InsSub = V.getOperand(1);
26115 EVT InsSubVT = InsSub.getValueType();
26116 unsigned NumInsElts = InsSubVT.getVectorMinNumElements();
26117 unsigned InsIdx = V.getConstantOperandVal(2);
26118 unsigned NumSubElts = NVT.getVectorMinNumElements();
26119 if (InsIdx <= ExtIdx && (ExtIdx + NumSubElts) <= (InsIdx + NumInsElts) &&
26120 TLI.isExtractSubvectorCheap(NVT, InsSubVT, ExtIdx - InsIdx) &&
26121 InsSubVT.isFixedLengthVector() && NVT.isFixedLengthVector() &&
26122 V.getValueType().isFixedLengthVector())
26123 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, InsSub,
26124 DAG.getVectorIdxConstant(ExtIdx - InsIdx, DL));
26125 }
26126
26127 // Try to move vector bitcast after extract_subv by scaling extraction index:
26128 // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
26129 if (V.getOpcode() == ISD::BITCAST &&
26130 V.getOperand(0).getValueType().isVector() &&
26131 (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
26132 SDValue SrcOp = V.getOperand(0);
26133 EVT SrcVT = SrcOp.getValueType();
26134 unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
26135 unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
26136 if ((SrcNumElts % DestNumElts) == 0) {
26137 unsigned SrcDestRatio = SrcNumElts / DestNumElts;
26138 ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
26139 EVT NewExtVT =
26140 EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), NewExtEC);
26142 SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
26143 SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
26144 V.getOperand(0), NewIndex);
26145 return DAG.getBitcast(NVT, NewExtract);
26146 }
26147 }
26148 if ((DestNumElts % SrcNumElts) == 0) {
26149 unsigned DestSrcRatio = DestNumElts / SrcNumElts;
26150 if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
26151 ElementCount NewExtEC =
26152 NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
26153 EVT ScalarVT = SrcVT.getScalarType();
26154 if ((ExtIdx % DestSrcRatio) == 0) {
26155 unsigned IndexValScaled = ExtIdx / DestSrcRatio;
26156 EVT NewExtVT =
26157 EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
26159 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
26160 SDValue NewExtract =
26161 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
26162 V.getOperand(0), NewIndex);
26163 return DAG.getBitcast(NVT, NewExtract);
26164 }
26165 if (NewExtEC.isScalar() &&
26167 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
26168 SDValue NewExtract =
26169 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
26170 V.getOperand(0), NewIndex);
26171 return DAG.getBitcast(NVT, NewExtract);
26172 }
26173 }
26174 }
26175 }
26176 }
26177
26178 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
26179 unsigned ExtNumElts = NVT.getVectorMinNumElements();
26180 EVT ConcatSrcVT = V.getOperand(0).getValueType();
26181 assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
26182 "Concat and extract subvector do not change element type");
26183
26184 unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
26185 unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
26186
26187 // If the concatenated source types match this extract, it's a direct
26188 // simplification:
26189 // extract_subvec (concat V1, V2, ...), i --> Vi
26190 if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
26191 return V.getOperand(ConcatOpIdx);
26192
26193 // If the concatenated source vectors are a multiple length of this extract,
26194 // then extract a fraction of one of those source vectors directly from a
26195 // concat operand. Example:
26196 // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
26197 // v2i8 extract_subvec v8i8 Y, 6
26198 if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
26199 ConcatSrcNumElts % ExtNumElts == 0) {
26200 unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
26201 assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
26202 "Trying to extract from >1 concat operand?");
26203 assert(NewExtIdx % ExtNumElts == 0 &&
26204 "Extract index is not a multiple of the input vector length.");
26205 SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
26206 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
26207 V.getOperand(ConcatOpIdx), NewIndexC);
26208 }
26209 }
26210
26212 NVT, V, ExtIdx, DL, DAG, LegalOperations))
26213 return Shuffle;
26214
26215 if (SDValue NarrowBOp =
26216 narrowExtractedVectorBinOp(NVT, V, ExtIdx, DL, DAG, LegalOperations))
26217 return NarrowBOp;
26218
26220
26221 // If the input is a build vector. Try to make a smaller build vector.
26222 if (V.getOpcode() == ISD::BUILD_VECTOR) {
26223 EVT InVT = V.getValueType();
26224 unsigned ExtractSize = NVT.getSizeInBits();
26225 unsigned EltSize = InVT.getScalarSizeInBits();
26226 // Only do this if we won't split any elements.
26227 if (ExtractSize % EltSize == 0) {
26228 unsigned NumElems = ExtractSize / EltSize;
26229 EVT EltVT = InVT.getVectorElementType();
26230 EVT ExtractVT =
26231 NumElems == 1 ? EltVT
26232 : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
26233 if ((Level < AfterLegalizeDAG ||
26234 (NumElems == 1 ||
26235 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
26236 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
26237 unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
26238
26239 if (NumElems == 1) {
26240 SDValue Src = V->getOperand(IdxVal);
26241 if (EltVT != Src.getValueType())
26242 Src = DAG.getNode(ISD::TRUNCATE, DL, EltVT, Src);
26243 return DAG.getBitcast(NVT, Src);
26244 }
26245
26246 // Extract the pieces from the original build_vector.
26247 SDValue BuildVec =
26248 DAG.getBuildVector(ExtractVT, DL, V->ops().slice(IdxVal, NumElems));
26249 return DAG.getBitcast(NVT, BuildVec);
26250 }
26251 }
26252 }
26253
26254 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
26255 // Handle only simple case where vector being inserted and vector
26256 // being extracted are of same size.
26257 EVT SmallVT = V.getOperand(1).getValueType();
26258 if (NVT.bitsEq(SmallVT)) {
26259 // Combine:
26260 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
26261 // Into:
26262 // indices are equal or bit offsets are equal => V1
26263 // otherwise => (extract_subvec V1, ExtIdx)
26264 uint64_t InsIdx = V.getConstantOperandVal(2);
26265 if (InsIdx * SmallVT.getScalarSizeInBits() ==
26266 ExtIdx * NVT.getScalarSizeInBits()) {
26267 if (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))
26268 return DAG.getBitcast(NVT, V.getOperand(1));
26269 } else {
26270 return DAG.getNode(
26272 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
26273 N->getOperand(1));
26274 }
26275 }
26276 }
26277
26278 // If only EXTRACT_SUBVECTOR nodes use the source vector we can
26279 // simplify it based on the (valid) extractions.
26280 if (!V.getValueType().isScalableVector() &&
26281 llvm::all_of(V->users(), [&](SDNode *Use) {
26282 return Use->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
26283 Use->getOperand(0) == V;
26284 })) {
26285 unsigned NumElts = V.getValueType().getVectorNumElements();
26286 APInt DemandedElts = APInt::getZero(NumElts);
26287 for (SDNode *User : V->users()) {
26288 unsigned ExtIdx = User->getConstantOperandVal(1);
26289 unsigned NumSubElts = User->getValueType(0).getVectorNumElements();
26290 DemandedElts.setBits(ExtIdx, ExtIdx + NumSubElts);
26291 }
26292 if (SimplifyDemandedVectorElts(V, DemandedElts, /*AssumeSingleUse=*/true)) {
26293 // We simplified the vector operand of this extract subvector. If this
26294 // extract is not dead, visit it again so it is folded properly.
26295 if (N->getOpcode() != ISD::DELETED_NODE)
26296 AddToWorklist(N);
26297 return SDValue(N, 0);
26298 }
26299 } else {
26301 return SDValue(N, 0);
26302 }
26303
26304 return SDValue();
26305}
26306
26307/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
26308/// followed by concatenation. Narrow vector ops may have better performance
26309/// than wide ops, and this can unlock further narrowing of other vector ops.
26310/// Targets can invert this transform later if it is not profitable.
26312 SelectionDAG &DAG) {
26313 SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
26314 if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
26315 N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
26316 !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
26317 return SDValue();
26318
26319 // Split the wide shuffle mask into halves. Any mask element that is accessing
26320 // operand 1 is offset down to account for narrowing of the vectors.
26321 ArrayRef<int> Mask = Shuf->getMask();
26322 EVT VT = Shuf->getValueType(0);
26323 unsigned NumElts = VT.getVectorNumElements();
26324 unsigned HalfNumElts = NumElts / 2;
26325 SmallVector<int, 16> Mask0(HalfNumElts, -1);
26326 SmallVector<int, 16> Mask1(HalfNumElts, -1);
26327 for (unsigned i = 0; i != NumElts; ++i) {
26328 if (Mask[i] == -1)
26329 continue;
26330 // If we reference the upper (undef) subvector then the element is undef.
26331 if ((Mask[i] % NumElts) >= HalfNumElts)
26332 continue;
26333 int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
26334 if (i < HalfNumElts)
26335 Mask0[i] = M;
26336 else
26337 Mask1[i - HalfNumElts] = M;
26338 }
26339
26340 // Ask the target if this is a valid transform.
26341 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26342 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
26343 HalfNumElts);
26344 if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
26345 !TLI.isShuffleMaskLegal(Mask1, HalfVT))
26346 return SDValue();
26347
26348 // shuffle (concat X, undef), (concat Y, undef), Mask -->
26349 // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
26350 SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
26351 SDLoc DL(Shuf);
26352 SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
26353 SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
26354 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
26355}
26356
26357// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
26358// or turn a shuffle of a single concat into simpler shuffle then concat.
26360 EVT VT = N->getValueType(0);
26361 unsigned NumElts = VT.getVectorNumElements();
26362
26363 SDValue N0 = N->getOperand(0);
26364 SDValue N1 = N->getOperand(1);
26366 ArrayRef<int> Mask = SVN->getMask();
26367
26369 EVT ConcatVT = N0.getOperand(0).getValueType();
26370 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
26371 unsigned NumConcats = NumElts / NumElemsPerConcat;
26372
26373 auto IsUndefMaskElt = [](int i) { return i == -1; };
26374
26375 // Special case: shuffle(concat(A,B)) can be more efficiently represented
26376 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
26377 // half vector elements.
26378 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
26379 llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
26380 IsUndefMaskElt)) {
26381 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
26382 N0.getOperand(1),
26383 Mask.slice(0, NumElemsPerConcat));
26384 N1 = DAG.getUNDEF(ConcatVT);
26385 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
26386 }
26387
26388 // Look at every vector that's inserted. We're looking for exact
26389 // subvector-sized copies from a concatenated vector
26390 for (unsigned I = 0; I != NumConcats; ++I) {
26391 unsigned Begin = I * NumElemsPerConcat;
26392 ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
26393
26394 // Make sure we're dealing with a copy.
26395 if (llvm::all_of(SubMask, IsUndefMaskElt)) {
26396 Ops.push_back(DAG.getUNDEF(ConcatVT));
26397 continue;
26398 }
26399
26400 int OpIdx = -1;
26401 for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
26402 if (IsUndefMaskElt(SubMask[i]))
26403 continue;
26404 if ((SubMask[i] % (int)NumElemsPerConcat) != i)
26405 return SDValue();
26406 int EltOpIdx = SubMask[i] / NumElemsPerConcat;
26407 if (0 <= OpIdx && EltOpIdx != OpIdx)
26408 return SDValue();
26409 OpIdx = EltOpIdx;
26410 }
26411 assert(0 <= OpIdx && "Unknown concat_vectors op");
26412
26413 if (OpIdx < (int)N0.getNumOperands())
26414 Ops.push_back(N0.getOperand(OpIdx));
26415 else
26416 Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
26417 }
26418
26419 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
26420}
26421
26422// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
26423// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
26424//
26425// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
26426// a simplification in some sense, but it isn't appropriate in general: some
26427// BUILD_VECTORs are substantially cheaper than others. The general case
26428// of a BUILD_VECTOR requires inserting each element individually (or
26429// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
26430// all constants is a single constant pool load. A BUILD_VECTOR where each
26431// element is identical is a splat. A BUILD_VECTOR where most of the operands
26432// are undef lowers to a small number of element insertions.
26433//
26434// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
26435// We don't fold shuffles where one side is a non-zero constant, and we don't
26436// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
26437// non-constant operands. This seems to work out reasonably well in practice.
26439 SelectionDAG &DAG,
26440 const TargetLowering &TLI) {
26441 EVT VT = SVN->getValueType(0);
26442 unsigned NumElts = VT.getVectorNumElements();
26443 SDValue N0 = SVN->getOperand(0);
26444 SDValue N1 = SVN->getOperand(1);
26445
26446 if (!N0->hasOneUse())
26447 return SDValue();
26448
26449 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
26450 // discussed above.
26451 if (!N1.isUndef()) {
26452 if (!N1->hasOneUse())
26453 return SDValue();
26454
26455 bool N0AnyConst = isAnyConstantBuildVector(N0);
26456 bool N1AnyConst = isAnyConstantBuildVector(N1);
26457 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
26458 return SDValue();
26459 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
26460 return SDValue();
26461 }
26462
26463 // If both inputs are splats of the same value then we can safely merge this
26464 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
26465 bool IsSplat = false;
26466 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
26467 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
26468 if (BV0 && BV1)
26469 if (SDValue Splat0 = BV0->getSplatValue())
26470 IsSplat = (Splat0 == BV1->getSplatValue());
26471
26473 SmallSet<SDValue, 16> DuplicateOps;
26474 for (int M : SVN->getMask()) {
26475 SDValue Op = DAG.getUNDEF(VT.getScalarType());
26476 if (M >= 0) {
26477 int Idx = M < (int)NumElts ? M : M - NumElts;
26478 SDValue &S = (M < (int)NumElts ? N0 : N1);
26479 if (S.getOpcode() == ISD::BUILD_VECTOR) {
26480 Op = S.getOperand(Idx);
26481 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
26482 SDValue Op0 = S.getOperand(0);
26483 Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
26484 } else {
26485 // Operand can't be combined - bail out.
26486 return SDValue();
26487 }
26488 }
26489
26490 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
26491 // generating a splat; semantically, this is fine, but it's likely to
26492 // generate low-quality code if the target can't reconstruct an appropriate
26493 // shuffle.
26494 if (!Op.isUndef() && !isIntOrFPConstant(Op))
26495 if (!IsSplat && !DuplicateOps.insert(Op).second)
26496 return SDValue();
26497
26498 Ops.push_back(Op);
26499 }
26500
26501 // BUILD_VECTOR requires all inputs to be of the same type, find the
26502 // maximum type and extend them all.
26503 EVT SVT = VT.getScalarType();
26504 if (SVT.isInteger())
26505 for (SDValue &Op : Ops)
26506 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
26507 if (SVT != VT.getScalarType())
26508 for (SDValue &Op : Ops)
26509 Op = Op.isUndef() ? DAG.getUNDEF(SVT)
26510 : (TLI.isZExtFree(Op.getValueType(), SVT)
26511 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
26512 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT));
26513 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
26514}
26515
26516// Match shuffles that can be converted to *_vector_extend_in_reg.
26517// This is often generated during legalization.
26518// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)),
26519// and returns the EVT to which the extension should be performed.
26520// NOTE: this assumes that the src is the first operand of the shuffle.
26522 unsigned Opcode, EVT VT, std::function<bool(unsigned)> Match,
26523 SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
26524 bool LegalOperations) {
26525 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
26526
26527 // TODO Add support for big-endian when we have a test case.
26528 if (!VT.isInteger() || IsBigEndian)
26529 return std::nullopt;
26530
26531 unsigned NumElts = VT.getVectorNumElements();
26532 unsigned EltSizeInBits = VT.getScalarSizeInBits();
26533
26534 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
26535 // power-of-2 extensions as they are the most likely.
26536 // FIXME: should try Scale == NumElts case too,
26537 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
26538 // The vector width must be a multiple of Scale.
26539 if (NumElts % Scale != 0)
26540 continue;
26541
26542 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
26543 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
26544
26545 if ((LegalTypes && !TLI.isTypeLegal(OutVT)) ||
26546 (LegalOperations && !TLI.isOperationLegalOrCustom(Opcode, OutVT)))
26547 continue;
26548
26549 if (Match(Scale))
26550 return OutVT;
26551 }
26552
26553 return std::nullopt;
26554}
26555
26556// Match shuffles that can be converted to any_vector_extend_in_reg.
26557// This is often generated during legalization.
26558// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
26560 SelectionDAG &DAG,
26561 const TargetLowering &TLI,
26562 bool LegalOperations) {
26563 EVT VT = SVN->getValueType(0);
26564 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
26565
26566 // TODO Add support for big-endian when we have a test case.
26567 if (!VT.isInteger() || IsBigEndian)
26568 return SDValue();
26569
26570 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
26571 auto isAnyExtend = [NumElts = VT.getVectorNumElements(),
26572 Mask = SVN->getMask()](unsigned Scale) {
26573 for (unsigned i = 0; i != NumElts; ++i) {
26574 if (Mask[i] < 0)
26575 continue;
26576 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
26577 continue;
26578 return false;
26579 }
26580 return true;
26581 };
26582
26583 unsigned Opcode = ISD::ANY_EXTEND_VECTOR_INREG;
26584 SDValue N0 = SVN->getOperand(0);
26585 // Never create an illegal type. Only create unsupported operations if we
26586 // are pre-legalization.
26587 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
26588 Opcode, VT, isAnyExtend, DAG, TLI, /*LegalTypes=*/true, LegalOperations);
26589 if (!OutVT)
26590 return SDValue();
26591 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT, N0));
26592}
26593
26594// Match shuffles that can be converted to zero_extend_vector_inreg.
26595// This is often generated during legalization.
26596// e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src))
26598 SelectionDAG &DAG,
26599 const TargetLowering &TLI,
26600 bool LegalOperations) {
26601 bool LegalTypes = true;
26602 EVT VT = SVN->getValueType(0);
26603 assert(!VT.isScalableVector() && "Encountered scalable shuffle?");
26604 unsigned NumElts = VT.getVectorNumElements();
26605 unsigned EltSizeInBits = VT.getScalarSizeInBits();
26606
26607 // TODO: add support for big-endian when we have a test case.
26608 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
26609 if (!VT.isInteger() || IsBigEndian)
26610 return SDValue();
26611
26612 SmallVector<int, 16> Mask(SVN->getMask());
26613 auto ForEachDecomposedIndice = [NumElts, &Mask](auto Fn) {
26614 for (int &Indice : Mask) {
26615 if (Indice < 0)
26616 continue;
26617 int OpIdx = (unsigned)Indice < NumElts ? 0 : 1;
26618 int OpEltIdx = (unsigned)Indice < NumElts ? Indice : Indice - NumElts;
26619 Fn(Indice, OpIdx, OpEltIdx);
26620 }
26621 };
26622
26623 // Which elements of which operand does this shuffle demand?
26624 std::array<APInt, 2> OpsDemandedElts;
26625 for (APInt &OpDemandedElts : OpsDemandedElts)
26626 OpDemandedElts = APInt::getZero(NumElts);
26627 ForEachDecomposedIndice(
26628 [&OpsDemandedElts](int &Indice, int OpIdx, int OpEltIdx) {
26629 OpsDemandedElts[OpIdx].setBit(OpEltIdx);
26630 });
26631
26632 // Element-wise(!), which of these demanded elements are know to be zero?
26633 std::array<APInt, 2> OpsKnownZeroElts;
26634 for (auto I : zip(SVN->ops(), OpsDemandedElts, OpsKnownZeroElts))
26635 std::get<2>(I) =
26636 DAG.computeVectorKnownZeroElements(std::get<0>(I), std::get<1>(I));
26637
26638 // Manifest zeroable element knowledge in the shuffle mask.
26639 // NOTE: we don't have 'zeroable' sentinel value in generic DAG,
26640 // this is a local invention, but it won't leak into DAG.
26641 // FIXME: should we not manifest them, but just check when matching?
26642 bool HadZeroableElts = false;
26643 ForEachDecomposedIndice([&OpsKnownZeroElts, &HadZeroableElts](
26644 int &Indice, int OpIdx, int OpEltIdx) {
26645 if (OpsKnownZeroElts[OpIdx][OpEltIdx]) {
26646 Indice = -2; // Zeroable element.
26647 HadZeroableElts = true;
26648 }
26649 });
26650
26651 // Don't proceed unless we've refined at least one zeroable mask indice.
26652 // If we didn't, then we are still trying to match the same shuffle mask
26653 // we previously tried to match as ISD::ANY_EXTEND_VECTOR_INREG,
26654 // and evidently failed. Proceeding will lead to endless combine loops.
26655 if (!HadZeroableElts)
26656 return SDValue();
26657
26658 // The shuffle may be more fine-grained than we want. Widen elements first.
26659 // FIXME: should we do this before manifesting zeroable shuffle mask indices?
26660 SmallVector<int, 16> ScaledMask;
26661 getShuffleMaskWithWidestElts(Mask, ScaledMask);
26662 assert(Mask.size() >= ScaledMask.size() &&
26663 Mask.size() % ScaledMask.size() == 0 && "Unexpected mask widening.");
26664 int Prescale = Mask.size() / ScaledMask.size();
26665
26666 NumElts = ScaledMask.size();
26667 EltSizeInBits *= Prescale;
26668
26669 EVT PrescaledVT = EVT::getVectorVT(
26670 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits),
26671 NumElts);
26672
26673 if (LegalTypes && !TLI.isTypeLegal(PrescaledVT) && TLI.isTypeLegal(VT))
26674 return SDValue();
26675
26676 // For example,
26677 // shuffle<0,z,1,-1> == (v2i64 zero_extend_vector_inreg(v4i32))
26678 // But not shuffle<z,z,1,-1> and not shuffle<0,z,z,-1> ! (for same types)
26679 auto isZeroExtend = [NumElts, &ScaledMask](unsigned Scale) {
26680 assert(Scale >= 2 && Scale <= NumElts && NumElts % Scale == 0 &&
26681 "Unexpected mask scaling factor.");
26682 ArrayRef<int> Mask = ScaledMask;
26683 for (unsigned SrcElt = 0, NumSrcElts = NumElts / Scale;
26684 SrcElt != NumSrcElts; ++SrcElt) {
26685 // Analyze the shuffle mask in Scale-sized chunks.
26686 ArrayRef<int> MaskChunk = Mask.take_front(Scale);
26687 assert(MaskChunk.size() == Scale && "Unexpected mask size.");
26688 Mask = Mask.drop_front(MaskChunk.size());
26689 // The first indice in this chunk must be SrcElt, but not zero!
26690 // FIXME: undef should be fine, but that results in more-defined result.
26691 if (int FirstIndice = MaskChunk[0]; (unsigned)FirstIndice != SrcElt)
26692 return false;
26693 // The rest of the indices in this chunk must be zeros.
26694 // FIXME: undef should be fine, but that results in more-defined result.
26695 if (!all_of(MaskChunk.drop_front(1),
26696 [](int Indice) { return Indice == -2; }))
26697 return false;
26698 }
26699 assert(Mask.empty() && "Did not process the whole mask?");
26700 return true;
26701 };
26702
26703 unsigned Opcode = ISD::ZERO_EXTEND_VECTOR_INREG;
26704 for (bool Commuted : {false, true}) {
26705 SDValue Op = SVN->getOperand(!Commuted ? 0 : 1);
26706 if (Commuted)
26708 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
26709 Opcode, PrescaledVT, isZeroExtend, DAG, TLI, LegalTypes,
26710 LegalOperations);
26711 if (OutVT)
26712 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT,
26713 DAG.getBitcast(PrescaledVT, Op)));
26714 }
26715 return SDValue();
26716}
26717
26718// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
26719// each source element of a large type into the lowest elements of a smaller
26720// destination type. This is often generated during legalization.
26721// If the source node itself was a '*_extend_vector_inreg' node then we should
26722// then be able to remove it.
26724 SelectionDAG &DAG) {
26725 EVT VT = SVN->getValueType(0);
26726 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
26727
26728 // TODO Add support for big-endian when we have a test case.
26729 if (!VT.isInteger() || IsBigEndian)
26730 return SDValue();
26731
26733
26734 unsigned Opcode = N0.getOpcode();
26735 if (!ISD::isExtVecInRegOpcode(Opcode))
26736 return SDValue();
26737
26738 SDValue N00 = N0.getOperand(0);
26739 ArrayRef<int> Mask = SVN->getMask();
26740 unsigned NumElts = VT.getVectorNumElements();
26741 unsigned EltSizeInBits = VT.getScalarSizeInBits();
26742 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
26743 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
26744
26745 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
26746 return SDValue();
26747 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
26748
26749 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
26750 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
26751 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
26752 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
26753 for (unsigned i = 0; i != NumElts; ++i) {
26754 if (Mask[i] < 0)
26755 continue;
26756 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
26757 continue;
26758 return false;
26759 }
26760 return true;
26761 };
26762
26763 // At the moment we just handle the case where we've truncated back to the
26764 // same size as before the extension.
26765 // TODO: handle more extension/truncation cases as cases arise.
26766 if (EltSizeInBits != ExtSrcSizeInBits)
26767 return SDValue();
26768
26769 // We can remove *extend_vector_inreg only if the truncation happens at
26770 // the same scale as the extension.
26771 if (isTruncate(ExtScale))
26772 return DAG.getBitcast(VT, N00);
26773
26774 return SDValue();
26775}
26776
26777// Combine shuffles of splat-shuffles of the form:
26778// shuffle (shuffle V, undef, splat-mask), undef, M
26779// If splat-mask contains undef elements, we need to be careful about
26780// introducing undef's in the folded mask which are not the result of composing
26781// the masks of the shuffles.
26783 SelectionDAG &DAG) {
26784 EVT VT = Shuf->getValueType(0);
26785 unsigned NumElts = VT.getVectorNumElements();
26786
26787 if (!Shuf->getOperand(1).isUndef())
26788 return SDValue();
26789
26790 // See if this unary non-splat shuffle actually *is* a splat shuffle,
26791 // in disguise, with all demanded elements being identical.
26792 // FIXME: this can be done per-operand.
26793 if (!Shuf->isSplat()) {
26794 APInt DemandedElts(NumElts, 0);
26795 for (int Idx : Shuf->getMask()) {
26796 if (Idx < 0)
26797 continue; // Ignore sentinel indices.
26798 assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?");
26799 DemandedElts.setBit(Idx);
26800 }
26801 assert(DemandedElts.popcount() > 1 && "Is a splat shuffle already?");
26802 APInt UndefElts;
26803 if (DAG.isSplatValue(Shuf->getOperand(0), DemandedElts, UndefElts)) {
26804 // Even if all demanded elements are splat, some of them could be undef.
26805 // Which lowest demanded element is *not* known-undef?
26806 std::optional<unsigned> MinNonUndefIdx;
26807 for (int Idx : Shuf->getMask()) {
26808 if (Idx < 0 || UndefElts[Idx])
26809 continue; // Ignore sentinel indices, and undef elements.
26810 MinNonUndefIdx = std::min<unsigned>(Idx, MinNonUndefIdx.value_or(~0U));
26811 }
26812 if (!MinNonUndefIdx)
26813 return DAG.getUNDEF(VT); // All undef - result is undef.
26814 assert(*MinNonUndefIdx < NumElts && "Expected valid element index.");
26815 SmallVector<int, 8> SplatMask(Shuf->getMask());
26816 for (int &Idx : SplatMask) {
26817 if (Idx < 0)
26818 continue; // Passthrough sentinel indices.
26819 // Otherwise, just pick the lowest demanded non-undef element.
26820 // Or sentinel undef, if we know we'd pick a known-undef element.
26821 Idx = UndefElts[Idx] ? -1 : *MinNonUndefIdx;
26822 }
26823 assert(SplatMask != Shuf->getMask() && "Expected mask to change!");
26824 return DAG.getVectorShuffle(VT, SDLoc(Shuf), Shuf->getOperand(0),
26825 Shuf->getOperand(1), SplatMask);
26826 }
26827 }
26828
26829 // If the inner operand is a known splat with no undefs, just return that directly.
26830 // TODO: Create DemandedElts mask from Shuf's mask.
26831 // TODO: Allow undef elements and merge with the shuffle code below.
26832 if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false))
26833 return Shuf->getOperand(0);
26834
26836 if (!Splat || !Splat->isSplat())
26837 return SDValue();
26838
26839 ArrayRef<int> ShufMask = Shuf->getMask();
26840 ArrayRef<int> SplatMask = Splat->getMask();
26841 assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
26842
26843 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
26844 // every undef mask element in the splat-shuffle has a corresponding undef
26845 // element in the user-shuffle's mask or if the composition of mask elements
26846 // would result in undef.
26847 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
26848 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
26849 // In this case it is not legal to simplify to the splat-shuffle because we
26850 // may be exposing the users of the shuffle an undef element at index 1
26851 // which was not there before the combine.
26852 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
26853 // In this case the composition of masks yields SplatMask, so it's ok to
26854 // simplify to the splat-shuffle.
26855 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
26856 // In this case the composed mask includes all undef elements of SplatMask
26857 // and in addition sets element zero to undef. It is safe to simplify to
26858 // the splat-shuffle.
26859 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
26860 ArrayRef<int> SplatMask) {
26861 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
26862 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
26863 SplatMask[UserMask[i]] != -1)
26864 return false;
26865 return true;
26866 };
26867 if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
26868 return Shuf->getOperand(0);
26869
26870 // Create a new shuffle with a mask that is composed of the two shuffles'
26871 // masks.
26872 SmallVector<int, 32> NewMask;
26873 for (int Idx : ShufMask)
26874 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
26875
26876 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
26877 Splat->getOperand(0), Splat->getOperand(1),
26878 NewMask);
26879}
26880
26881// Combine shuffles of bitcasts into a shuffle of the bitcast type, providing
26882// the mask can be treated as a larger type.
26884 SelectionDAG &DAG,
26885 const TargetLowering &TLI,
26886 bool LegalOperations) {
26887 SDValue Op0 = SVN->getOperand(0);
26888 SDValue Op1 = SVN->getOperand(1);
26889 EVT VT = SVN->getValueType(0);
26890 if (Op0.getOpcode() != ISD::BITCAST)
26891 return SDValue();
26892 EVT InVT = Op0.getOperand(0).getValueType();
26893 if (!InVT.isVector() ||
26894 (!Op1.isUndef() && (Op1.getOpcode() != ISD::BITCAST ||
26895 Op1.getOperand(0).getValueType() != InVT)))
26896 return SDValue();
26898 (Op1.isUndef() || isAnyConstantBuildVector(Op1.getOperand(0))))
26899 return SDValue();
26900
26901 int VTLanes = VT.getVectorNumElements();
26902 int InLanes = InVT.getVectorNumElements();
26903 if (VTLanes <= InLanes || VTLanes % InLanes != 0 ||
26904 (LegalOperations &&
26906 return SDValue();
26907 int Factor = VTLanes / InLanes;
26908
26909 // Check that each group of lanes in the mask are either undef or make a valid
26910 // mask for the wider lane type.
26911 ArrayRef<int> Mask = SVN->getMask();
26912 SmallVector<int> NewMask;
26913 if (!widenShuffleMaskElts(Factor, Mask, NewMask))
26914 return SDValue();
26915
26916 if (!TLI.isShuffleMaskLegal(NewMask, InVT))
26917 return SDValue();
26918
26919 // Create the new shuffle with the new mask and bitcast it back to the
26920 // original type.
26921 SDLoc DL(SVN);
26922 Op0 = Op0.getOperand(0);
26923 Op1 = Op1.isUndef() ? DAG.getUNDEF(InVT) : Op1.getOperand(0);
26924 SDValue NewShuf = DAG.getVectorShuffle(InVT, DL, Op0, Op1, NewMask);
26925 return DAG.getBitcast(VT, NewShuf);
26926}
26927
26928/// Combine shuffle of shuffle of the form:
26929/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
26931 SelectionDAG &DAG) {
26932 if (!OuterShuf->getOperand(1).isUndef())
26933 return SDValue();
26934 auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
26935 if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
26936 return SDValue();
26937
26938 ArrayRef<int> OuterMask = OuterShuf->getMask();
26939 ArrayRef<int> InnerMask = InnerShuf->getMask();
26940 unsigned NumElts = OuterMask.size();
26941 assert(NumElts == InnerMask.size() && "Mask length mismatch");
26942 SmallVector<int, 32> CombinedMask(NumElts, -1);
26943 int SplatIndex = -1;
26944 for (unsigned i = 0; i != NumElts; ++i) {
26945 // Undef lanes remain undef.
26946 int OuterMaskElt = OuterMask[i];
26947 if (OuterMaskElt == -1)
26948 continue;
26949
26950 // Peek through the shuffle masks to get the underlying source element.
26951 int InnerMaskElt = InnerMask[OuterMaskElt];
26952 if (InnerMaskElt == -1)
26953 continue;
26954
26955 // Initialize the splatted element.
26956 if (SplatIndex == -1)
26957 SplatIndex = InnerMaskElt;
26958
26959 // Non-matching index - this is not a splat.
26960 if (SplatIndex != InnerMaskElt)
26961 return SDValue();
26962
26963 CombinedMask[i] = InnerMaskElt;
26964 }
26965 assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
26966 getSplatIndex(CombinedMask) != -1) &&
26967 "Expected a splat mask");
26968
26969 // TODO: The transform may be a win even if the mask is not legal.
26970 EVT VT = OuterShuf->getValueType(0);
26971 assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
26972 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
26973 return SDValue();
26974
26975 return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
26976 InnerShuf->getOperand(1), CombinedMask);
26977}
26978
26979/// If the shuffle mask is taking exactly one element from the first vector
26980/// operand and passing through all other elements from the second vector
26981/// operand, return the index of the mask element that is choosing an element
26982/// from the first operand. Otherwise, return -1.
26984 int MaskSize = Mask.size();
26985 int EltFromOp0 = -1;
26986 // TODO: This does not match if there are undef elements in the shuffle mask.
26987 // Should we ignore undefs in the shuffle mask instead? The trade-off is
26988 // removing an instruction (a shuffle), but losing the knowledge that some
26989 // vector lanes are not needed.
26990 for (int i = 0; i != MaskSize; ++i) {
26991 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
26992 // We're looking for a shuffle of exactly one element from operand 0.
26993 if (EltFromOp0 != -1)
26994 return -1;
26995 EltFromOp0 = i;
26996 } else if (Mask[i] != i + MaskSize) {
26997 // Nothing from operand 1 can change lanes.
26998 return -1;
26999 }
27000 }
27001 return EltFromOp0;
27002}
27003
27004/// If a shuffle inserts exactly one element from a source vector operand into
27005/// another vector operand and we can access the specified element as a scalar,
27006/// then we can eliminate the shuffle.
27007SDValue DAGCombiner::replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf) {
27008 // First, check if we are taking one element of a vector and shuffling that
27009 // element into another vector.
27010 ArrayRef<int> Mask = Shuf->getMask();
27011 SmallVector<int, 16> CommutedMask(Mask);
27012 SDValue Op0 = Shuf->getOperand(0);
27013 SDValue Op1 = Shuf->getOperand(1);
27014 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
27015 if (ShufOp0Index == -1) {
27016 // Commute mask and check again.
27018 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
27019 if (ShufOp0Index == -1)
27020 return SDValue();
27021 // Commute operands to match the commuted shuffle mask.
27022 std::swap(Op0, Op1);
27023 Mask = CommutedMask;
27024 }
27025
27026 // The shuffle inserts exactly one element from operand 0 into operand 1.
27027 // Now see if we can access that element as a scalar via a real insert element
27028 // instruction.
27029 // TODO: We can try harder to locate the element as a scalar. Examples: it
27030 // could be an operand of BUILD_VECTOR, or a constant.
27031 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
27032 "Shuffle mask value must be from operand 0");
27033
27034 SDValue Elt;
27035 if (sd_match(Op0, m_InsertElt(m_Value(), m_Value(Elt),
27036 m_SpecificInt(Mask[ShufOp0Index])))) {
27037 // There's an existing insertelement with constant insertion index, so we
27038 // don't need to check the legality/profitability of a replacement operation
27039 // that differs at most in the constant value. The target should be able to
27040 // lower any of those in a similar way. If not, legalization will expand
27041 // this to a scalar-to-vector plus shuffle.
27042 //
27043 // Note that the shuffle may move the scalar from the position that the
27044 // insert element used. Therefore, our new insert element occurs at the
27045 // shuffle's mask index value, not the insert's index value.
27046 //
27047 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
27048 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
27049 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
27050 Op1, Elt, NewInsIndex);
27051 }
27052
27053 if (!hasOperation(ISD::INSERT_VECTOR_ELT, Op0.getValueType()))
27054 return SDValue();
27055
27057 Mask[ShufOp0Index] == 0) {
27058 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
27059 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
27060 Op1, Elt, NewInsIndex);
27061 }
27062
27063 return SDValue();
27064}
27065
27066/// If we have a unary shuffle of a shuffle, see if it can be folded away
27067/// completely. This has the potential to lose undef knowledge because the first
27068/// shuffle may not have an undef mask element where the second one does. So
27069/// only call this after doing simplifications based on demanded elements.
27071 // shuf (shuf0 X, Y, Mask0), undef, Mask
27072 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
27073 if (!Shuf0 || !Shuf->getOperand(1).isUndef())
27074 return SDValue();
27075
27076 ArrayRef<int> Mask = Shuf->getMask();
27077 ArrayRef<int> Mask0 = Shuf0->getMask();
27078 for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
27079 // Ignore undef elements.
27080 if (Mask[i] == -1)
27081 continue;
27082 assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
27083
27084 // Is the element of the shuffle operand chosen by this shuffle the same as
27085 // the element chosen by the shuffle operand itself?
27086 if (Mask0[Mask[i]] != Mask0[i])
27087 return SDValue();
27088 }
27089 // Every element of this shuffle is identical to the result of the previous
27090 // shuffle, so we can replace this value.
27091 return Shuf->getOperand(0);
27092}
27093
27094SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
27095 EVT VT = N->getValueType(0);
27096 unsigned NumElts = VT.getVectorNumElements();
27097
27098 SDValue N0 = N->getOperand(0);
27099 SDValue N1 = N->getOperand(1);
27100
27101 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
27102
27103 // Canonicalize shuffle undef, undef -> undef
27104 if (N0.isUndef() && N1.isUndef())
27105 return DAG.getUNDEF(VT);
27106
27107 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
27108
27109 // Canonicalize shuffle v, v -> v, undef
27110 if (N0 == N1)
27111 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
27112 createUnaryMask(SVN->getMask(), NumElts));
27113
27114 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
27115 if (N0.isUndef())
27116 return DAG.getCommutedVectorShuffle(*SVN);
27117
27118 // Remove references to rhs if it is undef
27119 if (N1.isUndef()) {
27120 bool Changed = false;
27121 SmallVector<int, 8> NewMask;
27122 for (unsigned i = 0; i != NumElts; ++i) {
27123 int Idx = SVN->getMaskElt(i);
27124 if (Idx >= (int)NumElts) {
27125 Idx = -1;
27126 Changed = true;
27127 }
27128 NewMask.push_back(Idx);
27129 }
27130 if (Changed)
27131 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
27132 }
27133
27134 if (SDValue InsElt = replaceShuffleOfInsert(SVN))
27135 return InsElt;
27136
27137 // A shuffle of a single vector that is a splatted value can always be folded.
27138 if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
27139 return V;
27140
27141 if (SDValue V = formSplatFromShuffles(SVN, DAG))
27142 return V;
27143
27144 // If it is a splat, check if the argument vector is another splat or a
27145 // build_vector.
27146 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
27147 int SplatIndex = SVN->getSplatIndex();
27148 if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
27149 TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) {
27150 // splat (vector_bo L, R), Index -->
27151 // splat (scalar_bo (extelt L, Index), (extelt R, Index))
27152 SDValue L = N0.getOperand(0), R = N0.getOperand(1);
27153 SDLoc DL(N);
27154 EVT EltVT = VT.getScalarType();
27155 SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
27156 SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
27157 SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
27158 SDValue NewBO =
27159 DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags());
27160 SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
27161 SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
27162 return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
27163 }
27164
27165 // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x)
27166 // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x)
27167 if ((!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) &&
27168 N0.hasOneUse()) {
27169 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == 0)
27170 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(0));
27171
27173 if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
27174 if (Idx->getAPIntValue() == SplatIndex)
27175 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
27176
27177 // Look through a bitcast if LE and splatting lane 0, through to a
27178 // scalar_to_vector or a build_vector.
27179 if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(0).hasOneUse() &&
27180 SplatIndex == 0 && DAG.getDataLayout().isLittleEndian() &&
27183 EVT N00VT = N0.getOperand(0).getValueType();
27184 if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() &&
27185 VT.isInteger() && N00VT.isInteger()) {
27186 EVT InVT =
27189 SDLoc(N), InVT);
27190 return DAG.getSplatBuildVector(VT, SDLoc(N), Op);
27191 }
27192 }
27193 }
27194
27195 // If this is a bit convert that changes the element type of the vector but
27196 // not the number of vector elements, look through it. Be careful not to
27197 // look though conversions that change things like v4f32 to v2f64.
27198 SDNode *V = N0.getNode();
27199 if (V->getOpcode() == ISD::BITCAST) {
27200 SDValue ConvInput = V->getOperand(0);
27201 if (ConvInput.getValueType().isVector() &&
27202 ConvInput.getValueType().getVectorNumElements() == NumElts)
27203 V = ConvInput.getNode();
27204 }
27205
27206 if (V->getOpcode() == ISD::BUILD_VECTOR) {
27207 assert(V->getNumOperands() == NumElts &&
27208 "BUILD_VECTOR has wrong number of operands");
27209 SDValue Base;
27210 bool AllSame = true;
27211 for (unsigned i = 0; i != NumElts; ++i) {
27212 if (!V->getOperand(i).isUndef()) {
27213 Base = V->getOperand(i);
27214 break;
27215 }
27216 }
27217 // Splat of <u, u, u, u>, return <u, u, u, u>
27218 if (!Base.getNode())
27219 return N0;
27220 for (unsigned i = 0; i != NumElts; ++i) {
27221 if (V->getOperand(i) != Base) {
27222 AllSame = false;
27223 break;
27224 }
27225 }
27226 // Splat of <x, x, x, x>, return <x, x, x, x>
27227 if (AllSame)
27228 return N0;
27229
27230 // Canonicalize any other splat as a build_vector, but avoid defining any
27231 // undefined elements in the mask.
27232 SDValue Splatted = V->getOperand(SplatIndex);
27233 SmallVector<SDValue, 8> Ops(NumElts, Splatted);
27234 EVT EltVT = Splatted.getValueType();
27235
27236 for (unsigned i = 0; i != NumElts; ++i) {
27237 if (SVN->getMaskElt(i) < 0)
27238 Ops[i] = DAG.getUNDEF(EltVT);
27239 }
27240
27241 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
27242
27243 // We may have jumped through bitcasts, so the type of the
27244 // BUILD_VECTOR may not match the type of the shuffle.
27245 if (V->getValueType(0) != VT)
27246 NewBV = DAG.getBitcast(VT, NewBV);
27247 return NewBV;
27248 }
27249 }
27250
27251 // Simplify source operands based on shuffle mask.
27253 return SDValue(N, 0);
27254
27255 // This is intentionally placed after demanded elements simplification because
27256 // it could eliminate knowledge of undef elements created by this shuffle.
27257 if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
27258 return ShufOp;
27259
27260 // Match shuffles that can be converted to any_vector_extend_in_reg.
27261 if (SDValue V =
27262 combineShuffleToAnyExtendVectorInreg(SVN, DAG, TLI, LegalOperations))
27263 return V;
27264
27265 // Combine "truncate_vector_in_reg" style shuffles.
27266 if (SDValue V = combineTruncationShuffle(SVN, DAG))
27267 return V;
27268
27269 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
27270 Level < AfterLegalizeVectorOps &&
27271 (N1.isUndef() ||
27272 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
27273 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
27274 if (SDValue V = partitionShuffleOfConcats(N, DAG))
27275 return V;
27276 }
27277
27278 // A shuffle of a concat of the same narrow vector can be reduced to use
27279 // only low-half elements of a concat with undef:
27280 // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
27281 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
27282 N0.getNumOperands() == 2 &&
27283 N0.getOperand(0) == N0.getOperand(1)) {
27284 int HalfNumElts = (int)NumElts / 2;
27285 SmallVector<int, 8> NewMask;
27286 for (unsigned i = 0; i != NumElts; ++i) {
27287 int Idx = SVN->getMaskElt(i);
27288 if (Idx >= HalfNumElts) {
27289 assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
27290 Idx -= HalfNumElts;
27291 }
27292 NewMask.push_back(Idx);
27293 }
27294 if (TLI.isShuffleMaskLegal(NewMask, VT)) {
27295 SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
27296 SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
27297 N0.getOperand(0), UndefVec);
27298 return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
27299 }
27300 }
27301
27302 // See if we can replace a shuffle with an insert_subvector.
27303 // e.g. v2i32 into v8i32:
27304 // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
27305 // --> insert_subvector(lhs,rhs1,4).
27306 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
27308 auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
27309 // Ensure RHS subvectors are legal.
27310 assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
27311 EVT SubVT = RHS.getOperand(0).getValueType();
27312 int NumSubVecs = RHS.getNumOperands();
27313 int NumSubElts = SubVT.getVectorNumElements();
27314 assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
27315 if (!TLI.isTypeLegal(SubVT))
27316 return SDValue();
27317
27318 // Don't bother if we have an unary shuffle (matches undef + LHS elts).
27319 if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
27320 return SDValue();
27321
27322 // Search [NumSubElts] spans for RHS sequence.
27323 // TODO: Can we avoid nested loops to increase performance?
27324 SmallVector<int> InsertionMask(NumElts);
27325 for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
27326 for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
27327 // Reset mask to identity.
27328 std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
27329
27330 // Add subvector insertion.
27331 std::iota(InsertionMask.begin() + SubIdx,
27332 InsertionMask.begin() + SubIdx + NumSubElts,
27333 NumElts + (SubVec * NumSubElts));
27334
27335 // See if the shuffle mask matches the reference insertion mask.
27336 bool MatchingShuffle = true;
27337 for (int i = 0; i != (int)NumElts; ++i) {
27338 int ExpectIdx = InsertionMask[i];
27339 int ActualIdx = Mask[i];
27340 if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
27341 MatchingShuffle = false;
27342 break;
27343 }
27344 }
27345
27346 if (MatchingShuffle)
27347 return DAG.getInsertSubvector(SDLoc(N), LHS, RHS.getOperand(SubVec),
27348 SubIdx);
27349 }
27350 }
27351 return SDValue();
27352 };
27353 ArrayRef<int> Mask = SVN->getMask();
27354 if (N1.getOpcode() == ISD::CONCAT_VECTORS)
27355 if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
27356 return InsertN1;
27357 if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
27358 SmallVector<int> CommuteMask(Mask);
27360 if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
27361 return InsertN0;
27362 }
27363 }
27364
27365 // If we're not performing a select/blend shuffle, see if we can convert the
27366 // shuffle into a AND node, with all the out-of-lane elements are known zero.
27367 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
27368 bool IsInLaneMask = true;
27369 ArrayRef<int> Mask = SVN->getMask();
27370 SmallVector<int, 16> ClearMask(NumElts, -1);
27371 APInt DemandedLHS = APInt::getZero(NumElts);
27372 APInt DemandedRHS = APInt::getZero(NumElts);
27373 for (int I = 0; I != (int)NumElts; ++I) {
27374 int M = Mask[I];
27375 if (M < 0)
27376 continue;
27377 ClearMask[I] = M == I ? I : (I + NumElts);
27378 IsInLaneMask &= (M == I) || (M == (int)(I + NumElts));
27379 if (M != I) {
27380 APInt &Demanded = M < (int)NumElts ? DemandedLHS : DemandedRHS;
27381 Demanded.setBit(M % NumElts);
27382 }
27383 }
27384 // TODO: Should we try to mask with N1 as well?
27385 if (!IsInLaneMask && (!DemandedLHS.isZero() || !DemandedRHS.isZero()) &&
27386 (DemandedLHS.isZero() || DAG.MaskedVectorIsZero(N0, DemandedLHS)) &&
27387 (DemandedRHS.isZero() || DAG.MaskedVectorIsZero(N1, DemandedRHS))) {
27388 SDLoc DL(N);
27389 EVT IntVT = VT.changeVectorElementTypeToInteger();
27390 EVT IntSVT = VT.getVectorElementType().changeTypeToInteger();
27391 // Transform the type to a legal type so that the buildvector constant
27392 // elements are not illegal. Make sure that the result is larger than the
27393 // original type, incase the value is split into two (eg i64->i32).
27394 if (!TLI.isTypeLegal(IntSVT) && LegalTypes)
27395 IntSVT = TLI.getTypeToTransformTo(*DAG.getContext(), IntSVT);
27396 if (IntSVT.getSizeInBits() >= IntVT.getScalarSizeInBits()) {
27397 SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT);
27398 SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT);
27399 SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT));
27400 for (int I = 0; I != (int)NumElts; ++I)
27401 if (0 <= Mask[I])
27402 AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt;
27403
27404 // See if a clear mask is legal instead of going via
27405 // XformToShuffleWithZero which loses UNDEF mask elements.
27406 if (TLI.isVectorClearMaskLegal(ClearMask, IntVT))
27407 return DAG.getBitcast(
27408 VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0),
27409 DAG.getConstant(0, DL, IntVT), ClearMask));
27410
27411 if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
27412 return DAG.getBitcast(
27413 VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0),
27414 DAG.getBuildVector(IntVT, DL, AndMask)));
27415 }
27416 }
27417 }
27418
27419 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
27420 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
27421 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
27422 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
27423 return Res;
27424
27425 // If this shuffle only has a single input that is a bitcasted shuffle,
27426 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
27427 // back to their original types.
27428 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
27429 N1.isUndef() && Level < AfterLegalizeVectorOps &&
27430 TLI.isTypeLegal(VT)) {
27431
27433 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
27434 EVT SVT = VT.getScalarType();
27435 EVT InnerVT = BC0->getValueType(0);
27436 EVT InnerSVT = InnerVT.getScalarType();
27437
27438 // Determine which shuffle works with the smaller scalar type.
27439 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
27440 EVT ScaleSVT = ScaleVT.getScalarType();
27441
27442 if (TLI.isTypeLegal(ScaleVT) &&
27443 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
27444 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
27445 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
27446 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
27447
27448 // Scale the shuffle masks to the smaller scalar type.
27449 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
27450 SmallVector<int, 8> InnerMask;
27451 SmallVector<int, 8> OuterMask;
27452 narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
27453 narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
27454
27455 // Merge the shuffle masks.
27456 SmallVector<int, 8> NewMask;
27457 for (int M : OuterMask)
27458 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
27459
27460 // Test for shuffle mask legality over both commutations.
27461 SDValue SV0 = BC0->getOperand(0);
27462 SDValue SV1 = BC0->getOperand(1);
27463 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
27464 if (!LegalMask) {
27465 std::swap(SV0, SV1);
27467 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
27468 }
27469
27470 if (LegalMask) {
27471 SV0 = DAG.getBitcast(ScaleVT, SV0);
27472 SV1 = DAG.getBitcast(ScaleVT, SV1);
27473 return DAG.getBitcast(
27474 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
27475 }
27476 }
27477 }
27478 }
27479
27480 // Match shuffles of bitcasts, so long as the mask can be treated as the
27481 // larger type.
27482 if (SDValue V = combineShuffleOfBitcast(SVN, DAG, TLI, LegalOperations))
27483 return V;
27484
27485 // Compute the combined shuffle mask for a shuffle with SV0 as the first
27486 // operand, and SV1 as the second operand.
27487 // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
27488 // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
27489 auto MergeInnerShuffle =
27490 [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
27491 ShuffleVectorSDNode *OtherSVN, SDValue N1,
27492 const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
27493 SmallVectorImpl<int> &Mask) -> bool {
27494 // Don't try to fold splats; they're likely to simplify somehow, or they
27495 // might be free.
27496 if (OtherSVN->isSplat())
27497 return false;
27498
27499 SV0 = SV1 = SDValue();
27500 Mask.clear();
27501
27502 for (unsigned i = 0; i != NumElts; ++i) {
27503 int Idx = SVN->getMaskElt(i);
27504 if (Idx < 0) {
27505 // Propagate Undef.
27506 Mask.push_back(Idx);
27507 continue;
27508 }
27509
27510 if (Commute)
27511 Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
27512
27513 SDValue CurrentVec;
27514 if (Idx < (int)NumElts) {
27515 // This shuffle index refers to the inner shuffle N0. Lookup the inner
27516 // shuffle mask to identify which vector is actually referenced.
27517 Idx = OtherSVN->getMaskElt(Idx);
27518 if (Idx < 0) {
27519 // Propagate Undef.
27520 Mask.push_back(Idx);
27521 continue;
27522 }
27523 CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
27524 : OtherSVN->getOperand(1);
27525 } else {
27526 // This shuffle index references an element within N1.
27527 CurrentVec = N1;
27528 }
27529
27530 // Simple case where 'CurrentVec' is UNDEF.
27531 if (CurrentVec.isUndef()) {
27532 Mask.push_back(-1);
27533 continue;
27534 }
27535
27536 // Canonicalize the shuffle index. We don't know yet if CurrentVec
27537 // will be the first or second operand of the combined shuffle.
27538 Idx = Idx % NumElts;
27539 if (!SV0.getNode() || SV0 == CurrentVec) {
27540 // Ok. CurrentVec is the left hand side.
27541 // Update the mask accordingly.
27542 SV0 = CurrentVec;
27543 Mask.push_back(Idx);
27544 continue;
27545 }
27546 if (!SV1.getNode() || SV1 == CurrentVec) {
27547 // Ok. CurrentVec is the right hand side.
27548 // Update the mask accordingly.
27549 SV1 = CurrentVec;
27550 Mask.push_back(Idx + NumElts);
27551 continue;
27552 }
27553
27554 // Last chance - see if the vector is another shuffle and if it
27555 // uses one of the existing candidate shuffle ops.
27556 if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
27557 int InnerIdx = CurrentSVN->getMaskElt(Idx);
27558 if (InnerIdx < 0) {
27559 Mask.push_back(-1);
27560 continue;
27561 }
27562 SDValue InnerVec = (InnerIdx < (int)NumElts)
27563 ? CurrentSVN->getOperand(0)
27564 : CurrentSVN->getOperand(1);
27565 if (InnerVec.isUndef()) {
27566 Mask.push_back(-1);
27567 continue;
27568 }
27569 InnerIdx %= NumElts;
27570 if (InnerVec == SV0) {
27571 Mask.push_back(InnerIdx);
27572 continue;
27573 }
27574 if (InnerVec == SV1) {
27575 Mask.push_back(InnerIdx + NumElts);
27576 continue;
27577 }
27578 }
27579
27580 // Bail out if we cannot convert the shuffle pair into a single shuffle.
27581 return false;
27582 }
27583
27584 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
27585 return true;
27586
27587 // Avoid introducing shuffles with illegal mask.
27588 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
27589 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
27590 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
27591 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
27592 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
27593 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
27594 if (TLI.isShuffleMaskLegal(Mask, VT))
27595 return true;
27596
27597 std::swap(SV0, SV1);
27599 return TLI.isShuffleMaskLegal(Mask, VT);
27600 };
27601
27602 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
27603 // Canonicalize shuffles according to rules:
27604 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
27605 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
27606 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
27607 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
27609 // The incoming shuffle must be of the same type as the result of the
27610 // current shuffle.
27611 assert(N1->getOperand(0).getValueType() == VT &&
27612 "Shuffle types don't match");
27613
27614 SDValue SV0 = N1->getOperand(0);
27615 SDValue SV1 = N1->getOperand(1);
27616 bool HasSameOp0 = N0 == SV0;
27617 bool IsSV1Undef = SV1.isUndef();
27618 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
27619 // Commute the operands of this shuffle so merging below will trigger.
27620 return DAG.getCommutedVectorShuffle(*SVN);
27621 }
27622
27623 // Canonicalize splat shuffles to the RHS to improve merging below.
27624 // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
27625 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
27626 N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
27627 cast<ShuffleVectorSDNode>(N0)->isSplat() &&
27628 !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
27629 return DAG.getCommutedVectorShuffle(*SVN);
27630 }
27631
27632 // Try to fold according to rules:
27633 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
27634 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
27635 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
27636 // Don't try to fold shuffles with illegal type.
27637 // Only fold if this shuffle is the only user of the other shuffle.
27638 // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
27639 for (int i = 0; i != 2; ++i) {
27640 if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
27641 N->isOnlyUserOf(N->getOperand(i).getNode())) {
27642 // The incoming shuffle must be of the same type as the result of the
27643 // current shuffle.
27644 auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
27645 assert(OtherSV->getOperand(0).getValueType() == VT &&
27646 "Shuffle types don't match");
27647
27648 SDValue SV0, SV1;
27649 SmallVector<int, 4> Mask;
27650 if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
27651 SV0, SV1, Mask)) {
27652 // Check if all indices in Mask are Undef. In case, propagate Undef.
27653 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
27654 return DAG.getUNDEF(VT);
27655
27656 return DAG.getVectorShuffle(VT, SDLoc(N),
27657 SV0 ? SV0 : DAG.getUNDEF(VT),
27658 SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
27659 }
27660 }
27661 }
27662
27663 // Merge shuffles through binops if we are able to merge it with at least
27664 // one other shuffles.
27665 // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
27666 // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
27667 unsigned SrcOpcode = N0.getOpcode();
27668 if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
27669 (N1.isUndef() ||
27670 (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
27671 // Get binop source ops, or just pass on the undef.
27672 SDValue Op00 = N0.getOperand(0);
27673 SDValue Op01 = N0.getOperand(1);
27674 SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
27675 SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
27676 // TODO: We might be able to relax the VT check but we don't currently
27677 // have any isBinOp() that has different result/ops VTs so play safe until
27678 // we have test coverage.
27679 if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
27680 Op01.getValueType() == VT && Op11.getValueType() == VT &&
27681 (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
27682 Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
27683 Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
27684 Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
27685 auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
27686 SmallVectorImpl<int> &Mask, bool LeftOp,
27687 bool Commute) {
27688 SDValue InnerN = Commute ? N1 : N0;
27689 SDValue Op0 = LeftOp ? Op00 : Op01;
27690 SDValue Op1 = LeftOp ? Op10 : Op11;
27691 if (Commute)
27692 std::swap(Op0, Op1);
27693 // Only accept the merged shuffle if we don't introduce undef elements,
27694 // or the inner shuffle already contained undef elements.
27695 auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
27696 return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
27697 MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
27698 Mask) &&
27699 (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
27700 llvm::none_of(Mask, [](int M) { return M < 0; }));
27701 };
27702
27703 // Ensure we don't increase the number of shuffles - we must merge a
27704 // shuffle from at least one of the LHS and RHS ops.
27705 bool MergedLeft = false;
27706 SDValue LeftSV0, LeftSV1;
27707 SmallVector<int, 4> LeftMask;
27708 if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
27709 CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
27710 MergedLeft = true;
27711 } else {
27712 LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
27713 LeftSV0 = Op00, LeftSV1 = Op10;
27714 }
27715
27716 bool MergedRight = false;
27717 SDValue RightSV0, RightSV1;
27718 SmallVector<int, 4> RightMask;
27719 if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
27720 CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
27721 MergedRight = true;
27722 } else {
27723 RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
27724 RightSV0 = Op01, RightSV1 = Op11;
27725 }
27726
27727 if (MergedLeft || MergedRight) {
27728 SDLoc DL(N);
27730 VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
27731 LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
27733 VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
27734 RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
27735 return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
27736 }
27737 }
27738 }
27739 }
27740
27741 if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
27742 return V;
27743
27744 // Match shuffles that can be converted to ISD::ZERO_EXTEND_VECTOR_INREG.
27745 // Perform this really late, because it could eliminate knowledge
27746 // of undef elements created by this shuffle.
27747 if (Level < AfterLegalizeTypes)
27748 if (SDValue V = combineShuffleToZeroExtendVectorInReg(SVN, DAG, TLI,
27749 LegalOperations))
27750 return V;
27751
27752 return SDValue();
27753}
27754
27755SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
27756 EVT VT = N->getValueType(0);
27757 if (!VT.isFixedLengthVector())
27758 return SDValue();
27759
27760 // Try to convert a scalar binop with an extracted vector element to a vector
27761 // binop. This is intended to reduce potentially expensive register moves.
27762 // TODO: Check if both operands are extracted.
27763 // TODO: How to prefer scalar/vector ops with multiple uses of the extact?
27764 // TODO: Generalize this, so it can be called from visitINSERT_VECTOR_ELT().
27765 SDValue Scalar = N->getOperand(0);
27766 unsigned Opcode = Scalar.getOpcode();
27767 EVT VecEltVT = VT.getScalarType();
27768 if (Scalar.hasOneUse() && Scalar->getNumValues() == 1 &&
27769 TLI.isBinOp(Opcode) && Scalar.getValueType() == VecEltVT &&
27770 Scalar.getOperand(0).getValueType() == VecEltVT &&
27771 Scalar.getOperand(1).getValueType() == VecEltVT &&
27772 Scalar->isOnlyUserOf(Scalar.getOperand(0).getNode()) &&
27773 Scalar->isOnlyUserOf(Scalar.getOperand(1).getNode()) &&
27774 DAG.isSafeToSpeculativelyExecute(Opcode) && hasOperation(Opcode, VT)) {
27775 // Match an extract element and get a shuffle mask equivalent.
27776 SmallVector<int, 8> ShufMask(VT.getVectorNumElements(), -1);
27777
27778 for (int i : {0, 1}) {
27779 // s2v (bo (extelt V, Idx), C) --> shuffle (bo V, C'), {Idx, -1, -1...}
27780 // s2v (bo C, (extelt V, Idx)) --> shuffle (bo C', V), {Idx, -1, -1...}
27781 SDValue EE = Scalar.getOperand(i);
27782 auto *C = dyn_cast<ConstantSDNode>(Scalar.getOperand(i ? 0 : 1));
27783 if (C && EE.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
27784 EE.getOperand(0).getValueType() == VT &&
27786 // Mask = {ExtractIndex, undef, undef....}
27787 ShufMask[0] = EE.getConstantOperandVal(1);
27788 // Make sure the shuffle is legal if we are crossing lanes.
27789 if (TLI.isShuffleMaskLegal(ShufMask, VT)) {
27790 SDLoc DL(N);
27791 SDValue V[] = {EE.getOperand(0),
27792 DAG.getConstant(C->getAPIntValue(), DL, VT)};
27793 SDValue VecBO = DAG.getNode(Opcode, DL, VT, V[i], V[1 - i]);
27794 return DAG.getVectorShuffle(VT, DL, VecBO, DAG.getUNDEF(VT),
27795 ShufMask);
27796 }
27797 }
27798 }
27799 }
27800
27801 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
27802 // with a VECTOR_SHUFFLE and possible truncate.
27803 if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
27804 !Scalar.getOperand(0).getValueType().isFixedLengthVector())
27805 return SDValue();
27806
27807 // If we have an implicit truncate, truncate here if it is legal.
27808 if (VecEltVT != Scalar.getValueType() &&
27809 Scalar.getValueType().isScalarInteger() && isTypeLegal(VecEltVT)) {
27810 SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(Scalar), VecEltVT, Scalar);
27811 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
27812 }
27813
27814 auto *ExtIndexC = dyn_cast<ConstantSDNode>(Scalar.getOperand(1));
27815 if (!ExtIndexC)
27816 return SDValue();
27817
27818 SDValue SrcVec = Scalar.getOperand(0);
27819 EVT SrcVT = SrcVec.getValueType();
27820 unsigned SrcNumElts = SrcVT.getVectorNumElements();
27821 unsigned VTNumElts = VT.getVectorNumElements();
27822 if (VecEltVT == SrcVT.getScalarType() && VTNumElts <= SrcNumElts) {
27823 // Create a shuffle equivalent for scalar-to-vector: {ExtIndex, -1, -1, ...}
27824 SmallVector<int, 8> Mask(SrcNumElts, -1);
27825 Mask[0] = ExtIndexC->getZExtValue();
27826 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
27827 SrcVT, SDLoc(N), SrcVec, DAG.getUNDEF(SrcVT), Mask, DAG);
27828 if (!LegalShuffle)
27829 return SDValue();
27830
27831 // If the initial vector is the same size, the shuffle is the result.
27832 if (VT == SrcVT)
27833 return LegalShuffle;
27834
27835 // If not, shorten the shuffled vector.
27836 if (VTNumElts != SrcNumElts) {
27837 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
27838 EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
27839 SrcVT.getVectorElementType(), VTNumElts);
27840 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, LegalShuffle,
27841 ZeroIdx);
27842 }
27843 }
27844
27845 return SDValue();
27846}
27847
27848SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
27849 EVT VT = N->getValueType(0);
27850 SDValue N0 = N->getOperand(0);
27851 SDValue N1 = N->getOperand(1);
27852 SDValue N2 = N->getOperand(2);
27853 uint64_t InsIdx = N->getConstantOperandVal(2);
27854
27855 // If inserting an UNDEF, just return the original vector.
27856 if (N1.isUndef())
27857 return N0;
27858
27859 // If this is an insert of an extracted vector into an undef vector, we can
27860 // just use the input to the extract if the types match, and can simplify
27861 // in some cases even if they don't.
27862 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
27863 N1.getOperand(1) == N2) {
27864 EVT SrcVT = N1.getOperand(0).getValueType();
27865 if (SrcVT == VT)
27866 return N1.getOperand(0);
27867 // TODO: To remove the zero check, need to adjust the offset to
27868 // a multiple of the new src type.
27869 if (isNullConstant(N2)) {
27870 if (VT.knownBitsGE(SrcVT) &&
27871 !(VT.isFixedLengthVector() && SrcVT.isScalableVector()))
27872 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
27873 VT, N0, N1.getOperand(0), N2);
27874 else if (VT.knownBitsLE(SrcVT) &&
27875 !(VT.isScalableVector() && SrcVT.isFixedLengthVector()))
27876 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N),
27877 VT, N1.getOperand(0), N2);
27878 }
27879 }
27880
27881 // Handle case where we've ended up inserting back into the source vector
27882 // we extracted the subvector from.
27883 // insert_subvector(N0, extract_subvector(N0, N2), N2) --> N0
27884 if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && N1.getOperand(0) == N0 &&
27885 N1.getOperand(1) == N2)
27886 return N0;
27887
27888 // Simplify scalar inserts into an undef vector:
27889 // insert_subvector undef, (splat X), N2 -> splat X
27890 if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
27891 if (DAG.isConstantValueOfAnyType(N1.getOperand(0)) || N1.hasOneUse())
27892 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
27893
27894 // insert_subvector (splat X), (splat X), N2 -> splat X
27895 if (N0.getOpcode() == ISD::SPLAT_VECTOR && N0.getOpcode() == N1.getOpcode() &&
27896 N0.getOperand(0) == N1.getOperand(0))
27897 return N0;
27898
27899 // If we are inserting a bitcast value into an undef, with the same
27900 // number of elements, just use the bitcast input of the extract.
27901 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
27902 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
27903 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
27905 N1.getOperand(0).getOperand(1) == N2 &&
27907 VT.getVectorElementCount() &&
27909 VT.getSizeInBits()) {
27910 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
27911 }
27912
27913 // If both N1 and N2 are bitcast values on which insert_subvector
27914 // would makes sense, pull the bitcast through.
27915 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
27916 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
27917 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
27918 SDValue CN0 = N0.getOperand(0);
27919 SDValue CN1 = N1.getOperand(0);
27920 EVT CN0VT = CN0.getValueType();
27921 EVT CN1VT = CN1.getValueType();
27922 if (CN0VT.isVector() && CN1VT.isVector() &&
27923 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
27925 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
27926 CN0.getValueType(), CN0, CN1, N2);
27927 return DAG.getBitcast(VT, NewINSERT);
27928 }
27929 }
27930
27931 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
27932 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
27933 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
27934 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
27935 N0.getOperand(1).getValueType() == N1.getValueType() &&
27936 N0.getOperand(2) == N2)
27937 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
27938 N1, N2);
27939
27940 // Eliminate an intermediate insert into an undef vector:
27941 // insert_subvector undef, (insert_subvector undef, X, 0), 0 -->
27942 // insert_subvector undef, X, 0
27943 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
27944 N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)) &&
27945 isNullConstant(N2))
27946 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
27947 N1.getOperand(1), N2);
27948
27949 // Push subvector bitcasts to the output, adjusting the index as we go.
27950 // insert_subvector(bitcast(v), bitcast(s), c1)
27951 // -> bitcast(insert_subvector(v, s, c2))
27952 if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
27953 N1.getOpcode() == ISD::BITCAST) {
27954 SDValue N0Src = peekThroughBitcasts(N0);
27955 SDValue N1Src = peekThroughBitcasts(N1);
27956 EVT N0SrcSVT = N0Src.getValueType().getScalarType();
27957 EVT N1SrcSVT = N1Src.getValueType().getScalarType();
27958 if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
27959 N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
27960 EVT NewVT;
27961 SDLoc DL(N);
27962 SDValue NewIdx;
27963 LLVMContext &Ctx = *DAG.getContext();
27964 ElementCount NumElts = VT.getVectorElementCount();
27965 unsigned EltSizeInBits = VT.getScalarSizeInBits();
27966 if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
27967 unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
27968 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
27969 NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
27970 } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
27971 unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
27972 if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
27973 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
27974 NumElts.divideCoefficientBy(Scale));
27975 NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
27976 }
27977 }
27978 if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
27979 SDValue Res = DAG.getBitcast(NewVT, N0Src);
27980 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
27981 return DAG.getBitcast(VT, Res);
27982 }
27983 }
27984 }
27985
27986 // Canonicalize insert_subvector dag nodes.
27987 // Example:
27988 // (insert_subvector (insert_subvector A, Idx0), Idx1)
27989 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
27990 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
27991 N1.getValueType() == N0.getOperand(1).getValueType()) {
27992 unsigned OtherIdx = N0.getConstantOperandVal(2);
27993 if (InsIdx < OtherIdx) {
27994 // Swap nodes.
27995 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
27996 N0.getOperand(0), N1, N2);
27997 AddToWorklist(NewOp.getNode());
27998 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
27999 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
28000 }
28001 }
28002
28003 // If the input vector is a concatenation, and the insert replaces
28004 // one of the pieces, we can optimize into a single concat_vectors.
28005 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
28006 N0.getOperand(0).getValueType() == N1.getValueType() &&
28009 unsigned Factor = N1.getValueType().getVectorMinNumElements();
28011 Ops[InsIdx / Factor] = N1;
28012 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
28013 }
28014
28015 // Simplify source operands based on insertion.
28017 return SDValue(N, 0);
28018
28019 return SDValue();
28020}
28021
28022SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
28023 SDValue N0 = N->getOperand(0);
28024
28025 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
28026 if (N0->getOpcode() == ISD::FP16_TO_FP)
28027 return N0->getOperand(0);
28028
28029 return SDValue();
28030}
28031
28032SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
28033 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
28034 auto Op = N->getOpcode();
28035 assert((Op == ISD::FP16_TO_FP || Op == ISD::BF16_TO_FP) &&
28036 "opcode should be FP16_TO_FP or BF16_TO_FP.");
28037 SDValue N0 = N->getOperand(0);
28038
28039 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) or
28040 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
28041 if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
28042 ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
28043 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
28044 return DAG.getNode(Op, SDLoc(N), N->getValueType(0), N0.getOperand(0));
28045 }
28046 }
28047
28048 if (SDValue CastEliminated = eliminateFPCastPair(N))
28049 return CastEliminated;
28050
28051 // Sometimes constants manage to survive very late in the pipeline, e.g.,
28052 // because they are wrapped inside the <1 x f16> type. Try one last time to
28053 // get rid of them.
28054 SDValue Folded = DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N),
28055 N->getValueType(0), {N0});
28056 return Folded;
28057}
28058
28059SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {
28060 SDValue N0 = N->getOperand(0);
28061
28062 // fold (fp_to_bf16 (bf16_to_fp op)) -> op
28063 if (N0->getOpcode() == ISD::BF16_TO_FP)
28064 return N0->getOperand(0);
28065
28066 return SDValue();
28067}
28068
28069SDValue DAGCombiner::visitBF16_TO_FP(SDNode *N) {
28070 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
28071 return visitFP16_TO_FP(N);
28072}
28073
28074SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
28075 SDValue N0 = N->getOperand(0);
28076 EVT VT = N0.getValueType();
28077 unsigned Opcode = N->getOpcode();
28078
28079 // VECREDUCE over 1-element vector is just an extract.
28080 if (VT.getVectorElementCount().isScalar()) {
28081 SDLoc dl(N);
28082 SDValue Res =
28084 DAG.getVectorIdxConstant(0, dl));
28085 if (Res.getValueType() != N->getValueType(0))
28086 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
28087 return Res;
28088 }
28089
28090 // On an boolean vector an and/or reduction is the same as a umin/umax
28091 // reduction. Convert them if the latter is legal while the former isn't.
28092 if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
28093 unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
28094 ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
28095 if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
28096 TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
28098 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
28099 }
28100
28101 // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
28102 // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
28103 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
28104 TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
28105 SDValue Vec = N0.getOperand(0);
28106 SDValue Subvec = N0.getOperand(1);
28107 if ((Opcode == ISD::VECREDUCE_OR &&
28108 (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) ||
28109 (Opcode == ISD::VECREDUCE_AND &&
28110 (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec))))
28111 return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
28112 }
28113
28114 // vecreduce_or(sext(x)) -> sext(vecreduce_or(x))
28115 // Same for zext and anyext, and for and/or/xor reductions.
28116 if ((Opcode == ISD::VECREDUCE_OR || Opcode == ISD::VECREDUCE_AND ||
28117 Opcode == ISD::VECREDUCE_XOR) &&
28118 (N0.getOpcode() == ISD::SIGN_EXTEND ||
28119 N0.getOpcode() == ISD::ZERO_EXTEND ||
28120 N0.getOpcode() == ISD::ANY_EXTEND) &&
28121 TLI.isOperationLegalOrCustom(Opcode, N0.getOperand(0).getValueType())) {
28122 SDValue Red = DAG.getNode(Opcode, SDLoc(N),
28124 N0.getOperand(0));
28125 return DAG.getNode(N0.getOpcode(), SDLoc(N), N->getValueType(0), Red);
28126 }
28127 return SDValue();
28128}
28129
28130SDValue DAGCombiner::visitVP_FSUB(SDNode *N) {
28131 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
28132
28133 // FSUB -> FMA combines:
28134 if (SDValue Fused = visitFSUBForFMACombine<VPMatchContext>(N)) {
28135 AddToWorklist(Fused.getNode());
28136 return Fused;
28137 }
28138 return SDValue();
28139}
28140
28141SDValue DAGCombiner::visitVPOp(SDNode *N) {
28142
28143 if (N->getOpcode() == ISD::VP_GATHER)
28144 if (SDValue SD = visitVPGATHER(N))
28145 return SD;
28146
28147 if (N->getOpcode() == ISD::VP_SCATTER)
28148 if (SDValue SD = visitVPSCATTER(N))
28149 return SD;
28150
28151 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD)
28152 if (SDValue SD = visitVP_STRIDED_LOAD(N))
28153 return SD;
28154
28155 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE)
28156 if (SDValue SD = visitVP_STRIDED_STORE(N))
28157 return SD;
28158
28159 // VP operations in which all vector elements are disabled - either by
28160 // determining that the mask is all false or that the EVL is 0 - can be
28161 // eliminated.
28162 bool AreAllEltsDisabled = false;
28163 if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
28164 AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
28165 if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
28166 AreAllEltsDisabled |=
28167 ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
28168
28169 // This is the only generic VP combine we support for now.
28170 if (!AreAllEltsDisabled) {
28171 switch (N->getOpcode()) {
28172 case ISD::VP_FADD:
28173 return visitVP_FADD(N);
28174 case ISD::VP_FSUB:
28175 return visitVP_FSUB(N);
28176 case ISD::VP_FMA:
28177 return visitFMA<VPMatchContext>(N);
28178 case ISD::VP_SELECT:
28179 return visitVP_SELECT(N);
28180 case ISD::VP_MUL:
28181 return visitMUL<VPMatchContext>(N);
28182 case ISD::VP_SUB:
28183 return foldSubCtlzNot<VPMatchContext>(N, DAG);
28184 default:
28185 break;
28186 }
28187 return SDValue();
28188 }
28189
28190 // Binary operations can be replaced by UNDEF.
28191 if (ISD::isVPBinaryOp(N->getOpcode()))
28192 return DAG.getUNDEF(N->getValueType(0));
28193
28194 // VP Memory operations can be replaced by either the chain (stores) or the
28195 // chain + undef (loads).
28196 if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
28197 if (MemSD->writeMem())
28198 return MemSD->getChain();
28199 return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
28200 }
28201
28202 // Reduction operations return the start operand when no elements are active.
28203 if (ISD::isVPReduction(N->getOpcode()))
28204 return N->getOperand(0);
28205
28206 return SDValue();
28207}
28208
28209SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) {
28210 SDValue Chain = N->getOperand(0);
28211 SDValue Ptr = N->getOperand(1);
28212 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
28213
28214 // Check if the memory, where FP state is written to, is used only in a single
28215 // load operation.
28216 LoadSDNode *LdNode = nullptr;
28217 for (auto *U : Ptr->users()) {
28218 if (U == N)
28219 continue;
28220 if (auto *Ld = dyn_cast<LoadSDNode>(U)) {
28221 if (LdNode && LdNode != Ld)
28222 return SDValue();
28223 LdNode = Ld;
28224 continue;
28225 }
28226 return SDValue();
28227 }
28228 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
28229 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
28231 return SDValue();
28232
28233 // Check if the loaded value is used only in a store operation.
28234 StoreSDNode *StNode = nullptr;
28235 for (SDUse &U : LdNode->uses()) {
28236 if (U.getResNo() == 0) {
28237 if (auto *St = dyn_cast<StoreSDNode>(U.getUser())) {
28238 if (StNode)
28239 return SDValue();
28240 StNode = St;
28241 } else {
28242 return SDValue();
28243 }
28244 }
28245 }
28246 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
28247 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
28248 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
28249 return SDValue();
28250
28251 // Create new node GET_FPENV_MEM, which uses the store address to write FP
28252 // environment.
28253 SDValue Res = DAG.getGetFPEnv(Chain, SDLoc(N), StNode->getBasePtr(), MemVT,
28254 StNode->getMemOperand());
28255 CombineTo(StNode, Res, false);
28256 return Res;
28257}
28258
28259SDValue DAGCombiner::visitSET_FPENV_MEM(SDNode *N) {
28260 SDValue Chain = N->getOperand(0);
28261 SDValue Ptr = N->getOperand(1);
28262 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
28263
28264 // Check if the address of FP state is used also in a store operation only.
28265 StoreSDNode *StNode = nullptr;
28266 for (auto *U : Ptr->users()) {
28267 if (U == N)
28268 continue;
28269 if (auto *St = dyn_cast<StoreSDNode>(U)) {
28270 if (StNode && StNode != St)
28271 return SDValue();
28272 StNode = St;
28273 continue;
28274 }
28275 return SDValue();
28276 }
28277 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
28278 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
28279 !Chain.reachesChainWithoutSideEffects(SDValue(StNode, 0)))
28280 return SDValue();
28281
28282 // Check if the stored value is loaded from some location and the loaded
28283 // value is used only in the store operation.
28284 SDValue StValue = StNode->getValue();
28285 auto *LdNode = dyn_cast<LoadSDNode>(StValue);
28286 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
28287 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
28288 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
28289 return SDValue();
28290
28291 // Create new node SET_FPENV_MEM, which uses the load address to read FP
28292 // environment.
28293 SDValue Res =
28294 DAG.getSetFPEnv(LdNode->getChain(), SDLoc(N), LdNode->getBasePtr(), MemVT,
28295 LdNode->getMemOperand());
28296 return Res;
28297}
28298
28299/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
28300/// with the destination vector and a zero vector.
28301/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
28302/// vector_shuffle V, Zero, <0, 4, 2, 4>
28303SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
28304 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
28305
28306 EVT VT = N->getValueType(0);
28307 SDValue LHS = N->getOperand(0);
28308 SDValue RHS = peekThroughBitcasts(N->getOperand(1));
28309 SDLoc DL(N);
28310
28311 // Make sure we're not running after operation legalization where it
28312 // may have custom lowered the vector shuffles.
28313 if (LegalOperations)
28314 return SDValue();
28315
28316 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
28317 return SDValue();
28318
28319 EVT RVT = RHS.getValueType();
28320 unsigned NumElts = RHS.getNumOperands();
28321
28322 // Attempt to create a valid clear mask, splitting the mask into
28323 // sub elements and checking to see if each is
28324 // all zeros or all ones - suitable for shuffle masking.
28325 auto BuildClearMask = [&](int Split) {
28326 int NumSubElts = NumElts * Split;
28327 int NumSubBits = RVT.getScalarSizeInBits() / Split;
28328
28329 SmallVector<int, 8> Indices;
28330 for (int i = 0; i != NumSubElts; ++i) {
28331 int EltIdx = i / Split;
28332 int SubIdx = i % Split;
28333 SDValue Elt = RHS.getOperand(EltIdx);
28334 // X & undef --> 0 (not undef). So this lane must be converted to choose
28335 // from the zero constant vector (same as if the element had all 0-bits).
28336 if (Elt.isUndef()) {
28337 Indices.push_back(i + NumSubElts);
28338 continue;
28339 }
28340
28341 std::optional<APInt> Bits = Elt->bitcastToAPInt();
28342 if (!Bits)
28343 return SDValue();
28344
28345 // Extract the sub element from the constant bit mask.
28346 if (DAG.getDataLayout().isBigEndian())
28347 *Bits =
28348 Bits->extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
28349 else
28350 *Bits = Bits->extractBits(NumSubBits, SubIdx * NumSubBits);
28351
28352 if (Bits->isAllOnes())
28353 Indices.push_back(i);
28354 else if (*Bits == 0)
28355 Indices.push_back(i + NumSubElts);
28356 else
28357 return SDValue();
28358 }
28359
28360 // Let's see if the target supports this vector_shuffle.
28361 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
28362 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
28363 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
28364 return SDValue();
28365
28366 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
28367 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
28368 DAG.getBitcast(ClearVT, LHS),
28369 Zero, Indices));
28370 };
28371
28372 // Determine maximum split level (byte level masking).
28373 int MaxSplit = 1;
28374 if (RVT.getScalarSizeInBits() % 8 == 0)
28375 MaxSplit = RVT.getScalarSizeInBits() / 8;
28376
28377 for (int Split = 1; Split <= MaxSplit; ++Split)
28378 if (RVT.getScalarSizeInBits() % Split == 0)
28379 if (SDValue S = BuildClearMask(Split))
28380 return S;
28381
28382 return SDValue();
28383}
28384
28385/// If a vector binop is performed on splat values, it may be profitable to
28386/// extract, scalarize, and insert/splat.
28388 const SDLoc &DL, bool LegalTypes) {
28389 SDValue N0 = N->getOperand(0);
28390 SDValue N1 = N->getOperand(1);
28391 unsigned Opcode = N->getOpcode();
28392 EVT VT = N->getValueType(0);
28393 EVT EltVT = VT.getVectorElementType();
28394 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
28395
28396 // TODO: Remove/replace the extract cost check? If the elements are available
28397 // as scalars, then there may be no extract cost. Should we ask if
28398 // inserting a scalar back into a vector is cheap instead?
28399 int Index0, Index1;
28400 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
28401 SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
28402 // Extract element from splat_vector should be free.
28403 // TODO: use DAG.isSplatValue instead?
28404 bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
28406 if (!Src0 || !Src1 || Index0 != Index1 ||
28407 Src0.getValueType().getVectorElementType() != EltVT ||
28408 Src1.getValueType().getVectorElementType() != EltVT ||
28409 !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
28410 // If before type legalization, allow scalar types that will eventually be
28411 // made legal.
28413 Opcode, LegalTypes
28414 ? EltVT
28415 : TLI.getTypeToTransformTo(*DAG.getContext(), EltVT)))
28416 return SDValue();
28417
28418 // FIXME: Type legalization can't handle illegal MULHS/MULHU.
28419 if ((Opcode == ISD::MULHS || Opcode == ISD::MULHU) && !TLI.isTypeLegal(EltVT))
28420 return SDValue();
28421
28422 if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode()) {
28423 // All but one element should have an undef input, which will fold to a
28424 // constant or undef. Avoid splatting which would over-define potentially
28425 // undefined elements.
28426
28427 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
28428 // build_vec ..undef, (bo X, Y), undef...
28429 SmallVector<SDValue, 16> EltsX, EltsY, EltsResult;
28430 DAG.ExtractVectorElements(Src0, EltsX);
28431 DAG.ExtractVectorElements(Src1, EltsY);
28432
28433 for (auto [X, Y] : zip(EltsX, EltsY))
28434 EltsResult.push_back(DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags()));
28435 return DAG.getBuildVector(VT, DL, EltsResult);
28436 }
28437
28438 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
28439 SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
28440 SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
28441 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
28442
28443 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
28444 return DAG.getSplat(VT, DL, ScalarBO);
28445}
28446
28447/// Visit a vector cast operation, like FP_EXTEND.
28448SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) {
28449 EVT VT = N->getValueType(0);
28450 assert(VT.isVector() && "SimplifyVCastOp only works on vectors!");
28451 EVT EltVT = VT.getVectorElementType();
28452 unsigned Opcode = N->getOpcode();
28453
28454 SDValue N0 = N->getOperand(0);
28455 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
28456
28457 // TODO: promote operation might be also good here?
28458 int Index0;
28459 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
28460 if (Src0 &&
28461 (N0.getOpcode() == ISD::SPLAT_VECTOR ||
28462 TLI.isExtractVecEltCheap(VT, Index0)) &&
28463 TLI.isOperationLegalOrCustom(Opcode, EltVT) &&
28464 TLI.preferScalarizeSplat(N)) {
28465 EVT SrcVT = N0.getValueType();
28466 EVT SrcEltVT = SrcVT.getVectorElementType();
28467 if (!LegalTypes || TLI.isTypeLegal(SrcEltVT)) {
28468 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
28469 SDValue Elt =
28470 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC);
28471 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags());
28472 if (VT.isScalableVector())
28473 return DAG.getSplatVector(VT, DL, ScalarBO);
28475 return DAG.getBuildVector(VT, DL, Ops);
28476 }
28477 }
28478
28479 return SDValue();
28480}
28481
28482/// Visit a binary vector operation, like ADD.
28483SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
28484 EVT VT = N->getValueType(0);
28485 assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
28486
28487 SDValue LHS = N->getOperand(0);
28488 SDValue RHS = N->getOperand(1);
28489 unsigned Opcode = N->getOpcode();
28490 SDNodeFlags Flags = N->getFlags();
28491
28492 // Move unary shuffles with identical masks after a vector binop:
28493 // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
28494 // --> shuffle (VBinOp A, B), Undef, Mask
28495 // This does not require type legality checks because we are creating the
28496 // same types of operations that are in the original sequence. We do have to
28497 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
28498 // though. This code is adapted from the identical transform in instcombine.
28499 if (DAG.isSafeToSpeculativelyExecute(Opcode)) {
28500 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
28501 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
28502 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
28503 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
28504 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
28505 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
28506 RHS.getOperand(0), Flags);
28507 SDValue UndefV = LHS.getOperand(1);
28508 return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
28509 }
28510
28511 // Try to sink a splat shuffle after a binop with a uniform constant.
28512 // This is limited to cases where neither the shuffle nor the constant have
28513 // undefined elements because that could be poison-unsafe or inhibit
28514 // demanded elements analysis. It is further limited to not change a splat
28515 // of an inserted scalar because that may be optimized better by
28516 // load-folding or other target-specific behaviors.
28517 if (isConstOrConstSplat(RHS) && Shuf0 && all_equal(Shuf0->getMask()) &&
28518 Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
28519 Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
28520 // binop (splat X), (splat C) --> splat (binop X, C)
28521 SDValue X = Shuf0->getOperand(0);
28522 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
28523 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
28524 Shuf0->getMask());
28525 }
28526 if (isConstOrConstSplat(LHS) && Shuf1 && all_equal(Shuf1->getMask()) &&
28527 Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
28528 Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
28529 // binop (splat C), (splat X) --> splat (binop C, X)
28530 SDValue X = Shuf1->getOperand(0);
28531 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
28532 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
28533 Shuf1->getMask());
28534 }
28535 }
28536
28537 // The following pattern is likely to emerge with vector reduction ops. Moving
28538 // the binary operation ahead of insertion may allow using a narrower vector
28539 // instruction that has better performance than the wide version of the op:
28540 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
28541 if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
28542 RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
28543 LHS.getOperand(2) == RHS.getOperand(2) &&
28544 (LHS.hasOneUse() || RHS.hasOneUse())) {
28545 SDValue X = LHS.getOperand(1);
28546 SDValue Y = RHS.getOperand(1);
28547 SDValue Z = LHS.getOperand(2);
28548 EVT NarrowVT = X.getValueType();
28549 if (NarrowVT == Y.getValueType() &&
28550 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
28551 LegalOperations)) {
28552 // (binop undef, undef) may not return undef, so compute that result.
28553 SDValue VecC =
28554 DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
28555 SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
28556 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
28557 }
28558 }
28559
28560 // Make sure all but the first op are undef or constant.
28561 auto ConcatWithConstantOrUndef = [](SDValue Concat) {
28562 return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
28563 all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
28564 return Op.isUndef() ||
28565 ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
28566 });
28567 };
28568
28569 // The following pattern is likely to emerge with vector reduction ops. Moving
28570 // the binary operation ahead of the concat may allow using a narrower vector
28571 // instruction that has better performance than the wide version of the op:
28572 // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
28573 // concat (VBinOp X, Y), VecC
28574 if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
28575 (LHS.hasOneUse() || RHS.hasOneUse())) {
28576 EVT NarrowVT = LHS.getOperand(0).getValueType();
28577 if (NarrowVT == RHS.getOperand(0).getValueType() &&
28578 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
28579 unsigned NumOperands = LHS.getNumOperands();
28580 SmallVector<SDValue, 4> ConcatOps;
28581 for (unsigned i = 0; i != NumOperands; ++i) {
28582 // This constant fold for operands 1 and up.
28583 ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
28584 RHS.getOperand(i)));
28585 }
28586
28587 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
28588 }
28589 }
28590
28591 if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL, LegalTypes))
28592 return V;
28593
28594 return SDValue();
28595}
28596
28597SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
28598 SDValue N2) {
28599 assert(N0.getOpcode() == ISD::SETCC &&
28600 "First argument must be a SetCC node!");
28601
28602 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
28603 cast<CondCodeSDNode>(N0.getOperand(2))->get());
28604
28605 // If we got a simplified select_cc node back from SimplifySelectCC, then
28606 // break it down into a new SETCC node, and a new SELECT node, and then return
28607 // the SELECT node, since we were called with a SELECT node.
28608 if (SCC.getNode()) {
28609 // Check to see if we got a select_cc back (to turn into setcc/select).
28610 // Otherwise, just return whatever node we got back, like fabs.
28611 if (SCC.getOpcode() == ISD::SELECT_CC) {
28612 const SDNodeFlags Flags = N0->getFlags();
28613 SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
28614 N0.getValueType(),
28615 SCC.getOperand(0), SCC.getOperand(1),
28616 SCC.getOperand(4), Flags);
28617 AddToWorklist(SETCC.getNode());
28618 return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
28619 SCC.getOperand(2), SCC.getOperand(3), Flags);
28620 }
28621
28622 return SCC;
28623 }
28624 return SDValue();
28625}
28626
28627/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
28628/// being selected between, see if we can simplify the select. Callers of this
28629/// should assume that TheSelect is deleted if this returns true. As such, they
28630/// should return the appropriate thing (e.g. the node) back to the top-level of
28631/// the DAG combiner loop to avoid it being looked at.
28632bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
28633 SDValue RHS) {
28634 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
28635 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
28636 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
28637 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
28638 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
28639 SDValue Sqrt = RHS;
28640 ISD::CondCode CC;
28641 SDValue CmpLHS;
28642 const ConstantFPSDNode *Zero = nullptr;
28643
28644 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
28645 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
28646 CmpLHS = TheSelect->getOperand(0);
28647 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
28648 } else {
28649 // SELECT or VSELECT
28650 SDValue Cmp = TheSelect->getOperand(0);
28651 if (Cmp.getOpcode() == ISD::SETCC) {
28652 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
28653 CmpLHS = Cmp.getOperand(0);
28654 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
28655 }
28656 }
28657 if (Zero && Zero->isZero() &&
28658 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
28659 CC == ISD::SETULT || CC == ISD::SETLT)) {
28660 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
28661 CombineTo(TheSelect, Sqrt);
28662 return true;
28663 }
28664 }
28665 }
28666 // Cannot simplify select with vector condition
28667 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
28668
28669 // If this is a select from two identical things, try to pull the operation
28670 // through the select.
28671 if (LHS.getOpcode() != RHS.getOpcode() ||
28672 !LHS.hasOneUse() || !RHS.hasOneUse())
28673 return false;
28674
28675 // If this is a load and the token chain is identical, replace the select
28676 // of two loads with a load through a select of the address to load from.
28677 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
28678 // constants have been dropped into the constant pool.
28679 if (LHS.getOpcode() == ISD::LOAD) {
28680 LoadSDNode *LLD = cast<LoadSDNode>(LHS);
28681 LoadSDNode *RLD = cast<LoadSDNode>(RHS);
28682
28683 // Token chains must be identical.
28684 if (LHS.getOperand(0) != RHS.getOperand(0) ||
28685 // Do not let this transformation reduce the number of volatile loads.
28686 // Be conservative for atomics for the moment
28687 // TODO: This does appear to be legal for unordered atomics (see D66309)
28688 !LLD->isSimple() || !RLD->isSimple() ||
28689 // FIXME: If either is a pre/post inc/dec load,
28690 // we'd need to split out the address adjustment.
28691 LLD->isIndexed() || RLD->isIndexed() ||
28692 // If this is an EXTLOAD, the VT's must match.
28693 LLD->getMemoryVT() != RLD->getMemoryVT() ||
28694 // If this is an EXTLOAD, the kind of extension must match.
28695 (LLD->getExtensionType() != RLD->getExtensionType() &&
28696 // The only exception is if one of the extensions is anyext.
28697 LLD->getExtensionType() != ISD::EXTLOAD &&
28698 RLD->getExtensionType() != ISD::EXTLOAD) ||
28699 // FIXME: this discards src value information. This is
28700 // over-conservative. It would be beneficial to be able to remember
28701 // both potential memory locations. Since we are discarding
28702 // src value info, don't do the transformation if the memory
28703 // locations are not in the default address space.
28704 LLD->getPointerInfo().getAddrSpace() != 0 ||
28705 RLD->getPointerInfo().getAddrSpace() != 0 ||
28706 // We can't produce a CMOV of a TargetFrameIndex since we won't
28707 // generate the address generation required.
28710 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
28711 LLD->getBasePtr().getValueType()))
28712 return false;
28713
28714 // The loads must not depend on one another.
28715 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
28716 return false;
28717
28718 // Check that the select condition doesn't reach either load. If so,
28719 // folding this will induce a cycle into the DAG. If not, this is safe to
28720 // xform, so create a select of the addresses.
28721
28722 SmallPtrSet<const SDNode *, 32> Visited;
28724
28725 // Always fail if LLD and RLD are not independent. TheSelect is a
28726 // predecessor to all Nodes in question so we need not search past it.
28727
28728 Visited.insert(TheSelect);
28729 Worklist.push_back(LLD);
28730 Worklist.push_back(RLD);
28731
28732 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
28733 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
28734 return false;
28735
28736 SDValue Addr;
28737 if (TheSelect->getOpcode() == ISD::SELECT) {
28738 // We cannot do this optimization if any pair of {RLD, LLD} is a
28739 // predecessor to {RLD, LLD, CondNode}. As we've already compared the
28740 // Loads, we only need to check if CondNode is a successor to one of the
28741 // loads. We can further avoid this if there's no use of their chain
28742 // value.
28743 SDNode *CondNode = TheSelect->getOperand(0).getNode();
28744 Worklist.push_back(CondNode);
28745
28746 if ((LLD->hasAnyUseOfValue(1) &&
28747 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
28748 (RLD->hasAnyUseOfValue(1) &&
28749 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
28750 return false;
28751
28752 Addr = DAG.getSelect(SDLoc(TheSelect),
28753 LLD->getBasePtr().getValueType(),
28754 TheSelect->getOperand(0), LLD->getBasePtr(),
28755 RLD->getBasePtr());
28756 } else { // Otherwise SELECT_CC
28757 // We cannot do this optimization if any pair of {RLD, LLD} is a
28758 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
28759 // the Loads, we only need to check if CondLHS/CondRHS is a successor to
28760 // one of the loads. We can further avoid this if there's no use of their
28761 // chain value.
28762
28763 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
28764 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
28765 Worklist.push_back(CondLHS);
28766 Worklist.push_back(CondRHS);
28767
28768 if ((LLD->hasAnyUseOfValue(1) &&
28769 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
28770 (RLD->hasAnyUseOfValue(1) &&
28771 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
28772 return false;
28773
28774 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
28775 LLD->getBasePtr().getValueType(),
28776 TheSelect->getOperand(0),
28777 TheSelect->getOperand(1),
28778 LLD->getBasePtr(), RLD->getBasePtr(),
28779 TheSelect->getOperand(4));
28780 }
28781
28782 SDValue Load;
28783 // It is safe to replace the two loads if they have different alignments,
28784 // but the new load must be the minimum (most restrictive) alignment of the
28785 // inputs.
28786 Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
28787 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
28788 if (!RLD->isInvariant())
28789 MMOFlags &= ~MachineMemOperand::MOInvariant;
28790 if (!RLD->isDereferenceable())
28791 MMOFlags &= ~MachineMemOperand::MODereferenceable;
28792 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
28793 // FIXME: Discards pointer and AA info.
28794 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
28795 LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
28796 MMOFlags);
28797 } else {
28798 // FIXME: Discards pointer and AA info.
28799 Load = DAG.getExtLoad(
28801 : LLD->getExtensionType(),
28802 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
28803 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
28804 }
28805
28806 // Users of the select now use the result of the load.
28807 CombineTo(TheSelect, Load);
28808
28809 // Users of the old loads now use the new load's chain. We know the
28810 // old-load value is dead now.
28811 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
28812 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
28813 return true;
28814 }
28815
28816 return false;
28817}
28818
28819/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
28820/// bitwise 'and'.
28821SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
28822 SDValue N1, SDValue N2, SDValue N3,
28823 ISD::CondCode CC) {
28824 // If this is a select where the false operand is zero and the compare is a
28825 // check of the sign bit, see if we can perform the "gzip trick":
28826 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
28827 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
28828 EVT XType = N0.getValueType();
28829 EVT AType = N2.getValueType();
28830 if (!isNullConstant(N3) || !XType.bitsGE(AType))
28831 return SDValue();
28832
28833 // If the comparison is testing for a positive value, we have to invert
28834 // the sign bit mask, so only do that transform if the target has a bitwise
28835 // 'and not' instruction (the invert is free).
28836 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
28837 // (X > -1) ? A : 0
28838 // (X > 0) ? X : 0 <-- This is canonical signed max.
28839 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
28840 return SDValue();
28841 } else if (CC == ISD::SETLT) {
28842 // (X < 0) ? A : 0
28843 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
28844 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
28845 return SDValue();
28846 } else {
28847 return SDValue();
28848 }
28849
28850 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
28851 // constant.
28852 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
28853 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
28854 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
28855 if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
28856 SDValue ShiftAmt = DAG.getShiftAmountConstant(ShCt, XType, DL);
28857 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
28858 AddToWorklist(Shift.getNode());
28859
28860 if (XType.bitsGT(AType)) {
28861 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
28862 AddToWorklist(Shift.getNode());
28863 }
28864
28865 if (CC == ISD::SETGT)
28866 Shift = DAG.getNOT(DL, Shift, AType);
28867
28868 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
28869 }
28870 }
28871
28872 unsigned ShCt = XType.getSizeInBits() - 1;
28873 if (TLI.shouldAvoidTransformToShift(XType, ShCt))
28874 return SDValue();
28875
28876 SDValue ShiftAmt = DAG.getShiftAmountConstant(ShCt, XType, DL);
28877 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
28878 AddToWorklist(Shift.getNode());
28879
28880 if (XType.bitsGT(AType)) {
28881 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
28882 AddToWorklist(Shift.getNode());
28883 }
28884
28885 if (CC == ISD::SETGT)
28886 Shift = DAG.getNOT(DL, Shift, AType);
28887
28888 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
28889}
28890
28891// Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
28892SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
28893 SDValue N0 = N->getOperand(0);
28894 SDValue N1 = N->getOperand(1);
28895 SDValue N2 = N->getOperand(2);
28896 SDLoc DL(N);
28897
28898 unsigned BinOpc = N1.getOpcode();
28899 if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc) ||
28900 (N1.getResNo() != N2.getResNo()))
28901 return SDValue();
28902
28903 // The use checks are intentionally on SDNode because we may be dealing
28904 // with opcodes that produce more than one SDValue.
28905 // TODO: Do we really need to check N0 (the condition operand of the select)?
28906 // But removing that clause could cause an infinite loop...
28907 if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
28908 return SDValue();
28909
28910 // Binops may include opcodes that return multiple values, so all values
28911 // must be created/propagated from the newly created binops below.
28912 SDVTList OpVTs = N1->getVTList();
28913
28914 // Fold select(cond, binop(x, y), binop(z, y))
28915 // --> binop(select(cond, x, z), y)
28916 if (N1.getOperand(1) == N2.getOperand(1)) {
28917 SDValue N10 = N1.getOperand(0);
28918 SDValue N20 = N2.getOperand(0);
28919 SDValue NewSel = DAG.getSelect(DL, N10.getValueType(), N0, N10, N20);
28920 SDNodeFlags Flags = N1->getFlags() & N2->getFlags();
28921 SDValue NewBinOp =
28922 DAG.getNode(BinOpc, DL, OpVTs, {NewSel, N1.getOperand(1)}, Flags);
28923 return SDValue(NewBinOp.getNode(), N1.getResNo());
28924 }
28925
28926 // Fold select(cond, binop(x, y), binop(x, z))
28927 // --> binop(x, select(cond, y, z))
28928 if (N1.getOperand(0) == N2.getOperand(0)) {
28929 SDValue N11 = N1.getOperand(1);
28930 SDValue N21 = N2.getOperand(1);
28931 // Second op VT might be different (e.g. shift amount type)
28932 if (N11.getValueType() == N21.getValueType()) {
28933 SDValue NewSel = DAG.getSelect(DL, N11.getValueType(), N0, N11, N21);
28934 SDNodeFlags Flags = N1->getFlags() & N2->getFlags();
28935 SDValue NewBinOp =
28936 DAG.getNode(BinOpc, DL, OpVTs, {N1.getOperand(0), NewSel}, Flags);
28937 return SDValue(NewBinOp.getNode(), N1.getResNo());
28938 }
28939 }
28940
28941 // TODO: Handle isCommutativeBinOp patterns as well?
28942 return SDValue();
28943}
28944
28945// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
28946SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
28947 SDValue N0 = N->getOperand(0);
28948 EVT VT = N->getValueType(0);
28949 bool IsFabs = N->getOpcode() == ISD::FABS;
28950 bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
28951
28952 if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
28953 return SDValue();
28954
28955 SDValue Int = N0.getOperand(0);
28956 EVT IntVT = Int.getValueType();
28957
28958 // The operand to cast should be integer.
28959 if (!IntVT.isInteger() || IntVT.isVector())
28960 return SDValue();
28961
28962 // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
28963 // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
28964 APInt SignMask;
28965 if (N0.getValueType().isVector()) {
28966 // For vector, create a sign mask (0x80...) or its inverse (for fabs,
28967 // 0x7f...) per element and splat it.
28969 if (IsFabs)
28970 SignMask = ~SignMask;
28971 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
28972 } else {
28973 // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
28974 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
28975 if (IsFabs)
28976 SignMask = ~SignMask;
28977 }
28978 SDLoc DL(N0);
28979 Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
28980 DAG.getConstant(SignMask, DL, IntVT));
28981 AddToWorklist(Int.getNode());
28982 return DAG.getBitcast(VT, Int);
28983}
28984
28985/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
28986/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
28987/// in it. This may be a win when the constant is not otherwise available
28988/// because it replaces two constant pool loads with one.
28989SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
28990 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
28991 ISD::CondCode CC) {
28993 return SDValue();
28994
28995 // If we are before legalize types, we want the other legalization to happen
28996 // first (for example, to avoid messing with soft float).
28997 auto *TV = dyn_cast<ConstantFPSDNode>(N2);
28998 auto *FV = dyn_cast<ConstantFPSDNode>(N3);
28999 EVT VT = N2.getValueType();
29000 if (!TV || !FV || !TLI.isTypeLegal(VT))
29001 return SDValue();
29002
29003 // If a constant can be materialized without loads, this does not make sense.
29005 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
29006 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
29007 return SDValue();
29008
29009 // If both constants have multiple uses, then we won't need to do an extra
29010 // load. The values are likely around in registers for other users.
29011 if (!TV->hasOneUse() && !FV->hasOneUse())
29012 return SDValue();
29013
29014 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
29015 const_cast<ConstantFP*>(TV->getConstantFPValue()) };
29016 Type *FPTy = Elts[0]->getType();
29017 const DataLayout &TD = DAG.getDataLayout();
29018
29019 // Create a ConstantArray of the two constants.
29020 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
29021 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
29022 TD.getPrefTypeAlign(FPTy));
29023 Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
29024
29025 // Get offsets to the 0 and 1 elements of the array, so we can select between
29026 // them.
29027 SDValue Zero = DAG.getIntPtrConstant(0, DL);
29028 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
29029 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
29030 SDValue Cond =
29031 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
29032 AddToWorklist(Cond.getNode());
29033 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
29034 AddToWorklist(CstOffset.getNode());
29035 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
29036 AddToWorklist(CPIdx.getNode());
29037 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
29039 DAG.getMachineFunction()), Alignment);
29040}
29041
29042/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
29043/// where 'cond' is the comparison specified by CC.
29044SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
29045 SDValue N2, SDValue N3, ISD::CondCode CC,
29046 bool NotExtCompare) {
29047 // (x ? y : y) -> y.
29048 if (N2 == N3) return N2;
29049
29050 EVT CmpOpVT = N0.getValueType();
29051 EVT CmpResVT = getSetCCResultType(CmpOpVT);
29052 EVT VT = N2.getValueType();
29053 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
29054 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
29055 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
29056
29057 // Determine if the condition we're dealing with is constant.
29058 if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
29059 AddToWorklist(SCC.getNode());
29060 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
29061 // fold select_cc true, x, y -> x
29062 // fold select_cc false, x, y -> y
29063 return !(SCCC->isZero()) ? N2 : N3;
29064 }
29065 }
29066
29067 if (SDValue V =
29068 convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
29069 return V;
29070
29071 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
29072 return V;
29073
29074 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A)
29075 // where y is has a single bit set.
29076 // A plaintext description would be, we can turn the SELECT_CC into an AND
29077 // when the condition can be materialized as an all-ones register. Any
29078 // single bit-test can be materialized as an all-ones register with
29079 // shift-left and shift-right-arith.
29080 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
29081 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
29082 SDValue AndLHS = N0->getOperand(0);
29083 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
29084 if (ConstAndRHS && ConstAndRHS->getAPIntValue().popcount() == 1) {
29085 // Shift the tested bit over the sign bit.
29086 const APInt &AndMask = ConstAndRHS->getAPIntValue();
29087 if (TLI.shouldFoldSelectWithSingleBitTest(VT, AndMask)) {
29088 unsigned ShCt = AndMask.getBitWidth() - 1;
29089 SDValue ShlAmt = DAG.getShiftAmountConstant(AndMask.countl_zero(), VT,
29090 SDLoc(AndLHS));
29091 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
29092
29093 // Now arithmetic right shift it all the way over, so the result is
29094 // either all-ones, or zero.
29095 SDValue ShrAmt = DAG.getShiftAmountConstant(ShCt, VT, SDLoc(Shl));
29096 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
29097
29098 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
29099 }
29100 }
29101 }
29102
29103 // fold select C, 16, 0 -> shl C, 4
29104 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
29105 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
29106
29107 if ((Fold || Swap) &&
29108 TLI.getBooleanContents(CmpOpVT) ==
29110 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT)) &&
29112
29113 if (Swap) {
29114 CC = ISD::getSetCCInverse(CC, CmpOpVT);
29115 std::swap(N2C, N3C);
29116 }
29117
29118 // If the caller doesn't want us to simplify this into a zext of a compare,
29119 // don't do it.
29120 if (NotExtCompare && N2C->isOne())
29121 return SDValue();
29122
29123 SDValue Temp, SCC;
29124 // zext (setcc n0, n1)
29125 if (LegalTypes) {
29126 SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
29127 Temp = DAG.getZExtOrTrunc(SCC, SDLoc(N2), VT);
29128 } else {
29129 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
29130 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
29131 }
29132
29133 AddToWorklist(SCC.getNode());
29134 AddToWorklist(Temp.getNode());
29135
29136 if (N2C->isOne())
29137 return Temp;
29138
29139 unsigned ShCt = N2C->getAPIntValue().logBase2();
29140 if (TLI.shouldAvoidTransformToShift(VT, ShCt))
29141 return SDValue();
29142
29143 // shl setcc result by log2 n2c
29144 return DAG.getNode(
29145 ISD::SHL, DL, N2.getValueType(), Temp,
29146 DAG.getShiftAmountConstant(ShCt, N2.getValueType(), SDLoc(Temp)));
29147 }
29148
29149 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
29150 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
29151 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
29152 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
29153 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
29154 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
29155 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
29156 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
29157 if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
29158 SDValue ValueOnZero = N2;
29159 SDValue Count = N3;
29160 // If the condition is NE instead of E, swap the operands.
29161 if (CC == ISD::SETNE)
29162 std::swap(ValueOnZero, Count);
29163 // Check if the value on zero is a constant equal to the bits in the type.
29164 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
29165 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
29166 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
29167 // legal, combine to just cttz.
29168 if ((Count.getOpcode() == ISD::CTTZ ||
29169 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
29170 N0 == Count.getOperand(0) &&
29171 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
29172 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
29173 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
29174 // legal, combine to just ctlz.
29175 if ((Count.getOpcode() == ISD::CTLZ ||
29176 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
29177 N0 == Count.getOperand(0) &&
29178 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
29179 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
29180 }
29181 }
29182 }
29183
29184 // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
29185 // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
29186 if (!NotExtCompare && N1C && N2C && N3C &&
29187 N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
29188 ((N1C->isAllOnes() && CC == ISD::SETGT) ||
29189 (N1C->isZero() && CC == ISD::SETLT)) &&
29190 !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
29191 SDValue ASHR =
29192 DAG.getNode(ISD::SRA, DL, CmpOpVT, N0,
29194 CmpOpVT.getScalarSizeInBits() - 1, CmpOpVT, DL));
29195 return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASHR, DL, VT),
29196 DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
29197 }
29198
29199 // Fold sign pattern select_cc setgt X, -1, 1, -1 -> or (ashr X, BW-1), 1
29200 if (CC == ISD::SETGT && N1C && N2C && N3C && N1C->isAllOnes() &&
29201 N2C->isOne() && N3C->isAllOnes() &&
29202 !TLI.shouldAvoidTransformToShift(CmpOpVT,
29203 CmpOpVT.getScalarSizeInBits() - 1)) {
29204 SDValue ASHR =
29205 DAG.getNode(ISD::SRA, DL, CmpOpVT, N0,
29207 CmpOpVT.getScalarSizeInBits() - 1, CmpOpVT, DL));
29208 return DAG.getNode(ISD::OR, DL, VT, DAG.getSExtOrTrunc(ASHR, DL, VT),
29209 DAG.getConstant(1, DL, VT));
29210 }
29211
29212 if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
29213 return S;
29214 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
29215 return S;
29216 if (SDValue ABD = foldSelectToABD(N0, N1, N2, N3, CC, DL))
29217 return ABD;
29218
29219 return SDValue();
29220}
29221
29223 const TargetLowering &TLI) {
29224 // Match a pattern such as:
29225 // (X | (X >> C0) | (X >> C1) | ...) & Mask
29226 // This extracts contiguous parts of X and ORs them together before comparing.
29227 // We can optimize this so that we directly check (X & SomeMask) instead,
29228 // eliminating the shifts.
29229
29230 EVT VT = Root.getValueType();
29231
29232 // TODO: Support vectors?
29233 if (!VT.isScalarInteger() || Root.getOpcode() != ISD::AND)
29234 return SDValue();
29235
29236 SDValue N0 = Root.getOperand(0);
29237 SDValue N1 = Root.getOperand(1);
29238
29239 if (N0.getOpcode() != ISD::OR || !isa<ConstantSDNode>(N1))
29240 return SDValue();
29241
29242 APInt RootMask = cast<ConstantSDNode>(N1)->getAsAPIntVal();
29243
29244 SDValue Src;
29245 const auto IsSrc = [&](SDValue V) {
29246 if (!Src) {
29247 Src = V;
29248 return true;
29249 }
29250
29251 return Src == V;
29252 };
29253
29254 SmallVector<SDValue> Worklist = {N0};
29255 APInt PartsMask(VT.getSizeInBits(), 0);
29256 while (!Worklist.empty()) {
29257 SDValue V = Worklist.pop_back_val();
29258 if (!V.hasOneUse() && (Src && Src != V))
29259 return SDValue();
29260
29261 if (V.getOpcode() == ISD::OR) {
29262 Worklist.push_back(V.getOperand(0));
29263 Worklist.push_back(V.getOperand(1));
29264 continue;
29265 }
29266
29267 if (V.getOpcode() == ISD::SRL) {
29268 SDValue ShiftSrc = V.getOperand(0);
29269 SDValue ShiftAmt = V.getOperand(1);
29270
29271 if (!IsSrc(ShiftSrc) || !isa<ConstantSDNode>(ShiftAmt))
29272 return SDValue();
29273
29274 auto ShiftAmtVal = cast<ConstantSDNode>(ShiftAmt)->getAsZExtVal();
29275 if (ShiftAmtVal > RootMask.getBitWidth())
29276 return SDValue();
29277
29278 PartsMask |= (RootMask << ShiftAmtVal);
29279 continue;
29280 }
29281
29282 if (IsSrc(V)) {
29283 PartsMask |= RootMask;
29284 continue;
29285 }
29286
29287 return SDValue();
29288 }
29289
29290 if (!Src)
29291 return SDValue();
29292
29293 SDLoc DL(Root);
29294 return DAG.getNode(ISD::AND, DL, VT,
29295 {Src, DAG.getConstant(PartsMask, DL, VT)});
29296}
29297
29298/// This is a stub for TargetLowering::SimplifySetCC.
29299SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
29300 ISD::CondCode Cond, const SDLoc &DL,
29301 bool foldBooleans) {
29302 TargetLowering::DAGCombinerInfo
29303 DagCombineInfo(DAG, Level, false, this);
29304 if (SDValue C =
29305 TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL))
29306 return C;
29307
29309 isNullConstant(N1)) {
29310
29311 if (SDValue Res = matchMergedBFX(N0, DAG, TLI))
29312 return DAG.getSetCC(DL, VT, Res, N1, Cond);
29313 }
29314
29315 return SDValue();
29316}
29317
29318/// Given an ISD::SDIV node expressing a divide by constant, return
29319/// a DAG expression to select that will generate the same value by multiplying
29320/// by a magic number.
29321/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
29322SDValue DAGCombiner::BuildSDIV(SDNode *N) {
29323 // when optimising for minimum size, we don't want to expand a div to a mul
29324 // and a shift.
29326 return SDValue();
29327
29329 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, LegalTypes, Built)) {
29330 for (SDNode *N : Built)
29331 AddToWorklist(N);
29332 return S;
29333 }
29334
29335 return SDValue();
29336}
29337
29338/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
29339/// DAG expression that will generate the same value by right shifting.
29340SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
29341 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
29342 if (!C)
29343 return SDValue();
29344
29345 // Avoid division by zero.
29346 if (C->isZero())
29347 return SDValue();
29348
29350 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
29351 for (SDNode *N : Built)
29352 AddToWorklist(N);
29353 return S;
29354 }
29355
29356 return SDValue();
29357}
29358
29359/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
29360/// expression that will generate the same value by multiplying by a magic
29361/// number.
29362/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
29363SDValue DAGCombiner::BuildUDIV(SDNode *N) {
29364 // when optimising for minimum size, we don't want to expand a div to a mul
29365 // and a shift.
29367 return SDValue();
29368
29370 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, LegalTypes, Built)) {
29371 for (SDNode *N : Built)
29372 AddToWorklist(N);
29373 return S;
29374 }
29375
29376 return SDValue();
29377}
29378
29379/// Given an ISD::SREM node expressing a remainder by constant power of 2,
29380/// return a DAG expression that will generate the same value.
29381SDValue DAGCombiner::BuildSREMPow2(SDNode *N) {
29382 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
29383 if (!C)
29384 return SDValue();
29385
29386 // Avoid division by zero.
29387 if (C->isZero())
29388 return SDValue();
29389
29391 if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) {
29392 for (SDNode *N : Built)
29393 AddToWorklist(N);
29394 return S;
29395 }
29396
29397 return SDValue();
29398}
29399
29400// This is basically just a port of takeLog2 from InstCombineMulDivRem.cpp
29401//
29402// Returns the node that represents `Log2(Op)`. This may create a new node. If
29403// we are unable to compute `Log2(Op)` its return `SDValue()`.
29404//
29405// All nodes will be created at `DL` and the output will be of type `VT`.
29406//
29407// This will only return `Log2(Op)` if we can prove `Op` is non-zero. Set
29408// `AssumeNonZero` if this function should simply assume (not require proving
29409// `Op` is non-zero).
29411 SDValue Op, unsigned Depth,
29412 bool AssumeNonZero) {
29413 assert(VT.isInteger() && "Only integer types are supported!");
29414
29415 auto PeekThroughCastsAndTrunc = [](SDValue V) {
29416 while (true) {
29417 switch (V.getOpcode()) {
29418 case ISD::TRUNCATE:
29419 case ISD::ZERO_EXTEND:
29420 V = V.getOperand(0);
29421 break;
29422 default:
29423 return V;
29424 }
29425 }
29426 };
29427
29428 if (VT.isScalableVector())
29429 return SDValue();
29430
29431 Op = PeekThroughCastsAndTrunc(Op);
29432
29433 // Helper for determining whether a value is a power-2 constant scalar or a
29434 // vector of such elements.
29435 SmallVector<APInt> Pow2Constants;
29436 auto IsPowerOfTwo = [&Pow2Constants](ConstantSDNode *C) {
29437 if (C->isZero() || C->isOpaque())
29438 return false;
29439 // TODO: We may also be able to support negative powers of 2 here.
29440 if (C->getAPIntValue().isPowerOf2()) {
29441 Pow2Constants.emplace_back(C->getAPIntValue());
29442 return true;
29443 }
29444 return false;
29445 };
29446
29447 if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo)) {
29448 if (!VT.isVector())
29449 return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
29450 // We need to create a build vector
29451 if (Op.getOpcode() == ISD::SPLAT_VECTOR)
29452 return DAG.getSplat(VT, DL,
29453 DAG.getConstant(Pow2Constants.back().logBase2(), DL,
29454 VT.getScalarType()));
29455 SmallVector<SDValue> Log2Ops;
29456 for (const APInt &Pow2 : Pow2Constants)
29457 Log2Ops.emplace_back(
29458 DAG.getConstant(Pow2.logBase2(), DL, VT.getScalarType()));
29459 return DAG.getBuildVector(VT, DL, Log2Ops);
29460 }
29461
29462 if (Depth >= DAG.MaxRecursionDepth)
29463 return SDValue();
29464
29465 auto CastToVT = [&](EVT NewVT, SDValue ToCast) {
29466 // Peek through zero extend. We can't peek through truncates since this
29467 // function is called on a shift amount. We must ensure that all of the bits
29468 // above the original shift amount are zeroed by this function.
29469 while (ToCast.getOpcode() == ISD::ZERO_EXTEND)
29470 ToCast = ToCast.getOperand(0);
29471 EVT CurVT = ToCast.getValueType();
29472 if (NewVT == CurVT)
29473 return ToCast;
29474
29475 if (NewVT.getSizeInBits() == CurVT.getSizeInBits())
29476 return DAG.getBitcast(NewVT, ToCast);
29477
29478 return DAG.getZExtOrTrunc(ToCast, DL, NewVT);
29479 };
29480
29481 // log2(X << Y) -> log2(X) + Y
29482 if (Op.getOpcode() == ISD::SHL) {
29483 // 1 << Y and X nuw/nsw << Y are all non-zero.
29484 if (AssumeNonZero || Op->getFlags().hasNoUnsignedWrap() ||
29485 Op->getFlags().hasNoSignedWrap() || isOneConstant(Op.getOperand(0)))
29486 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0),
29487 Depth + 1, AssumeNonZero))
29488 return DAG.getNode(ISD::ADD, DL, VT, LogX,
29489 CastToVT(VT, Op.getOperand(1)));
29490 }
29491
29492 // c ? X : Y -> c ? Log2(X) : Log2(Y)
29493 if ((Op.getOpcode() == ISD::SELECT || Op.getOpcode() == ISD::VSELECT) &&
29494 Op.hasOneUse()) {
29495 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1),
29496 Depth + 1, AssumeNonZero))
29497 if (SDValue LogY = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(2),
29498 Depth + 1, AssumeNonZero))
29499 return DAG.getSelect(DL, VT, Op.getOperand(0), LogX, LogY);
29500 }
29501
29502 // log2(umin(X, Y)) -> umin(log2(X), log2(Y))
29503 // log2(umax(X, Y)) -> umax(log2(X), log2(Y))
29504 if ((Op.getOpcode() == ISD::UMIN || Op.getOpcode() == ISD::UMAX) &&
29505 Op.hasOneUse()) {
29506 // Use AssumeNonZero as false here. Otherwise we can hit case where
29507 // log2(umax(X, Y)) != umax(log2(X), log2(Y)) (because overflow).
29508 if (SDValue LogX =
29509 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0), Depth + 1,
29510 /*AssumeNonZero*/ false))
29511 if (SDValue LogY =
29512 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1), Depth + 1,
29513 /*AssumeNonZero*/ false))
29514 return DAG.getNode(Op.getOpcode(), DL, VT, LogX, LogY);
29515 }
29516
29517 return SDValue();
29518}
29519
29520/// Determines the LogBase2 value for a non-null input value using the
29521/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
29522SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL,
29523 bool KnownNonZero, bool InexpensiveOnly,
29524 std::optional<EVT> OutVT) {
29525 EVT VT = OutVT ? *OutVT : V.getValueType();
29526 SDValue InexpensiveLogBase2 =
29527 takeInexpensiveLog2(DAG, DL, VT, V, /*Depth*/ 0, KnownNonZero);
29528 if (InexpensiveLogBase2 || InexpensiveOnly || !DAG.isKnownToBeAPowerOfTwo(V))
29529 return InexpensiveLogBase2;
29530
29531 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
29532 SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
29533 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
29534 return LogBase2;
29535}
29536
29537/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
29538/// For the reciprocal, we need to find the zero of the function:
29539/// F(X) = 1/X - A [which has a zero at X = 1/A]
29540/// =>
29541/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
29542/// does not require additional intermediate precision]
29543/// For the last iteration, put numerator N into it to gain more precision:
29544/// Result = N X_i + X_i (N - N A X_i)
29545SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
29546 SDNodeFlags Flags) {
29547 if (LegalDAG)
29548 return SDValue();
29549
29550 // TODO: Handle extended types?
29551 EVT VT = Op.getValueType();
29552 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
29553 VT.getScalarType() != MVT::f64)
29554 return SDValue();
29555
29556 // If estimates are explicitly disabled for this function, we're done.
29557 MachineFunction &MF = DAG.getMachineFunction();
29558 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
29559 if (Enabled == TLI.ReciprocalEstimate::Disabled)
29560 return SDValue();
29561
29562 // Estimates may be explicitly enabled for this type with a custom number of
29563 // refinement steps.
29564 int Iterations = TLI.getDivRefinementSteps(VT, MF);
29565 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
29566 AddToWorklist(Est.getNode());
29567
29568 SDLoc DL(Op);
29569 if (Iterations) {
29570 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
29571
29572 // Newton iterations: Est = Est + Est (N - Arg * Est)
29573 // If this is the last iteration, also multiply by the numerator.
29574 for (int i = 0; i < Iterations; ++i) {
29575 SDValue MulEst = Est;
29576
29577 if (i == Iterations - 1) {
29578 MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
29579 AddToWorklist(MulEst.getNode());
29580 }
29581
29582 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
29583 AddToWorklist(NewEst.getNode());
29584
29585 NewEst = DAG.getNode(ISD::FSUB, DL, VT,
29586 (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
29587 AddToWorklist(NewEst.getNode());
29588
29589 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
29590 AddToWorklist(NewEst.getNode());
29591
29592 Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
29593 AddToWorklist(Est.getNode());
29594 }
29595 } else {
29596 // If no iterations are available, multiply with N.
29597 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
29598 AddToWorklist(Est.getNode());
29599 }
29600
29601 return Est;
29602 }
29603
29604 return SDValue();
29605}
29606
29607/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
29608/// For the reciprocal sqrt, we need to find the zero of the function:
29609/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
29610/// =>
29611/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
29612/// As a result, we precompute A/2 prior to the iteration loop.
29613SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
29614 unsigned Iterations,
29615 SDNodeFlags Flags, bool Reciprocal) {
29616 EVT VT = Arg.getValueType();
29617 SDLoc DL(Arg);
29618 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
29619
29620 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
29621 // this entire sequence requires only one FP constant.
29622 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
29623 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
29624
29625 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
29626 for (unsigned i = 0; i < Iterations; ++i) {
29627 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
29628 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
29629 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
29630 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
29631 }
29632
29633 // If non-reciprocal square root is requested, multiply the result by Arg.
29634 if (!Reciprocal)
29635 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
29636
29637 return Est;
29638}
29639
29640/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
29641/// For the reciprocal sqrt, we need to find the zero of the function:
29642/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
29643/// =>
29644/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
29645SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
29646 unsigned Iterations,
29647 SDNodeFlags Flags, bool Reciprocal) {
29648 EVT VT = Arg.getValueType();
29649 SDLoc DL(Arg);
29650 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
29651 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
29652
29653 // This routine must enter the loop below to work correctly
29654 // when (Reciprocal == false).
29655 assert(Iterations > 0);
29656
29657 // Newton iterations for reciprocal square root:
29658 // E = (E * -0.5) * ((A * E) * E + -3.0)
29659 for (unsigned i = 0; i < Iterations; ++i) {
29660 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
29661 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
29662 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
29663
29664 // When calculating a square root at the last iteration build:
29665 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
29666 // (notice a common subexpression)
29667 SDValue LHS;
29668 if (Reciprocal || (i + 1) < Iterations) {
29669 // RSQRT: LHS = (E * -0.5)
29670 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
29671 } else {
29672 // SQRT: LHS = (A * E) * -0.5
29673 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
29674 }
29675
29676 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
29677 }
29678
29679 return Est;
29680}
29681
29682/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
29683/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
29684/// Op can be zero.
29685SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
29686 bool Reciprocal) {
29687 if (LegalDAG)
29688 return SDValue();
29689
29690 // TODO: Handle extended types?
29691 EVT VT = Op.getValueType();
29692 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
29693 VT.getScalarType() != MVT::f64)
29694 return SDValue();
29695
29696 // If estimates are explicitly disabled for this function, we're done.
29697 MachineFunction &MF = DAG.getMachineFunction();
29698 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
29699 if (Enabled == TLI.ReciprocalEstimate::Disabled)
29700 return SDValue();
29701
29702 // Estimates may be explicitly enabled for this type with a custom number of
29703 // refinement steps.
29704 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
29705
29706 bool UseOneConstNR = false;
29707 if (SDValue Est =
29708 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
29709 Reciprocal)) {
29710 AddToWorklist(Est.getNode());
29711
29712 if (Iterations > 0)
29713 Est = UseOneConstNR
29714 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
29715 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
29716 if (!Reciprocal) {
29717 SDLoc DL(Op);
29718 // Try the target specific test first.
29719 SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
29720
29721 // The estimate is now completely wrong if the input was exactly 0.0 or
29722 // possibly a denormal. Force the answer to 0.0 or value provided by
29723 // target for those cases.
29724 Est = DAG.getSelect(DL, VT, Test,
29725 TLI.getSqrtResultForDenormInput(Op, DAG), Est);
29726 }
29727 return Est;
29728 }
29729
29730 return SDValue();
29731}
29732
29733SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
29734 return buildSqrtEstimateImpl(Op, Flags, true);
29735}
29736
29737SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
29738 return buildSqrtEstimateImpl(Op, Flags, false);
29739}
29740
29741/// Return true if there is any possibility that the two addresses overlap.
29742bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
29743
29744 struct MemUseCharacteristics {
29745 bool IsVolatile;
29746 bool IsAtomic;
29748 int64_t Offset;
29749 LocationSize NumBytes;
29750 MachineMemOperand *MMO;
29751 };
29752
29753 auto getCharacteristics = [this](SDNode *N) -> MemUseCharacteristics {
29754 if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
29755 int64_t Offset = 0;
29756 if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
29757 Offset = (LSN->getAddressingMode() == ISD::PRE_INC) ? C->getSExtValue()
29758 : (LSN->getAddressingMode() == ISD::PRE_DEC)
29759 ? -1 * C->getSExtValue()
29760 : 0;
29761 TypeSize Size = LSN->getMemoryVT().getStoreSize();
29762 return {LSN->isVolatile(), LSN->isAtomic(),
29763 LSN->getBasePtr(), Offset /*base offset*/,
29764 LocationSize::precise(Size), LSN->getMemOperand()};
29765 }
29766 if (const auto *LN = cast<LifetimeSDNode>(N)) {
29767 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
29768 return {false /*isVolatile*/,
29769 /*isAtomic*/ false,
29770 LN->getOperand(1),
29771 0,
29772 LocationSize::precise(MFI.getObjectSize(LN->getFrameIndex())),
29773 (MachineMemOperand *)nullptr};
29774 }
29775 // Default.
29776 return {false /*isvolatile*/,
29777 /*isAtomic*/ false,
29778 SDValue(),
29779 (int64_t)0 /*offset*/,
29781 (MachineMemOperand *)nullptr};
29782 };
29783
29784 MemUseCharacteristics MUC0 = getCharacteristics(Op0),
29785 MUC1 = getCharacteristics(Op1);
29786
29787 // If they are to the same address, then they must be aliases.
29788 if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
29789 MUC0.Offset == MUC1.Offset)
29790 return true;
29791
29792 // If they are both volatile then they cannot be reordered.
29793 if (MUC0.IsVolatile && MUC1.IsVolatile)
29794 return true;
29795
29796 // Be conservative about atomics for the moment
29797 // TODO: This is way overconservative for unordered atomics (see D66309)
29798 if (MUC0.IsAtomic && MUC1.IsAtomic)
29799 return true;
29800
29801 if (MUC0.MMO && MUC1.MMO) {
29802 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
29803 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
29804 return false;
29805 }
29806
29807 // If NumBytes is scalable and offset is not 0, conservatively return may
29808 // alias
29809 if ((MUC0.NumBytes.hasValue() && MUC0.NumBytes.isScalable() &&
29810 MUC0.Offset != 0) ||
29811 (MUC1.NumBytes.hasValue() && MUC1.NumBytes.isScalable() &&
29812 MUC1.Offset != 0))
29813 return true;
29814 // Try to prove that there is aliasing, or that there is no aliasing. Either
29815 // way, we can return now. If nothing can be proved, proceed with more tests.
29816 bool IsAlias;
29817 if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
29818 DAG, IsAlias))
29819 return IsAlias;
29820
29821 // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
29822 // either are not known.
29823 if (!MUC0.MMO || !MUC1.MMO)
29824 return true;
29825
29826 // If one operation reads from invariant memory, and the other may store, they
29827 // cannot alias. These should really be checking the equivalent of mayWrite,
29828 // but it only matters for memory nodes other than load /store.
29829 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
29830 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
29831 return false;
29832
29833 // If we know required SrcValue1 and SrcValue2 have relatively large
29834 // alignment compared to the size and offset of the access, we may be able
29835 // to prove they do not alias. This check is conservative for now to catch
29836 // cases created by splitting vector types, it only works when the offsets are
29837 // multiples of the size of the data.
29838 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
29839 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
29840 Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
29841 Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
29842 LocationSize Size0 = MUC0.NumBytes;
29843 LocationSize Size1 = MUC1.NumBytes;
29844
29845 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
29846 Size0.hasValue() && Size1.hasValue() && !Size0.isScalable() &&
29847 !Size1.isScalable() && Size0 == Size1 &&
29848 OrigAlignment0 > Size0.getValue().getKnownMinValue() &&
29849 SrcValOffset0 % Size0.getValue().getKnownMinValue() == 0 &&
29850 SrcValOffset1 % Size1.getValue().getKnownMinValue() == 0) {
29851 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
29852 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
29853
29854 // There is no overlap between these relatively aligned accesses of
29855 // similar size. Return no alias.
29856 if ((OffAlign0 + static_cast<int64_t>(
29857 Size0.getValue().getKnownMinValue())) <= OffAlign1 ||
29858 (OffAlign1 + static_cast<int64_t>(
29859 Size1.getValue().getKnownMinValue())) <= OffAlign0)
29860 return false;
29861 }
29862
29865 : DAG.getSubtarget().useAA();
29866#ifndef NDEBUG
29867 if (CombinerAAOnlyFunc.getNumOccurrences() &&
29869 UseAA = false;
29870#endif
29871
29872 if (UseAA && BatchAA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
29873 Size0.hasValue() && Size1.hasValue() &&
29874 // Can't represent a scalable size + fixed offset in LocationSize
29875 (!Size0.isScalable() || SrcValOffset0 == 0) &&
29876 (!Size1.isScalable() || SrcValOffset1 == 0)) {
29877 // Use alias analysis information.
29878 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
29879 int64_t Overlap0 =
29880 Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
29881 int64_t Overlap1 =
29882 Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
29883 LocationSize Loc0 =
29884 Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0);
29885 LocationSize Loc1 =
29886 Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1);
29887 if (BatchAA->isNoAlias(
29888 MemoryLocation(MUC0.MMO->getValue(), Loc0,
29889 UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
29890 MemoryLocation(MUC1.MMO->getValue(), Loc1,
29891 UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
29892 return false;
29893 }
29894
29895 // Otherwise we have to assume they alias.
29896 return true;
29897}
29898
29899/// Walk up chain skipping non-aliasing memory nodes,
29900/// looking for aliasing nodes and adding them to the Aliases vector.
29901void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
29902 SmallVectorImpl<SDValue> &Aliases) {
29903 SmallVector<SDValue, 8> Chains; // List of chains to visit.
29904 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
29905
29906 // Get alias information for node.
29907 // TODO: relax aliasing for unordered atomics (see D66309)
29908 const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
29909
29910 // Starting off.
29911 Chains.push_back(OriginalChain);
29912 unsigned Depth = 0;
29913
29914 // Attempt to improve chain by a single step
29915 auto ImproveChain = [&](SDValue &C) -> bool {
29916 switch (C.getOpcode()) {
29917 case ISD::EntryToken:
29918 // No need to mark EntryToken.
29919 C = SDValue();
29920 return true;
29921 case ISD::LOAD:
29922 case ISD::STORE: {
29923 // Get alias information for C.
29924 // TODO: Relax aliasing for unordered atomics (see D66309)
29925 bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
29926 cast<LSBaseSDNode>(C.getNode())->isSimple();
29927 if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
29928 // Look further up the chain.
29929 C = C.getOperand(0);
29930 return true;
29931 }
29932 // Alias, so stop here.
29933 return false;
29934 }
29935
29936 case ISD::CopyFromReg:
29937 // Always forward past CopyFromReg.
29938 C = C.getOperand(0);
29939 return true;
29940
29941 case ISD::LIFETIME_START:
29942 case ISD::LIFETIME_END: {
29943 // We can forward past any lifetime start/end that can be proven not to
29944 // alias the memory access.
29945 if (!mayAlias(N, C.getNode())) {
29946 // Look further up the chain.
29947 C = C.getOperand(0);
29948 return true;
29949 }
29950 return false;
29951 }
29952 default:
29953 return false;
29954 }
29955 };
29956
29957 // Look at each chain and determine if it is an alias. If so, add it to the
29958 // aliases list. If not, then continue up the chain looking for the next
29959 // candidate.
29960 while (!Chains.empty()) {
29961 SDValue Chain = Chains.pop_back_val();
29962
29963 // Don't bother if we've seen Chain before.
29964 if (!Visited.insert(Chain.getNode()).second)
29965 continue;
29966
29967 // For TokenFactor nodes, look at each operand and only continue up the
29968 // chain until we reach the depth limit.
29969 //
29970 // FIXME: The depth check could be made to return the last non-aliasing
29971 // chain we found before we hit a tokenfactor rather than the original
29972 // chain.
29973 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
29974 Aliases.clear();
29975 Aliases.push_back(OriginalChain);
29976 return;
29977 }
29978
29979 if (Chain.getOpcode() == ISD::TokenFactor) {
29980 // We have to check each of the operands of the token factor for "small"
29981 // token factors, so we queue them up. Adding the operands to the queue
29982 // (stack) in reverse order maintains the original order and increases the
29983 // likelihood that getNode will find a matching token factor (CSE.)
29984 if (Chain.getNumOperands() > 16) {
29985 Aliases.push_back(Chain);
29986 continue;
29987 }
29988 for (unsigned n = Chain.getNumOperands(); n;)
29989 Chains.push_back(Chain.getOperand(--n));
29990 ++Depth;
29991 continue;
29992 }
29993 // Everything else
29994 if (ImproveChain(Chain)) {
29995 // Updated Chain Found, Consider new chain if one exists.
29996 if (Chain.getNode())
29997 Chains.push_back(Chain);
29998 ++Depth;
29999 continue;
30000 }
30001 // No Improved Chain Possible, treat as Alias.
30002 Aliases.push_back(Chain);
30003 }
30004}
30005
30006/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
30007/// (aliasing node.)
30008SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
30009 if (OptLevel == CodeGenOptLevel::None)
30010 return OldChain;
30011
30012 // Ops for replacing token factor.
30014
30015 // Accumulate all the aliases to this node.
30016 GatherAllAliases(N, OldChain, Aliases);
30017
30018 // If no operands then chain to entry token.
30019 if (Aliases.empty())
30020 return DAG.getEntryNode();
30021
30022 // If a single operand then chain to it. We don't need to revisit it.
30023 if (Aliases.size() == 1)
30024 return Aliases[0];
30025
30026 // Construct a custom tailored token factor.
30027 return DAG.getTokenFactor(SDLoc(N), Aliases);
30028}
30029
30030// This function tries to collect a bunch of potentially interesting
30031// nodes to improve the chains of, all at once. This might seem
30032// redundant, as this function gets called when visiting every store
30033// node, so why not let the work be done on each store as it's visited?
30034//
30035// I believe this is mainly important because mergeConsecutiveStores
30036// is unable to deal with merging stores of different sizes, so unless
30037// we improve the chains of all the potential candidates up-front
30038// before running mergeConsecutiveStores, it might only see some of
30039// the nodes that will eventually be candidates, and then not be able
30040// to go from a partially-merged state to the desired final
30041// fully-merged state.
30042
30043bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
30044 SmallVector<StoreSDNode *, 8> ChainedStores;
30045 StoreSDNode *STChain = St;
30046 // Intervals records which offsets from BaseIndex have been covered. In
30047 // the common case, every store writes to the immediately previous address
30048 // space and thus merged with the previous interval at insertion time.
30049
30050 using IMap = llvm::IntervalMap<int64_t, std::monostate, 8,
30051 IntervalMapHalfOpenInfo<int64_t>>;
30052 IMap::Allocator A;
30053 IMap Intervals(A);
30054
30055 // This holds the base pointer, index, and the offset in bytes from the base
30056 // pointer.
30057 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
30058
30059 // We must have a base and an offset.
30060 if (!BasePtr.getBase().getNode())
30061 return false;
30062
30063 // Do not handle stores to undef base pointers.
30064 if (BasePtr.getBase().isUndef())
30065 return false;
30066
30067 // Do not handle stores to opaque types
30068 if (St->getMemoryVT().isZeroSized())
30069 return false;
30070
30071 // BaseIndexOffset assumes that offsets are fixed-size, which
30072 // is not valid for scalable vectors where the offsets are
30073 // scaled by `vscale`, so bail out early.
30074 if (St->getMemoryVT().isScalableVT())
30075 return false;
30076
30077 // Add ST's interval.
30078 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8,
30079 std::monostate{});
30080
30081 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
30082 if (Chain->getMemoryVT().isScalableVector())
30083 return false;
30084
30085 // If the chain has more than one use, then we can't reorder the mem ops.
30086 if (!SDValue(Chain, 0)->hasOneUse())
30087 break;
30088 // TODO: Relax for unordered atomics (see D66309)
30089 if (!Chain->isSimple() || Chain->isIndexed())
30090 break;
30091
30092 // Find the base pointer and offset for this memory node.
30093 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
30094 // Check that the base pointer is the same as the original one.
30095 int64_t Offset;
30096 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
30097 break;
30098 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
30099 // Make sure we don't overlap with other intervals by checking the ones to
30100 // the left or right before inserting.
30101 auto I = Intervals.find(Offset);
30102 // If there's a next interval, we should end before it.
30103 if (I != Intervals.end() && I.start() < (Offset + Length))
30104 break;
30105 // If there's a previous interval, we should start after it.
30106 if (I != Intervals.begin() && (--I).stop() <= Offset)
30107 break;
30108 Intervals.insert(Offset, Offset + Length, std::monostate{});
30109
30110 ChainedStores.push_back(Chain);
30111 STChain = Chain;
30112 }
30113
30114 // If we didn't find a chained store, exit.
30115 if (ChainedStores.empty())
30116 return false;
30117
30118 // Improve all chained stores (St and ChainedStores members) starting from
30119 // where the store chain ended and return single TokenFactor.
30120 SDValue NewChain = STChain->getChain();
30122 for (unsigned I = ChainedStores.size(); I;) {
30123 StoreSDNode *S = ChainedStores[--I];
30124 SDValue BetterChain = FindBetterChain(S, NewChain);
30126 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
30127 TFOps.push_back(SDValue(S, 0));
30128 ChainedStores[I] = S;
30129 }
30130
30131 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
30132 SDValue BetterChain = FindBetterChain(St, NewChain);
30133 SDValue NewST;
30134 if (St->isTruncatingStore())
30135 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
30136 St->getBasePtr(), St->getMemoryVT(),
30137 St->getMemOperand());
30138 else
30139 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
30140 St->getBasePtr(), St->getMemOperand());
30141
30142 TFOps.push_back(NewST);
30143
30144 // If we improved every element of TFOps, then we've lost the dependence on
30145 // NewChain to successors of St and we need to add it back to TFOps. Do so at
30146 // the beginning to keep relative order consistent with FindBetterChains.
30147 auto hasImprovedChain = [&](SDValue ST) -> bool {
30148 return ST->getOperand(0) != NewChain;
30149 };
30150 bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
30151 if (AddNewChain)
30152 TFOps.insert(TFOps.begin(), NewChain);
30153
30154 SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
30155 CombineTo(St, TF);
30156
30157 // Add TF and its operands to the worklist.
30158 AddToWorklist(TF.getNode());
30159 for (const SDValue &Op : TF->ops())
30160 AddToWorklist(Op.getNode());
30161 AddToWorklist(STChain);
30162 return true;
30163}
30164
30165bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
30166 if (OptLevel == CodeGenOptLevel::None)
30167 return false;
30168
30169 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
30170
30171 // We must have a base and an offset.
30172 if (!BasePtr.getBase().getNode())
30173 return false;
30174
30175 // Do not handle stores to undef base pointers.
30176 if (BasePtr.getBase().isUndef())
30177 return false;
30178
30179 // Directly improve a chain of disjoint stores starting at St.
30180 if (parallelizeChainedStores(St))
30181 return true;
30182
30183 // Improve St's Chain..
30184 SDValue BetterChain = FindBetterChain(St, St->getChain());
30185 if (St->getChain() != BetterChain) {
30186 replaceStoreChain(St, BetterChain);
30187 return true;
30188 }
30189 return false;
30190}
30191
30192/// This is the entry point for the file.
30194 CodeGenOptLevel OptLevel) {
30195 /// This is the main entry point to this class.
30196 DAGCombiner(*this, BatchAA, OptLevel).Run(Level);
30197}
return SDValue()
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
constexpr LLT S1
AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques=false)
static cl::opt< bool > EnableShrinkLoadReplaceStoreWithStore("combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable load/<replace bytes>/store with " "a narrower store"))
static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, unsigned ExtOpc, SmallVectorImpl< SDNode * > &ExtendNodes, const TargetLowering &TLI)
static cl::opt< unsigned > TokenFactorInlineLimit("combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), cl::desc("Limit the number of operands to inline for Token Factors"))
static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc, bool NonNegZExt=false)
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG)
static SDNode * getBuildPairElt(SDNode *N, unsigned i)
static SDValue foldExtractSubvectorFromShuffleVector(EVT NarrowVT, SDValue Src, unsigned Index, const SDLoc &DL, SelectionDAG &DAG, bool LegalOperations)
Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)), try to produce VECTOR_SHUFFLE(EXTRACT_SUBVEC...
static SDValue foldToMaskedStore(StoreSDNode *Store, SelectionDAG &DAG, const SDLoc &Dl)
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtendOfConstant(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalTypes)
Try to fold a sext/zext/aext dag node into a ConstantSDNode or a build_vector of constants.
static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, SDValue ExtractFrom, SDValue &Mask, const SDLoc &DL)
Helper function for visitOR to extract the needed side of a rotate idiom from a shl/srl/mul/udiv.
static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec, bool &IsLoad, bool &IsMasked, SDValue &Ptr, const TargetLowering &TLI)
bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled, SelectionDAG &DAG, const SDLoc &DL)
static SDValue narrowExtractedVectorLoad(EVT VT, SDValue Src, unsigned Index, const SDLoc &DL, SelectionDAG &DAG)
If we are extracting a subvector from a wide vector load, convert to a narrow load to eliminate the e...
static SDValue scalarizeExtractedBinOp(SDNode *ExtElt, SelectionDAG &DAG, const SDLoc &DL, bool LegalTypes)
Transform a vector binary operation into a scalar binary operation by moving the math/logic after an ...
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI)
Return true if divmod libcall is available.
static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG)
static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, SelectionDAG &DAG, const SDLoc &DL)
Given the operands of an add/sub operation, see if the 2nd operand is a masked 0/1 whose source opera...
static bool mergeEltWithShuffle(SDValue &X, SDValue &Y, ArrayRef< int > Mask, SmallVectorImpl< int > &NewMask, SDValue Elt, unsigned InsIndex)
static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf)
If we have a unary shuffle of a shuffle, see if it can be folded away completely.
static bool canSplitIdx(LoadSDNode *LD)
static SDValue ShrinkLoadReplaceStoreWithStore(const std::pair< unsigned, unsigned > &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC)
Check to see if IVal is something that provides a value as specified by MaskInfo.
static cl::opt< bool > StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, cl::desc("Bypass the profitability model of load slicing"), cl::init(false))
Hidden option to stress test load slicing, i.e., when this option is enabled, load slicing bypasses m...
static cl::opt< bool > UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), cl::desc("Enable DAG combiner's use of TBAA"))
static void adjustCostForPairing(SmallVectorImpl< LoadedSlice > &LoadedSlices, LoadedSlice::Cost &GlobalLSCost)
Adjust the GlobalLSCost according to the target paring capabilities and the layout of the slices.
static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI, SDValue N0, SDValue N1, SDNode *N)
static cl::opt< bool > DisableCombines("combiner-disabled", cl::Hidden, cl::init(false), cl::desc("Disable the DAG combiner"))
static SDValue foldExtendVectorInregToExtendOfSubvector(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalOperations)
static SDValue narrowExtractedVectorBinOp(EVT VT, SDValue Src, unsigned Index, const SDLoc &DL, SelectionDAG &DAG, bool LegalOperations)
If we are extracting a subvector produced by a wide binary operator try to use a narrow binary operat...
static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode)
Check if N satisfies: N is used once.
static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG, const SDLoc &DL)
Given an extending node with a pop-count operand, if the target does not support a pop-count in the n...
static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand, SDValue RightHand, SelectionDAG &DAG)
Given a tree of logic operations with shape like (LOGIC (LOGIC (X, Y), LOGIC (Z, Y))) try to match an...
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG)
static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned Depth, bool AssumeNonZero)
static SDValue combineSelectAsExtAnd(SDValue Cond, SDValue T, SDValue F, const SDLoc &DL, SelectionDAG &DAG)
static bool areUsedBitsDense(const APInt &UsedBits)
Check that all bits set in UsedBits form a dense region, i.e., UsedBits looks like 0....
static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG, const TargetLowering &TLI, const SDLoc &DL)
Fold "masked merge" expressions like (m & x) | (~m & y) and its DeMorgan variant (~m | x) & (m | y) i...
static SDValue getInputChainForNode(SDNode *N)
Given a node, return its input chain if it has one, otherwise return a null sd operand.
static ElementCount numVectorEltsOrZero(EVT T)
static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG, bool ShouldCommuteOperands)
This inverts a canonicalization in IR that replaces a variable select arm with an identity constant.
static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG)
static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
For targets that support usubsat, match a bit-hack form of that operation that ends in 'and' and conv...
static cl::opt< bool > CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Enable DAG combiner's use of IR alias analysis"))
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy)
static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf, SelectionDAG &DAG)
Combine shuffle of shuffle of the form: shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X...
static bool isDivisorPowerOfTwo(SDValue Divisor)
static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift, SDValue &Mask)
Match "(X shl/srl V1) & V2" where V2 may not be present.
static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG)
static bool hasNoInfs(const TargetOptions &Options, SDValue N)
static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS, const SDNodeFlags Flags, const TargetLowering &TLI)
static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static std::optional< EVT > canCombineShuffleToExtendVectorInreg(unsigned Opcode, EVT VT, std::function< bool(unsigned)> Match, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue combineShuffleToAnyExtendVectorInreg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static SDValue foldAddSubOfSignBit(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold a 'not' shifted sign-bit with add/sub with constant operand into a shift and add with a d...
static SDValue stripTruncAndExt(SDValue Value)
static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, SDValue X, SDValue Carry0, SDValue Carry1, SDNode *N)
If we are facing some sort of diamond carry propagation pattern try to break it up to generate someth...
static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles followed by concatenatio...
static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
static auto getFirstIndexOf(R &&Range, const T &Val)
static SDValue getSubVectorSrc(SDValue V, unsigned Index, EVT SubVT)
static std::pair< unsigned, unsigned > CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain)
Check to see if V is (and load (ptr), imm), where the load is having specific bytes cleared out.
static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef< int > Mask)
If the shuffle mask is taking exactly one element from the first vector operand and passing through a...
static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT, const TargetLowering &TLI)
static cl::opt< bool > EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable merging multiple stores " "into a wider store"))
static bool isContractableFMUL(const TargetOptions &Options, SDValue N)
static cl::opt< bool > MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), cl::desc("DAG combiner may split indexing from loads"))
static bool areSlicesNextToEachOther(const LoadedSlice &First, const LoadedSlice &Second)
Check whether or not First and Second are next to each other in memory.
static SDValue stripConstantMask(const SelectionDAG &DAG, SDValue Op, SDValue &Mask)
static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static bool isBSwapHWordPair(SDValue N, MutableArrayRef< SDNode * > Parts)
static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI)
static bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy)
copysign(x, fp_extend(y)) -> copysign(x, y) copysign(x, fp_round(y)) -> copysign(x,...
static cl::opt< bool > ReduceLoadOpStoreWidthForceNarrowingProfitable("combiner-reduce-load-op-store-width-force-narrowing-profitable", cl::Hidden, cl::init(false), cl::desc("DAG combiner force override the narrowing profitable check when " "reducing the width of load/op/store sequences"))
static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2, ISD::CondCode CC, unsigned OrAndOpcode, SelectionDAG &DAG, bool isFMAXNUMFMINNUM_IEEE, bool isFMAXNUMFMINNUM)
static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
static SDValue foldToSaturated(SDNode *N, EVT &VT, SDValue &Src, EVT &SrcVT, SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG)
static SDValue FoldIntToFPToInt(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue foldSubCtlzNot(SDNode *N, SelectionDAG &DAG)
static SDNode * getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad, bool &IsMasked, SDValue &Ptr, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, const TargetLowering &TLI, bool Force)
Flips a boolean if it is cheaper to compute.
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, KnownBits &Known)
static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc)
static SDValue combineConcatVectorOfShuffleAndItsOperands(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT, SelectionDAG &DAG)
static SDValue foldRemainderIdiom(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, SelectionDAG &DAG)
static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG)
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG)
static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset=0)
static SDValue combineShiftToMULH(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI)
static ConstantSDNode * getAsNonOpaqueConstant(SDValue N)
If N is a ConstantSDNode with isOpaque() == false return it casted to a ConstantSDNode pointer else n...
static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static SDValue detectUSatUPattern(SDValue In, EVT VT)
Detect patterns of truncation with unsigned saturation:
static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue combineConcatVectorOfSplats(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N)
OR combines for which the commuted variant will be tried as well.
static SDValue detectSSatUPattern(SDValue In, EVT VT, SelectionDAG &DAG, const SDLoc &DL)
Detect patterns of truncation with unsigned saturation:
static SDValue combineShuffleToZeroExtendVectorInReg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static cl::opt< bool > EnableReduceLoadOpStoreWidth("combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable reducing the width of load/op/store " "sequence"))
static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal, SDValue FVal, const TargetLowering &TLI, SelectionDAG &DAG, const SDLoc &DL)
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, SelectionDAG &DAG, bool IsRotate, bool FromAdd)
static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG)
static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG)
Try to replace shift/logic that tests if a bit is clear with mask + setcc.
static bool areBitwiseNotOfEachother(SDValue Op0, SDValue Op1)
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG)
static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, bool LegalTypes)
If a vector binop is performed on splat values, it may be profitable to extract, scalarize,...
static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG)
static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue combineConcatVectorOfConcatVectors(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtOfAtomicLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI, SelectionDAG &DAG, SDNode *N, SDValue N0, SDValue N1, EVT VT)
static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG, const SDLoc &DL, CombineLevel Level)
Fold (sext (select c, load x, load y)) -> (select c, sextload x, sextload y) (zext (select c,...
static SDValue getAsCarry(const TargetLowering &TLI, SDValue V, bool ForceCarryReconstruction=false)
static SDValue matchMergedBFX(SDValue Root, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue foldSelectOfConstantsUsingSra(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
If a (v)select has a condition value that is a sign-bit test, try to smear the condition operand sign...
static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG)
static SDValue detectSSatSPattern(SDValue In, EVT VT)
Detect patterns of truncation with signed saturation: (truncate (smin (smax (x, signed_min_of_dest_ty...
static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG)
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations)
static cl::opt< unsigned > StoreMergeDependenceLimit("combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10), cl::desc("Limit the number of times for the same StoreNode and RootNode " "to bail out in store merging dependence check"))
static SDValue eliminateFPCastPair(SDNode *N)
static cl::opt< std::string > CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, cl::desc("Only use DAG-combiner alias analysis in this" " function"))
static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp, SelectionDAG &DAG)
Given a bitwise logic operation N with a matching bitwise logic operand, fold a pattern where 2 of th...
ByteProvider< SDNode * > SDByteProvider
Recursively traverses the expression calculating the origin of the requested byte of the given value.
static bool isSlicingProfitable(SmallVectorImpl< LoadedSlice > &LoadedSlices, const APInt &UsedBits, bool ForCodeSize)
Check the profitability of all involved LoadedSlice.
static SDValue narrowInsertExtractVectorBinOp(EVT SubVT, SDValue BinOp, unsigned Index, const SDLoc &DL, SelectionDAG &DAG, bool LegalOperations)
static bool isBSwapHWordElement(SDValue N, MutableArrayRef< SDNode * > Parts)
Return true if the specified node is an element that makes up a 32-bit packed halfword byteswap.
static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, unsigned &BW, bool &Unsigned, SelectionDAG &DAG)
static SDValue foldBoolSelectToLogic(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static std::optional< SDByteProvider > calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, std::optional< uint64_t > VectorIndex, unsigned StartingIndex=0)
dxil translate DXIL Translate Metadata
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
This file defines the DenseMap class.
static bool isSigned(unsigned int Opcode)
static MaybeAlign getAlign(Value *Ptr)
iv Induction Variable Users
Definition IVUsers.cpp:48
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
static Value * simplifyDivRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse)
Check for common or similar folds of integer division or integer remainder.
This file implements a coalescing interval map for small objects.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
static bool isUndef(const MachineInstr &MI)
Register const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
This file contains the declarations for metadata subclasses.
#define T
#define T1
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
if(PassOpts->AAPipeline)
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
static bool isSimple(Instruction *I)
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
static cl::opt< unsigned > MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), cl::desc("DAG combiner limit number of steps when searching DAG " "for predecessor nodes"))
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static unsigned getScalarSizeInBits(Type *Ty)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
static constexpr int Concat[]
Value * RHS
Value * LHS
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
Definition APFloat.h:1120
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1208
bool isNegative() const
Definition APFloat.h:1449
bool isNormal() const
Definition APFloat.h:1453
bool isDenormal() const
Definition APFloat.h:1450
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1432
const fltSemantics & getSemantics() const
Definition APFloat.h:1457
bool isNaN() const
Definition APFloat.h:1447
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition APFloat.h:1088
APInt bitcastToAPInt() const
Definition APFloat.h:1353
bool isLargest() const
Definition APFloat.h:1465
bool isInfinity() const
Definition APFloat.h:1446
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt umul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1971
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1758
LLVM_ABI APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition APInt.cpp:644
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:449
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1670
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1385
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1512
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:206
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
APInt abs() const
Get the absolute value.
Definition APInt.h:1795
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1182
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:258
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
Definition APInt.h:466
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1111
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:209
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:329
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1249
int32_t exactLogBase2() const
Definition APInt.h:1783
LLVM_ABI APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1935
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1598
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:219
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1531
unsigned countLeadingZeros() const
Definition APInt.h:1606
void flipAllBits()
Toggle every bit to its opposite value.
Definition APInt.h:1452
unsigned logBase2() const
Definition APInt.h:1761
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:510
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:475
bool getBoolValue() const
Convert APInt to a boolean value.
Definition APInt.h:471
LLVM_ABI APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1960
bool isMask(unsigned numBits) const
Definition APInt.h:488
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition APInt.h:1150
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition APInt.h:1367
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:306
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:296
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:389
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:286
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:239
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:851
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1656
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1221
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition ArrayRef.h:200
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
static LLVM_ABI BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
static LLVM_ABI bool computeAliasing(const SDNode *Op0, const LocationSize NumBytes0, const SDNode *Op1, const LocationSize NumBytes1, const SelectionDAG &DAG, bool &IsAlias)
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
LLVM_ABI bool isConstant() const
Represents known origin of an individual byte in combine pattern.
static ByteProvider getConstantZero()
static ByteProvider getSrc(std::optional< SDNode * > Val, int64_t ByteOffset, int64_t VectorOffset)
Combiner implementation.
Definition Combiner.h:34
ISD::CondCode get() const
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:535
const APFloat & getValueAPF() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
bool isNegative() const
Return true if the value is negative.
bool isZero() const
Return true if the value is positive or negative zero.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI bool isFullSet() const
Return true if this set contains all of the elements possible for this data-type.
LLVM_ABI ConstantRange truncate(uint32_t BitWidth, unsigned NoWrapKind=0) const
Return a new range in the specified integer type, which must be strictly smaller than the current typ...
const APInt & getUpper() const
Return the upper value for this range.
uint32_t getBitWidth() const
Get the bit width of this ConstantRange.
const ConstantInt * getConstantIntValue() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition Constant.h:43
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:198
bool isBigEndian() const
Definition DataLayout.h:199
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
static bool shouldExecute(unsigned CounterName)
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:165
iterator end()
Definition DenseMap.h:81
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
const_iterator find(KeyT x) const
find - Return an iterator pointing to the first interval ending at or after x, or end().
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
static LocationSize precise(uint64_t Value)
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
bool isScalable() const
TypeSize getValue() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1565
Machine Value Type.
SimpleValueType SimpleTy
static auto all_valuetypes()
SimpleValueType Iteration.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
A description of a memory reference used in the backend.
const PseudoSourceValue * getPseudoValue() const
void clearRanges()
Unset the tracked range metadata.
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MONonTemporal
The memory access is non-temporal.
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
const SDValue & getPassThru() const
ISD::LoadExtType getExtensionType() const
const SDValue & getBasePtr() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getInc() const
const SDValue & getScale() const
const SDValue & getMask() const
const SDValue & getIntID() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
ISD::MemIndexType getIndexType() const
This class is used to represent an MLOAD node.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
const SDValue & getMask() const
const SDValue & getPassThru() const
const SDValue & getOffset() const
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This class is used to represent an MSTORE node.
bool isCompressingStore() const
Returns true if the op does a compression to the vector before storing.
const SDValue & getOffset() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getBaseAlign() const
Returns alignment and volatility of the memory access.
const MDNode * getRanges() const
Returns the Ranges that describes the dereference.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
bool isNonTemporal() const
bool isInvariant() const
bool isDereferenceable() const
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
MutableArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition ArrayRef.h:424
iterator end() const
Definition ArrayRef.h:348
iterator begin() const
Definition ArrayRef.h:347
MutableArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition ArrayRef.h:417
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
LLVM_ABI void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
TypeSize getValueSizeInBits(unsigned ResNo) const
Returns MVT::getSizeInBits(getValueType(ResNo)).
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
LLVM_ABI bool isOperandOf(const SDNode *N) const
Return true if this node is an operand of N.
const APInt & getConstantOperandAPInt(unsigned Num) const
Helper method returns the APInt of a ConstantSDNode operand.
std::optional< APInt > bitcastToAPInt() const
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
LLVM_ABI bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
LLVM_ABI bool reachesChainWithoutSideEffects(SDValue Dest, unsigned Depth=2) const
Return true if this operand (which must be a chain) reaches the specified operand without crossing an...
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isAnyAdd() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
LLVM_ABI SDValue getSplatSourceVector(SDValue V, int &SplatIndex)
If V is a splatted value, return the source vector and its splat index.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
LLVM_ABI SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
bool isKnownNeverSNaN(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
LLVM_ABI std::optional< bool > isBoolConstant(SDValue N) const
Check if a value \op N is a constant using the target's BooleanContent for its type.
const TargetSubtargetInfo & getSubtarget() const
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
LLVM_ABI SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getAtomicLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, MachineMemOperand *MMO)
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
LLVM_ABI void Combine(CombineLevel Level, BatchAAResults *BatchAA, CodeGenOptLevel OptLevel)
This iterates over the nodes in the SelectionDAG, folding certain types of nodes together,...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
static LLVM_ABI unsigned getHasPredecessorMaxSteps()
LLVM_ABI bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI bool cannotBeOrderedNegativeFP(SDValue Op) const
Test whether the given float value is known to be positive.
LLVM_ABI SDValue getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
LLVM_ABI SDValue getAssertAlign(const SDLoc &DL, SDValue V, Align A)
Return an AssertAlignSDNode.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
LLVM_ABI SDValue getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
LLVM_ABI APInt computeVectorKnownZeroElements(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
For each demanded element of a vector, see if it is known to be zero.
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
LLVM_ABI SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI void DeleteNode(SDNode *N)
Remove the specified node from the system.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI std::optional< unsigned > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
LLVM_ABI SDValue simplifySelect(SDValue Cond, SDValue TVal, SDValue FVal)
Try to simplify a select/vselect into 1 of its operands or a constant.
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
LLVM_ABI bool isConstantFPBuildVectorOrConstantFP(SDValue N) const
Test whether the given value is a constant FP or similar node.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
LLVM_ABI bool LegalizeOp(SDNode *N, SmallSetVector< SDNode *, 16 > &UpdatedNodes)
Transforms a SelectionDAG node and any operands to it into a node that is compatible with the target ...
LLVM_ABI bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
LLVM_ABI bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
LLVM_ABI SDValue getMaskedHistogram(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
LLVM_ABI SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
bool willNotOverflowMul(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the mul of 2 nodes can never overflow.
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
LLVM_ABI MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
bool isConstantValueOfAnyType(SDValue N) const
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
LLVM_ABI bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
LLVM_ABI SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getSetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
iterator_range< allnodes_iterator > allnodes()
LLVM_ABI SDValue getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, const MDNode *Ranges=nullptr, bool IsExpanding=false)
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
LLVM_ABI SDValue FoldConstantBuildVector(BuildVectorSDNode *BV, const SDLoc &DL, EVT DstEltVT)
Fold BUILD_VECTOR of constants/undefs to the destination type BUILD_VECTOR of constants/undefs elemen...
LLVM_ABI SDValue getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
const TargetLibraryInfo & getLibInfo() const
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI bool MaskedVectorIsZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Return true if 'Op' is known to be zero in DemandedElts.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
LLVM_ABI bool canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, bool PoisonOnly=false, bool ConsiderFlags=true, unsigned Depth=0) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
LLVM_ABI OverflowKind computeOverflowForUnsignedAdd(SDValue N0, SDValue N1) const
Determine if the result of the unsigned addition of 2 nodes can overflow.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
bool isSafeToSpeculativelyExecuteNode(const SDNode *N) const
Check if the provided node is save to speculatively executed given its current arguments.
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVM_ABI bool isKnownToBeAPowerOfTwoFP(SDValue Val, unsigned Depth=0) const
Test if the given fp value is known to be an integer power-of-2, either positive or negative.
LLVMContext * getContext() const
LLVM_ABI SDValue simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y, SDNodeFlags Flags)
Try to simplify a floating-point binary operation into 1 of its operands or a constant.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
LLVM_ABI bool isUndef(unsigned Opcode, ArrayRef< SDValue > Ops)
Return true if the result of this operation is always undefined.
LLVM_ABI SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
LLVM_ABI SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags)
Get the specified node if it's already available, or else return NULL.
LLVM_ABI SDValue getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
DenormalMode getDenormalMode(EVT VT) const
Return the current function's default denormal handling kind for the given floating point type.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
static unsigned getOpcode_EXTEND(unsigned Opcode)
Convert *_EXTEND_VECTOR_INREG to *_EXTEND opcode.
LLVM_ABI bool isADDLike(SDValue Op, bool NoWrap=false) const
Return true if the specified operand is an ISD::OR or ISD::XOR node that can be treated as an ISD::AD...
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
LLVM_ABI SDValue simplifyShift(SDValue X, SDValue Y)
Try to simplify a shift into 1 of its operands or a constant.
LLVM_ABI void transferDbgValues(SDValue From, SDValue To, unsigned OffsetInBits=0, unsigned SizeInBits=0, bool InvalidateDbg=true)
Transfer debug values from one node to another, while optionally generating fragment expressions for ...
LLVM_ABI SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
LLVM_ABI SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:99
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:168
value_type pop_back_val()
Definition SetVector.h:296
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
void push_back(bool Val)
void reserve(unsigned N)
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:356
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
bool empty() const
Definition SmallSet.h:168
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:181
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
iterator insert(iterator I, T &&Elt)
void resize(size_type N)
void push_back(const T &Elt)
pointer data()
Return a pointer to the vector's buffer, even if empty().
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
bool has(LibFunc F) const
Tests whether a library function is available.
virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT) const
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, EVT ExtVT) const
virtual bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
virtual bool hasAndNot(SDValue X) const
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool enableAggressiveFMAFusion(EVT VT) const
Return true if target always benefits from combining into FMA for a given value type.
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
SDValue promoteTargetBoolean(SelectionDAG &DAG, SDValue Bool, EVT ValVT) const
Promote the given target boolean to a target boolean of the given type.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, std::optional< unsigned > ByteOffset=std::nullopt) const
Return true if it is profitable to reduce a load to a smaller type.
virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT, bool LegalOnly) const
virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const
Use bitwise logic to make pairs of compares more efficient.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a square root of the given type based on the function's at...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, LLT DestTy, LLT SrcTy) const
Return true if an fpext operation input to an Opcode operation is free (for instance,...
virtual bool hasBitTest(SDValue X, SDValue Y) const
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: fold (conv (load x)) -> (load (conv*)x) On arch...
virtual bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
virtual bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst, SDValue IntPow2) const
virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, EVT VT) const
Returns true if we should normalize select(N0&N1, X, Y) => select(N0, select(N1, X,...
virtual bool preferScalarizeSplat(SDNode *N) const
bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const
Return true if it is profitable to convert a select of FP constants into a constant pool load whose a...
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
virtual bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
bool isPartialReduceMLALegalOrCustom(unsigned Opc, EVT AccVT, EVT InputVT) const
Return true if a PARTIAL_REDUCE_U/SMLA node with the specified types is legal or custom for this targ...
virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const
Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
int getDivRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a division of the given type based on the function's attributes.
virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to fold a pair of shifts into a mask.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a division of the given type based on the function's attri...
virtual bool preferIncOfAddToSubOfNot(EVT VT) const
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const
virtual bool isFNegFree(EVT VT) const
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
virtual bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X, SDValue Y) const
Return true if pulling a binary operation into a select with an identity constant is profitable.
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
virtual bool mergeStoresAfterLegalization(EVT MemVT) const
Allow store merging for the specified type after legalization in addition to before legalization.
virtual bool shouldMergeStoreOfLoadsOverCall(EVT, EVT) const
Returns true if it's profitable to allow merging store of loads when there are functions calls betwee...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getGatherAllAliasesMaxDepth() const
virtual bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, unsigned AddrSpace) const
Return true if it is expected to be cheaper to do a store of vector constant with the given size and ...
virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal or custom on this target.
bool isAtomicLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified atomic load with extension is legal on this target.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const
There are two ways to clear extreme bits (either low or high): Mask: x & (-1 << y) (the instcombine c...
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const
Returns if it's reasonable to merge stores to MemVT size.
virtual bool preferABDSToABSWithNSW(EVT VT) const
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
AndOrSETCCFoldKind
Enum of different potentially desirable ways to fold (and/or (setcc ...), (setcc ....
virtual bool shouldScalarizeBinop(SDValue VecOp) const
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: (store (y (conv x)), y*)) -> (store x,...
bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool isVectorClearMaskLegal(ArrayRef< int >, EVT) const
Similar to isShuffleMaskLegal.
bool hasTargetDAGCombine(ISD::NodeType NT) const
If true, the target has custom DAG combine transformations that it can perform for the specified node...
virtual bool shouldSplatInsEltVarIndex(EVT) const
Return true if inserting a scalar into a variable element of an undef vector is more efficiently hand...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a square root of the given type based on the function's attribut...
virtual unsigned preferedOpcodeForCmpEqPiecesOfOperand(EVT VT, unsigned ShiftOpc, bool MayTransformRotate, const APInt &ShiftOrRotateAmt, const std::optional< APInt > &AndMask) const
virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const
Returns true if MI can be combined with another instruction to form TargetOpcode::G_FMAD.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const
virtual bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
virtual bool generateFMAsInMachineCombiner(EVT VT, CodeGenOptLevel OptLevel) const
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool hasPairedLoad(EVT, Align &) const
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
virtual bool convertSelectOfConstantsToMath(EVT VT) const
Return true if a select of constants (select Cond, C1, C2) should be transformed into simple math ops...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const
Hooks for building estimates in place of slower divisions and square roots.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const
SDValue getCheaperOrNeutralNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, const NegatibleCost CostThreshold=NegatibleCost::Neutral, unsigned Depth=0) const
virtual bool isTargetCanonicalSelect(SDNode *N) const
Return true if the given select/vselect should be considered canonical and not be transformed.
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
virtual bool IsDesirableToPromoteOp(SDValue, EVT &) const
This method query the target whether it is beneficial for dag combiner to promote the specified node.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const
Return a reciprocal estimate value for the input operand.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
TargetLowering(const TargetLowering &)=delete
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool getPostIndexedAddressParts(SDNode *, SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to move this shift by a constant amount through its operand,...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual AndOrSETCCFoldKind isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const
virtual bool getPreIndexedAddressParts(SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
SDValue scalarizeExtractedVectorLoad(EVT ResultVT, const SDLoc &DL, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad, SelectionDAG &DAG) const
Replace an extraction of a load with a narrowed load.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
virtual bool isDesirableToTransformToIntegerOp(unsigned, EVT) const
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetOptions Options
unsigned NoSignedZerosFPMath
NoSignedZerosFPMath - This flag is enabled when the -enable-no-signed-zeros-fp-math is specified on t...
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:107
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:232
const SDValue & getScale() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getVectorLength() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
iterator_range< user_iterator > users()
Definition Value.h:426
int getNumOccurrences() const
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:181
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:230
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:169
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
Changed
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition APInt.h:2248
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition APInt.h:2253
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition APInt.h:2258
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition APInt.h:2263
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Entry
Definition COFF.h:862
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical AND between different comparisons of identical values: ((X op1 Y) & (X...
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:801
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:774
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:587
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:765
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition ISDOpcodes.h:387
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:289
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition ISDOpcodes.h:515
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:393
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:571
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:738
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:892
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:826
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:706
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:656
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:773
@ TRUNCATE_SSAT_U
Definition ISDOpcodes.h:855
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition ISDOpcodes.h:809
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:347
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor to...
Definition ISDOpcodes.h:622
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition ISDOpcodes.h:682
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:528
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:778
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:663
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition ISDOpcodes.h:69
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:225
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:343
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:601
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
Definition ISDOpcodes.h:48
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:563
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:832
@ TargetConstantFP
Definition ISDOpcodes.h:175
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:793
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:379
@ SMULO
Same for multiplication.
Definition ISDOpcodes.h:351
@ TargetFrameIndex
Definition ISDOpcodes.h:182
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:881
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:718
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:787
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:323
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:908
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:174
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ CARRY_FALSE
CARRY_FALSE - This node is used when folding other nodes, like ADDC/SUBC, which indicate the carry re...
Definition ISDOpcodes.h:280
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:701
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:299
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:420
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:941
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition ISDOpcodes.h:690
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:903
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:927
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:521
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
Definition ISDOpcodes.h:853
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:713
@ TRUNCATE_USAT_U
Definition ISDOpcodes.h:857
@ SADDO_CARRY
Carry-using overflow-aware nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:333
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:543
bool isIndexTypeSigned(MemIndexType IndexType)
bool isExtVecInRegOpcode(unsigned Opcode)
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool matchUnaryFpPredicate(SDValue Op, std::function< bool(ConstantFPSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantFPSDNode predicate.
bool isFPEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with floati...
bool isExtOpcode(unsigned Opcode)
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI bool isVPBinaryOp(unsigned Opcode)
Whether this is a vector-predicated binary operation opcode.
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
LLVM_ABI std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
LLVM_ABI std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
LLVM_ABI bool allOperandsUndef(const SDNode *N)
Return true if the node has at least one operand and all operands of the specified node are ISD::UNDE...
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
LLVM_ABI NodeType getInverseMinMaxOpcode(unsigned MinMaxOpc)
Given a MinMaxOpc of ISD::(U|S)MIN or ISD::(U|S)MAX, returns ISD::(U|S)MAX and ISD::(U|S)MIN,...
LLVM_ABI bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
LLVM_ABI bool isVPReduction(unsigned Opcode)
Whether this is a vector-predicated reduction opcode.
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTruncation=false)
Hook for matching ConstantSDNode predicate.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
LLVM_ABI bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
LLVM_ABI CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical OR between different comparisons of identical values: ((X op1 Y) | (X ...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
CastInst_match< OpTy, FPToUIInst > m_FPToUI(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinOpPred_match< LHS, RHS, is_bitwiselogic_op > m_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
@ Undef
Value of the register doesn't matter.
Opcode_match m_Opc(unsigned Opcode)
auto m_SelectCCLike(const LTy &L, const RTy &R, const TTy &T, const FTy &F, const CCTy &CC)
BinaryOpc_match< LHS, RHS > m_Srl(const LHS &L, const RHS &R)
auto m_SpecificVT(EVT RefVT, const Pattern &P)
Match a specific ValueType.
BinaryOpc_match< LHS, RHS > m_Sra(const LHS &L, const RHS &R)
auto m_UMinLike(const LHS &L, const RHS &R)
auto m_UMaxLike(const LHS &L, const RHS &R)
UnaryOpc_match< Opnd > m_Abs(const Opnd &Op)
Or< Preds... > m_AnyOf(const Preds &...preds)
And< Preds... > m_AllOf(const Preds &...preds)
TernaryOpc_match< T0_P, T1_P, T2_P > m_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
UnaryOpc_match< Opnd > m_AnyExt(const Opnd &Op)
auto m_SMaxLike(const LHS &L, const RHS &R)
UnaryOpc_match< Opnd > m_Ctlz(const Opnd &Op)
TernaryOpc_match< T0_P, T1_P, T2_P > m_VSelect(const T0_P &Cond, const T1_P &T, const T2_P &F)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
UnaryOpc_match< Opnd > m_UnaryOp(unsigned Opc, const Opnd &Op)
auto m_SMinLike(const LHS &L, const RHS &R)
CondCode_match m_SpecificCondCode(ISD::CondCode CC)
Match a conditional code SDNode with a specific ISD::CondCode.
NUses_match< 1, Value_match > m_OneUse()
CondCode_match m_CondCode()
Match any conditional code SDNode.
Not(const Pred &P) -> Not< Pred >
TernaryOpc_match< T0_P, T1_P, T2_P, true, false > m_c_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
bool sd_context_match(SDValue N, const MatchContext &Ctx, Pattern &&P)
ConstantInt_match m_ConstInt()
Match any integer constants or splat of an integer constant.
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:666
constexpr double e
Definition MathExtras.h:47
@ User
could "use" a pointer
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:311
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:262
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:355
@ Offset
Definition DWP.cpp:477
@ Length
Definition DWP.cpp:477
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:824
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:362
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void stable_sort(R &&Range)
Definition STLExtras.h:2047
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1740
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1714
InstructionCost Cost
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition Utils.cpp:1607
LLVM_ABI SDValue getBitwiseNotOperand(SDValue V, SDValue Mask, bool AllowUndefs)
If V is a bitwise not, returns the inverted operand.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2461
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:279
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:145
LLVM_ABI llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
bool operator!=(uint64_t V1, const APInt &V2)
Definition APInt.h:2113
bool operator>=(int64_t V1, const APSInt &V2)
Definition APSInt.h:361
LLVM_ATTRIBUTE_ALWAYS_INLINE DynamicAPInt & operator+=(DynamicAPInt &A, int64_t B)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2125
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
int ilogb(const APFloat &Arg)
Returns the exponent of the internal representation of the APFloat.
Definition APFloat.h:1534
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1589
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:348
LLVM_ABI bool isMinSignedConstant(SDValue V)
Returns true if V is a constant min signed integer value.
LLVM_ABI ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
LLVM_ABI ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:396
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:186
unsigned M1(unsigned Val)
Definition VE.h:377
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1721
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
LLVM_ABI bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition Utils.cpp:1545
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:222
bool operator>(int64_t V1, const APSInt &V2)
Definition APSInt.h:363
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:401
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1633
detail::ValueMatchesPoly< M > HasValue(M Matcher)
Definition Error.h:221
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI SDValue peekThroughTruncates(SDValue V)
Return the non-truncated source operand of V if it exists.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1728
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
LLVM_ABI SDValue peekThroughOneUseBitcasts(SDValue V)
Return the non-bitcasted and one-use source operand of V if it exists.
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
Definition ModRef.h:68
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
CombineLevel
Definition DAGCombine.h:15
@ AfterLegalizeDAG
Definition DAGCombine.h:19
@ AfterLegalizeVectorOps
Definition DAGCombine.h:18
@ BeforeLegalizeTypes
Definition DAGCombine.h:16
@ AfterLegalizeTypes
Definition DAGCombine.h:17
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ FMul
Product of floats.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1950
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
LLVM_ABI void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1886
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:212
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition STLExtras.h:2097
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:208
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool operator<=(int64_t V1, const APSInt &V2)
Definition APSInt.h:360
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:384
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
LLVM_ABI AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.
static LLVM_ABI ExponentType semanticsMinExponent(const fltSemantics &)
Definition APFloat.cpp:332
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static LLVM_ABI ExponentType semanticsMaxExponent(const fltSemantics &)
Definition APFloat.cpp:328
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:324
static LLVM_ABI bool isIEEELikeFP(const fltSemantics &)
Definition APFloat.cpp:365
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:320
static LLVM_ABI unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition APFloat.cpp:338
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:85
static constexpr DenormalMode getIEEE()
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
bool knownBitsLE(EVT VT) const
Return true if we know at compile time this has fewer than or the same bits as VT.
Definition ValueTypes.h:279
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition ValueTypes.h:121
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:243
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition ValueTypes.h:470
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:412
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isScalableVT() const
Return true if the type is a scalable type.
Definition ValueTypes.h:187
bool isFixedLengthVector() const
Definition ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:292
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition ValueTypes.h:256
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition ValueTypes.h:248
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
bool knownBitsGE(EVT VT) const
Return true if we know at compile time this has more than or the same bits as VT.
Definition ValueTypes.h:268
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition ValueTypes.h:142
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool isZeroSized() const
Test if the given EVT has zero size, this will fail if called on a scalable type.
Definition ValueTypes.h:132
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:308
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:101
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:235
bool isConstant() const
Returns true if we know the value of all bits.
Definition KnownBits.h:54
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:289
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:241
bool isAllOnes() const
Returns true if value is all one bits.
Definition KnownBits.h:83
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition KnownBits.h:60
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
These are IR-level optimization flags that may be propagated to SDNodes.
void setAllowContract(bool b)
bool hasNoUnsignedWrap() const
void setAllowReassociation(bool b)
void setAllowReciprocal(bool b)
bool hasAllowContract() const
bool hasApproximateFuncs() const
void setApproximateFuncs(bool b)
bool hasNoSignedWrap() const
bool hasAllowReciprocal() const
bool hasAllowReassociation() const
Clients of various APIs that cause global effects on the DAG can optionally implement this interface.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI bool recursivelyDeleteUnusedNodes(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
LLVM_ABI void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...