LLVM 22.0.0git
DAGCombiner.cpp
Go to the documentation of this file.
1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/DenseMap.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/SmallSet.h"
29#include "llvm/ADT/Statistic.h"
51#include "llvm/IR/Attributes.h"
52#include "llvm/IR/Constant.h"
53#include "llvm/IR/DataLayout.h"
55#include "llvm/IR/Function.h"
56#include "llvm/IR/Metadata.h"
61#include "llvm/Support/Debug.h"
69#include <algorithm>
70#include <cassert>
71#include <cstdint>
72#include <functional>
73#include <iterator>
74#include <optional>
75#include <string>
76#include <tuple>
77#include <utility>
78#include <variant>
79
80#include "MatchContext.h"
81
82using namespace llvm;
83using namespace llvm::SDPatternMatch;
84
85#define DEBUG_TYPE "dagcombine"
86
87STATISTIC(NodesCombined , "Number of dag nodes combined");
88STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
89STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
90STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
91STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
92STATISTIC(SlicedLoads, "Number of load sliced");
93STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
94
95DEBUG_COUNTER(DAGCombineCounter, "dagcombine",
96 "Controls whether a DAG combine is performed for a node");
97
98static cl::opt<bool>
99CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
100 cl::desc("Enable DAG combiner's use of IR alias analysis"));
101
102static cl::opt<bool>
103UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
104 cl::desc("Enable DAG combiner's use of TBAA"));
105
106#ifndef NDEBUG
108CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
109 cl::desc("Only use DAG-combiner alias analysis in this"
110 " function"));
111#endif
112
113/// Hidden option to stress test load slicing, i.e., when this option
114/// is enabled, load slicing bypasses most of its profitability guards.
115static cl::opt<bool>
116StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
117 cl::desc("Bypass the profitability model of load slicing"),
118 cl::init(false));
119
120static cl::opt<bool>
121 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
122 cl::desc("DAG combiner may split indexing from loads"));
123
124static cl::opt<bool>
125 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
126 cl::desc("DAG combiner enable merging multiple stores "
127 "into a wider store"));
128
130 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
131 cl::desc("Limit the number of operands to inline for Token Factors"));
132
134 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
135 cl::desc("Limit the number of times for the same StoreNode and RootNode "
136 "to bail out in store merging dependence check"));
137
139 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
140 cl::desc("DAG combiner enable reducing the width of load/op/store "
141 "sequence"));
143 "combiner-reduce-load-op-store-width-force-narrowing-profitable",
144 cl::Hidden, cl::init(false),
145 cl::desc("DAG combiner force override the narrowing profitable check when "
146 "reducing the width of load/op/store sequences"));
147
149 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
150 cl::desc("DAG combiner enable load/<replace bytes>/store with "
151 "a narrower store"));
152
153static cl::opt<bool> DisableCombines("combiner-disabled", cl::Hidden,
154 cl::init(false),
155 cl::desc("Disable the DAG combiner"));
156
157namespace {
158
159 class DAGCombiner {
160 SelectionDAG &DAG;
161 const TargetLowering &TLI;
162 const SelectionDAGTargetInfo *STI;
164 CodeGenOptLevel OptLevel;
165 bool LegalDAG = false;
166 bool LegalOperations = false;
167 bool LegalTypes = false;
168 bool ForCodeSize;
169 bool DisableGenericCombines;
170
171 /// Worklist of all of the nodes that need to be simplified.
172 ///
173 /// This must behave as a stack -- new nodes to process are pushed onto the
174 /// back and when processing we pop off of the back.
175 ///
176 /// The worklist will not contain duplicates but may contain null entries
177 /// due to nodes being deleted from the underlying DAG. For fast lookup and
178 /// deduplication, the index of the node in this vector is stored in the
179 /// node in SDNode::CombinerWorklistIndex.
181
182 /// This records all nodes attempted to be added to the worklist since we
183 /// considered a new worklist entry. As we keep do not add duplicate nodes
184 /// in the worklist, this is different from the tail of the worklist.
186
187 /// Map from candidate StoreNode to the pair of RootNode and count.
188 /// The count is used to track how many times we have seen the StoreNode
189 /// with the same RootNode bail out in dependence check. If we have seen
190 /// the bail out for the same pair many times over a limit, we won't
191 /// consider the StoreNode with the same RootNode as store merging
192 /// candidate again.
194
195 // BatchAA - Used for DAG load/store alias analysis.
196 BatchAAResults *BatchAA;
197
198 /// This caches all chains that have already been processed in
199 /// DAGCombiner::getStoreMergeCandidates() and found to have no mergeable
200 /// stores candidates.
201 SmallPtrSet<SDNode *, 4> ChainsWithoutMergeableStores;
202
203 /// When an instruction is simplified, add all users of the instruction to
204 /// the work lists because they might get more simplified now.
205 void AddUsersToWorklist(SDNode *N) {
206 for (SDNode *Node : N->users())
207 AddToWorklist(Node);
208 }
209
210 /// Convenient shorthand to add a node and all of its user to the worklist.
211 void AddToWorklistWithUsers(SDNode *N) {
212 AddUsersToWorklist(N);
213 AddToWorklist(N);
214 }
215
216 // Prune potentially dangling nodes. This is called after
217 // any visit to a node, but should also be called during a visit after any
218 // failed combine which may have created a DAG node.
219 void clearAddedDanglingWorklistEntries() {
220 // Check any nodes added to the worklist to see if they are prunable.
221 while (!PruningList.empty()) {
222 auto *N = PruningList.pop_back_val();
223 if (N->use_empty())
224 recursivelyDeleteUnusedNodes(N);
225 }
226 }
227
228 SDNode *getNextWorklistEntry() {
229 // Before we do any work, remove nodes that are not in use.
230 clearAddedDanglingWorklistEntries();
231 SDNode *N = nullptr;
232 // The Worklist holds the SDNodes in order, but it may contain null
233 // entries.
234 while (!N && !Worklist.empty()) {
235 N = Worklist.pop_back_val();
236 }
237
238 if (N) {
239 assert(N->getCombinerWorklistIndex() >= 0 &&
240 "Found a worklist entry without a corresponding map entry!");
241 // Set to -2 to indicate that we combined the node.
242 N->setCombinerWorklistIndex(-2);
243 }
244 return N;
245 }
246
247 /// Call the node-specific routine that folds each particular type of node.
249
250 public:
251 DAGCombiner(SelectionDAG &D, BatchAAResults *BatchAA, CodeGenOptLevel OL)
252 : DAG(D), TLI(D.getTargetLoweringInfo()),
253 STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL),
254 BatchAA(BatchAA) {
255 ForCodeSize = DAG.shouldOptForSize();
256 DisableGenericCombines =
257 DisableCombines || (STI && STI->disableGenericCombines(OptLevel));
258
259 MaximumLegalStoreInBits = 0;
260 // We use the minimum store size here, since that's all we can guarantee
261 // for the scalable vector types.
262 for (MVT VT : MVT::all_valuetypes())
263 if (EVT(VT).isSimple() && VT != MVT::Other &&
264 TLI.isTypeLegal(EVT(VT)) &&
265 VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
266 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
267 }
268
269 void ConsiderForPruning(SDNode *N) {
270 // Mark this for potential pruning.
271 PruningList.insert(N);
272 }
273
274 /// Add to the worklist making sure its instance is at the back (next to be
275 /// processed.)
276 void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true,
277 bool SkipIfCombinedBefore = false) {
278 assert(N->getOpcode() != ISD::DELETED_NODE &&
279 "Deleted Node added to Worklist");
280
281 // Skip handle nodes as they can't usefully be combined and confuse the
282 // zero-use deletion strategy.
283 if (N->getOpcode() == ISD::HANDLENODE)
284 return;
285
286 if (SkipIfCombinedBefore && N->getCombinerWorklistIndex() == -2)
287 return;
288
289 if (IsCandidateForPruning)
290 ConsiderForPruning(N);
291
292 if (N->getCombinerWorklistIndex() < 0) {
293 N->setCombinerWorklistIndex(Worklist.size());
294 Worklist.push_back(N);
295 }
296 }
297
298 /// Remove all instances of N from the worklist.
299 void removeFromWorklist(SDNode *N) {
300 PruningList.remove(N);
301 StoreRootCountMap.erase(N);
302
303 int WorklistIndex = N->getCombinerWorklistIndex();
304 // If not in the worklist, the index might be -1 or -2 (was combined
305 // before). As the node gets deleted anyway, there's no need to update
306 // the index.
307 if (WorklistIndex < 0)
308 return; // Not in the worklist.
309
310 // Null out the entry rather than erasing it to avoid a linear operation.
311 Worklist[WorklistIndex] = nullptr;
312 N->setCombinerWorklistIndex(-1);
313 }
314
315 void deleteAndRecombine(SDNode *N);
316 bool recursivelyDeleteUnusedNodes(SDNode *N);
317
318 /// Replaces all uses of the results of one DAG node with new values.
319 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
320 bool AddTo = true);
321
322 /// Replaces all uses of the results of one DAG node with new values.
323 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
324 return CombineTo(N, &Res, 1, AddTo);
325 }
326
327 /// Replaces all uses of the results of one DAG node with new values.
328 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
329 bool AddTo = true) {
330 SDValue To[] = { Res0, Res1 };
331 return CombineTo(N, To, 2, AddTo);
332 }
333
335 bool AddTo = true) {
336 return CombineTo(N, To->data(), To->size(), AddTo);
337 }
338
339 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
340
341 private:
342 unsigned MaximumLegalStoreInBits;
343
344 /// Check the specified integer node value to see if it can be simplified or
345 /// if things it uses can be simplified by bit propagation.
346 /// If so, return true.
347 bool SimplifyDemandedBits(SDValue Op) {
348 unsigned BitWidth = Op.getScalarValueSizeInBits();
350 return SimplifyDemandedBits(Op, DemandedBits);
351 }
352
353 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
354 EVT VT = Op.getValueType();
355 APInt DemandedElts = VT.isFixedLengthVector()
357 : APInt(1, 1);
358 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, false);
359 }
360
361 /// Check the specified vector node value to see if it can be simplified or
362 /// if things it uses can be simplified as it only uses some of the
363 /// elements. If so, return true.
364 bool SimplifyDemandedVectorElts(SDValue Op) {
365 // TODO: For now just pretend it cannot be simplified.
366 if (Op.getValueType().isScalableVector())
367 return false;
368
369 unsigned NumElts = Op.getValueType().getVectorNumElements();
370 APInt DemandedElts = APInt::getAllOnes(NumElts);
371 return SimplifyDemandedVectorElts(Op, DemandedElts);
372 }
373
374 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
375 const APInt &DemandedElts,
376 bool AssumeSingleUse = false);
377 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
378 bool AssumeSingleUse = false);
379
380 bool CombineToPreIndexedLoadStore(SDNode *N);
381 bool CombineToPostIndexedLoadStore(SDNode *N);
382 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
383 bool SliceUpLoad(SDNode *N);
384
385 // Looks up the chain to find a unique (unaliased) store feeding the passed
386 // load. If no such store is found, returns a nullptr.
387 // Note: This will look past a CALLSEQ_START if the load is chained to it so
388 // so that it can find stack stores for byval params.
389 StoreSDNode *getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset);
390 // Scalars have size 0 to distinguish from singleton vectors.
391 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
392 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
393 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
394
395 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
396 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
397 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
398 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
399 SDValue PromoteIntBinOp(SDValue Op);
400 SDValue PromoteIntShiftOp(SDValue Op);
401 SDValue PromoteExtend(SDValue Op);
402 bool PromoteLoad(SDValue Op);
403
404 SDValue foldShiftToAvg(SDNode *N, const SDLoc &DL);
405 // Fold `a bitwiseop (~b +/- c)` -> `a bitwiseop ~(b -/+ c)`
406 SDValue foldBitwiseOpWithNeg(SDNode *N, const SDLoc &DL, EVT VT);
407
408 SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
409 SDValue RHS, SDValue True, SDValue False,
410 ISD::CondCode CC);
411
412 /// Call the node-specific routine that knows how to fold each
413 /// particular type of node. If that doesn't do anything, try the
414 /// target-specific DAG combines.
415 SDValue combine(SDNode *N);
416
417 // Visitation implementation - Implement dag node combining for different
418 // node types. The semantics are as follows:
419 // Return Value:
420 // SDValue.getNode() == 0 - No change was made
421 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
422 // otherwise - N should be replaced by the returned Operand.
423 //
424 SDValue visitTokenFactor(SDNode *N);
425 SDValue visitMERGE_VALUES(SDNode *N);
426 SDValue visitADD(SDNode *N);
427 SDValue visitADDLike(SDNode *N);
428 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1,
429 SDNode *LocReference);
430 SDValue visitPTRADD(SDNode *N);
431 SDValue visitSUB(SDNode *N);
432 SDValue visitADDSAT(SDNode *N);
433 SDValue visitSUBSAT(SDNode *N);
434 SDValue visitADDC(SDNode *N);
435 SDValue visitADDO(SDNode *N);
436 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
437 SDValue visitSUBC(SDNode *N);
438 SDValue visitSUBO(SDNode *N);
439 SDValue visitADDE(SDNode *N);
440 SDValue visitUADDO_CARRY(SDNode *N);
441 SDValue visitSADDO_CARRY(SDNode *N);
442 SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
443 SDNode *N);
444 SDValue visitSADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
445 SDNode *N);
446 SDValue visitSUBE(SDNode *N);
447 SDValue visitUSUBO_CARRY(SDNode *N);
448 SDValue visitSSUBO_CARRY(SDNode *N);
449 template <class MatchContextClass> SDValue visitMUL(SDNode *N);
450 SDValue visitMULFIX(SDNode *N);
451 SDValue useDivRem(SDNode *N);
452 SDValue visitSDIV(SDNode *N);
453 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
454 SDValue visitUDIV(SDNode *N);
455 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
456 SDValue visitREM(SDNode *N);
457 SDValue visitMULHU(SDNode *N);
458 SDValue visitMULHS(SDNode *N);
459 SDValue visitAVG(SDNode *N);
460 SDValue visitABD(SDNode *N);
461 SDValue visitSMUL_LOHI(SDNode *N);
462 SDValue visitUMUL_LOHI(SDNode *N);
463 SDValue visitMULO(SDNode *N);
464 SDValue visitIMINMAX(SDNode *N);
465 SDValue visitAND(SDNode *N);
466 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
467 SDValue visitOR(SDNode *N);
468 SDValue visitORLike(SDValue N0, SDValue N1, const SDLoc &DL);
469 SDValue visitXOR(SDNode *N);
470 SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
471 SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
472 SDValue visitSHL(SDNode *N);
473 SDValue visitSRA(SDNode *N);
474 SDValue visitSRL(SDNode *N);
475 SDValue visitFunnelShift(SDNode *N);
476 SDValue visitSHLSAT(SDNode *N);
477 SDValue visitRotate(SDNode *N);
478 SDValue visitABS(SDNode *N);
479 SDValue visitBSWAP(SDNode *N);
480 SDValue visitBITREVERSE(SDNode *N);
481 SDValue visitCTLZ(SDNode *N);
482 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
483 SDValue visitCTTZ(SDNode *N);
484 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
485 SDValue visitCTPOP(SDNode *N);
486 SDValue visitSELECT(SDNode *N);
487 SDValue visitVSELECT(SDNode *N);
488 SDValue visitVP_SELECT(SDNode *N);
489 SDValue visitSELECT_CC(SDNode *N);
490 SDValue visitSETCC(SDNode *N);
491 SDValue visitSETCCCARRY(SDNode *N);
492 SDValue visitSIGN_EXTEND(SDNode *N);
493 SDValue visitZERO_EXTEND(SDNode *N);
494 SDValue visitANY_EXTEND(SDNode *N);
495 SDValue visitAssertExt(SDNode *N);
496 SDValue visitAssertAlign(SDNode *N);
497 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
498 SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
499 SDValue visitTRUNCATE(SDNode *N);
500 SDValue visitTRUNCATE_USAT_U(SDNode *N);
501 SDValue visitBITCAST(SDNode *N);
502 SDValue visitFREEZE(SDNode *N);
503 SDValue visitBUILD_PAIR(SDNode *N);
504 SDValue visitFADD(SDNode *N);
505 SDValue visitVP_FADD(SDNode *N);
506 SDValue visitVP_FSUB(SDNode *N);
507 SDValue visitSTRICT_FADD(SDNode *N);
508 SDValue visitFSUB(SDNode *N);
509 SDValue visitFMUL(SDNode *N);
510 template <class MatchContextClass> SDValue visitFMA(SDNode *N);
511 SDValue visitFMAD(SDNode *N);
512 SDValue visitFDIV(SDNode *N);
513 SDValue visitFREM(SDNode *N);
514 SDValue visitFSQRT(SDNode *N);
515 SDValue visitFCOPYSIGN(SDNode *N);
516 SDValue visitFPOW(SDNode *N);
517 SDValue visitFCANONICALIZE(SDNode *N);
518 SDValue visitSINT_TO_FP(SDNode *N);
519 SDValue visitUINT_TO_FP(SDNode *N);
520 SDValue visitFP_TO_SINT(SDNode *N);
521 SDValue visitFP_TO_UINT(SDNode *N);
522 SDValue visitXROUND(SDNode *N);
523 SDValue visitFP_ROUND(SDNode *N);
524 SDValue visitFP_EXTEND(SDNode *N);
525 SDValue visitFNEG(SDNode *N);
526 SDValue visitFABS(SDNode *N);
527 SDValue visitFCEIL(SDNode *N);
528 SDValue visitFTRUNC(SDNode *N);
529 SDValue visitFFREXP(SDNode *N);
530 SDValue visitFFLOOR(SDNode *N);
531 SDValue visitFMinMax(SDNode *N);
532 SDValue visitBRCOND(SDNode *N);
533 SDValue visitBR_CC(SDNode *N);
534 SDValue visitLOAD(SDNode *N);
535
536 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
537 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
538 SDValue replaceStoreOfInsertLoad(StoreSDNode *ST);
539
540 bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
541
542 SDValue visitSTORE(SDNode *N);
543 SDValue visitATOMIC_STORE(SDNode *N);
544 SDValue visitLIFETIME_END(SDNode *N);
545 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
546 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
547 SDValue visitBUILD_VECTOR(SDNode *N);
548 SDValue visitCONCAT_VECTORS(SDNode *N);
549 SDValue visitVECTOR_INTERLEAVE(SDNode *N);
550 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
551 SDValue visitVECTOR_SHUFFLE(SDNode *N);
552 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
553 SDValue visitINSERT_SUBVECTOR(SDNode *N);
554 SDValue visitVECTOR_COMPRESS(SDNode *N);
555 SDValue visitMLOAD(SDNode *N);
556 SDValue visitMSTORE(SDNode *N);
557 SDValue visitMGATHER(SDNode *N);
558 SDValue visitMSCATTER(SDNode *N);
559 SDValue visitMHISTOGRAM(SDNode *N);
560 SDValue visitPARTIAL_REDUCE_MLA(SDNode *N);
561 SDValue visitVPGATHER(SDNode *N);
562 SDValue visitVPSCATTER(SDNode *N);
563 SDValue visitVP_STRIDED_LOAD(SDNode *N);
564 SDValue visitVP_STRIDED_STORE(SDNode *N);
565 SDValue visitFP_TO_FP16(SDNode *N);
566 SDValue visitFP16_TO_FP(SDNode *N);
567 SDValue visitFP_TO_BF16(SDNode *N);
568 SDValue visitBF16_TO_FP(SDNode *N);
569 SDValue visitVECREDUCE(SDNode *N);
570 SDValue visitVPOp(SDNode *N);
571 SDValue visitGET_FPENV_MEM(SDNode *N);
572 SDValue visitSET_FPENV_MEM(SDNode *N);
573
574 template <class MatchContextClass>
575 SDValue visitFADDForFMACombine(SDNode *N);
576 template <class MatchContextClass>
577 SDValue visitFSUBForFMACombine(SDNode *N);
578 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
579
580 SDValue XformToShuffleWithZero(SDNode *N);
581 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
582 const SDLoc &DL,
583 SDNode *N,
584 SDValue N0,
585 SDValue N1);
586 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
587 SDValue N1, SDNodeFlags Flags);
588 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
589 SDValue N1, SDNodeFlags Flags);
590 SDValue reassociateReduction(unsigned RedOpc, unsigned Opc, const SDLoc &DL,
591 EVT VT, SDValue N0, SDValue N1,
592 SDNodeFlags Flags = SDNodeFlags());
593
594 SDValue visitShiftByConstant(SDNode *N);
595
596 SDValue foldSelectOfConstants(SDNode *N);
597 SDValue foldVSelectOfConstants(SDNode *N);
598 SDValue foldBinOpIntoSelect(SDNode *BO);
599 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
600 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
601 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
602 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
603 SDValue N2, SDValue N3, ISD::CondCode CC,
604 bool NotExtCompare = false);
605 SDValue convertSelectOfFPConstantsToLoadOffset(
606 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
607 ISD::CondCode CC);
608 SDValue foldSignChangeInBitcast(SDNode *N);
609 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
610 SDValue N2, SDValue N3, ISD::CondCode CC);
611 SDValue foldSelectOfBinops(SDNode *N);
612 SDValue foldSextSetcc(SDNode *N);
613 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
614 const SDLoc &DL);
615 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL);
616 SDValue foldABSToABD(SDNode *N, const SDLoc &DL);
617 SDValue foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
618 SDValue False, ISD::CondCode CC, const SDLoc &DL);
619 SDValue foldSelectToUMin(SDValue LHS, SDValue RHS, SDValue True,
620 SDValue False, ISD::CondCode CC, const SDLoc &DL);
621 SDValue unfoldMaskedMerge(SDNode *N);
622 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
623 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
624 const SDLoc &DL, bool foldBooleans);
625 SDValue rebuildSetCC(SDValue N);
626
627 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
628 SDValue &CC, bool MatchStrict = false) const;
629 bool isOneUseSetCC(SDValue N) const;
630
631 SDValue foldAddToAvg(SDNode *N, const SDLoc &DL);
632 SDValue foldSubToAvg(SDNode *N, const SDLoc &DL);
633
634 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
635 unsigned HiOp);
636 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
637 SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
638 const TargetLowering &TLI);
639 SDValue foldPartialReduceMLAMulOp(SDNode *N);
640 SDValue foldPartialReduceAdd(SDNode *N);
641
642 SDValue CombineExtLoad(SDNode *N);
643 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
644 SDValue combineRepeatedFPDivisors(SDNode *N);
645 SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
646 SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf);
647 SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
648 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
649 SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
650 SDValue BuildSDIV(SDNode *N);
651 SDValue BuildSDIVPow2(SDNode *N);
652 SDValue BuildUDIV(SDNode *N);
653 SDValue BuildSREMPow2(SDNode *N);
654 SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
655 SDValue BuildLogBase2(SDValue V, const SDLoc &DL,
656 bool KnownNeverZero = false,
657 bool InexpensiveOnly = false,
658 std::optional<EVT> OutVT = std::nullopt);
659 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
660 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
661 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
662 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
663 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
664 SDNodeFlags Flags, bool Reciprocal);
665 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
666 SDNodeFlags Flags, bool Reciprocal);
667 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
668 bool DemandHighBits = true);
669 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
670 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
671 SDValue InnerPos, SDValue InnerNeg, bool FromAdd,
672 bool HasPos, unsigned PosOpcode,
673 unsigned NegOpcode, const SDLoc &DL);
674 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
675 SDValue InnerPos, SDValue InnerNeg, bool FromAdd,
676 bool HasPos, unsigned PosOpcode,
677 unsigned NegOpcode, const SDLoc &DL);
678 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL,
679 bool FromAdd);
680 SDValue MatchLoadCombine(SDNode *N);
681 SDValue mergeTruncStores(StoreSDNode *N);
682 SDValue reduceLoadWidth(SDNode *N);
683 SDValue ReduceLoadOpStoreWidth(SDNode *N);
685 SDValue TransformFPLoadStorePair(SDNode *N);
686 SDValue convertBuildVecZextToZext(SDNode *N);
687 SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
688 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
689 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
690 SDValue reduceBuildVecToShuffle(SDNode *N);
691 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
692 ArrayRef<int> VectorMask, SDValue VecIn1,
693 SDValue VecIn2, unsigned LeftIdx,
694 bool DidSplitVec);
695 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
696
697 /// Walk up chain skipping non-aliasing memory nodes,
698 /// looking for aliasing nodes and adding them to the Aliases vector.
699 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
700 SmallVectorImpl<SDValue> &Aliases);
701
702 /// Return true if there is any possibility that the two addresses overlap.
703 bool mayAlias(SDNode *Op0, SDNode *Op1) const;
704
705 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
706 /// chain (aliasing node.)
707 SDValue FindBetterChain(SDNode *N, SDValue Chain);
708
709 /// Try to replace a store and any possibly adjacent stores on
710 /// consecutive chains with better chains. Return true only if St is
711 /// replaced.
712 ///
713 /// Notice that other chains may still be replaced even if the function
714 /// returns false.
715 bool findBetterNeighborChains(StoreSDNode *St);
716
717 // Helper for findBetterNeighborChains. Walk up store chain add additional
718 // chained stores that do not overlap and can be parallelized.
719 bool parallelizeChainedStores(StoreSDNode *St);
720
721 /// Holds a pointer to an LSBaseSDNode as well as information on where it
722 /// is located in a sequence of memory operations connected by a chain.
723 struct MemOpLink {
724 // Ptr to the mem node.
725 LSBaseSDNode *MemNode;
726
727 // Offset from the base ptr.
728 int64_t OffsetFromBase;
729
730 MemOpLink(LSBaseSDNode *N, int64_t Offset)
731 : MemNode(N), OffsetFromBase(Offset) {}
732 };
733
734 // Classify the origin of a stored value.
735 enum class StoreSource { Unknown, Constant, Extract, Load };
736 StoreSource getStoreSource(SDValue StoreVal) {
737 switch (StoreVal.getOpcode()) {
738 case ISD::Constant:
739 case ISD::ConstantFP:
740 return StoreSource::Constant;
744 return StoreSource::Constant;
745 return StoreSource::Unknown;
748 return StoreSource::Extract;
749 case ISD::LOAD:
750 return StoreSource::Load;
751 default:
752 return StoreSource::Unknown;
753 }
754 }
755
756 /// This is a helper function for visitMUL to check the profitability
757 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
758 /// MulNode is the original multiply, AddNode is (add x, c1),
759 /// and ConstNode is c2.
760 bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
761 SDValue ConstNode);
762
763 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
764 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
765 /// the type of the loaded value to be extended.
766 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
767 EVT LoadResultTy, EVT &ExtVT);
768
769 /// Helper function to calculate whether the given Load/Store can have its
770 /// width reduced to ExtVT.
771 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
772 EVT &MemVT, unsigned ShAmt = 0);
773
774 /// Used by BackwardsPropagateMask to find suitable loads.
775 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
776 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
777 ConstantSDNode *Mask, SDNode *&NodeToMask);
778 /// Attempt to propagate a given AND node back to load leaves so that they
779 /// can be combined into narrow loads.
780 bool BackwardsPropagateMask(SDNode *N);
781
782 /// Helper function for mergeConsecutiveStores which merges the component
783 /// store chains.
784 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
785 unsigned NumStores);
786
787 /// Helper function for mergeConsecutiveStores which checks if all the store
788 /// nodes have the same underlying object. We can still reuse the first
789 /// store's pointer info if all the stores are from the same object.
790 bool hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes);
791
792 /// This is a helper function for mergeConsecutiveStores. When the source
793 /// elements of the consecutive stores are all constants or all extracted
794 /// vector elements, try to merge them into one larger store introducing
795 /// bitcasts if necessary. \return True if a merged store was created.
796 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
797 EVT MemVT, unsigned NumStores,
798 bool IsConstantSrc, bool UseVector,
799 bool UseTrunc);
800
801 /// This is a helper function for mergeConsecutiveStores. Stores that
802 /// potentially may be merged with St are placed in StoreNodes. On success,
803 /// returns a chain predecessor to all store candidates.
804 SDNode *getStoreMergeCandidates(StoreSDNode *St,
805 SmallVectorImpl<MemOpLink> &StoreNodes);
806
807 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
808 /// have indirect dependency through their operands. RootNode is the
809 /// predecessor to all stores calculated by getStoreMergeCandidates and is
810 /// used to prune the dependency check. \return True if safe to merge.
811 bool checkMergeStoreCandidatesForDependencies(
812 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
813 SDNode *RootNode);
814
815 /// Helper function for tryStoreMergeOfLoads. Checks if the load/store
816 /// chain has a call in it. \return True if a call is found.
817 bool hasCallInLdStChain(StoreSDNode *St, LoadSDNode *Ld);
818
819 /// This is a helper function for mergeConsecutiveStores. Given a list of
820 /// store candidates, find the first N that are consecutive in memory.
821 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
822 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
823 int64_t ElementSizeBytes) const;
824
825 /// This is a helper function for mergeConsecutiveStores. It is used for
826 /// store chains that are composed entirely of constant values.
827 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
828 unsigned NumConsecutiveStores,
829 EVT MemVT, SDNode *Root, bool AllowVectors);
830
831 /// This is a helper function for mergeConsecutiveStores. It is used for
832 /// store chains that are composed entirely of extracted vector elements.
833 /// When extracting multiple vector elements, try to store them in one
834 /// vector store rather than a sequence of scalar stores.
835 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
836 unsigned NumConsecutiveStores, EVT MemVT,
837 SDNode *Root);
838
839 /// This is a helper function for mergeConsecutiveStores. It is used for
840 /// store chains that are composed entirely of loaded values.
841 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
842 unsigned NumConsecutiveStores, EVT MemVT,
843 SDNode *Root, bool AllowVectors,
844 bool IsNonTemporalStore, bool IsNonTemporalLoad);
845
846 /// Merge consecutive store operations into a wide store.
847 /// This optimization uses wide integers or vectors when possible.
848 /// \return true if stores were merged.
849 bool mergeConsecutiveStores(StoreSDNode *St);
850
851 /// Try to transform a truncation where C is a constant:
852 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
853 ///
854 /// \p N needs to be a truncation and its first operand an AND. Other
855 /// requirements are checked by the function (e.g. that trunc is
856 /// single-use) and if missed an empty SDValue is returned.
857 SDValue distributeTruncateThroughAnd(SDNode *N);
858
859 /// Helper function to determine whether the target supports operation
860 /// given by \p Opcode for type \p VT, that is, whether the operation
861 /// is legal or custom before legalizing operations, and whether is
862 /// legal (but not custom) after legalization.
863 bool hasOperation(unsigned Opcode, EVT VT) {
864 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
865 }
866
867 bool hasUMin(EVT VT) const {
868 auto LK = TLI.getTypeConversion(*DAG.getContext(), VT);
869 return (LK.first == TargetLoweringBase::TypeLegal ||
871 TLI.isOperationLegalOrCustom(ISD::UMIN, LK.second);
872 }
873
874 public:
875 /// Runs the dag combiner on all nodes in the work list
876 void Run(CombineLevel AtLevel);
877
878 SelectionDAG &getDAG() const { return DAG; }
879
880 /// Convenience wrapper around TargetLowering::getShiftAmountTy.
881 EVT getShiftAmountTy(EVT LHSTy) {
882 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout());
883 }
884
885 /// This method returns true if we are running before type legalization or
886 /// if the specified VT is legal.
887 bool isTypeLegal(const EVT &VT) {
888 if (!LegalTypes) return true;
889 return TLI.isTypeLegal(VT);
890 }
891
892 /// Convenience wrapper around TargetLowering::getSetCCResultType
893 EVT getSetCCResultType(EVT VT) const {
894 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
895 }
896
897 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
898 SDValue OrigLoad, SDValue ExtLoad,
899 ISD::NodeType ExtType);
900 };
901
902/// This class is a DAGUpdateListener that removes any deleted
903/// nodes from the worklist.
904class WorklistRemover : public SelectionDAG::DAGUpdateListener {
905 DAGCombiner &DC;
906
907public:
908 explicit WorklistRemover(DAGCombiner &dc)
909 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
910
911 void NodeDeleted(SDNode *N, SDNode *E) override {
912 DC.removeFromWorklist(N);
913 }
914};
915
916class WorklistInserter : public SelectionDAG::DAGUpdateListener {
917 DAGCombiner &DC;
918
919public:
920 explicit WorklistInserter(DAGCombiner &dc)
921 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
922
923 // FIXME: Ideally we could add N to the worklist, but this causes exponential
924 // compile time costs in large DAGs, e.g. Halide.
925 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
926};
927
928} // end anonymous namespace
929
930//===----------------------------------------------------------------------===//
931// TargetLowering::DAGCombinerInfo implementation
932//===----------------------------------------------------------------------===//
933
935 ((DAGCombiner*)DC)->AddToWorklist(N);
936}
937
939CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
940 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
941}
942
944CombineTo(SDNode *N, SDValue Res, bool AddTo) {
945 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
946}
947
949CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
950 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
951}
952
955 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
956}
957
960 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
961}
962
963//===----------------------------------------------------------------------===//
964// Helper Functions
965//===----------------------------------------------------------------------===//
966
967void DAGCombiner::deleteAndRecombine(SDNode *N) {
968 removeFromWorklist(N);
969
970 // If the operands of this node are only used by the node, they will now be
971 // dead. Make sure to re-visit them and recursively delete dead nodes.
972 for (const SDValue &Op : N->ops())
973 // For an operand generating multiple values, one of the values may
974 // become dead allowing further simplification (e.g. split index
975 // arithmetic from an indexed load).
976 if (Op->hasOneUse() || Op->getNumValues() > 1)
977 AddToWorklist(Op.getNode());
978
979 DAG.DeleteNode(N);
980}
981
982// APInts must be the same size for most operations, this helper
983// function zero extends the shorter of the pair so that they match.
984// We provide an Offset so that we can create bitwidths that won't overflow.
985static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
986 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
987 LHS = LHS.zext(Bits);
988 RHS = RHS.zext(Bits);
989}
990
991// Return true if this node is a setcc, or is a select_cc
992// that selects between the target values used for true and false, making it
993// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
994// the appropriate nodes based on the type of node we are checking. This
995// simplifies life a bit for the callers.
996bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
997 SDValue &CC, bool MatchStrict) const {
998 if (N.getOpcode() == ISD::SETCC) {
999 LHS = N.getOperand(0);
1000 RHS = N.getOperand(1);
1001 CC = N.getOperand(2);
1002 return true;
1003 }
1004
1005 if (MatchStrict &&
1006 (N.getOpcode() == ISD::STRICT_FSETCC ||
1007 N.getOpcode() == ISD::STRICT_FSETCCS)) {
1008 LHS = N.getOperand(1);
1009 RHS = N.getOperand(2);
1010 CC = N.getOperand(3);
1011 return true;
1012 }
1013
1014 if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
1015 !TLI.isConstFalseVal(N.getOperand(3)))
1016 return false;
1017
1018 if (TLI.getBooleanContents(N.getValueType()) ==
1020 return false;
1021
1022 LHS = N.getOperand(0);
1023 RHS = N.getOperand(1);
1024 CC = N.getOperand(4);
1025 return true;
1026}
1027
1028/// Return true if this is a SetCC-equivalent operation with only one use.
1029/// If this is true, it allows the users to invert the operation for free when
1030/// it is profitable to do so.
1031bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1032 SDValue N0, N1, N2;
1033 if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
1034 return true;
1035 return false;
1036}
1037
1039 if (!ScalarTy.isSimple())
1040 return false;
1041
1042 uint64_t MaskForTy = 0ULL;
1043 switch (ScalarTy.getSimpleVT().SimpleTy) {
1044 case MVT::i8:
1045 MaskForTy = 0xFFULL;
1046 break;
1047 case MVT::i16:
1048 MaskForTy = 0xFFFFULL;
1049 break;
1050 case MVT::i32:
1051 MaskForTy = 0xFFFFFFFFULL;
1052 break;
1053 default:
1054 return false;
1055 break;
1056 }
1057
1058 APInt Val;
1059 if (ISD::isConstantSplatVector(N, Val))
1060 return Val.getLimitedValue() == MaskForTy;
1061
1062 return false;
1063}
1064
1065// Determines if it is a constant integer or a splat/build vector of constant
1066// integers (and undefs).
1067// Do not permit build vector implicit truncation.
1068static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1069 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
1070 return !(Const->isOpaque() && NoOpaques);
1071 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
1072 return false;
1073 unsigned BitWidth = N.getScalarValueSizeInBits();
1074 for (const SDValue &Op : N->op_values()) {
1075 if (Op.isUndef())
1076 continue;
1077 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
1078 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1079 (Const->isOpaque() && NoOpaques))
1080 return false;
1081 }
1082 return true;
1083}
1084
1085// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1086// undef's.
1087static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1088 if (V.getOpcode() != ISD::BUILD_VECTOR)
1089 return false;
1090 return isConstantOrConstantVector(V, NoOpaques) ||
1092}
1093
1094// Determine if this an indexed load with an opaque target constant index.
1095static bool canSplitIdx(LoadSDNode *LD) {
1096 return MaySplitLoadIndex &&
1097 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1098 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1099}
1100
1101bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1102 const SDLoc &DL,
1103 SDNode *N,
1104 SDValue N0,
1105 SDValue N1) {
1106 // Currently this only tries to ensure we don't undo the GEP splits done by
1107 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1108 // we check if the following transformation would be problematic:
1109 // (load/store (add, (add, x, offset1), offset2)) ->
1110 // (load/store (add, x, offset1+offset2)).
1111
1112 // (load/store (add, (add, x, y), offset2)) ->
1113 // (load/store (add, (add, x, offset2), y)).
1114
1115 if (!N0.isAnyAdd())
1116 return false;
1117
1118 // Check for vscale addressing modes.
1119 // (load/store (add/sub (add x, y), vscale))
1120 // (load/store (add/sub (add x, y), (lsl vscale, C)))
1121 // (load/store (add/sub (add x, y), (mul vscale, C)))
1122 if ((N1.getOpcode() == ISD::VSCALE ||
1123 ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::MUL) &&
1124 N1.getOperand(0).getOpcode() == ISD::VSCALE &&
1125 isa<ConstantSDNode>(N1.getOperand(1)))) &&
1126 N1.getValueType().getFixedSizeInBits() <= 64) {
1127 int64_t ScalableOffset = N1.getOpcode() == ISD::VSCALE
1128 ? N1.getConstantOperandVal(0)
1129 : (N1.getOperand(0).getConstantOperandVal(0) *
1130 (N1.getOpcode() == ISD::SHL
1131 ? (1LL << N1.getConstantOperandVal(1))
1132 : N1.getConstantOperandVal(1)));
1133 if (Opc == ISD::SUB)
1134 ScalableOffset = -ScalableOffset;
1135 if (all_of(N->users(), [&](SDNode *Node) {
1136 if (auto *LoadStore = dyn_cast<MemSDNode>(Node);
1137 LoadStore && LoadStore->getBasePtr().getNode() == N) {
1139 AM.HasBaseReg = true;
1140 AM.ScalableOffset = ScalableOffset;
1141 EVT VT = LoadStore->getMemoryVT();
1142 unsigned AS = LoadStore->getAddressSpace();
1143 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1144 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy,
1145 AS);
1146 }
1147 return false;
1148 }))
1149 return true;
1150 }
1151
1152 if (Opc != ISD::ADD && Opc != ISD::PTRADD)
1153 return false;
1154
1155 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1156 if (!C2)
1157 return false;
1158
1159 const APInt &C2APIntVal = C2->getAPIntValue();
1160 if (C2APIntVal.getSignificantBits() > 64)
1161 return false;
1162
1163 if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1164 if (N0.hasOneUse())
1165 return false;
1166
1167 const APInt &C1APIntVal = C1->getAPIntValue();
1168 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1169 if (CombinedValueIntVal.getSignificantBits() > 64)
1170 return false;
1171 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1172
1173 for (SDNode *Node : N->users()) {
1174 if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1175 // Is x[offset2] already not a legal addressing mode? If so then
1176 // reassociating the constants breaks nothing (we test offset2 because
1177 // that's the one we hope to fold into the load or store).
1179 AM.HasBaseReg = true;
1180 AM.BaseOffs = C2APIntVal.getSExtValue();
1181 EVT VT = LoadStore->getMemoryVT();
1182 unsigned AS = LoadStore->getAddressSpace();
1183 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1184 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1185 continue;
1186
1187 // Would x[offset1+offset2] still be a legal addressing mode?
1188 AM.BaseOffs = CombinedValue;
1189 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1190 return true;
1191 }
1192 }
1193 } else {
1194 if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1195 if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1196 return false;
1197
1198 for (SDNode *Node : N->users()) {
1199 auto *LoadStore = dyn_cast<MemSDNode>(Node);
1200 if (!LoadStore)
1201 return false;
1202
1203 // Is x[offset2] a legal addressing mode? If so then
1204 // reassociating the constants breaks address pattern
1206 AM.HasBaseReg = true;
1207 AM.BaseOffs = C2APIntVal.getSExtValue();
1208 EVT VT = LoadStore->getMemoryVT();
1209 unsigned AS = LoadStore->getAddressSpace();
1210 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1211 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1212 return false;
1213 }
1214 return true;
1215 }
1216
1217 return false;
1218}
1219
1220/// Helper for DAGCombiner::reassociateOps. Try to reassociate (Opc N0, N1) if
1221/// \p N0 is the same kind of operation as \p Opc.
1222SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1223 SDValue N0, SDValue N1,
1224 SDNodeFlags Flags) {
1225 EVT VT = N0.getValueType();
1226
1227 if (N0.getOpcode() != Opc)
1228 return SDValue();
1229
1230 SDValue N00 = N0.getOperand(0);
1231 SDValue N01 = N0.getOperand(1);
1232
1234 SDNodeFlags NewFlags;
1235 if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
1236 Flags.hasNoUnsignedWrap())
1237 NewFlags |= SDNodeFlags::NoUnsignedWrap;
1238
1240 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1241 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1})) {
1242 NewFlags.setDisjoint(Flags.hasDisjoint() &&
1243 N0->getFlags().hasDisjoint());
1244 return DAG.getNode(Opc, DL, VT, N00, OpNode, NewFlags);
1245 }
1246 return SDValue();
1247 }
1248 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1249 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1250 // iff (op x, c1) has one use
1251 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags);
1252 return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags);
1253 }
1254 }
1255
1256 // Check for repeated operand logic simplifications.
1257 if (Opc == ISD::AND || Opc == ISD::OR) {
1258 // (N00 & N01) & N00 --> N00 & N01
1259 // (N00 & N01) & N01 --> N00 & N01
1260 // (N00 | N01) | N00 --> N00 | N01
1261 // (N00 | N01) | N01 --> N00 | N01
1262 if (N1 == N00 || N1 == N01)
1263 return N0;
1264 }
1265 if (Opc == ISD::XOR) {
1266 // (N00 ^ N01) ^ N00 --> N01
1267 if (N1 == N00)
1268 return N01;
1269 // (N00 ^ N01) ^ N01 --> N00
1270 if (N1 == N01)
1271 return N00;
1272 }
1273
1274 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1275 if (N1 != N01) {
1276 // Reassociate if (op N00, N1) already exist
1277 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1278 // if Op (Op N00, N1), N01 already exist
1279 // we need to stop reassciate to avoid dead loop
1280 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1281 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1282 }
1283 }
1284
1285 if (N1 != N00) {
1286 // Reassociate if (op N01, N1) already exist
1287 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1288 // if Op (Op N01, N1), N00 already exist
1289 // we need to stop reassciate to avoid dead loop
1290 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1291 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1292 }
1293 }
1294
1295 // Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND
1296 // (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same
1297 // predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are
1298 // comparisons with the same predicate. This enables optimizations as the
1299 // following one:
1300 // CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
1301 // CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)
1302 if (Opc == ISD::AND || Opc == ISD::OR) {
1303 if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC &&
1304 N01->getOpcode() == ISD::SETCC) {
1305 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
1306 ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get();
1307 ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get();
1308 if (CC1 == CC00 && CC1 != CC01) {
1309 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, Flags);
1310 return DAG.getNode(Opc, DL, VT, OpNode, N01, Flags);
1311 }
1312 if (CC1 == CC01 && CC1 != CC00) {
1313 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N01, N1, Flags);
1314 return DAG.getNode(Opc, DL, VT, OpNode, N00, Flags);
1315 }
1316 }
1317 }
1318 }
1319
1320 return SDValue();
1321}
1322
1323/// Try to reassociate commutative (Opc N0, N1) if either \p N0 or \p N1 is the
1324/// same kind of operation as \p Opc.
1325SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1326 SDValue N1, SDNodeFlags Flags) {
1327 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1328
1329 // Floating-point reassociation is not allowed without loose FP math.
1330 if (N0.getValueType().isFloatingPoint() ||
1332 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1333 return SDValue();
1334
1335 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1, Flags))
1336 return Combined;
1337 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0, Flags))
1338 return Combined;
1339 return SDValue();
1340}
1341
1342// Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
1343// Note that we only expect Flags to be passed from FP operations. For integer
1344// operations they need to be dropped.
1345SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
1346 const SDLoc &DL, EVT VT, SDValue N0,
1347 SDValue N1, SDNodeFlags Flags) {
1348 if (N0.getOpcode() == RedOpc && N1.getOpcode() == RedOpc &&
1349 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
1350 N0->hasOneUse() && N1->hasOneUse() &&
1352 TLI.shouldReassociateReduction(RedOpc, N0.getOperand(0).getValueType())) {
1353 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
1354 return DAG.getNode(RedOpc, DL, VT,
1355 DAG.getNode(Opc, DL, N0.getOperand(0).getValueType(),
1356 N0.getOperand(0), N1.getOperand(0)));
1357 }
1358
1359 // Reassociate op(op(vecreduce(a), b), op(vecreduce(c), d)) into
1360 // op(vecreduce(op(a, c)), op(b, d)), to combine the reductions into a
1361 // single node.
1362 SDValue A, B, C, D, RedA, RedB;
1363 if (sd_match(N0, m_OneUse(m_c_BinOp(
1364 Opc,
1365 m_AllOf(m_OneUse(m_UnaryOp(RedOpc, m_Value(A))),
1366 m_Value(RedA)),
1367 m_Value(B)))) &&
1369 Opc,
1370 m_AllOf(m_OneUse(m_UnaryOp(RedOpc, m_Value(C))),
1371 m_Value(RedB)),
1372 m_Value(D)))) &&
1373 !sd_match(B, m_UnaryOp(RedOpc, m_Value())) &&
1374 !sd_match(D, m_UnaryOp(RedOpc, m_Value())) &&
1375 A.getValueType() == C.getValueType() &&
1376 hasOperation(Opc, A.getValueType()) &&
1377 TLI.shouldReassociateReduction(RedOpc, VT)) {
1378 if ((Opc == ISD::FADD || Opc == ISD::FMUL) &&
1379 (!N0->getFlags().hasAllowReassociation() ||
1381 !RedA->getFlags().hasAllowReassociation() ||
1382 !RedB->getFlags().hasAllowReassociation()))
1383 return SDValue();
1384 SelectionDAG::FlagInserter FlagsInserter(
1385 DAG, Flags & N0->getFlags() & N1->getFlags() & RedA->getFlags() &
1386 RedB->getFlags());
1387 SDValue Op = DAG.getNode(Opc, DL, A.getValueType(), A, C);
1388 SDValue Red = DAG.getNode(RedOpc, DL, VT, Op);
1389 SDValue Op2 = DAG.getNode(Opc, DL, VT, B, D);
1390 return DAG.getNode(Opc, DL, VT, Red, Op2);
1391 }
1392 return SDValue();
1393}
1394
1395SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1396 bool AddTo) {
1397 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1398 ++NodesCombined;
1399 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1400 To[0].dump(&DAG);
1401 dbgs() << " and " << NumTo - 1 << " other values\n");
1402 for (unsigned i = 0, e = NumTo; i != e; ++i)
1403 assert((!To[i].getNode() ||
1404 N->getValueType(i) == To[i].getValueType()) &&
1405 "Cannot combine value to value of different type!");
1406
1407 WorklistRemover DeadNodes(*this);
1408 DAG.ReplaceAllUsesWith(N, To);
1409 if (AddTo) {
1410 // Push the new nodes and any users onto the worklist
1411 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1412 if (To[i].getNode())
1413 AddToWorklistWithUsers(To[i].getNode());
1414 }
1415 }
1416
1417 // Finally, if the node is now dead, remove it from the graph. The node
1418 // may not be dead if the replacement process recursively simplified to
1419 // something else needing this node.
1420 if (N->use_empty())
1421 deleteAndRecombine(N);
1422 return SDValue(N, 0);
1423}
1424
1425void DAGCombiner::
1426CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1427 // Replace the old value with the new one.
1428 ++NodesCombined;
1429 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1430 dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1431
1432 // Replace all uses.
1433 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1434
1435 // Push the new node and any (possibly new) users onto the worklist.
1436 AddToWorklistWithUsers(TLO.New.getNode());
1437
1438 // Finally, if the node is now dead, remove it from the graph.
1439 recursivelyDeleteUnusedNodes(TLO.Old.getNode());
1440}
1441
1442/// Check the specified integer node value to see if it can be simplified or if
1443/// things it uses can be simplified by bit propagation. If so, return true.
1444bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1445 const APInt &DemandedElts,
1446 bool AssumeSingleUse) {
1447 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1448 KnownBits Known;
1449 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1450 AssumeSingleUse))
1451 return false;
1452
1453 // Revisit the node.
1454 AddToWorklist(Op.getNode());
1455
1456 CommitTargetLoweringOpt(TLO);
1457 return true;
1458}
1459
1460/// Check the specified vector node value to see if it can be simplified or
1461/// if things it uses can be simplified as it only uses some of the elements.
1462/// If so, return true.
1463bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1464 const APInt &DemandedElts,
1465 bool AssumeSingleUse) {
1466 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1467 APInt KnownUndef, KnownZero;
1468 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1469 TLO, 0, AssumeSingleUse))
1470 return false;
1471
1472 // Revisit the node.
1473 AddToWorklist(Op.getNode());
1474
1475 CommitTargetLoweringOpt(TLO);
1476 return true;
1477}
1478
1479void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1480 SDLoc DL(Load);
1481 EVT VT = Load->getValueType(0);
1482 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1483
1484 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1485 Trunc.dump(&DAG); dbgs() << '\n');
1486
1487 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1488 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1489
1490 AddToWorklist(Trunc.getNode());
1491 recursivelyDeleteUnusedNodes(Load);
1492}
1493
1494SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1495 Replace = false;
1496 SDLoc DL(Op);
1497 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1498 LoadSDNode *LD = cast<LoadSDNode>(Op);
1499 EVT MemVT = LD->getMemoryVT();
1501 : LD->getExtensionType();
1502 Replace = true;
1503 return DAG.getExtLoad(ExtType, DL, PVT,
1504 LD->getChain(), LD->getBasePtr(),
1505 MemVT, LD->getMemOperand());
1506 }
1507
1508 unsigned Opc = Op.getOpcode();
1509 switch (Opc) {
1510 default: break;
1511 case ISD::AssertSext:
1512 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1513 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1514 break;
1515 case ISD::AssertZext:
1516 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1517 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1518 break;
1519 case ISD::Constant: {
1520 unsigned ExtOpc =
1521 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1522 return DAG.getNode(ExtOpc, DL, PVT, Op);
1523 }
1524 }
1525
1526 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1527 return SDValue();
1528 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1529}
1530
1531SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1533 return SDValue();
1534 EVT OldVT = Op.getValueType();
1535 SDLoc DL(Op);
1536 bool Replace = false;
1537 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1538 if (!NewOp.getNode())
1539 return SDValue();
1540 AddToWorklist(NewOp.getNode());
1541
1542 if (Replace)
1543 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1544 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1545 DAG.getValueType(OldVT));
1546}
1547
1548SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1549 EVT OldVT = Op.getValueType();
1550 SDLoc DL(Op);
1551 bool Replace = false;
1552 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1553 if (!NewOp.getNode())
1554 return SDValue();
1555 AddToWorklist(NewOp.getNode());
1556
1557 if (Replace)
1558 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1559 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1560}
1561
1562/// Promote the specified integer binary operation if the target indicates it is
1563/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1564/// i32 since i16 instructions are longer.
1565SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1566 if (!LegalOperations)
1567 return SDValue();
1568
1569 EVT VT = Op.getValueType();
1570 if (VT.isVector() || !VT.isInteger())
1571 return SDValue();
1572
1573 // If operation type is 'undesirable', e.g. i16 on x86, consider
1574 // promoting it.
1575 unsigned Opc = Op.getOpcode();
1576 if (TLI.isTypeDesirableForOp(Opc, VT))
1577 return SDValue();
1578
1579 EVT PVT = VT;
1580 // Consult target whether it is a good idea to promote this operation and
1581 // what's the right type to promote it to.
1582 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1583 assert(PVT != VT && "Don't know what type to promote to!");
1584
1585 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1586
1587 bool Replace0 = false;
1588 SDValue N0 = Op.getOperand(0);
1589 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1590
1591 bool Replace1 = false;
1592 SDValue N1 = Op.getOperand(1);
1593 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1594 SDLoc DL(Op);
1595
1596 SDValue RV =
1597 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1598
1599 // We are always replacing N0/N1's use in N and only need additional
1600 // replacements if there are additional uses.
1601 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1602 // (SDValue) here because the node may reference multiple values
1603 // (for example, the chain value of a load node).
1604 Replace0 &= !N0->hasOneUse();
1605 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1606
1607 // Combine Op here so it is preserved past replacements.
1608 CombineTo(Op.getNode(), RV);
1609
1610 // If operands have a use ordering, make sure we deal with
1611 // predecessor first.
1612 if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1613 std::swap(N0, N1);
1614 std::swap(NN0, NN1);
1615 }
1616
1617 if (Replace0) {
1618 AddToWorklist(NN0.getNode());
1619 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1620 }
1621 if (Replace1) {
1622 AddToWorklist(NN1.getNode());
1623 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1624 }
1625 return Op;
1626 }
1627 return SDValue();
1628}
1629
1630/// Promote the specified integer shift operation if the target indicates it is
1631/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1632/// i32 since i16 instructions are longer.
1633SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1634 if (!LegalOperations)
1635 return SDValue();
1636
1637 EVT VT = Op.getValueType();
1638 if (VT.isVector() || !VT.isInteger())
1639 return SDValue();
1640
1641 // If operation type is 'undesirable', e.g. i16 on x86, consider
1642 // promoting it.
1643 unsigned Opc = Op.getOpcode();
1644 if (TLI.isTypeDesirableForOp(Opc, VT))
1645 return SDValue();
1646
1647 EVT PVT = VT;
1648 // Consult target whether it is a good idea to promote this operation and
1649 // what's the right type to promote it to.
1650 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1651 assert(PVT != VT && "Don't know what type to promote to!");
1652
1653 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1654
1655 bool Replace = false;
1656 SDValue N0 = Op.getOperand(0);
1657 if (Opc == ISD::SRA)
1658 N0 = SExtPromoteOperand(N0, PVT);
1659 else if (Opc == ISD::SRL)
1660 N0 = ZExtPromoteOperand(N0, PVT);
1661 else
1662 N0 = PromoteOperand(N0, PVT, Replace);
1663
1664 if (!N0.getNode())
1665 return SDValue();
1666
1667 SDLoc DL(Op);
1668 SDValue N1 = Op.getOperand(1);
1669 SDValue RV =
1670 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1671
1672 if (Replace)
1673 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1674
1675 // Deal with Op being deleted.
1676 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1677 return RV;
1678 }
1679 return SDValue();
1680}
1681
1682SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1683 if (!LegalOperations)
1684 return SDValue();
1685
1686 EVT VT = Op.getValueType();
1687 if (VT.isVector() || !VT.isInteger())
1688 return SDValue();
1689
1690 // If operation type is 'undesirable', e.g. i16 on x86, consider
1691 // promoting it.
1692 unsigned Opc = Op.getOpcode();
1693 if (TLI.isTypeDesirableForOp(Opc, VT))
1694 return SDValue();
1695
1696 EVT PVT = VT;
1697 // Consult target whether it is a good idea to promote this operation and
1698 // what's the right type to promote it to.
1699 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1700 assert(PVT != VT && "Don't know what type to promote to!");
1701 // fold (aext (aext x)) -> (aext x)
1702 // fold (aext (zext x)) -> (zext x)
1703 // fold (aext (sext x)) -> (sext x)
1704 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1705 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1706 }
1707 return SDValue();
1708}
1709
1710bool DAGCombiner::PromoteLoad(SDValue Op) {
1711 if (!LegalOperations)
1712 return false;
1713
1714 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1715 return false;
1716
1717 EVT VT = Op.getValueType();
1718 if (VT.isVector() || !VT.isInteger())
1719 return false;
1720
1721 // If operation type is 'undesirable', e.g. i16 on x86, consider
1722 // promoting it.
1723 unsigned Opc = Op.getOpcode();
1724 if (TLI.isTypeDesirableForOp(Opc, VT))
1725 return false;
1726
1727 EVT PVT = VT;
1728 // Consult target whether it is a good idea to promote this operation and
1729 // what's the right type to promote it to.
1730 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1731 assert(PVT != VT && "Don't know what type to promote to!");
1732
1733 SDLoc DL(Op);
1734 SDNode *N = Op.getNode();
1735 LoadSDNode *LD = cast<LoadSDNode>(N);
1736 EVT MemVT = LD->getMemoryVT();
1738 : LD->getExtensionType();
1739 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1740 LD->getChain(), LD->getBasePtr(),
1741 MemVT, LD->getMemOperand());
1742 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1743
1744 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1745 Result.dump(&DAG); dbgs() << '\n');
1746
1748 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1749
1750 AddToWorklist(Result.getNode());
1751 recursivelyDeleteUnusedNodes(N);
1752 return true;
1753 }
1754
1755 return false;
1756}
1757
1758/// Recursively delete a node which has no uses and any operands for
1759/// which it is the only use.
1760///
1761/// Note that this both deletes the nodes and removes them from the worklist.
1762/// It also adds any nodes who have had a user deleted to the worklist as they
1763/// may now have only one use and subject to other combines.
1764bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1765 if (!N->use_empty())
1766 return false;
1767
1769 Nodes.insert(N);
1770 do {
1771 N = Nodes.pop_back_val();
1772 if (!N)
1773 continue;
1774
1775 if (N->use_empty()) {
1776 for (const SDValue &ChildN : N->op_values())
1777 Nodes.insert(ChildN.getNode());
1778
1779 removeFromWorklist(N);
1780 DAG.DeleteNode(N);
1781 } else {
1782 AddToWorklist(N);
1783 }
1784 } while (!Nodes.empty());
1785 return true;
1786}
1787
1788//===----------------------------------------------------------------------===//
1789// Main DAG Combiner implementation
1790//===----------------------------------------------------------------------===//
1791
1792void DAGCombiner::Run(CombineLevel AtLevel) {
1793 // set the instance variables, so that the various visit routines may use it.
1794 Level = AtLevel;
1795 LegalDAG = Level >= AfterLegalizeDAG;
1796 LegalOperations = Level >= AfterLegalizeVectorOps;
1797 LegalTypes = Level >= AfterLegalizeTypes;
1798
1799 WorklistInserter AddNodes(*this);
1800
1801 // Add all the dag nodes to the worklist.
1802 //
1803 // Note: All nodes are not added to PruningList here, this is because the only
1804 // nodes which can be deleted are those which have no uses and all other nodes
1805 // which would otherwise be added to the worklist by the first call to
1806 // getNextWorklistEntry are already present in it.
1807 for (SDNode &Node : DAG.allnodes())
1808 AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
1809
1810 // Create a dummy node (which is not added to allnodes), that adds a reference
1811 // to the root node, preventing it from being deleted, and tracking any
1812 // changes of the root.
1813 HandleSDNode Dummy(DAG.getRoot());
1814
1815 // While we have a valid worklist entry node, try to combine it.
1816 while (SDNode *N = getNextWorklistEntry()) {
1817 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1818 // N is deleted from the DAG, since they too may now be dead or may have a
1819 // reduced number of uses, allowing other xforms.
1820 if (recursivelyDeleteUnusedNodes(N))
1821 continue;
1822
1823 WorklistRemover DeadNodes(*this);
1824
1825 // If this combine is running after legalizing the DAG, re-legalize any
1826 // nodes pulled off the worklist.
1827 if (LegalDAG) {
1828 SmallSetVector<SDNode *, 16> UpdatedNodes;
1829 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1830
1831 for (SDNode *LN : UpdatedNodes)
1832 AddToWorklistWithUsers(LN);
1833
1834 if (!NIsValid)
1835 continue;
1836 }
1837
1838 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1839
1840 // Add any operands of the new node which have not yet been combined to the
1841 // worklist as well. getNextWorklistEntry flags nodes that have been
1842 // combined before. Because the worklist uniques things already, this won't
1843 // repeatedly process the same operand.
1844 for (const SDValue &ChildN : N->op_values())
1845 AddToWorklist(ChildN.getNode(), /*IsCandidateForPruning=*/true,
1846 /*SkipIfCombinedBefore=*/true);
1847
1848 SDValue RV = combine(N);
1849
1850 if (!RV.getNode())
1851 continue;
1852
1853 ++NodesCombined;
1854
1855 // Invalidate cached info.
1856 ChainsWithoutMergeableStores.clear();
1857
1858 // If we get back the same node we passed in, rather than a new node or
1859 // zero, we know that the node must have defined multiple values and
1860 // CombineTo was used. Since CombineTo takes care of the worklist
1861 // mechanics for us, we have no work to do in this case.
1862 if (RV.getNode() == N)
1863 continue;
1864
1865 assert(N->getOpcode() != ISD::DELETED_NODE &&
1866 RV.getOpcode() != ISD::DELETED_NODE &&
1867 "Node was deleted but visit returned new node!");
1868
1869 LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1870
1871 if (N->getNumValues() == RV->getNumValues())
1872 DAG.ReplaceAllUsesWith(N, RV.getNode());
1873 else {
1874 assert(N->getValueType(0) == RV.getValueType() &&
1875 N->getNumValues() == 1 && "Type mismatch");
1876 DAG.ReplaceAllUsesWith(N, &RV);
1877 }
1878
1879 // Push the new node and any users onto the worklist. Omit this if the
1880 // new node is the EntryToken (e.g. if a store managed to get optimized
1881 // out), because re-visiting the EntryToken and its users will not uncover
1882 // any additional opportunities, but there may be a large number of such
1883 // users, potentially causing compile time explosion.
1884 if (RV.getOpcode() != ISD::EntryToken)
1885 AddToWorklistWithUsers(RV.getNode());
1886
1887 // Finally, if the node is now dead, remove it from the graph. The node
1888 // may not be dead if the replacement process recursively simplified to
1889 // something else needing this node. This will also take care of adding any
1890 // operands which have lost a user to the worklist.
1891 recursivelyDeleteUnusedNodes(N);
1892 }
1893
1894 // If the root changed (e.g. it was a dead load, update the root).
1895 DAG.setRoot(Dummy.getValue());
1896 DAG.RemoveDeadNodes();
1897}
1898
1899SDValue DAGCombiner::visit(SDNode *N) {
1900 // clang-format off
1901 switch (N->getOpcode()) {
1902 default: break;
1903 case ISD::TokenFactor: return visitTokenFactor(N);
1904 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1905 case ISD::ADD: return visitADD(N);
1906 case ISD::PTRADD: return visitPTRADD(N);
1907 case ISD::SUB: return visitSUB(N);
1908 case ISD::SADDSAT:
1909 case ISD::UADDSAT: return visitADDSAT(N);
1910 case ISD::SSUBSAT:
1911 case ISD::USUBSAT: return visitSUBSAT(N);
1912 case ISD::ADDC: return visitADDC(N);
1913 case ISD::SADDO:
1914 case ISD::UADDO: return visitADDO(N);
1915 case ISD::SUBC: return visitSUBC(N);
1916 case ISD::SSUBO:
1917 case ISD::USUBO: return visitSUBO(N);
1918 case ISD::ADDE: return visitADDE(N);
1919 case ISD::UADDO_CARRY: return visitUADDO_CARRY(N);
1920 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1921 case ISD::SUBE: return visitSUBE(N);
1922 case ISD::USUBO_CARRY: return visitUSUBO_CARRY(N);
1923 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1924 case ISD::SMULFIX:
1925 case ISD::SMULFIXSAT:
1926 case ISD::UMULFIX:
1927 case ISD::UMULFIXSAT: return visitMULFIX(N);
1928 case ISD::MUL: return visitMUL<EmptyMatchContext>(N);
1929 case ISD::SDIV: return visitSDIV(N);
1930 case ISD::UDIV: return visitUDIV(N);
1931 case ISD::SREM:
1932 case ISD::UREM: return visitREM(N);
1933 case ISD::MULHU: return visitMULHU(N);
1934 case ISD::MULHS: return visitMULHS(N);
1935 case ISD::AVGFLOORS:
1936 case ISD::AVGFLOORU:
1937 case ISD::AVGCEILS:
1938 case ISD::AVGCEILU: return visitAVG(N);
1939 case ISD::ABDS:
1940 case ISD::ABDU: return visitABD(N);
1941 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1942 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1943 case ISD::SMULO:
1944 case ISD::UMULO: return visitMULO(N);
1945 case ISD::SMIN:
1946 case ISD::SMAX:
1947 case ISD::UMIN:
1948 case ISD::UMAX: return visitIMINMAX(N);
1949 case ISD::AND: return visitAND(N);
1950 case ISD::OR: return visitOR(N);
1951 case ISD::XOR: return visitXOR(N);
1952 case ISD::SHL: return visitSHL(N);
1953 case ISD::SRA: return visitSRA(N);
1954 case ISD::SRL: return visitSRL(N);
1955 case ISD::ROTR:
1956 case ISD::ROTL: return visitRotate(N);
1957 case ISD::FSHL:
1958 case ISD::FSHR: return visitFunnelShift(N);
1959 case ISD::SSHLSAT:
1960 case ISD::USHLSAT: return visitSHLSAT(N);
1961 case ISD::ABS: return visitABS(N);
1962 case ISD::BSWAP: return visitBSWAP(N);
1963 case ISD::BITREVERSE: return visitBITREVERSE(N);
1964 case ISD::CTLZ: return visitCTLZ(N);
1965 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1966 case ISD::CTTZ: return visitCTTZ(N);
1967 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1968 case ISD::CTPOP: return visitCTPOP(N);
1969 case ISD::SELECT: return visitSELECT(N);
1970 case ISD::VSELECT: return visitVSELECT(N);
1971 case ISD::SELECT_CC: return visitSELECT_CC(N);
1972 case ISD::SETCC: return visitSETCC(N);
1973 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1974 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1975 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1976 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1977 case ISD::AssertSext:
1978 case ISD::AssertZext: return visitAssertExt(N);
1979 case ISD::AssertAlign: return visitAssertAlign(N);
1980 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1983 case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1984 case ISD::TRUNCATE: return visitTRUNCATE(N);
1985 case ISD::TRUNCATE_USAT_U: return visitTRUNCATE_USAT_U(N);
1986 case ISD::BITCAST: return visitBITCAST(N);
1987 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1988 case ISD::FADD: return visitFADD(N);
1989 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1990 case ISD::FSUB: return visitFSUB(N);
1991 case ISD::FMUL: return visitFMUL(N);
1992 case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
1993 case ISD::FMAD: return visitFMAD(N);
1994 case ISD::FDIV: return visitFDIV(N);
1995 case ISD::FREM: return visitFREM(N);
1996 case ISD::FSQRT: return visitFSQRT(N);
1997 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1998 case ISD::FPOW: return visitFPOW(N);
1999 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
2000 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
2001 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
2002 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
2003 case ISD::LROUND:
2004 case ISD::LLROUND:
2005 case ISD::LRINT:
2006 case ISD::LLRINT: return visitXROUND(N);
2007 case ISD::FP_ROUND: return visitFP_ROUND(N);
2008 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
2009 case ISD::FNEG: return visitFNEG(N);
2010 case ISD::FABS: return visitFABS(N);
2011 case ISD::FFLOOR: return visitFFLOOR(N);
2012 case ISD::FMINNUM:
2013 case ISD::FMAXNUM:
2014 case ISD::FMINIMUM:
2015 case ISD::FMAXIMUM:
2016 case ISD::FMINIMUMNUM:
2017 case ISD::FMAXIMUMNUM: return visitFMinMax(N);
2018 case ISD::FCEIL: return visitFCEIL(N);
2019 case ISD::FTRUNC: return visitFTRUNC(N);
2020 case ISD::FFREXP: return visitFFREXP(N);
2021 case ISD::BRCOND: return visitBRCOND(N);
2022 case ISD::BR_CC: return visitBR_CC(N);
2023 case ISD::LOAD: return visitLOAD(N);
2024 case ISD::STORE: return visitSTORE(N);
2025 case ISD::ATOMIC_STORE: return visitATOMIC_STORE(N);
2026 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
2027 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
2028 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
2029 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
2030 case ISD::VECTOR_INTERLEAVE: return visitVECTOR_INTERLEAVE(N);
2031 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
2032 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
2033 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
2034 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
2035 case ISD::MGATHER: return visitMGATHER(N);
2036 case ISD::MLOAD: return visitMLOAD(N);
2037 case ISD::MSCATTER: return visitMSCATTER(N);
2038 case ISD::MSTORE: return visitMSTORE(N);
2039 case ISD::EXPERIMENTAL_VECTOR_HISTOGRAM: return visitMHISTOGRAM(N);
2043 return visitPARTIAL_REDUCE_MLA(N);
2044 case ISD::VECTOR_COMPRESS: return visitVECTOR_COMPRESS(N);
2045 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
2046 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
2047 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
2048 case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
2049 case ISD::BF16_TO_FP: return visitBF16_TO_FP(N);
2050 case ISD::FREEZE: return visitFREEZE(N);
2051 case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
2052 case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
2053 case ISD::FCANONICALIZE: return visitFCANONICALIZE(N);
2056 case ISD::VECREDUCE_ADD:
2057 case ISD::VECREDUCE_MUL:
2058 case ISD::VECREDUCE_AND:
2059 case ISD::VECREDUCE_OR:
2060 case ISD::VECREDUCE_XOR:
2068 case ISD::VECREDUCE_FMINIMUM: return visitVECREDUCE(N);
2069#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
2070#include "llvm/IR/VPIntrinsics.def"
2071 return visitVPOp(N);
2072 }
2073 // clang-format on
2074 return SDValue();
2075}
2076
2077SDValue DAGCombiner::combine(SDNode *N) {
2078 if (!DebugCounter::shouldExecute(DAGCombineCounter))
2079 return SDValue();
2080
2081 SDValue RV;
2082 if (!DisableGenericCombines)
2083 RV = visit(N);
2084
2085 // If nothing happened, try a target-specific DAG combine.
2086 if (!RV.getNode()) {
2087 assert(N->getOpcode() != ISD::DELETED_NODE &&
2088 "Node was deleted but visit returned NULL!");
2089
2090 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
2091 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
2092
2093 // Expose the DAG combiner to the target combiner impls.
2095 DagCombineInfo(DAG, Level, false, this);
2096
2097 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
2098 }
2099 }
2100
2101 // If nothing happened still, try promoting the operation.
2102 if (!RV.getNode()) {
2103 switch (N->getOpcode()) {
2104 default: break;
2105 case ISD::ADD:
2106 case ISD::SUB:
2107 case ISD::MUL:
2108 case ISD::AND:
2109 case ISD::OR:
2110 case ISD::XOR:
2111 RV = PromoteIntBinOp(SDValue(N, 0));
2112 break;
2113 case ISD::SHL:
2114 case ISD::SRA:
2115 case ISD::SRL:
2116 RV = PromoteIntShiftOp(SDValue(N, 0));
2117 break;
2118 case ISD::SIGN_EXTEND:
2119 case ISD::ZERO_EXTEND:
2120 case ISD::ANY_EXTEND:
2121 RV = PromoteExtend(SDValue(N, 0));
2122 break;
2123 case ISD::LOAD:
2124 if (PromoteLoad(SDValue(N, 0)))
2125 RV = SDValue(N, 0);
2126 break;
2127 }
2128 }
2129
2130 // If N is a commutative binary node, try to eliminate it if the commuted
2131 // version is already present in the DAG.
2132 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
2133 SDValue N0 = N->getOperand(0);
2134 SDValue N1 = N->getOperand(1);
2135
2136 // Constant operands are canonicalized to RHS.
2137 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
2138 SDValue Ops[] = {N1, N0};
2139 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
2140 N->getFlags());
2141 if (CSENode)
2142 return SDValue(CSENode, 0);
2143 }
2144 }
2145
2146 return RV;
2147}
2148
2149/// Given a node, return its input chain if it has one, otherwise return a null
2150/// sd operand.
2152 if (unsigned NumOps = N->getNumOperands()) {
2153 if (N->getOperand(0).getValueType() == MVT::Other)
2154 return N->getOperand(0);
2155 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
2156 return N->getOperand(NumOps-1);
2157 for (unsigned i = 1; i < NumOps-1; ++i)
2158 if (N->getOperand(i).getValueType() == MVT::Other)
2159 return N->getOperand(i);
2160 }
2161 return SDValue();
2162}
2163
2164SDValue DAGCombiner::visitFCANONICALIZE(SDNode *N) {
2165 SDValue Operand = N->getOperand(0);
2166 EVT VT = Operand.getValueType();
2167 SDLoc dl(N);
2168
2169 // Canonicalize undef to quiet NaN.
2170 if (Operand.isUndef()) {
2171 APFloat CanonicalQNaN = APFloat::getQNaN(VT.getFltSemantics());
2172 return DAG.getConstantFP(CanonicalQNaN, dl, VT);
2173 }
2174 return SDValue();
2175}
2176
2177SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
2178 // If N has two operands, where one has an input chain equal to the other,
2179 // the 'other' chain is redundant.
2180 if (N->getNumOperands() == 2) {
2181 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
2182 return N->getOperand(0);
2183 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
2184 return N->getOperand(1);
2185 }
2186
2187 // Don't simplify token factors if optnone.
2188 if (OptLevel == CodeGenOptLevel::None)
2189 return SDValue();
2190
2191 // Don't simplify the token factor if the node itself has too many operands.
2192 if (N->getNumOperands() > TokenFactorInlineLimit)
2193 return SDValue();
2194
2195 // If the sole user is a token factor, we should make sure we have a
2196 // chance to merge them together. This prevents TF chains from inhibiting
2197 // optimizations.
2198 if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::TokenFactor)
2199 AddToWorklist(*(N->user_begin()));
2200
2201 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
2202 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
2204 bool Changed = false; // If we should replace this token factor.
2205
2206 // Start out with this token factor.
2207 TFs.push_back(N);
2208
2209 // Iterate through token factors. The TFs grows when new token factors are
2210 // encountered.
2211 for (unsigned i = 0; i < TFs.size(); ++i) {
2212 // Limit number of nodes to inline, to avoid quadratic compile times.
2213 // We have to add the outstanding Token Factors to Ops, otherwise we might
2214 // drop Ops from the resulting Token Factors.
2215 if (Ops.size() > TokenFactorInlineLimit) {
2216 for (unsigned j = i; j < TFs.size(); j++)
2217 Ops.emplace_back(TFs[j], 0);
2218 // Drop unprocessed Token Factors from TFs, so we do not add them to the
2219 // combiner worklist later.
2220 TFs.resize(i);
2221 break;
2222 }
2223
2224 SDNode *TF = TFs[i];
2225 // Check each of the operands.
2226 for (const SDValue &Op : TF->op_values()) {
2227 switch (Op.getOpcode()) {
2228 case ISD::EntryToken:
2229 // Entry tokens don't need to be added to the list. They are
2230 // redundant.
2231 Changed = true;
2232 break;
2233
2234 case ISD::TokenFactor:
2235 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
2236 // Queue up for processing.
2237 TFs.push_back(Op.getNode());
2238 Changed = true;
2239 break;
2240 }
2241 [[fallthrough]];
2242
2243 default:
2244 // Only add if it isn't already in the list.
2245 if (SeenOps.insert(Op.getNode()).second)
2246 Ops.push_back(Op);
2247 else
2248 Changed = true;
2249 break;
2250 }
2251 }
2252 }
2253
2254 // Re-visit inlined Token Factors, to clean them up in case they have been
2255 // removed. Skip the first Token Factor, as this is the current node.
2256 for (unsigned i = 1, e = TFs.size(); i < e; i++)
2257 AddToWorklist(TFs[i]);
2258
2259 // Remove Nodes that are chained to another node in the list. Do so
2260 // by walking up chains breath-first stopping when we've seen
2261 // another operand. In general we must climb to the EntryNode, but we can exit
2262 // early if we find all remaining work is associated with just one operand as
2263 // no further pruning is possible.
2264
2265 // List of nodes to search through and original Ops from which they originate.
2267 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
2268 SmallPtrSet<SDNode *, 16> SeenChains;
2269 bool DidPruneOps = false;
2270
2271 unsigned NumLeftToConsider = 0;
2272 for (const SDValue &Op : Ops) {
2273 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2274 OpWorkCount.push_back(1);
2275 }
2276
2277 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2278 // If this is an Op, we can remove the op from the list. Remark any
2279 // search associated with it as from the current OpNumber.
2280 if (SeenOps.contains(Op)) {
2281 Changed = true;
2282 DidPruneOps = true;
2283 unsigned OrigOpNumber = 0;
2284 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2285 OrigOpNumber++;
2286 assert((OrigOpNumber != Ops.size()) &&
2287 "expected to find TokenFactor Operand");
2288 // Re-mark worklist from OrigOpNumber to OpNumber
2289 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2290 if (Worklist[i].second == OrigOpNumber) {
2291 Worklist[i].second = OpNumber;
2292 }
2293 }
2294 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2295 OpWorkCount[OrigOpNumber] = 0;
2296 NumLeftToConsider--;
2297 }
2298 // Add if it's a new chain
2299 if (SeenChains.insert(Op).second) {
2300 OpWorkCount[OpNumber]++;
2301 Worklist.push_back(std::make_pair(Op, OpNumber));
2302 }
2303 };
2304
2305 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2306 // We need at least be consider at least 2 Ops to prune.
2307 if (NumLeftToConsider <= 1)
2308 break;
2309 auto CurNode = Worklist[i].first;
2310 auto CurOpNumber = Worklist[i].second;
2311 assert((OpWorkCount[CurOpNumber] > 0) &&
2312 "Node should not appear in worklist");
2313 switch (CurNode->getOpcode()) {
2314 case ISD::EntryToken:
2315 // Hitting EntryToken is the only way for the search to terminate without
2316 // hitting
2317 // another operand's search. Prevent us from marking this operand
2318 // considered.
2319 NumLeftToConsider++;
2320 break;
2321 case ISD::TokenFactor:
2322 for (const SDValue &Op : CurNode->op_values())
2323 AddToWorklist(i, Op.getNode(), CurOpNumber);
2324 break;
2326 case ISD::LIFETIME_END:
2327 case ISD::CopyFromReg:
2328 case ISD::CopyToReg:
2329 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2330 break;
2331 default:
2332 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2333 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2334 break;
2335 }
2336 OpWorkCount[CurOpNumber]--;
2337 if (OpWorkCount[CurOpNumber] == 0)
2338 NumLeftToConsider--;
2339 }
2340
2341 // If we've changed things around then replace token factor.
2342 if (Changed) {
2344 if (Ops.empty()) {
2345 // The entry token is the only possible outcome.
2346 Result = DAG.getEntryNode();
2347 } else {
2348 if (DidPruneOps) {
2349 SmallVector<SDValue, 8> PrunedOps;
2350 //
2351 for (const SDValue &Op : Ops) {
2352 if (SeenChains.count(Op.getNode()) == 0)
2353 PrunedOps.push_back(Op);
2354 }
2355 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2356 } else {
2357 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2358 }
2359 }
2360 return Result;
2361 }
2362 return SDValue();
2363}
2364
2365/// MERGE_VALUES can always be eliminated.
2366SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2367 WorklistRemover DeadNodes(*this);
2368 // Replacing results may cause a different MERGE_VALUES to suddenly
2369 // be CSE'd with N, and carry its uses with it. Iterate until no
2370 // uses remain, to ensure that the node can be safely deleted.
2371 // First add the users of this node to the work list so that they
2372 // can be tried again once they have new operands.
2373 AddUsersToWorklist(N);
2374 do {
2375 // Do as a single replacement to avoid rewalking use lists.
2376 SmallVector<SDValue, 8> Ops(N->ops());
2377 DAG.ReplaceAllUsesWith(N, Ops.data());
2378 } while (!N->use_empty());
2379 deleteAndRecombine(N);
2380 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2381}
2382
2383/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2384/// ConstantSDNode pointer else nullptr.
2386 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2387 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2388}
2389
2390// isTruncateOf - If N is a truncate of some other value, return true, record
2391// the value being truncated in Op and which of Op's bits are zero/one in Known.
2392// This function computes KnownBits to avoid a duplicated call to
2393// computeKnownBits in the caller.
2395 KnownBits &Known) {
2396 if (N->getOpcode() == ISD::TRUNCATE) {
2397 Op = N->getOperand(0);
2398 Known = DAG.computeKnownBits(Op);
2399 if (N->getFlags().hasNoUnsignedWrap())
2400 Known.Zero.setBitsFrom(N.getScalarValueSizeInBits());
2401 return true;
2402 }
2403
2404 if (N.getValueType().getScalarType() != MVT::i1 ||
2405 !sd_match(
2407 return false;
2408
2409 Known = DAG.computeKnownBits(Op);
2410 return (Known.Zero | 1).isAllOnes();
2411}
2412
2413/// Return true if 'Use' is a load or a store that uses N as its base pointer
2414/// and that N may be folded in the load / store addressing mode.
2416 const TargetLowering &TLI) {
2417 EVT VT;
2418 unsigned AS;
2419
2420 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2421 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2422 return false;
2423 VT = LD->getMemoryVT();
2424 AS = LD->getAddressSpace();
2425 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2426 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2427 return false;
2428 VT = ST->getMemoryVT();
2429 AS = ST->getAddressSpace();
2430 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2431 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2432 return false;
2433 VT = LD->getMemoryVT();
2434 AS = LD->getAddressSpace();
2435 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2436 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2437 return false;
2438 VT = ST->getMemoryVT();
2439 AS = ST->getAddressSpace();
2440 } else {
2441 return false;
2442 }
2443
2445 if (N->isAnyAdd()) {
2446 AM.HasBaseReg = true;
2447 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2448 if (Offset)
2449 // [reg +/- imm]
2450 AM.BaseOffs = Offset->getSExtValue();
2451 else
2452 // [reg +/- reg]
2453 AM.Scale = 1;
2454 } else if (N->getOpcode() == ISD::SUB) {
2455 AM.HasBaseReg = true;
2456 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2457 if (Offset)
2458 // [reg +/- imm]
2459 AM.BaseOffs = -Offset->getSExtValue();
2460 else
2461 // [reg +/- reg]
2462 AM.Scale = 1;
2463 } else {
2464 return false;
2465 }
2466
2467 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2468 VT.getTypeForEVT(*DAG.getContext()), AS);
2469}
2470
2471/// This inverts a canonicalization in IR that replaces a variable select arm
2472/// with an identity constant. Codegen improves if we re-use the variable
2473/// operand rather than load a constant. This can also be converted into a
2474/// masked vector operation if the target supports it.
2476 bool ShouldCommuteOperands) {
2477 // Match a select as operand 1. The identity constant that we are looking for
2478 // is only valid as operand 1 of a non-commutative binop.
2479 SDValue N0 = N->getOperand(0);
2480 SDValue N1 = N->getOperand(1);
2481 if (ShouldCommuteOperands)
2482 std::swap(N0, N1);
2483
2484 unsigned SelOpcode = N1.getOpcode();
2485 if ((SelOpcode != ISD::VSELECT && SelOpcode != ISD::SELECT) ||
2486 !N1.hasOneUse())
2487 return SDValue();
2488
2489 // We can't hoist all instructions because of immediate UB (not speculatable).
2490 // For example div/rem by zero.
2492 return SDValue();
2493
2494 unsigned Opcode = N->getOpcode();
2495 EVT VT = N->getValueType(0);
2496 SDValue Cond = N1.getOperand(0);
2497 SDValue TVal = N1.getOperand(1);
2498 SDValue FVal = N1.getOperand(2);
2499 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2500
2501 // This transform increases uses of N0, so freeze it to be safe.
2502 // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2503 unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
2504 if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo) &&
2505 TLI.shouldFoldSelectWithIdentityConstant(Opcode, VT, SelOpcode, N0,
2506 FVal)) {
2507 SDValue F0 = DAG.getFreeze(N0);
2508 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2509 return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2510 }
2511 // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2512 if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo) &&
2513 TLI.shouldFoldSelectWithIdentityConstant(Opcode, VT, SelOpcode, N0,
2514 TVal)) {
2515 SDValue F0 = DAG.getFreeze(N0);
2516 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2517 return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2518 }
2519
2520 return SDValue();
2521}
2522
2523SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2524 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2525 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2526 "Unexpected binary operator");
2527
2528 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2529 return Sel;
2530
2531 if (TLI.isCommutativeBinOp(BO->getOpcode()))
2532 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2533 return Sel;
2534
2535 // Don't do this unless the old select is going away. We want to eliminate the
2536 // binary operator, not replace a binop with a select.
2537 // TODO: Handle ISD::SELECT_CC.
2538 unsigned SelOpNo = 0;
2539 SDValue Sel = BO->getOperand(0);
2540 auto BinOpcode = BO->getOpcode();
2541 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2542 SelOpNo = 1;
2543 Sel = BO->getOperand(1);
2544
2545 // Peek through trunc to shift amount type.
2546 if ((BinOpcode == ISD::SHL || BinOpcode == ISD::SRA ||
2547 BinOpcode == ISD::SRL) && Sel.hasOneUse()) {
2548 // This is valid when the truncated bits of x are already zero.
2549 SDValue Op;
2550 KnownBits Known;
2551 if (isTruncateOf(DAG, Sel, Op, Known) &&
2553 Sel = Op;
2554 }
2555 }
2556
2557 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2558 return SDValue();
2559
2560 SDValue CT = Sel.getOperand(1);
2561 if (!isConstantOrConstantVector(CT, true) &&
2563 return SDValue();
2564
2565 SDValue CF = Sel.getOperand(2);
2566 if (!isConstantOrConstantVector(CF, true) &&
2568 return SDValue();
2569
2570 // Bail out if any constants are opaque because we can't constant fold those.
2571 // The exception is "and" and "or" with either 0 or -1 in which case we can
2572 // propagate non constant operands into select. I.e.:
2573 // and (select Cond, 0, -1), X --> select Cond, 0, X
2574 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2575 bool CanFoldNonConst =
2576 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2579
2580 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2581 if (!CanFoldNonConst &&
2582 !isConstantOrConstantVector(CBO, true) &&
2584 return SDValue();
2585
2586 SDLoc DL(Sel);
2587 SDValue NewCT, NewCF;
2588 EVT VT = BO->getValueType(0);
2589
2590 if (CanFoldNonConst) {
2591 // If CBO is an opaque constant, we can't rely on getNode to constant fold.
2592 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
2593 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
2594 NewCT = CT;
2595 else
2596 NewCT = CBO;
2597
2598 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
2599 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
2600 NewCF = CF;
2601 else
2602 NewCF = CBO;
2603 } else {
2604 // We have a select-of-constants followed by a binary operator with a
2605 // constant. Eliminate the binop by pulling the constant math into the
2606 // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2607 // CBO, CF + CBO
2608 NewCT = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CT})
2609 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CT, CBO});
2610 if (!NewCT)
2611 return SDValue();
2612
2613 NewCF = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CF})
2614 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CF, CBO});
2615 if (!NewCF)
2616 return SDValue();
2617 }
2618
2619 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF, BO->getFlags());
2620}
2621
2623 SelectionDAG &DAG) {
2624 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2625 "Expecting add or sub");
2626
2627 // Match a constant operand and a zext operand for the math instruction:
2628 // add Z, C
2629 // sub C, Z
2630 bool IsAdd = N->getOpcode() == ISD::ADD;
2631 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2632 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2633 auto *CN = dyn_cast<ConstantSDNode>(C);
2634 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2635 return SDValue();
2636
2637 // Match the zext operand as a setcc of a boolean.
2638 if (Z.getOperand(0).getValueType() != MVT::i1)
2639 return SDValue();
2640
2641 // Match the compare as: setcc (X & 1), 0, eq.
2642 if (!sd_match(Z.getOperand(0), m_SetCC(m_And(m_Value(), m_One()), m_Zero(),
2644 return SDValue();
2645
2646 // We are adding/subtracting a constant and an inverted low bit. Turn that
2647 // into a subtract/add of the low bit with incremented/decremented constant:
2648 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2649 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2650 EVT VT = C.getValueType();
2651 SDValue LowBit = DAG.getZExtOrTrunc(Z.getOperand(0).getOperand(0), DL, VT);
2652 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT)
2653 : DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2654 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2655}
2656
2657// Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
2658SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) {
2659 SDValue N0 = N->getOperand(0);
2660 EVT VT = N0.getValueType();
2661 SDValue A, B;
2662
2663 if ((!LegalOperations || hasOperation(ISD::AVGCEILU, VT)) &&
2665 m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2666 return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
2667 }
2668 if ((!LegalOperations || hasOperation(ISD::AVGCEILS, VT)) &&
2670 m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2671 return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
2672 }
2673 return SDValue();
2674}
2675
2676/// Try to fold a pointer arithmetic node.
2677/// This needs to be done separately from normal addition, because pointer
2678/// addition is not commutative.
2679SDValue DAGCombiner::visitPTRADD(SDNode *N) {
2680 SDValue N0 = N->getOperand(0);
2681 SDValue N1 = N->getOperand(1);
2682 EVT PtrVT = N0.getValueType();
2683 EVT IntVT = N1.getValueType();
2684 SDLoc DL(N);
2685
2686 // This is already ensured by an assert in SelectionDAG::getNode(). Several
2687 // combines here depend on this assumption.
2688 assert(PtrVT == IntVT &&
2689 "PTRADD with different operand types is not supported");
2690
2691 // fold (ptradd x, 0) -> x
2692 if (isNullConstant(N1))
2693 return N0;
2694
2695 // fold (ptradd 0, x) -> x
2696 if (PtrVT == IntVT && isNullConstant(N0))
2697 return N1;
2698
2699 if (N0.getOpcode() != ISD::PTRADD ||
2700 reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1))
2701 return SDValue();
2702
2703 SDValue X = N0.getOperand(0);
2704 SDValue Y = N0.getOperand(1);
2705 SDValue Z = N1;
2706 bool N0OneUse = N0.hasOneUse();
2707 bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2708 bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2709
2710 // (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
2711 // * y is a constant and (ptradd x, y) has one use; or
2712 // * y and z are both constants.
2713 if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
2714 // If both additions in the original were NUW, the new ones are as well.
2716 (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2717 SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
2718 AddToWorklist(Add.getNode());
2719 return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
2720 }
2721
2722 // TODO: There is another possible fold here that was proven useful.
2723 // It would be this:
2724 //
2725 // (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y) if:
2726 // * (ptradd x, y) has one use; and
2727 // * y is a constant; and
2728 // * z is not a constant.
2729 //
2730 // In some cases, specifically in AArch64's FEAT_CPA, it exposes the
2731 // opportunity to select more complex instructions such as SUBPT and
2732 // MSUBPT. However, a hypothetical corner case has been found that we could
2733 // not avoid. Consider this (pseudo-POSIX C):
2734 //
2735 // char *foo(char *x, int z) {return (x + LARGE_CONSTANT) + z;}
2736 // char *p = mmap(LARGE_CONSTANT);
2737 // char *q = foo(p, -LARGE_CONSTANT);
2738 //
2739 // Then x + LARGE_CONSTANT is one-past-the-end, so valid, and a
2740 // further + z takes it back to the start of the mapping, so valid,
2741 // regardless of the address mmap gave back. However, if mmap gives you an
2742 // address < LARGE_CONSTANT (ignoring high bits), x - LARGE_CONSTANT will
2743 // borrow from the high bits (with the subsequent + z carrying back into
2744 // the high bits to give you a well-defined pointer) and thus trip
2745 // FEAT_CPA's pointer corruption checks.
2746 //
2747 // We leave this fold as an opportunity for future work, addressing the
2748 // corner case for FEAT_CPA, as well as reconciling the solution with the
2749 // more general application of pointer arithmetic in other future targets.
2750 // For now each architecture that wants this fold must implement it in the
2751 // target-specific code (see e.g. SITargetLowering::performPtrAddCombine)
2752
2753 return SDValue();
2754}
2755
2756/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2757/// a shift and add with a different constant.
2759 SelectionDAG &DAG) {
2760 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2761 "Expecting add or sub");
2762
2763 // We need a constant operand for the add/sub, and the other operand is a
2764 // logical shift right: add (srl), C or sub C, (srl).
2765 bool IsAdd = N->getOpcode() == ISD::ADD;
2766 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2767 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2768 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2769 ShiftOp.getOpcode() != ISD::SRL)
2770 return SDValue();
2771
2772 // The shift must be of a 'not' value.
2773 SDValue Not = ShiftOp.getOperand(0);
2774 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2775 return SDValue();
2776
2777 // The shift must be moving the sign bit to the least-significant-bit.
2778 EVT VT = ShiftOp.getValueType();
2779 SDValue ShAmt = ShiftOp.getOperand(1);
2780 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2781 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2782 return SDValue();
2783
2784 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2785 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2786 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2787 if (SDValue NewC = DAG.FoldConstantArithmetic(
2788 IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2789 {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2790 SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2791 Not.getOperand(0), ShAmt);
2792 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2793 }
2794
2795 return SDValue();
2796}
2797
2798static bool
2800 return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
2801 (isBitwiseNot(Op1) && Op1.getOperand(0) == Op0);
2802}
2803
2804/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2805/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2806/// are no common bits set in the operands).
2807SDValue DAGCombiner::visitADDLike(SDNode *N) {
2808 SDValue N0 = N->getOperand(0);
2809 SDValue N1 = N->getOperand(1);
2810 EVT VT = N0.getValueType();
2811 SDLoc DL(N);
2812
2813 // fold (add x, undef) -> undef
2814 if (N0.isUndef())
2815 return N0;
2816 if (N1.isUndef())
2817 return N1;
2818
2819 // fold (add c1, c2) -> c1+c2
2820 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2821 return C;
2822
2823 // canonicalize constant to RHS
2826 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2827
2828 if (areBitwiseNotOfEachother(N0, N1))
2829 return DAG.getConstant(APInt::getAllOnes(VT.getScalarSizeInBits()), DL, VT);
2830
2831 // fold vector ops
2832 if (VT.isVector()) {
2833 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2834 return FoldedVOp;
2835
2836 // fold (add x, 0) -> x, vector edition
2838 return N0;
2839 }
2840
2841 // fold (add x, 0) -> x
2842 if (isNullConstant(N1))
2843 return N0;
2844
2845 if (N0.getOpcode() == ISD::SUB) {
2846 SDValue N00 = N0.getOperand(0);
2847 SDValue N01 = N0.getOperand(1);
2848
2849 // fold ((A-c1)+c2) -> (A+(c2-c1))
2850 if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2851 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2852
2853 // fold ((c1-A)+c2) -> (c1+c2)-A
2854 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2855 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2856 }
2857
2858 // add (sext i1 X), 1 -> zext (not i1 X)
2859 // We don't transform this pattern:
2860 // add (zext i1 X), -1 -> sext (not i1 X)
2861 // because most (?) targets generate better code for the zext form.
2862 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2863 isOneOrOneSplat(N1)) {
2864 SDValue X = N0.getOperand(0);
2865 if ((!LegalOperations ||
2866 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2868 X.getScalarValueSizeInBits() == 1) {
2869 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2870 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2871 }
2872 }
2873
2874 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2875 // iff (or x, c0) is equivalent to (add x, c0).
2876 // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2877 // iff (xor x, c0) is equivalent to (add x, c0).
2878 if (DAG.isADDLike(N0)) {
2879 SDValue N01 = N0.getOperand(1);
2880 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2881 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2882 }
2883
2884 if (SDValue NewSel = foldBinOpIntoSelect(N))
2885 return NewSel;
2886
2887 // reassociate add
2888 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2889 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2890 return RADD;
2891
2892 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2893 // equivalent to (add x, c).
2894 // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2895 // equivalent to (add x, c).
2896 // Do this optimization only when adding c does not introduce instructions
2897 // for adding carries.
2898 auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2899 if (DAG.isADDLike(N0) && N0.hasOneUse() &&
2900 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2901 // If N0's type does not split or is a sign mask, it does not introduce
2902 // add carry.
2903 auto TyActn = TLI.getTypeAction(*DAG.getContext(), N0.getValueType());
2904 bool NoAddCarry = TyActn == TargetLoweringBase::TypeLegal ||
2907 if (NoAddCarry)
2908 return DAG.getNode(
2909 ISD::ADD, DL, VT,
2910 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2911 N0.getOperand(1));
2912 }
2913 return SDValue();
2914 };
2915 if (SDValue Add = ReassociateAddOr(N0, N1))
2916 return Add;
2917 if (SDValue Add = ReassociateAddOr(N1, N0))
2918 return Add;
2919
2920 // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
2921 if (SDValue SD =
2922 reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
2923 return SD;
2924 }
2925
2926 SDValue A, B, C, D;
2927
2928 // fold ((0-A) + B) -> B-A
2929 if (sd_match(N0, m_Neg(m_Value(A))))
2930 return DAG.getNode(ISD::SUB, DL, VT, N1, A);
2931
2932 // fold (A + (0-B)) -> A-B
2933 if (sd_match(N1, m_Neg(m_Value(B))))
2934 return DAG.getNode(ISD::SUB, DL, VT, N0, B);
2935
2936 // fold (A+(B-A)) -> B
2937 if (sd_match(N1, m_Sub(m_Value(B), m_Specific(N0))))
2938 return B;
2939
2940 // fold ((B-A)+A) -> B
2941 if (sd_match(N0, m_Sub(m_Value(B), m_Specific(N1))))
2942 return B;
2943
2944 // fold ((A-B)+(C-A)) -> (C-B)
2945 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2947 return DAG.getNode(ISD::SUB, DL, VT, C, B);
2948
2949 // fold ((A-B)+(B-C)) -> (A-C)
2950 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2952 return DAG.getNode(ISD::SUB, DL, VT, A, C);
2953
2954 // fold (A+(B-(A+C))) to (B-C)
2955 // fold (A+(B-(C+A))) to (B-C)
2956 if (sd_match(N1, m_Sub(m_Value(B), m_Add(m_Specific(N0), m_Value(C)))))
2957 return DAG.getNode(ISD::SUB, DL, VT, B, C);
2958
2959 // fold (A+((B-A)+or-C)) to (B+or-C)
2960 if (sd_match(N1,
2962 m_Sub(m_Sub(m_Value(B), m_Specific(N0)), m_Value(C)))))
2963 return DAG.getNode(N1.getOpcode(), DL, VT, B, C);
2964
2965 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2966 if (sd_match(N0, m_OneUse(m_Sub(m_Value(A), m_Value(B)))) &&
2967 sd_match(N1, m_OneUse(m_Sub(m_Value(C), m_Value(D)))) &&
2969 return DAG.getNode(ISD::SUB, DL, VT,
2970 DAG.getNode(ISD::ADD, SDLoc(N0), VT, A, C),
2971 DAG.getNode(ISD::ADD, SDLoc(N1), VT, B, D));
2972
2973 // fold (add (umax X, C), -C) --> (usubsat X, C)
2974 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2975 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2976 return (!Max && !Op) ||
2977 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2978 };
2979 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2980 /*AllowUndefs*/ true))
2981 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2982 N0.getOperand(1));
2983 }
2984
2986 return SDValue(N, 0);
2987
2988 if (isOneOrOneSplat(N1)) {
2989 // fold (add (xor a, -1), 1) -> (sub 0, a)
2990 if (isBitwiseNot(N0))
2991 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2992 N0.getOperand(0));
2993
2994 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2995 if (N0.getOpcode() == ISD::ADD) {
2996 SDValue A, Xor;
2997
2998 if (isBitwiseNot(N0.getOperand(0))) {
2999 A = N0.getOperand(1);
3000 Xor = N0.getOperand(0);
3001 } else if (isBitwiseNot(N0.getOperand(1))) {
3002 A = N0.getOperand(0);
3003 Xor = N0.getOperand(1);
3004 }
3005
3006 if (Xor)
3007 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
3008 }
3009
3010 // Look for:
3011 // add (add x, y), 1
3012 // And if the target does not like this form then turn into:
3013 // sub y, (xor x, -1)
3014 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3015 N0.hasOneUse() &&
3016 // Limit this to after legalization if the add has wrap flags
3017 (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
3018 !N->getFlags().hasNoSignedWrap()))) {
3019 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3020 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
3021 }
3022 }
3023
3024 // (x - y) + -1 -> add (xor y, -1), x
3025 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
3026 isAllOnesOrAllOnesSplat(N1, /*AllowUndefs=*/true)) {
3027 SDValue Not = DAG.getNOT(DL, N0.getOperand(1), VT);
3028 return DAG.getNode(ISD::ADD, DL, VT, Not, N0.getOperand(0));
3029 }
3030
3031 // Fold add(mul(add(A, CA), CM), CB) -> add(mul(A, CM), CM*CA+CB).
3032 // This can help if the inner add has multiple uses.
3033 APInt CM, CA;
3034 if (ConstantSDNode *CB = dyn_cast<ConstantSDNode>(N1)) {
3035 if (VT.getScalarSizeInBits() <= 64) {
3037 m_ConstInt(CM)))) &&
3039 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
3041 // If all the inputs are nuw, the outputs can be nuw. If all the input
3042 // are _also_ nsw the outputs can be too.
3043 if (N->getFlags().hasNoUnsignedWrap() &&
3044 N0->getFlags().hasNoUnsignedWrap() &&
3047 if (N->getFlags().hasNoSignedWrap() &&
3048 N0->getFlags().hasNoSignedWrap() &&
3051 }
3052 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
3053 DAG.getConstant(CM, DL, VT), Flags);
3054 return DAG.getNode(
3055 ISD::ADD, DL, VT, Mul,
3056 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
3057 }
3058 // Also look in case there is an intermediate add.
3059 if (sd_match(N0, m_OneUse(m_Add(
3061 m_ConstInt(CM))),
3062 m_Value(B)))) &&
3064 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
3066 // If all the inputs are nuw, the outputs can be nuw. If all the input
3067 // are _also_ nsw the outputs can be too.
3068 SDValue OMul =
3069 N0.getOperand(0) == B ? N0.getOperand(1) : N0.getOperand(0);
3070 if (N->getFlags().hasNoUnsignedWrap() &&
3071 N0->getFlags().hasNoUnsignedWrap() &&
3072 OMul->getFlags().hasNoUnsignedWrap() &&
3073 OMul.getOperand(0)->getFlags().hasNoUnsignedWrap()) {
3075 if (N->getFlags().hasNoSignedWrap() &&
3076 N0->getFlags().hasNoSignedWrap() &&
3077 OMul->getFlags().hasNoSignedWrap() &&
3078 OMul.getOperand(0)->getFlags().hasNoSignedWrap())
3080 }
3081 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
3082 DAG.getConstant(CM, DL, VT), Flags);
3083 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N1), VT, Mul, B, Flags);
3084 return DAG.getNode(
3085 ISD::ADD, DL, VT, Add,
3086 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
3087 }
3088 }
3089 }
3090
3091 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
3092 return Combined;
3093
3094 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
3095 return Combined;
3096
3097 return SDValue();
3098}
3099
3100// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
3101SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) {
3102 SDValue N0 = N->getOperand(0);
3103 EVT VT = N0.getValueType();
3104 SDValue A, B;
3105
3106 if ((!LegalOperations || hasOperation(ISD::AVGFLOORU, VT)) &&
3108 m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
3109 return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
3110 }
3111 if ((!LegalOperations || hasOperation(ISD::AVGFLOORS, VT)) &&
3113 m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
3114 return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
3115 }
3116
3117 return SDValue();
3118}
3119
3120SDValue DAGCombiner::visitADD(SDNode *N) {
3121 SDValue N0 = N->getOperand(0);
3122 SDValue N1 = N->getOperand(1);
3123 EVT VT = N0.getValueType();
3124 SDLoc DL(N);
3125
3126 if (SDValue Combined = visitADDLike(N))
3127 return Combined;
3128
3129 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
3130 return V;
3131
3132 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
3133 return V;
3134
3135 if (SDValue V = MatchRotate(N0, N1, SDLoc(N), /*FromAdd=*/true))
3136 return V;
3137
3138 // Try to match AVGFLOOR fixedwidth pattern
3139 if (SDValue V = foldAddToAvg(N, DL))
3140 return V;
3141
3142 // fold (a+b) -> (a|b) iff a and b share no bits.
3143 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
3144 DAG.haveNoCommonBitsSet(N0, N1))
3145 return DAG.getNode(ISD::OR, DL, VT, N0, N1, SDNodeFlags::Disjoint);
3146
3147 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
3148 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
3149 const APInt &C0 = N0->getConstantOperandAPInt(0);
3150 const APInt &C1 = N1->getConstantOperandAPInt(0);
3151 return DAG.getVScale(DL, VT, C0 + C1);
3152 }
3153
3154 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
3155 if (N0.getOpcode() == ISD::ADD &&
3156 N0.getOperand(1).getOpcode() == ISD::VSCALE &&
3157 N1.getOpcode() == ISD::VSCALE) {
3158 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3159 const APInt &VS1 = N1->getConstantOperandAPInt(0);
3160 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
3161 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
3162 }
3163
3164 // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
3165 if (N0.getOpcode() == ISD::STEP_VECTOR &&
3166 N1.getOpcode() == ISD::STEP_VECTOR) {
3167 const APInt &C0 = N0->getConstantOperandAPInt(0);
3168 const APInt &C1 = N1->getConstantOperandAPInt(0);
3169 APInt NewStep = C0 + C1;
3170 return DAG.getStepVector(DL, VT, NewStep);
3171 }
3172
3173 // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
3174 if (N0.getOpcode() == ISD::ADD &&
3176 N1.getOpcode() == ISD::STEP_VECTOR) {
3177 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3178 const APInt &SV1 = N1->getConstantOperandAPInt(0);
3179 APInt NewStep = SV0 + SV1;
3180 SDValue SV = DAG.getStepVector(DL, VT, NewStep);
3181 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
3182 }
3183
3184 return SDValue();
3185}
3186
3187SDValue DAGCombiner::visitADDSAT(SDNode *N) {
3188 unsigned Opcode = N->getOpcode();
3189 SDValue N0 = N->getOperand(0);
3190 SDValue N1 = N->getOperand(1);
3191 EVT VT = N0.getValueType();
3192 bool IsSigned = Opcode == ISD::SADDSAT;
3193 SDLoc DL(N);
3194
3195 // fold (add_sat x, undef) -> -1
3196 if (N0.isUndef() || N1.isUndef())
3197 return DAG.getAllOnesConstant(DL, VT);
3198
3199 // fold (add_sat c1, c2) -> c3
3200 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
3201 return C;
3202
3203 // canonicalize constant to RHS
3206 return DAG.getNode(Opcode, DL, VT, N1, N0);
3207
3208 // fold vector ops
3209 if (VT.isVector()) {
3210 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3211 return FoldedVOp;
3212
3213 // fold (add_sat x, 0) -> x, vector edition
3215 return N0;
3216 }
3217
3218 // fold (add_sat x, 0) -> x
3219 if (isNullConstant(N1))
3220 return N0;
3221
3222 // If it cannot overflow, transform into an add.
3223 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3224 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
3225
3226 return SDValue();
3227}
3228
3230 bool ForceCarryReconstruction = false) {
3231 bool Masked = false;
3232
3233 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
3234 while (true) {
3235 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
3236 V = V.getOperand(0);
3237 continue;
3238 }
3239
3240 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
3241 if (ForceCarryReconstruction)
3242 return V;
3243
3244 Masked = true;
3245 V = V.getOperand(0);
3246 continue;
3247 }
3248
3249 if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
3250 return V;
3251
3252 break;
3253 }
3254
3255 // If this is not a carry, return.
3256 if (V.getResNo() != 1)
3257 return SDValue();
3258
3259 if (V.getOpcode() != ISD::UADDO_CARRY && V.getOpcode() != ISD::USUBO_CARRY &&
3260 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
3261 return SDValue();
3262
3263 EVT VT = V->getValueType(0);
3264 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
3265 return SDValue();
3266
3267 // If the result is masked, then no matter what kind of bool it is we can
3268 // return. If it isn't, then we need to make sure the bool type is either 0 or
3269 // 1 and not other values.
3270 if (Masked ||
3271 TLI.getBooleanContents(V.getValueType()) ==
3273 return V;
3274
3275 return SDValue();
3276}
3277
3278/// Given the operands of an add/sub operation, see if the 2nd operand is a
3279/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
3280/// the opcode and bypass the mask operation.
3281static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
3282 SelectionDAG &DAG, const SDLoc &DL) {
3283 if (N1.getOpcode() == ISD::ZERO_EXTEND)
3284 N1 = N1.getOperand(0);
3285
3286 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
3287 return SDValue();
3288
3289 EVT VT = N0.getValueType();
3290 SDValue N10 = N1.getOperand(0);
3291 if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
3292 N10 = N10.getOperand(0);
3293
3294 if (N10.getValueType() != VT)
3295 return SDValue();
3296
3297 if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits())
3298 return SDValue();
3299
3300 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
3301 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
3302 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10);
3303}
3304
3305/// Helper for doing combines based on N0 and N1 being added to each other.
3306SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
3307 SDNode *LocReference) {
3308 EVT VT = N0.getValueType();
3309 SDLoc DL(LocReference);
3310
3311 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
3312 SDValue Y, N;
3313 if (sd_match(N1, m_Shl(m_Neg(m_Value(Y)), m_Value(N))))
3314 return DAG.getNode(ISD::SUB, DL, VT, N0,
3315 DAG.getNode(ISD::SHL, DL, VT, Y, N));
3316
3317 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
3318 return V;
3319
3320 // Look for:
3321 // add (add x, 1), y
3322 // And if the target does not like this form then turn into:
3323 // sub y, (xor x, -1)
3324 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3325 N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1)) &&
3326 // Limit this to after legalization if the add has wrap flags
3327 (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
3328 !N0->getFlags().hasNoSignedWrap()))) {
3329 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3330 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
3331 }
3332
3333 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
3334 // Hoist one-use subtraction by non-opaque constant:
3335 // (x - C) + y -> (x + y) - C
3336 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3337 if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3338 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
3339 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
3340 }
3341 // Hoist one-use subtraction from non-opaque constant:
3342 // (C - x) + y -> (y - x) + C
3343 if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3344 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
3345 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
3346 }
3347 }
3348
3349 // add (mul x, C), x -> mul x, C+1
3350 if (N0.getOpcode() == ISD::MUL && N0.getOperand(0) == N1 &&
3351 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true) &&
3352 N0.hasOneUse()) {
3353 SDValue NewC = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
3354 DAG.getConstant(1, DL, VT));
3355 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), NewC);
3356 }
3357
3358 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
3359 // rather than 'add 0/-1' (the zext should get folded).
3360 // add (sext i1 Y), X --> sub X, (zext i1 Y)
3361 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
3362 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
3364 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
3365 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
3366 }
3367
3368 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
3369 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3370 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3371 if (TN->getVT() == MVT::i1) {
3372 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3373 DAG.getConstant(1, DL, VT));
3374 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
3375 }
3376 }
3377
3378 // (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3379 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1)) &&
3380 N1.getResNo() == 0)
3381 return DAG.getNode(ISD::UADDO_CARRY, DL, N1->getVTList(),
3382 N0, N1.getOperand(0), N1.getOperand(2));
3383
3384 // (add X, Carry) -> (uaddo_carry X, 0, Carry)
3386 if (SDValue Carry = getAsCarry(TLI, N1))
3387 return DAG.getNode(ISD::UADDO_CARRY, DL,
3388 DAG.getVTList(VT, Carry.getValueType()), N0,
3389 DAG.getConstant(0, DL, VT), Carry);
3390
3391 return SDValue();
3392}
3393
3394SDValue DAGCombiner::visitADDC(SDNode *N) {
3395 SDValue N0 = N->getOperand(0);
3396 SDValue N1 = N->getOperand(1);
3397 EVT VT = N0.getValueType();
3398 SDLoc DL(N);
3399
3400 // If the flag result is dead, turn this into an ADD.
3401 if (!N->hasAnyUseOfValue(1))
3402 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3403 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3404
3405 // canonicalize constant to RHS.
3406 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3407 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3408 if (N0C && !N1C)
3409 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
3410
3411 // fold (addc x, 0) -> x + no carry out
3412 if (isNullConstant(N1))
3413 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
3414 DL, MVT::Glue));
3415
3416 // If it cannot overflow, transform into an add.
3418 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3419 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3420
3421 return SDValue();
3422}
3423
3424/**
3425 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
3426 * then the flip also occurs if computing the inverse is the same cost.
3427 * This function returns an empty SDValue in case it cannot flip the boolean
3428 * without increasing the cost of the computation. If you want to flip a boolean
3429 * no matter what, use DAG.getLogicalNOT.
3430 */
3432 const TargetLowering &TLI,
3433 bool Force) {
3434 if (Force && isa<ConstantSDNode>(V))
3435 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3436
3437 if (V.getOpcode() != ISD::XOR)
3438 return SDValue();
3439
3440 if (DAG.isBoolConstant(V.getOperand(1)) == true)
3441 return V.getOperand(0);
3442 if (Force && isConstOrConstSplat(V.getOperand(1), false))
3443 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3444 return SDValue();
3445}
3446
3447SDValue DAGCombiner::visitADDO(SDNode *N) {
3448 SDValue N0 = N->getOperand(0);
3449 SDValue N1 = N->getOperand(1);
3450 EVT VT = N0.getValueType();
3451 bool IsSigned = (ISD::SADDO == N->getOpcode());
3452
3453 EVT CarryVT = N->getValueType(1);
3454 SDLoc DL(N);
3455
3456 // If the flag result is dead, turn this into an ADD.
3457 if (!N->hasAnyUseOfValue(1))
3458 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3459 DAG.getUNDEF(CarryVT));
3460
3461 // canonicalize constant to RHS.
3464 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
3465
3466 // fold (addo x, 0) -> x + no carry out
3467 if (isNullOrNullSplat(N1))
3468 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3469
3470 // If it cannot overflow, transform into an add.
3471 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3472 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3473 DAG.getConstant(0, DL, CarryVT));
3474
3475 if (IsSigned) {
3476 // fold (saddo (xor a, -1), 1) -> (ssub 0, a).
3477 if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
3478 return DAG.getNode(ISD::SSUBO, DL, N->getVTList(),
3479 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3480 } else {
3481 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
3482 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
3483 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
3484 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3485 return CombineTo(
3486 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3487 }
3488
3489 if (SDValue Combined = visitUADDOLike(N0, N1, N))
3490 return Combined;
3491
3492 if (SDValue Combined = visitUADDOLike(N1, N0, N))
3493 return Combined;
3494 }
3495
3496 return SDValue();
3497}
3498
3499SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3500 EVT VT = N0.getValueType();
3501 if (VT.isVector())
3502 return SDValue();
3503
3504 // (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3505 // If Y + 1 cannot overflow.
3506 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1))) {
3507 SDValue Y = N1.getOperand(0);
3508 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3510 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0, Y,
3511 N1.getOperand(2));
3512 }
3513
3514 // (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
3516 if (SDValue Carry = getAsCarry(TLI, N1))
3517 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0,
3518 DAG.getConstant(0, SDLoc(N), VT), Carry);
3519
3520 return SDValue();
3521}
3522
3523SDValue DAGCombiner::visitADDE(SDNode *N) {
3524 SDValue N0 = N->getOperand(0);
3525 SDValue N1 = N->getOperand(1);
3526 SDValue CarryIn = N->getOperand(2);
3527
3528 // canonicalize constant to RHS
3529 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3530 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3531 if (N0C && !N1C)
3532 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3533 N1, N0, CarryIn);
3534
3535 // fold (adde x, y, false) -> (addc x, y)
3536 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3537 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3538
3539 return SDValue();
3540}
3541
3542SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
3543 SDValue N0 = N->getOperand(0);
3544 SDValue N1 = N->getOperand(1);
3545 SDValue CarryIn = N->getOperand(2);
3546 SDLoc DL(N);
3547
3548 // canonicalize constant to RHS
3549 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3550 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3551 if (N0C && !N1C)
3552 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3553
3554 // fold (uaddo_carry x, y, false) -> (uaddo x, y)
3555 if (isNullConstant(CarryIn)) {
3556 if (!LegalOperations ||
3557 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3558 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3559 }
3560
3561 // fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3562 if (isNullConstant(N0) && isNullConstant(N1)) {
3563 EVT VT = N0.getValueType();
3564 EVT CarryVT = CarryIn.getValueType();
3565 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3566 AddToWorklist(CarryExt.getNode());
3567 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3568 DAG.getConstant(1, DL, VT)),
3569 DAG.getConstant(0, DL, CarryVT));
3570 }
3571
3572 if (SDValue Combined = visitUADDO_CARRYLike(N0, N1, CarryIn, N))
3573 return Combined;
3574
3575 if (SDValue Combined = visitUADDO_CARRYLike(N1, N0, CarryIn, N))
3576 return Combined;
3577
3578 // We want to avoid useless duplication.
3579 // TODO: This is done automatically for binary operations. As UADDO_CARRY is
3580 // not a binary operation, this is not really possible to leverage this
3581 // existing mechanism for it. However, if more operations require the same
3582 // deduplication logic, then it may be worth generalize.
3583 SDValue Ops[] = {N1, N0, CarryIn};
3584 SDNode *CSENode =
3585 DAG.getNodeIfExists(ISD::UADDO_CARRY, N->getVTList(), Ops, N->getFlags());
3586 if (CSENode)
3587 return SDValue(CSENode, 0);
3588
3589 return SDValue();
3590}
3591
3592/**
3593 * If we are facing some sort of diamond carry propagation pattern try to
3594 * break it up to generate something like:
3595 * (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
3596 *
3597 * The end result is usually an increase in operation required, but because the
3598 * carry is now linearized, other transforms can kick in and optimize the DAG.
3599 *
3600 * Patterns typically look something like
3601 * (uaddo A, B)
3602 * / \
3603 * Carry Sum
3604 * | \
3605 * | (uaddo_carry *, 0, Z)
3606 * | /
3607 * \ Carry
3608 * | /
3609 * (uaddo_carry X, *, *)
3610 *
3611 * But numerous variation exist. Our goal is to identify A, B, X and Z and
3612 * produce a combine with a single path for carry propagation.
3613 */
3615 SelectionDAG &DAG, SDValue X,
3616 SDValue Carry0, SDValue Carry1,
3617 SDNode *N) {
3618 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3619 return SDValue();
3620 if (Carry1.getOpcode() != ISD::UADDO)
3621 return SDValue();
3622
3623 SDValue Z;
3624
3625 /**
3626 * First look for a suitable Z. It will present itself in the form of
3627 * (uaddo_carry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3628 */
3629 if (Carry0.getOpcode() == ISD::UADDO_CARRY &&
3630 isNullConstant(Carry0.getOperand(1))) {
3631 Z = Carry0.getOperand(2);
3632 } else if (Carry0.getOpcode() == ISD::UADDO &&
3633 isOneConstant(Carry0.getOperand(1))) {
3634 EVT VT = Carry0->getValueType(1);
3635 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3636 } else {
3637 // We couldn't find a suitable Z.
3638 return SDValue();
3639 }
3640
3641
3642 auto cancelDiamond = [&](SDValue A,SDValue B) {
3643 SDLoc DL(N);
3644 SDValue NewY =
3645 DAG.getNode(ISD::UADDO_CARRY, DL, Carry0->getVTList(), A, B, Z);
3646 Combiner.AddToWorklist(NewY.getNode());
3647 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), X,
3648 DAG.getConstant(0, DL, X.getValueType()),
3649 NewY.getValue(1));
3650 };
3651
3652 /**
3653 * (uaddo A, B)
3654 * |
3655 * Sum
3656 * |
3657 * (uaddo_carry *, 0, Z)
3658 */
3659 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3660 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3661 }
3662
3663 /**
3664 * (uaddo_carry A, 0, Z)
3665 * |
3666 * Sum
3667 * |
3668 * (uaddo *, B)
3669 */
3670 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3671 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3672 }
3673
3674 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3675 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3676 }
3677
3678 return SDValue();
3679}
3680
3681// If we are facing some sort of diamond carry/borrow in/out pattern try to
3682// match patterns like:
3683//
3684// (uaddo A, B) CarryIn
3685// | \ |
3686// | \ |
3687// PartialSum PartialCarryOutX /
3688// | | /
3689// | ____|____________/
3690// | / |
3691// (uaddo *, *) \________
3692// | \ \
3693// | \ |
3694// | PartialCarryOutY |
3695// | \ |
3696// | \ /
3697// AddCarrySum | ______/
3698// | /
3699// CarryOut = (or *, *)
3700//
3701// And generate UADDO_CARRY (or USUBO_CARRY) with two result values:
3702//
3703// {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn)
3704//
3705// Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY
3706// with a single path for carry/borrow out propagation.
3708 SDValue N0, SDValue N1, SDNode *N) {
3709 SDValue Carry0 = getAsCarry(TLI, N0);
3710 if (!Carry0)
3711 return SDValue();
3712 SDValue Carry1 = getAsCarry(TLI, N1);
3713 if (!Carry1)
3714 return SDValue();
3715
3716 unsigned Opcode = Carry0.getOpcode();
3717 if (Opcode != Carry1.getOpcode())
3718 return SDValue();
3719 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3720 return SDValue();
3721 // Guarantee identical type of CarryOut
3722 EVT CarryOutType = N->getValueType(0);
3723 if (CarryOutType != Carry0.getValue(1).getValueType() ||
3724 CarryOutType != Carry1.getValue(1).getValueType())
3725 return SDValue();
3726
3727 // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3728 // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3729 if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3730 std::swap(Carry0, Carry1);
3731
3732 // Check if nodes are connected in expected way.
3733 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3734 Carry1.getOperand(1) != Carry0.getValue(0))
3735 return SDValue();
3736
3737 // The carry in value must be on the righthand side for subtraction.
3738 unsigned CarryInOperandNum =
3739 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3740 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3741 return SDValue();
3742 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3743
3744 unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
3745 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3746 return SDValue();
3747
3748 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3749 CarryIn = getAsCarry(TLI, CarryIn, true);
3750 if (!CarryIn)
3751 return SDValue();
3752
3753 SDLoc DL(N);
3754 CarryIn = DAG.getBoolExtOrTrunc(CarryIn, DL, Carry1->getValueType(1),
3755 Carry1->getValueType(0));
3756 SDValue Merged =
3757 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3758 Carry0.getOperand(1), CarryIn);
3759
3760 // Please note that because we have proven that the result of the UADDO/USUBO
3761 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3762 // therefore prove that if the first UADDO/USUBO overflows, the second
3763 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3764 // maximum value.
3765 //
3766 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3767 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3768 //
3769 // This is important because it means that OR and XOR can be used to merge
3770 // carry flags; and that AND can return a constant zero.
3771 //
3772 // TODO: match other operations that can merge flags (ADD, etc)
3773 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3774 if (N->getOpcode() == ISD::AND)
3775 return DAG.getConstant(0, DL, CarryOutType);
3776 return Merged.getValue(1);
3777}
3778
3779SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
3780 SDValue CarryIn, SDNode *N) {
3781 // fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
3782 // carry.
3783 if (isBitwiseNot(N0))
3784 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3785 SDLoc DL(N);
3786 SDValue Sub = DAG.getNode(ISD::USUBO_CARRY, DL, N->getVTList(), N1,
3787 N0.getOperand(0), NotC);
3788 return CombineTo(
3789 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3790 }
3791
3792 // Iff the flag result is dead:
3793 // (uaddo_carry (add|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
3794 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3795 // or the dependency between the instructions.
3796 if ((N0.getOpcode() == ISD::ADD ||
3797 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3798 N0.getValue(1) != CarryIn)) &&
3799 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3800 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(),
3801 N0.getOperand(0), N0.getOperand(1), CarryIn);
3802
3803 /**
3804 * When one of the uaddo_carry argument is itself a carry, we may be facing
3805 * a diamond carry propagation. In which case we try to transform the DAG
3806 * to ensure linear carry propagation if that is possible.
3807 */
3808 if (auto Y = getAsCarry(TLI, N1)) {
3809 // Because both are carries, Y and Z can be swapped.
3810 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3811 return R;
3812 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3813 return R;
3814 }
3815
3816 return SDValue();
3817}
3818
3819SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1,
3820 SDValue CarryIn, SDNode *N) {
3821 // fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c)
3822 if (isBitwiseNot(N0)) {
3823 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true))
3824 return DAG.getNode(ISD::SSUBO_CARRY, SDLoc(N), N->getVTList(), N1,
3825 N0.getOperand(0), NotC);
3826 }
3827
3828 return SDValue();
3829}
3830
3831SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3832 SDValue N0 = N->getOperand(0);
3833 SDValue N1 = N->getOperand(1);
3834 SDValue CarryIn = N->getOperand(2);
3835 SDLoc DL(N);
3836
3837 // canonicalize constant to RHS
3838 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3839 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3840 if (N0C && !N1C)
3841 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3842
3843 // fold (saddo_carry x, y, false) -> (saddo x, y)
3844 if (isNullConstant(CarryIn)) {
3845 if (!LegalOperations ||
3846 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3847 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3848 }
3849
3850 if (SDValue Combined = visitSADDO_CARRYLike(N0, N1, CarryIn, N))
3851 return Combined;
3852
3853 if (SDValue Combined = visitSADDO_CARRYLike(N1, N0, CarryIn, N))
3854 return Combined;
3855
3856 return SDValue();
3857}
3858
3859// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3860// clamp/truncation if necessary.
3861static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3862 SDValue RHS, SelectionDAG &DAG,
3863 const SDLoc &DL) {
3864 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3865 "Illegal truncation");
3866
3867 if (DstVT == SrcVT)
3868 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3869
3870 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3871 // clamping RHS.
3873 DstVT.getScalarSizeInBits());
3874 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3875 return SDValue();
3876
3877 SDValue SatLimit =
3879 DstVT.getScalarSizeInBits()),
3880 DL, SrcVT);
3881 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3882 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3883 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3884 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3885}
3886
3887// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3888// usubsat(a,b), optionally as a truncated type.
3889SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL) {
3890 if (N->getOpcode() != ISD::SUB ||
3891 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3892 return SDValue();
3893
3894 EVT SubVT = N->getValueType(0);
3895 SDValue Op0 = N->getOperand(0);
3896 SDValue Op1 = N->getOperand(1);
3897
3898 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3899 // they may be converted to usubsat(a,b).
3900 if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3901 SDValue MaxLHS = Op0.getOperand(0);
3902 SDValue MaxRHS = Op0.getOperand(1);
3903 if (MaxLHS == Op1)
3904 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, DL);
3905 if (MaxRHS == Op1)
3906 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, DL);
3907 }
3908
3909 if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3910 SDValue MinLHS = Op1.getOperand(0);
3911 SDValue MinRHS = Op1.getOperand(1);
3912 if (MinLHS == Op0)
3913 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, DL);
3914 if (MinRHS == Op0)
3915 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, DL);
3916 }
3917
3918 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3919 if (Op1.getOpcode() == ISD::TRUNCATE &&
3920 Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3921 Op1.getOperand(0).hasOneUse()) {
3922 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3923 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3924 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3925 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3926 DAG, DL);
3927 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3928 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3929 DAG, DL);
3930 }
3931
3932 return SDValue();
3933}
3934
3935// Refinement of DAG/Type Legalisation (promotion) when CTLZ is used for
3936// counting leading ones. Broadly, it replaces the substraction with a left
3937// shift.
3938//
3939// * DAG Legalisation Pattern:
3940//
3941// (sub (ctlz (zeroextend (not Src)))
3942// BitWidthDiff)
3943//
3944// if BitWidthDiff == BitWidth(Node) - BitWidth(Src)
3945// -->
3946//
3947// (ctlz_zero_undef (not (shl (anyextend Src)
3948// BitWidthDiff)))
3949//
3950// * Type Legalisation Pattern:
3951//
3952// (sub (ctlz (and (xor Src XorMask)
3953// AndMask))
3954// BitWidthDiff)
3955//
3956// if AndMask has only trailing ones
3957// and MaskBitWidth(AndMask) == BitWidth(Node) - BitWidthDiff
3958// and XorMask has more trailing ones than AndMask
3959// -->
3960//
3961// (ctlz_zero_undef (not (shl Src BitWidthDiff)))
3962template <class MatchContextClass>
3964 const SDLoc DL(N);
3965 SDValue N0 = N->getOperand(0);
3966 EVT VT = N0.getValueType();
3967 unsigned BitWidth = VT.getScalarSizeInBits();
3968
3969 MatchContextClass Matcher(DAG, DAG.getTargetLoweringInfo(), N);
3970
3971 APInt AndMask;
3972 APInt XorMask;
3973 APInt BitWidthDiff;
3974
3975 SDValue CtlzOp;
3976 SDValue Src;
3977
3978 if (!sd_context_match(
3979 N, Matcher, m_Sub(m_Ctlz(m_Value(CtlzOp)), m_ConstInt(BitWidthDiff))))
3980 return SDValue();
3981
3982 if (sd_context_match(CtlzOp, Matcher, m_ZExt(m_Not(m_Value(Src))))) {
3983 // DAG Legalisation Pattern:
3984 // (sub (ctlz (zero_extend (not Op)) BitWidthDiff))
3985 if ((BitWidth - Src.getValueType().getScalarSizeInBits()) != BitWidthDiff)
3986 return SDValue();
3987
3988 Src = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Src);
3989 } else if (sd_context_match(CtlzOp, Matcher,
3990 m_And(m_Xor(m_Value(Src), m_ConstInt(XorMask)),
3991 m_ConstInt(AndMask)))) {
3992 // Type Legalisation Pattern:
3993 // (sub (ctlz (and (xor Op XorMask) AndMask)) BitWidthDiff)
3994 unsigned AndMaskWidth = BitWidth - BitWidthDiff.getZExtValue();
3995 if (!(AndMask.isMask(AndMaskWidth) && XorMask.countr_one() >= AndMaskWidth))
3996 return SDValue();
3997 } else
3998 return SDValue();
3999
4000 SDValue ShiftConst = DAG.getShiftAmountConstant(BitWidthDiff, VT, DL);
4001 SDValue LShift = Matcher.getNode(ISD::SHL, DL, VT, Src, ShiftConst);
4002 SDValue Not =
4003 Matcher.getNode(ISD::XOR, DL, VT, LShift, DAG.getAllOnesConstant(DL, VT));
4004
4005 return Matcher.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, Not);
4006}
4007
4008// Fold sub(x, mul(divrem(x,y)[0], y)) to divrem(x, y)[1]
4010 const SDLoc &DL) {
4011 assert(N->getOpcode() == ISD::SUB && "Node must be a SUB");
4012 SDValue Sub0 = N->getOperand(0);
4013 SDValue Sub1 = N->getOperand(1);
4014
4015 auto CheckAndFoldMulCase = [&](SDValue DivRem, SDValue MaybeY) -> SDValue {
4016 if ((DivRem.getOpcode() == ISD::SDIVREM ||
4017 DivRem.getOpcode() == ISD::UDIVREM) &&
4018 DivRem.getResNo() == 0 && DivRem.getOperand(0) == Sub0 &&
4019 DivRem.getOperand(1) == MaybeY) {
4020 return SDValue(DivRem.getNode(), 1);
4021 }
4022 return SDValue();
4023 };
4024
4025 if (Sub1.getOpcode() == ISD::MUL) {
4026 // (sub x, (mul divrem(x,y)[0], y))
4027 SDValue Mul0 = Sub1.getOperand(0);
4028 SDValue Mul1 = Sub1.getOperand(1);
4029
4030 if (SDValue Res = CheckAndFoldMulCase(Mul0, Mul1))
4031 return Res;
4032
4033 if (SDValue Res = CheckAndFoldMulCase(Mul1, Mul0))
4034 return Res;
4035
4036 } else if (Sub1.getOpcode() == ISD::SHL) {
4037 // Handle (sub x, (shl divrem(x,y)[0], C)) where y = 1 << C
4038 SDValue Shl0 = Sub1.getOperand(0);
4039 SDValue Shl1 = Sub1.getOperand(1);
4040 // Check if Shl0 is divrem(x, Y)[0]
4041 if ((Shl0.getOpcode() == ISD::SDIVREM ||
4042 Shl0.getOpcode() == ISD::UDIVREM) &&
4043 Shl0.getResNo() == 0 && Shl0.getOperand(0) == Sub0) {
4044
4045 SDValue Divisor = Shl0.getOperand(1);
4046
4047 ConstantSDNode *DivC = isConstOrConstSplat(Divisor);
4049 if (!DivC || !ShC)
4050 return SDValue();
4051
4052 if (DivC->getAPIntValue().isPowerOf2() &&
4053 DivC->getAPIntValue().logBase2() == ShC->getAPIntValue())
4054 return SDValue(Shl0.getNode(), 1);
4055 }
4056 }
4057 return SDValue();
4058}
4059
4060// Since it may not be valid to emit a fold to zero for vector initializers
4061// check if we can before folding.
4062static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
4063 SelectionDAG &DAG, bool LegalOperations) {
4064 if (!VT.isVector())
4065 return DAG.getConstant(0, DL, VT);
4066 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
4067 return DAG.getConstant(0, DL, VT);
4068 return SDValue();
4069}
4070
4071SDValue DAGCombiner::visitSUB(SDNode *N) {
4072 SDValue N0 = N->getOperand(0);
4073 SDValue N1 = N->getOperand(1);
4074 EVT VT = N0.getValueType();
4075 unsigned BitWidth = VT.getScalarSizeInBits();
4076 SDLoc DL(N);
4077
4078 if (SDValue V = foldSubCtlzNot<EmptyMatchContext>(N, DAG))
4079 return V;
4080
4081 // fold (sub x, x) -> 0
4082 if (N0 == N1)
4083 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4084
4085 // fold (sub c1, c2) -> c3
4086 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
4087 return C;
4088
4089 // fold vector ops
4090 if (VT.isVector()) {
4091 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4092 return FoldedVOp;
4093
4094 // fold (sub x, 0) -> x, vector edition
4096 return N0;
4097 }
4098
4099 // (sub x, ([v]select (ult x, y), 0, y)) -> (umin x, (sub x, y))
4100 // (sub x, ([v]select (uge x, y), y, 0)) -> (umin x, (sub x, y))
4101 if (N1.hasOneUse() && hasUMin(VT)) {
4102 SDValue Y;
4103 auto MS0 = m_Specific(N0);
4104 auto MVY = m_Value(Y);
4105 auto MZ = m_Zero();
4106 auto MCC1 = m_SpecificCondCode(ISD::SETULT);
4107 auto MCC2 = m_SpecificCondCode(ISD::SETUGE);
4108
4109 if (sd_match(N1, m_SelectCCLike(MS0, MVY, MZ, m_Deferred(Y), MCC1)) ||
4110 sd_match(N1, m_SelectCCLike(MS0, MVY, m_Deferred(Y), MZ, MCC2)) ||
4111 sd_match(N1, m_VSelect(m_SetCC(MS0, MVY, MCC1), MZ, m_Deferred(Y))) ||
4112 sd_match(N1, m_VSelect(m_SetCC(MS0, MVY, MCC2), m_Deferred(Y), MZ)))
4113
4114 return DAG.getNode(ISD::UMIN, DL, VT, N0,
4115 DAG.getNode(ISD::SUB, DL, VT, N0, Y));
4116 }
4117
4118 if (SDValue NewSel = foldBinOpIntoSelect(N))
4119 return NewSel;
4120
4121 // fold (sub x, c) -> (add x, -c)
4123 return DAG.getNode(ISD::ADD, DL, VT, N0,
4124 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4125
4126 if (isNullOrNullSplat(N0)) {
4127 // Right-shifting everything out but the sign bit followed by negation is
4128 // the same as flipping arithmetic/logical shift type without the negation:
4129 // -(X >>u 31) -> (X >>s 31)
4130 // -(X >>s 31) -> (X >>u 31)
4131 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
4133 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
4134 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
4135 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
4136 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
4137 }
4138 }
4139
4140 // 0 - X --> 0 if the sub is NUW.
4141 if (N->getFlags().hasNoUnsignedWrap())
4142 return N0;
4143
4145 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
4146 // N1 must be 0 because negating the minimum signed value is undefined.
4147 if (N->getFlags().hasNoSignedWrap())
4148 return N0;
4149
4150 // 0 - X --> X if X is 0 or the minimum signed value.
4151 return N1;
4152 }
4153
4154 // Convert 0 - abs(x).
4155 if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
4157 if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
4158 return Result;
4159
4160 // Similar to the previous rule, but this time targeting an expanded abs.
4161 // (sub 0, (max X, (sub 0, X))) --> (min X, (sub 0, X))
4162 // as well as
4163 // (sub 0, (min X, (sub 0, X))) --> (max X, (sub 0, X))
4164 // Note that these two are applicable to both signed and unsigned min/max.
4165 SDValue X;
4166 SDValue S0;
4167 auto NegPat = m_AllOf(m_Neg(m_Deferred(X)), m_Value(S0));
4168 if (sd_match(N1, m_OneUse(m_AnyOf(m_SMax(m_Value(X), NegPat),
4169 m_UMax(m_Value(X), NegPat),
4170 m_SMin(m_Value(X), NegPat),
4171 m_UMin(m_Value(X), NegPat))))) {
4172 unsigned NewOpc = ISD::getInverseMinMaxOpcode(N1->getOpcode());
4173 if (hasOperation(NewOpc, VT))
4174 return DAG.getNode(NewOpc, DL, VT, X, S0);
4175 }
4176
4177 // Fold neg(splat(neg(x)) -> splat(x)
4178 if (VT.isVector()) {
4179 SDValue N1S = DAG.getSplatValue(N1, true);
4180 if (N1S && N1S.getOpcode() == ISD::SUB &&
4181 isNullConstant(N1S.getOperand(0)))
4182 return DAG.getSplat(VT, DL, N1S.getOperand(1));
4183 }
4184
4185 // sub 0, (and x, 1) --> SIGN_EXTEND_INREG x, i1
4186 if (N1.getOpcode() == ISD::AND && N1.hasOneUse() &&
4187 isOneOrOneSplat(N1->getOperand(1))) {
4188 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), 1);
4189 if (VT.isVector())
4190 ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT,
4194 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N1->getOperand(0),
4195 DAG.getValueType(ExtVT));
4196 }
4197 }
4198 }
4199
4200 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
4202 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4203
4204 // fold (A - (0-B)) -> A+B
4205 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
4206 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
4207
4208 // fold A-(A-B) -> B
4209 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
4210 return N1.getOperand(1);
4211
4212 // fold (A+B)-A -> B
4213 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
4214 return N0.getOperand(1);
4215
4216 // fold (A+B)-B -> A
4217 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
4218 return N0.getOperand(0);
4219
4220 // fold (A+C1)-C2 -> A+(C1-C2)
4221 if (N0.getOpcode() == ISD::ADD) {
4222 SDValue N01 = N0.getOperand(1);
4223 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
4224 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
4225 }
4226
4227 // fold C2-(A+C1) -> (C2-C1)-A
4228 if (N1.getOpcode() == ISD::ADD) {
4229 SDValue N11 = N1.getOperand(1);
4230 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
4231 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
4232 }
4233
4234 // fold (A-C1)-C2 -> A-(C1+C2)
4235 if (N0.getOpcode() == ISD::SUB) {
4236 SDValue N01 = N0.getOperand(1);
4237 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
4238 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
4239 }
4240
4241 // fold (c1-A)-c2 -> (c1-c2)-A
4242 if (N0.getOpcode() == ISD::SUB) {
4243 SDValue N00 = N0.getOperand(0);
4244 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
4245 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
4246 }
4247
4248 SDValue A, B, C;
4249
4250 // fold ((A+(B+C))-B) -> A+C
4251 if (sd_match(N0, m_Add(m_Value(A), m_Add(m_Specific(N1), m_Value(C)))))
4252 return DAG.getNode(ISD::ADD, DL, VT, A, C);
4253
4254 // fold ((A+(B-C))-B) -> A-C
4255 if (sd_match(N0, m_Add(m_Value(A), m_Sub(m_Specific(N1), m_Value(C)))))
4256 return DAG.getNode(ISD::SUB, DL, VT, A, C);
4257
4258 // fold ((A-(B-C))-C) -> A-B
4259 if (sd_match(N0, m_Sub(m_Value(A), m_Sub(m_Value(B), m_Specific(N1)))))
4260 return DAG.getNode(ISD::SUB, DL, VT, A, B);
4261
4262 // fold (A-(B-C)) -> A+(C-B)
4263 if (sd_match(N1, m_OneUse(m_Sub(m_Value(B), m_Value(C)))))
4264 return DAG.getNode(ISD::ADD, DL, VT, N0,
4265 DAG.getNode(ISD::SUB, DL, VT, C, B));
4266
4267 // A - (A & B) -> A & (~B)
4268 if (sd_match(N1, m_And(m_Specific(N0), m_Value(B))) &&
4269 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true)))
4270 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getNOT(DL, B, VT));
4271
4272 // fold (A - (-B * C)) -> (A + (B * C))
4273 if (sd_match(N1, m_OneUse(m_Mul(m_Neg(m_Value(B)), m_Value(C)))))
4274 return DAG.getNode(ISD::ADD, DL, VT, N0,
4275 DAG.getNode(ISD::MUL, DL, VT, B, C));
4276
4277 // If either operand of a sub is undef, the result is undef
4278 if (N0.isUndef())
4279 return N0;
4280 if (N1.isUndef())
4281 return N1;
4282
4283 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
4284 return V;
4285
4286 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
4287 return V;
4288
4289 // Try to match AVGCEIL fixedwidth pattern
4290 if (SDValue V = foldSubToAvg(N, DL))
4291 return V;
4292
4293 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, DL))
4294 return V;
4295
4296 if (SDValue V = foldSubToUSubSat(VT, N, DL))
4297 return V;
4298
4299 if (SDValue V = foldRemainderIdiom(N, DAG, DL))
4300 return V;
4301
4302 // (A - B) - 1 -> add (xor B, -1), A
4304 m_One(/*AllowUndefs=*/true))))
4305 return DAG.getNode(ISD::ADD, DL, VT, A, DAG.getNOT(DL, B, VT));
4306
4307 // Look for:
4308 // sub y, (xor x, -1)
4309 // And if the target does not like this form then turn into:
4310 // add (add x, y), 1
4311 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
4312 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
4313 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
4314 }
4315
4316 // Hoist one-use addition by non-opaque constant:
4317 // (x + C) - y -> (x - y) + C
4318 if (!reassociationCanBreakAddressingModePattern(ISD::SUB, DL, N, N0, N1) &&
4319 N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
4320 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4321 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4322 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
4323 }
4324 // y - (x + C) -> (y - x) - C
4325 if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
4326 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
4327 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
4328 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
4329 }
4330 // (x - C) - y -> (x - y) - C
4331 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
4332 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4333 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4334 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4335 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
4336 }
4337 // (C - x) - y -> C - (x + y)
4338 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4339 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
4340 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
4341 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
4342 }
4343
4344 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
4345 // rather than 'sub 0/1' (the sext should get folded).
4346 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
4347 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
4348 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
4349 TLI.getBooleanContents(VT) ==
4351 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
4352 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
4353 }
4354
4355 // fold B = sra (A, size(A)-1); sub (xor (A, B), B) -> (abs A)
4356 if ((!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4358 sd_match(N0, m_Xor(m_Specific(A), m_Specific(N1))))
4359 return DAG.getNode(ISD::ABS, DL, VT, A);
4360
4361 // If the relocation model supports it, consider symbol offsets.
4362 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
4363 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
4364 // fold (sub Sym+c1, Sym+c2) -> c1-c2
4365 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
4366 if (GA->getGlobal() == GB->getGlobal())
4367 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
4368 DL, VT);
4369 }
4370
4371 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
4372 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
4373 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
4374 if (TN->getVT() == MVT::i1) {
4375 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
4376 DAG.getConstant(1, DL, VT));
4377 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
4378 }
4379 }
4380
4381 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
4382 if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
4383 const APInt &IntVal = N1.getConstantOperandAPInt(0);
4384 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
4385 }
4386
4387 // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
4388 if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
4389 APInt NewStep = -N1.getConstantOperandAPInt(0);
4390 return DAG.getNode(ISD::ADD, DL, VT, N0,
4391 DAG.getStepVector(DL, VT, NewStep));
4392 }
4393
4394 // Prefer an add for more folding potential and possibly better codegen:
4395 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
4396 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
4397 SDValue ShAmt = N1.getOperand(1);
4398 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
4399 if (ShAmtC && ShAmtC->getAPIntValue() == (BitWidth - 1)) {
4400 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
4401 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
4402 }
4403 }
4404
4405 // As with the previous fold, prefer add for more folding potential.
4406 // Subtracting SMIN/0 is the same as adding SMIN/0:
4407 // N0 - (X << BW-1) --> N0 + (X << BW-1)
4408 if (N1.getOpcode() == ISD::SHL) {
4410 if (ShlC && ShlC->getAPIntValue() == (BitWidth - 1))
4411 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
4412 }
4413
4414 // (sub (usubo_carry X, 0, Carry), Y) -> (usubo_carry X, Y, Carry)
4415 if (N0.getOpcode() == ISD::USUBO_CARRY && isNullConstant(N0.getOperand(1)) &&
4416 N0.getResNo() == 0 && N0.hasOneUse())
4417 return DAG.getNode(ISD::USUBO_CARRY, DL, N0->getVTList(),
4418 N0.getOperand(0), N1, N0.getOperand(2));
4419
4421 // (sub Carry, X) -> (uaddo_carry (sub 0, X), 0, Carry)
4422 if (SDValue Carry = getAsCarry(TLI, N0)) {
4423 SDValue X = N1;
4424 SDValue Zero = DAG.getConstant(0, DL, VT);
4425 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
4426 return DAG.getNode(ISD::UADDO_CARRY, DL,
4427 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
4428 Carry);
4429 }
4430 }
4431
4432 // If there's no chance of borrowing from adjacent bits, then sub is xor:
4433 // sub C0, X --> xor X, C0
4434 if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
4435 if (!C0->isOpaque()) {
4436 const APInt &C0Val = C0->getAPIntValue();
4437 const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
4438 if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
4439 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4440 }
4441 }
4442
4443 // smax(a,b) - smin(a,b) --> abds(a,b)
4444 if ((!LegalOperations || hasOperation(ISD::ABDS, VT)) &&
4445 sd_match(N0, m_SMaxLike(m_Value(A), m_Value(B))) &&
4447 return DAG.getNode(ISD::ABDS, DL, VT, A, B);
4448
4449 // smin(a,b) - smax(a,b) --> neg(abds(a,b))
4450 if (hasOperation(ISD::ABDS, VT) &&
4451 sd_match(N0, m_SMinLike(m_Value(A), m_Value(B))) &&
4453 return DAG.getNegative(DAG.getNode(ISD::ABDS, DL, VT, A, B), DL, VT);
4454
4455 // umax(a,b) - umin(a,b) --> abdu(a,b)
4456 if ((!LegalOperations || hasOperation(ISD::ABDU, VT)) &&
4457 sd_match(N0, m_UMaxLike(m_Value(A), m_Value(B))) &&
4459 return DAG.getNode(ISD::ABDU, DL, VT, A, B);
4460
4461 // umin(a,b) - umax(a,b) --> neg(abdu(a,b))
4462 if (hasOperation(ISD::ABDU, VT) &&
4463 sd_match(N0, m_UMinLike(m_Value(A), m_Value(B))) &&
4465 return DAG.getNegative(DAG.getNode(ISD::ABDU, DL, VT, A, B), DL, VT);
4466
4467 return SDValue();
4468}
4469
4470SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
4471 unsigned Opcode = N->getOpcode();
4472 SDValue N0 = N->getOperand(0);
4473 SDValue N1 = N->getOperand(1);
4474 EVT VT = N0.getValueType();
4475 bool IsSigned = Opcode == ISD::SSUBSAT;
4476 SDLoc DL(N);
4477
4478 // fold (sub_sat x, undef) -> 0
4479 if (N0.isUndef() || N1.isUndef())
4480 return DAG.getConstant(0, DL, VT);
4481
4482 // fold (sub_sat x, x) -> 0
4483 if (N0 == N1)
4484 return DAG.getConstant(0, DL, VT);
4485
4486 // fold (sub_sat c1, c2) -> c3
4487 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4488 return C;
4489
4490 // fold vector ops
4491 if (VT.isVector()) {
4492 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4493 return FoldedVOp;
4494
4495 // fold (sub_sat x, 0) -> x, vector edition
4497 return N0;
4498 }
4499
4500 // fold (sub_sat x, 0) -> x
4501 if (isNullConstant(N1))
4502 return N0;
4503
4504 // If it cannot overflow, transform into an sub.
4505 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4506 return DAG.getNode(ISD::SUB, DL, VT, N0, N1);
4507
4508 return SDValue();
4509}
4510
4511SDValue DAGCombiner::visitSUBC(SDNode *N) {
4512 SDValue N0 = N->getOperand(0);
4513 SDValue N1 = N->getOperand(1);
4514 EVT VT = N0.getValueType();
4515 SDLoc DL(N);
4516
4517 // If the flag result is dead, turn this into an SUB.
4518 if (!N->hasAnyUseOfValue(1))
4519 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4520 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4521
4522 // fold (subc x, x) -> 0 + no borrow
4523 if (N0 == N1)
4524 return CombineTo(N, DAG.getConstant(0, DL, VT),
4525 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4526
4527 // fold (subc x, 0) -> x + no borrow
4528 if (isNullConstant(N1))
4529 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4530
4531 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4532 if (isAllOnesConstant(N0))
4533 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4534 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4535
4536 return SDValue();
4537}
4538
4539SDValue DAGCombiner::visitSUBO(SDNode *N) {
4540 SDValue N0 = N->getOperand(0);
4541 SDValue N1 = N->getOperand(1);
4542 EVT VT = N0.getValueType();
4543 bool IsSigned = (ISD::SSUBO == N->getOpcode());
4544
4545 EVT CarryVT = N->getValueType(1);
4546 SDLoc DL(N);
4547
4548 // If the flag result is dead, turn this into an SUB.
4549 if (!N->hasAnyUseOfValue(1))
4550 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4551 DAG.getUNDEF(CarryVT));
4552
4553 // fold (subo x, x) -> 0 + no borrow
4554 if (N0 == N1)
4555 return CombineTo(N, DAG.getConstant(0, DL, VT),
4556 DAG.getConstant(0, DL, CarryVT));
4557
4558 // fold (subox, c) -> (addo x, -c)
4560 if (IsSigned && !N1C->isMinSignedValue())
4561 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
4562 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4563
4564 // fold (subo x, 0) -> x + no borrow
4565 if (isNullOrNullSplat(N1))
4566 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
4567
4568 // If it cannot overflow, transform into an sub.
4569 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4570 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4571 DAG.getConstant(0, DL, CarryVT));
4572
4573 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4574 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
4575 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4576 DAG.getConstant(0, DL, CarryVT));
4577
4578 return SDValue();
4579}
4580
4581SDValue DAGCombiner::visitSUBE(SDNode *N) {
4582 SDValue N0 = N->getOperand(0);
4583 SDValue N1 = N->getOperand(1);
4584 SDValue CarryIn = N->getOperand(2);
4585
4586 // fold (sube x, y, false) -> (subc x, y)
4587 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
4588 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
4589
4590 return SDValue();
4591}
4592
4593SDValue DAGCombiner::visitUSUBO_CARRY(SDNode *N) {
4594 SDValue N0 = N->getOperand(0);
4595 SDValue N1 = N->getOperand(1);
4596 SDValue CarryIn = N->getOperand(2);
4597
4598 // fold (usubo_carry x, y, false) -> (usubo x, y)
4599 if (isNullConstant(CarryIn)) {
4600 if (!LegalOperations ||
4601 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
4602 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
4603 }
4604
4605 return SDValue();
4606}
4607
4608SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
4609 SDValue N0 = N->getOperand(0);
4610 SDValue N1 = N->getOperand(1);
4611 SDValue CarryIn = N->getOperand(2);
4612
4613 // fold (ssubo_carry x, y, false) -> (ssubo x, y)
4614 if (isNullConstant(CarryIn)) {
4615 if (!LegalOperations ||
4616 TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
4617 return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
4618 }
4619
4620 return SDValue();
4621}
4622
4623// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
4624// UMULFIXSAT here.
4625SDValue DAGCombiner::visitMULFIX(SDNode *N) {
4626 SDValue N0 = N->getOperand(0);
4627 SDValue N1 = N->getOperand(1);
4628 SDValue Scale = N->getOperand(2);
4629 EVT VT = N0.getValueType();
4630
4631 // fold (mulfix x, undef, scale) -> 0
4632 if (N0.isUndef() || N1.isUndef())
4633 return DAG.getConstant(0, SDLoc(N), VT);
4634
4635 // Canonicalize constant to RHS (vector doesn't have to splat)
4638 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
4639
4640 // fold (mulfix x, 0, scale) -> 0
4641 if (isNullConstant(N1))
4642 return DAG.getConstant(0, SDLoc(N), VT);
4643
4644 return SDValue();
4645}
4646
4647template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) {
4648 SDValue N0 = N->getOperand(0);
4649 SDValue N1 = N->getOperand(1);
4650 EVT VT = N0.getValueType();
4651 unsigned BitWidth = VT.getScalarSizeInBits();
4652 SDLoc DL(N);
4653 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
4654 MatchContextClass Matcher(DAG, TLI, N);
4655
4656 // fold (mul x, undef) -> 0
4657 if (N0.isUndef() || N1.isUndef())
4658 return DAG.getConstant(0, DL, VT);
4659
4660 // fold (mul c1, c2) -> c1*c2
4661 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
4662 return C;
4663
4664 // canonicalize constant to RHS (vector doesn't have to splat)
4667 return Matcher.getNode(ISD::MUL, DL, VT, N1, N0);
4668
4669 bool N1IsConst = false;
4670 bool N1IsOpaqueConst = false;
4671 APInt ConstValue1;
4672
4673 // fold vector ops
4674 if (VT.isVector()) {
4675 // TODO: Change this to use SimplifyVBinOp when it supports VP op.
4676 if (!UseVP)
4677 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4678 return FoldedVOp;
4679
4680 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
4681 assert((!N1IsConst || ConstValue1.getBitWidth() == BitWidth) &&
4682 "Splat APInt should be element width");
4683 } else {
4684 N1IsConst = isa<ConstantSDNode>(N1);
4685 if (N1IsConst) {
4686 ConstValue1 = N1->getAsAPIntVal();
4687 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
4688 }
4689 }
4690
4691 // fold (mul x, 0) -> 0
4692 if (N1IsConst && ConstValue1.isZero())
4693 return N1;
4694
4695 // fold (mul x, 1) -> x
4696 if (N1IsConst && ConstValue1.isOne())
4697 return N0;
4698
4699 if (!UseVP)
4700 if (SDValue NewSel = foldBinOpIntoSelect(N))
4701 return NewSel;
4702
4703 // fold (mul x, -1) -> 0-x
4704 if (N1IsConst && ConstValue1.isAllOnes())
4705 return Matcher.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4706
4707 // fold (mul x, (1 << c)) -> x << c
4708 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4709 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
4710 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4711 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4712 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4714 Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap());
4715 // TODO: Preserve setNoSignedWrap if LogBase2 isn't BitWidth - 1.
4716 return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc, Flags);
4717 }
4718 }
4719
4720 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4721 if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
4722 unsigned Log2Val = (-ConstValue1).logBase2();
4723
4724 // FIXME: If the input is something that is easily negated (e.g. a
4725 // single-use add), we should put the negate there.
4726 return Matcher.getNode(
4727 ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
4728 Matcher.getNode(ISD::SHL, DL, VT, N0,
4729 DAG.getShiftAmountConstant(Log2Val, VT, DL)));
4730 }
4731
4732 // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
4733 // hi result is in use in case we hit this mid-legalization.
4734 if (!UseVP) {
4735 for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
4736 if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
4737 SDVTList LoHiVT = DAG.getVTList(VT, VT);
4738 // TODO: Can we match commutable operands with getNodeIfExists?
4739 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
4740 if (LoHi->hasAnyUseOfValue(1))
4741 return SDValue(LoHi, 0);
4742 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
4743 if (LoHi->hasAnyUseOfValue(1))
4744 return SDValue(LoHi, 0);
4745 }
4746 }
4747 }
4748
4749 // Try to transform:
4750 // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4751 // mul x, (2^N + 1) --> add (shl x, N), x
4752 // mul x, (2^N - 1) --> sub (shl x, N), x
4753 // Examples: x * 33 --> (x << 5) + x
4754 // x * 15 --> (x << 4) - x
4755 // x * -33 --> -((x << 5) + x)
4756 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
4757 // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4758 // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4759 // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4760 // Examples: x * 0x8800 --> (x << 15) + (x << 11)
4761 // x * 0xf800 --> (x << 16) - (x << 11)
4762 // x * -0x8800 --> -((x << 15) + (x << 11))
4763 // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
4764 if (!UseVP && N1IsConst &&
4765 TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
4766 // TODO: We could handle more general decomposition of any constant by
4767 // having the target set a limit on number of ops and making a
4768 // callback to determine that sequence (similar to sqrt expansion).
4769 unsigned MathOp = ISD::DELETED_NODE;
4770 APInt MulC = ConstValue1.abs();
4771 // The constant `2` should be treated as (2^0 + 1).
4772 unsigned TZeros = MulC == 2 ? 0 : MulC.countr_zero();
4773 MulC.lshrInPlace(TZeros);
4774 if ((MulC - 1).isPowerOf2())
4775 MathOp = ISD::ADD;
4776 else if ((MulC + 1).isPowerOf2())
4777 MathOp = ISD::SUB;
4778
4779 if (MathOp != ISD::DELETED_NODE) {
4780 unsigned ShAmt =
4781 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4782 ShAmt += TZeros;
4783 assert(ShAmt < BitWidth &&
4784 "multiply-by-constant generated out of bounds shift");
4785 SDValue Shl =
4786 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
4787 SDValue R =
4788 TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
4789 DAG.getNode(ISD::SHL, DL, VT, N0,
4790 DAG.getConstant(TZeros, DL, VT)))
4791 : DAG.getNode(MathOp, DL, VT, Shl, N0);
4792 if (ConstValue1.isNegative())
4793 R = DAG.getNegative(R, DL, VT);
4794 return R;
4795 }
4796 }
4797
4798 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4799 if (sd_context_match(N0, Matcher, m_Opc(ISD::SHL))) {
4800 SDValue N01 = N0.getOperand(1);
4801 if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
4802 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
4803 }
4804
4805 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4806 // use.
4807 {
4808 SDValue Sh, Y;
4809
4810 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
4811 if (sd_context_match(N0, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
4813 Sh = N0; Y = N1;
4814 } else if (sd_context_match(N1, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
4816 Sh = N1; Y = N0;
4817 }
4818
4819 if (Sh.getNode()) {
4820 SDValue Mul = Matcher.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
4821 return Matcher.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
4822 }
4823 }
4824
4825 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4826 if (sd_context_match(N0, Matcher, m_Opc(ISD::ADD)) &&
4830 return Matcher.getNode(
4831 ISD::ADD, DL, VT,
4832 Matcher.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
4833 Matcher.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
4834
4835 // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4837 if (!UseVP && N0.getOpcode() == ISD::VSCALE && NC1) {
4838 const APInt &C0 = N0.getConstantOperandAPInt(0);
4839 const APInt &C1 = NC1->getAPIntValue();
4840 return DAG.getVScale(DL, VT, C0 * C1);
4841 }
4842
4843 // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4844 APInt MulVal;
4845 if (!UseVP && N0.getOpcode() == ISD::STEP_VECTOR &&
4846 ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4847 const APInt &C0 = N0.getConstantOperandAPInt(0);
4848 APInt NewStep = C0 * MulVal;
4849 return DAG.getStepVector(DL, VT, NewStep);
4850 }
4851
4852 // Fold Y = sra (X, size(X)-1); mul (or (Y, 1), X) -> (abs X)
4853 SDValue X;
4854 if (!UseVP && (!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4856 N, Matcher,
4858 m_Deferred(X)))) {
4859 return Matcher.getNode(ISD::ABS, DL, VT, X);
4860 }
4861
4862 // Fold ((mul x, 0/undef) -> 0,
4863 // (mul x, 1) -> x) -> x)
4864 // -> and(x, mask)
4865 // We can replace vectors with '0' and '1' factors with a clearing mask.
4866 if (VT.isFixedLengthVector()) {
4867 unsigned NumElts = VT.getVectorNumElements();
4868 SmallBitVector ClearMask;
4869 ClearMask.reserve(NumElts);
4870 auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4871 if (!V || V->isZero()) {
4872 ClearMask.push_back(true);
4873 return true;
4874 }
4875 ClearMask.push_back(false);
4876 return V->isOne();
4877 };
4878 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4879 ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4880 assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4881 EVT LegalSVT = N1.getOperand(0).getValueType();
4882 SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4883 SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4885 for (unsigned I = 0; I != NumElts; ++I)
4886 if (ClearMask[I])
4887 Mask[I] = Zero;
4888 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4889 }
4890 }
4891
4892 // reassociate mul
4893 // TODO: Change reassociateOps to support vp ops.
4894 if (!UseVP)
4895 if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
4896 return RMUL;
4897
4898 // Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
4899 // TODO: Change reassociateReduction to support vp ops.
4900 if (!UseVP)
4901 if (SDValue SD =
4902 reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
4903 return SD;
4904
4905 // Simplify the operands using demanded-bits information.
4907 return SDValue(N, 0);
4908
4909 return SDValue();
4910}
4911
4912/// Return true if divmod libcall is available.
4914 const TargetLowering &TLI) {
4915 RTLIB::Libcall LC;
4916 EVT NodeType = Node->getValueType(0);
4917 if (!NodeType.isSimple())
4918 return false;
4919 switch (NodeType.getSimpleVT().SimpleTy) {
4920 default: return false; // No libcall for vector types.
4921 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
4922 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4923 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4924 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4925 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4926 }
4927
4928 return TLI.getLibcallName(LC) != nullptr;
4929}
4930
4931/// Issue divrem if both quotient and remainder are needed.
4932SDValue DAGCombiner::useDivRem(SDNode *Node) {
4933 if (Node->use_empty())
4934 return SDValue(); // This is a dead node, leave it alone.
4935
4936 unsigned Opcode = Node->getOpcode();
4937 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4938 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4939
4940 // DivMod lib calls can still work on non-legal types if using lib-calls.
4941 EVT VT = Node->getValueType(0);
4942 if (VT.isVector() || !VT.isInteger())
4943 return SDValue();
4944
4945 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4946 return SDValue();
4947
4948 // If DIVREM is going to get expanded into a libcall,
4949 // but there is no libcall available, then don't combine.
4950 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4952 return SDValue();
4953
4954 // If div is legal, it's better to do the normal expansion
4955 unsigned OtherOpcode = 0;
4956 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4957 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4958 if (TLI.isOperationLegalOrCustom(Opcode, VT))
4959 return SDValue();
4960 } else {
4961 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4962 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4963 return SDValue();
4964 }
4965
4966 SDValue Op0 = Node->getOperand(0);
4967 SDValue Op1 = Node->getOperand(1);
4968 SDValue combined;
4969 for (SDNode *User : Op0->users()) {
4970 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4971 User->use_empty())
4972 continue;
4973 // Convert the other matching node(s), too;
4974 // otherwise, the DIVREM may get target-legalized into something
4975 // target-specific that we won't be able to recognize.
4976 unsigned UserOpc = User->getOpcode();
4977 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4978 User->getOperand(0) == Op0 &&
4979 User->getOperand(1) == Op1) {
4980 if (!combined) {
4981 if (UserOpc == OtherOpcode) {
4982 SDVTList VTs = DAG.getVTList(VT, VT);
4983 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4984 } else if (UserOpc == DivRemOpc) {
4985 combined = SDValue(User, 0);
4986 } else {
4987 assert(UserOpc == Opcode);
4988 continue;
4989 }
4990 }
4991 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4992 CombineTo(User, combined);
4993 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4994 CombineTo(User, combined.getValue(1));
4995 }
4996 }
4997 return combined;
4998}
4999
5001 SDValue N0 = N->getOperand(0);
5002 SDValue N1 = N->getOperand(1);
5003 EVT VT = N->getValueType(0);
5004 SDLoc DL(N);
5005
5006 unsigned Opc = N->getOpcode();
5007 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
5009
5010 // X / undef -> undef
5011 // X % undef -> undef
5012 // X / 0 -> undef
5013 // X % 0 -> undef
5014 // NOTE: This includes vectors where any divisor element is zero/undef.
5015 if (DAG.isUndef(Opc, {N0, N1}))
5016 return DAG.getUNDEF(VT);
5017
5018 // undef / X -> 0
5019 // undef % X -> 0
5020 if (N0.isUndef())
5021 return DAG.getConstant(0, DL, VT);
5022
5023 // 0 / X -> 0
5024 // 0 % X -> 0
5026 if (N0C && N0C->isZero())
5027 return N0;
5028
5029 // X / X -> 1
5030 // X % X -> 0
5031 if (N0 == N1)
5032 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
5033
5034 // X / 1 -> X
5035 // X % 1 -> 0
5036 // If this is a boolean op (single-bit element type), we can't have
5037 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
5038 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
5039 // it's a 1.
5040 if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
5041 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
5042
5043 return SDValue();
5044}
5045
5046SDValue DAGCombiner::visitSDIV(SDNode *N) {
5047 SDValue N0 = N->getOperand(0);
5048 SDValue N1 = N->getOperand(1);
5049 EVT VT = N->getValueType(0);
5050 EVT CCVT = getSetCCResultType(VT);
5051 SDLoc DL(N);
5052
5053 // fold (sdiv c1, c2) -> c1/c2
5054 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
5055 return C;
5056
5057 // fold vector ops
5058 if (VT.isVector())
5059 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5060 return FoldedVOp;
5061
5062 // fold (sdiv X, -1) -> 0-X
5064 if (N1C && N1C->isAllOnes())
5065 return DAG.getNegative(N0, DL, VT);
5066
5067 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
5068 if (N1C && N1C->isMinSignedValue())
5069 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
5070 DAG.getConstant(1, DL, VT),
5071 DAG.getConstant(0, DL, VT));
5072
5073 if (SDValue V = simplifyDivRem(N, DAG))
5074 return V;
5075
5076 if (SDValue NewSel = foldBinOpIntoSelect(N))
5077 return NewSel;
5078
5079 // If we know the sign bits of both operands are zero, strength reduce to a
5080 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
5081 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
5082 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
5083
5084 if (SDValue V = visitSDIVLike(N0, N1, N)) {
5085 // If the corresponding remainder node exists, update its users with
5086 // (Dividend - (Quotient * Divisor).
5087 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
5088 { N0, N1 })) {
5089 // If the sdiv has the exact flag we shouldn't propagate it to the
5090 // remainder node.
5091 if (!N->getFlags().hasExact()) {
5092 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
5093 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5094 AddToWorklist(Mul.getNode());
5095 AddToWorklist(Sub.getNode());
5096 CombineTo(RemNode, Sub);
5097 }
5098 }
5099 return V;
5100 }
5101
5102 // sdiv, srem -> sdivrem
5103 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
5104 // true. Otherwise, we break the simplification logic in visitREM().
5106 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
5107 if (SDValue DivRem = useDivRem(N))
5108 return DivRem;
5109
5110 return SDValue();
5111}
5112
5113static bool isDivisorPowerOfTwo(SDValue Divisor) {
5114 // Helper for determining whether a value is a power-2 constant scalar or a
5115 // vector of such elements.
5116 auto IsPowerOfTwo = [](ConstantSDNode *C) {
5117 if (C->isZero() || C->isOpaque())
5118 return false;
5119 if (C->getAPIntValue().isPowerOf2())
5120 return true;
5121 if (C->getAPIntValue().isNegatedPowerOf2())
5122 return true;
5123 return false;
5124 };
5125
5126 return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
5127}
5128
5129SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
5130 SDLoc DL(N);
5131 EVT VT = N->getValueType(0);
5132 EVT CCVT = getSetCCResultType(VT);
5133 unsigned BitWidth = VT.getScalarSizeInBits();
5134
5135 // fold (sdiv X, pow2) -> simple ops after legalize
5136 // FIXME: We check for the exact bit here because the generic lowering gives
5137 // better results in that case. The target-specific lowering should learn how
5138 // to handle exact sdivs efficiently.
5139 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
5140 // Target-specific implementation of sdiv x, pow2.
5141 if (SDValue Res = BuildSDIVPow2(N))
5142 return Res;
5143
5144 // Create constants that are functions of the shift amount value.
5145 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
5146 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
5147 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
5148 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
5149 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
5150 if (!isConstantOrConstantVector(Inexact))
5151 return SDValue();
5152
5153 // Splat the sign bit into the register
5154 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
5155 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
5156 AddToWorklist(Sign.getNode());
5157
5158 // Add (N0 < 0) ? abs2 - 1 : 0;
5159 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
5160 AddToWorklist(Srl.getNode());
5161 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
5162 AddToWorklist(Add.getNode());
5163 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
5164 AddToWorklist(Sra.getNode());
5165
5166 // Special case: (sdiv X, 1) -> X
5167 // Special Case: (sdiv X, -1) -> 0-X
5168 SDValue One = DAG.getConstant(1, DL, VT);
5170 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
5171 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
5172 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
5173 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
5174
5175 // If dividing by a positive value, we're done. Otherwise, the result must
5176 // be negated.
5177 SDValue Zero = DAG.getConstant(0, DL, VT);
5178 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
5179
5180 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
5181 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
5182 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
5183 return Res;
5184 }
5185
5186 // If integer divide is expensive and we satisfy the requirements, emit an
5187 // alternate sequence. Targets may check function attributes for size/speed
5188 // trade-offs.
5191 !TLI.isIntDivCheap(N->getValueType(0), Attr))
5192 if (SDValue Op = BuildSDIV(N))
5193 return Op;
5194
5195 return SDValue();
5196}
5197
5198SDValue DAGCombiner::visitUDIV(SDNode *N) {
5199 SDValue N0 = N->getOperand(0);
5200 SDValue N1 = N->getOperand(1);
5201 EVT VT = N->getValueType(0);
5202 EVT CCVT = getSetCCResultType(VT);
5203 SDLoc DL(N);
5204
5205 // fold (udiv c1, c2) -> c1/c2
5206 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
5207 return C;
5208
5209 // fold vector ops
5210 if (VT.isVector())
5211 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5212 return FoldedVOp;
5213
5214 // fold (udiv X, -1) -> select(X == -1, 1, 0)
5216 if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) {
5217 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
5218 DAG.getConstant(1, DL, VT),
5219 DAG.getConstant(0, DL, VT));
5220 }
5221
5222 if (SDValue V = simplifyDivRem(N, DAG))
5223 return V;
5224
5225 if (SDValue NewSel = foldBinOpIntoSelect(N))
5226 return NewSel;
5227
5228 if (SDValue V = visitUDIVLike(N0, N1, N)) {
5229 // If the corresponding remainder node exists, update its users with
5230 // (Dividend - (Quotient * Divisor).
5231 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
5232 { N0, N1 })) {
5233 // If the udiv has the exact flag we shouldn't propagate it to the
5234 // remainder node.
5235 if (!N->getFlags().hasExact()) {
5236 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
5237 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5238 AddToWorklist(Mul.getNode());
5239 AddToWorklist(Sub.getNode());
5240 CombineTo(RemNode, Sub);
5241 }
5242 }
5243 return V;
5244 }
5245
5246 // sdiv, srem -> sdivrem
5247 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
5248 // true. Otherwise, we break the simplification logic in visitREM().
5250 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
5251 if (SDValue DivRem = useDivRem(N))
5252 return DivRem;
5253
5254 // Simplify the operands using demanded-bits information.
5255 // We don't have demanded bits support for UDIV so this just enables constant
5256 // folding based on known bits.
5258 return SDValue(N, 0);
5259
5260 return SDValue();
5261}
5262
5263SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
5264 SDLoc DL(N);
5265 EVT VT = N->getValueType(0);
5266
5267 // fold (udiv x, (1 << c)) -> x >>u c
5268 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
5269 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5270 AddToWorklist(LogBase2.getNode());
5271
5272 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5273 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
5274 AddToWorklist(Trunc.getNode());
5275 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5276 }
5277 }
5278
5279 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
5280 if (N1.getOpcode() == ISD::SHL) {
5281 SDValue N10 = N1.getOperand(0);
5282 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
5283 if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
5284 AddToWorklist(LogBase2.getNode());
5285
5286 EVT ADDVT = N1.getOperand(1).getValueType();
5287 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
5288 AddToWorklist(Trunc.getNode());
5289 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
5290 AddToWorklist(Add.getNode());
5291 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
5292 }
5293 }
5294 }
5295
5296 // fold (udiv x, c) -> alternate
5299 !TLI.isIntDivCheap(N->getValueType(0), Attr))
5300 if (SDValue Op = BuildUDIV(N))
5301 return Op;
5302
5303 return SDValue();
5304}
5305
5306SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) {
5307 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
5308 !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
5309 // Target-specific implementation of srem x, pow2.
5310 if (SDValue Res = BuildSREMPow2(N))
5311 return Res;
5312 }
5313 return SDValue();
5314}
5315
5316// handles ISD::SREM and ISD::UREM
5317SDValue DAGCombiner::visitREM(SDNode *N) {
5318 unsigned Opcode = N->getOpcode();
5319 SDValue N0 = N->getOperand(0);
5320 SDValue N1 = N->getOperand(1);
5321 EVT VT = N->getValueType(0);
5322 EVT CCVT = getSetCCResultType(VT);
5323
5324 bool isSigned = (Opcode == ISD::SREM);
5325 SDLoc DL(N);
5326
5327 // fold (rem c1, c2) -> c1%c2
5328 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5329 return C;
5330
5331 // fold (urem X, -1) -> select(FX == -1, 0, FX)
5332 // Freeze the numerator to avoid a miscompile with an undefined value.
5333 if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false) &&
5334 CCVT.isVector() == VT.isVector()) {
5335 SDValue F0 = DAG.getFreeze(N0);
5336 SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
5337 return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
5338 }
5339
5340 if (SDValue V = simplifyDivRem(N, DAG))
5341 return V;
5342
5343 if (SDValue NewSel = foldBinOpIntoSelect(N))
5344 return NewSel;
5345
5346 if (isSigned) {
5347 // If we know the sign bits of both operands are zero, strength reduce to a
5348 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
5349 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
5350 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
5351 } else {
5352 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
5353 // fold (urem x, pow2) -> (and x, pow2-1)
5354 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5355 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5356 AddToWorklist(Add.getNode());
5357 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5358 }
5359 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
5360 // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1))
5361 // TODO: We should sink the following into isKnownToBePowerOfTwo
5362 // using a OrZero parameter analogous to our handling in ValueTracking.
5363 if ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) &&
5365 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5366 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5367 AddToWorklist(Add.getNode());
5368 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5369 }
5370 }
5371
5373
5374 // If X/C can be simplified by the division-by-constant logic, lower
5375 // X%C to the equivalent of X-X/C*C.
5376 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
5377 // speculative DIV must not cause a DIVREM conversion. We guard against this
5378 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
5379 // combine will not return a DIVREM. Regardless, checking cheapness here
5380 // makes sense since the simplification results in fatter code.
5381 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
5382 if (isSigned) {
5383 // check if we can build faster implementation for srem
5384 if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))
5385 return OptimizedRem;
5386 }
5387
5388 SDValue OptimizedDiv =
5389 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
5390 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
5391 // If the equivalent Div node also exists, update its users.
5392 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
5393 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
5394 { N0, N1 }))
5395 CombineTo(DivNode, OptimizedDiv);
5396 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
5397 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5398 AddToWorklist(OptimizedDiv.getNode());
5399 AddToWorklist(Mul.getNode());
5400 return Sub;
5401 }
5402 }
5403
5404 // sdiv, srem -> sdivrem
5405 if (SDValue DivRem = useDivRem(N))
5406 return DivRem.getValue(1);
5407
5408 return SDValue();
5409}
5410
5411SDValue DAGCombiner::visitMULHS(SDNode *N) {
5412 SDValue N0 = N->getOperand(0);
5413 SDValue N1 = N->getOperand(1);
5414 EVT VT = N->getValueType(0);
5415 SDLoc DL(N);
5416
5417 // fold (mulhs c1, c2)
5418 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
5419 return C;
5420
5421 // canonicalize constant to RHS.
5424 return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
5425
5426 if (VT.isVector()) {
5427 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5428 return FoldedVOp;
5429
5430 // fold (mulhs x, 0) -> 0
5431 // do not return N1, because undef node may exist.
5433 return DAG.getConstant(0, DL, VT);
5434 }
5435
5436 // fold (mulhs x, 0) -> 0
5437 if (isNullConstant(N1))
5438 return N1;
5439
5440 // fold (mulhs x, 1) -> (sra x, size(x)-1)
5441 if (isOneConstant(N1))
5442 return DAG.getNode(
5443 ISD::SRA, DL, VT, N0,
5445
5446 // fold (mulhs x, undef) -> 0
5447 if (N0.isUndef() || N1.isUndef())
5448 return DAG.getConstant(0, DL, VT);
5449
5450 // If the type twice as wide is legal, transform the mulhs to a wider multiply
5451 // plus a shift.
5452 if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
5453 !VT.isVector()) {
5454 MVT Simple = VT.getSimpleVT();
5455 unsigned SimpleSize = Simple.getSizeInBits();
5456 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5457 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5458 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5459 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5460 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5461 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5462 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5463 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5464 }
5465 }
5466
5467 return SDValue();
5468}
5469
5470SDValue DAGCombiner::visitMULHU(SDNode *N) {
5471 SDValue N0 = N->getOperand(0);
5472 SDValue N1 = N->getOperand(1);
5473 EVT VT = N->getValueType(0);
5474 SDLoc DL(N);
5475
5476 // fold (mulhu c1, c2)
5477 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
5478 return C;
5479
5480 // canonicalize constant to RHS.
5483 return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
5484
5485 if (VT.isVector()) {
5486 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5487 return FoldedVOp;
5488
5489 // fold (mulhu x, 0) -> 0
5490 // do not return N1, because undef node may exist.
5492 return DAG.getConstant(0, DL, VT);
5493 }
5494
5495 // fold (mulhu x, 0) -> 0
5496 if (isNullConstant(N1))
5497 return N1;
5498
5499 // fold (mulhu x, 1) -> 0
5500 if (isOneConstant(N1))
5501 return DAG.getConstant(0, DL, VT);
5502
5503 // fold (mulhu x, undef) -> 0
5504 if (N0.isUndef() || N1.isUndef())
5505 return DAG.getConstant(0, DL, VT);
5506
5507 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
5508 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
5509 hasOperation(ISD::SRL, VT)) {
5510 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5511 unsigned NumEltBits = VT.getScalarSizeInBits();
5512 SDValue SRLAmt = DAG.getNode(
5513 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
5514 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5515 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
5516 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5517 }
5518 }
5519
5520 // If the type twice as wide is legal, transform the mulhu to a wider multiply
5521 // plus a shift.
5522 if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
5523 !VT.isVector()) {
5524 MVT Simple = VT.getSimpleVT();
5525 unsigned SimpleSize = Simple.getSizeInBits();
5526 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5527 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5528 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5529 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5530 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5531 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5532 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5533 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5534 }
5535 }
5536
5537 // Simplify the operands using demanded-bits information.
5538 // We don't have demanded bits support for MULHU so this just enables constant
5539 // folding based on known bits.
5541 return SDValue(N, 0);
5542
5543 return SDValue();
5544}
5545
5546SDValue DAGCombiner::visitAVG(SDNode *N) {
5547 unsigned Opcode = N->getOpcode();
5548 SDValue N0 = N->getOperand(0);
5549 SDValue N1 = N->getOperand(1);
5550 EVT VT = N->getValueType(0);
5551 SDLoc DL(N);
5552 bool IsSigned = Opcode == ISD::AVGCEILS || Opcode == ISD::AVGFLOORS;
5553
5554 // fold (avg c1, c2)
5555 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5556 return C;
5557
5558 // canonicalize constant to RHS.
5561 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5562
5563 if (VT.isVector())
5564 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5565 return FoldedVOp;
5566
5567 // fold (avg x, undef) -> x
5568 if (N0.isUndef())
5569 return N1;
5570 if (N1.isUndef())
5571 return N0;
5572
5573 // fold (avg x, x) --> x
5574 if (N0 == N1 && Level >= AfterLegalizeTypes)
5575 return N0;
5576
5577 // fold (avgfloor x, 0) -> x >> 1
5578 SDValue X, Y;
5580 return DAG.getNode(ISD::SRA, DL, VT, X,
5581 DAG.getShiftAmountConstant(1, VT, DL));
5583 return DAG.getNode(ISD::SRL, DL, VT, X,
5584 DAG.getShiftAmountConstant(1, VT, DL));
5585
5586 // fold avgu(zext(x), zext(y)) -> zext(avgu(x, y))
5587 // fold avgs(sext(x), sext(y)) -> sext(avgs(x, y))
5588 if (!IsSigned &&
5589 sd_match(N, m_BinOp(Opcode, m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) &&
5590 X.getValueType() == Y.getValueType() &&
5591 hasOperation(Opcode, X.getValueType())) {
5592 SDValue AvgU = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5593 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, AvgU);
5594 }
5595 if (IsSigned &&
5596 sd_match(N, m_BinOp(Opcode, m_SExt(m_Value(X)), m_SExt(m_Value(Y)))) &&
5597 X.getValueType() == Y.getValueType() &&
5598 hasOperation(Opcode, X.getValueType())) {
5599 SDValue AvgS = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5600 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, AvgS);
5601 }
5602
5603 // Fold avgflooru(x,y) -> avgceilu(x,y-1) iff y != 0
5604 // Fold avgflooru(x,y) -> avgceilu(x-1,y) iff x != 0
5605 // Check if avgflooru isn't legal/custom but avgceilu is.
5606 if (Opcode == ISD::AVGFLOORU && !hasOperation(ISD::AVGFLOORU, VT) &&
5607 (!LegalOperations || hasOperation(ISD::AVGCEILU, VT))) {
5608 if (DAG.isKnownNeverZero(N1))
5609 return DAG.getNode(
5610 ISD::AVGCEILU, DL, VT, N0,
5611 DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getAllOnesConstant(DL, VT)));
5612 if (DAG.isKnownNeverZero(N0))
5613 return DAG.getNode(
5614 ISD::AVGCEILU, DL, VT, N1,
5615 DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getAllOnesConstant(DL, VT)));
5616 }
5617
5618 // Fold avgfloor((add nw x,y), 1) -> avgceil(x,y)
5619 // Fold avgfloor((add nw x,1), y) -> avgceil(x,y)
5620 if ((Opcode == ISD::AVGFLOORU && hasOperation(ISD::AVGCEILU, VT)) ||
5621 (Opcode == ISD::AVGFLOORS && hasOperation(ISD::AVGCEILS, VT))) {
5622 SDValue Add;
5623 if (sd_match(N,
5624 m_c_BinOp(Opcode,
5626 m_One())) ||
5627 sd_match(N, m_c_BinOp(Opcode,
5629 m_Value(Y)))) {
5630
5631 if (IsSigned && Add->getFlags().hasNoSignedWrap())
5632 return DAG.getNode(ISD::AVGCEILS, DL, VT, X, Y);
5633
5634 if (!IsSigned && Add->getFlags().hasNoUnsignedWrap())
5635 return DAG.getNode(ISD::AVGCEILU, DL, VT, X, Y);
5636 }
5637 }
5638
5639 // Fold avgfloors(x,y) -> avgflooru(x,y) if both x and y are non-negative
5640 if (Opcode == ISD::AVGFLOORS && hasOperation(ISD::AVGFLOORU, VT)) {
5641 if (DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5642 return DAG.getNode(ISD::AVGFLOORU, DL, VT, N0, N1);
5643 }
5644
5645 return SDValue();
5646}
5647
5648SDValue DAGCombiner::visitABD(SDNode *N) {
5649 unsigned Opcode = N->getOpcode();
5650 SDValue N0 = N->getOperand(0);
5651 SDValue N1 = N->getOperand(1);
5652 EVT VT = N->getValueType(0);
5653 SDLoc DL(N);
5654
5655 // fold (abd c1, c2)
5656 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5657 return C;
5658
5659 // canonicalize constant to RHS.
5662 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5663
5664 if (VT.isVector())
5665 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5666 return FoldedVOp;
5667
5668 // fold (abd x, undef) -> 0
5669 if (N0.isUndef() || N1.isUndef())
5670 return DAG.getConstant(0, DL, VT);
5671
5672 // fold (abd x, x) -> 0
5673 if (N0 == N1)
5674 return DAG.getConstant(0, DL, VT);
5675
5676 SDValue X;
5677
5678 // fold (abds x, 0) -> abs x
5680 (!LegalOperations || hasOperation(ISD::ABS, VT)))
5681 return DAG.getNode(ISD::ABS, DL, VT, X);
5682
5683 // fold (abdu x, 0) -> x
5685 return X;
5686
5687 // fold (abds x, y) -> (abdu x, y) iff both args are known positive
5688 if (Opcode == ISD::ABDS && hasOperation(ISD::ABDU, VT) &&
5689 DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5690 return DAG.getNode(ISD::ABDU, DL, VT, N1, N0);
5691
5692 return SDValue();
5693}
5694
5695/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
5696/// give the opcodes for the two computations that are being performed. Return
5697/// true if a simplification was made.
5698SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
5699 unsigned HiOp) {
5700 // If the high half is not needed, just compute the low half.
5701 bool HiExists = N->hasAnyUseOfValue(1);
5702 if (!HiExists && (!LegalOperations ||
5703 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
5704 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5705 return CombineTo(N, Res, Res);
5706 }
5707
5708 // If the low half is not needed, just compute the high half.
5709 bool LoExists = N->hasAnyUseOfValue(0);
5710 if (!LoExists && (!LegalOperations ||
5711 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
5712 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5713 return CombineTo(N, Res, Res);
5714 }
5715
5716 // If both halves are used, return as it is.
5717 if (LoExists && HiExists)
5718 return SDValue();
5719
5720 // If the two computed results can be simplified separately, separate them.
5721 if (LoExists) {
5722 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5723 AddToWorklist(Lo.getNode());
5724 SDValue LoOpt = combine(Lo.getNode());
5725 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
5726 (!LegalOperations ||
5727 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
5728 return CombineTo(N, LoOpt, LoOpt);
5729 }
5730
5731 if (HiExists) {
5732 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5733 AddToWorklist(Hi.getNode());
5734 SDValue HiOpt = combine(Hi.getNode());
5735 if (HiOpt.getNode() && HiOpt != Hi &&
5736 (!LegalOperations ||
5737 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
5738 return CombineTo(N, HiOpt, HiOpt);
5739 }
5740
5741 return SDValue();
5742}
5743
5744SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
5745 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
5746 return Res;
5747
5748 SDValue N0 = N->getOperand(0);
5749 SDValue N1 = N->getOperand(1);
5750 EVT VT = N->getValueType(0);
5751 SDLoc DL(N);
5752
5753 // Constant fold.
5754 if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5755 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1);
5756
5757 // canonicalize constant to RHS (vector doesn't have to splat)
5760 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0);
5761
5762 // If the type is twice as wide is legal, transform the mulhu to a wider
5763 // multiply plus a shift.
5764 if (VT.isSimple() && !VT.isVector()) {
5765 MVT Simple = VT.getSimpleVT();
5766 unsigned SimpleSize = Simple.getSizeInBits();
5767 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5768 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5769 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5770 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5771 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5772 // Compute the high part as N1.
5773 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5774 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5775 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5776 // Compute the low part as N0.
5777 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5778 return CombineTo(N, Lo, Hi);
5779 }
5780 }
5781
5782 return SDValue();
5783}
5784
5785SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
5786 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
5787 return Res;
5788
5789 SDValue N0 = N->getOperand(0);
5790 SDValue N1 = N->getOperand(1);
5791 EVT VT = N->getValueType(0);
5792 SDLoc DL(N);
5793
5794 // Constant fold.
5795 if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5796 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1);
5797
5798 // canonicalize constant to RHS (vector doesn't have to splat)
5801 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0);
5802
5803 // (umul_lohi N0, 0) -> (0, 0)
5804 if (isNullConstant(N1)) {
5805 SDValue Zero = DAG.getConstant(0, DL, VT);
5806 return CombineTo(N, Zero, Zero);
5807 }
5808
5809 // (umul_lohi N0, 1) -> (N0, 0)
5810 if (isOneConstant(N1)) {
5811 SDValue Zero = DAG.getConstant(0, DL, VT);
5812 return CombineTo(N, N0, Zero);
5813 }
5814
5815 // If the type is twice as wide is legal, transform the mulhu to a wider
5816 // multiply plus a shift.
5817 if (VT.isSimple() && !VT.isVector()) {
5818 MVT Simple = VT.getSimpleVT();
5819 unsigned SimpleSize = Simple.getSizeInBits();
5820 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5821 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5822 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5823 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5824 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5825 // Compute the high part as N1.
5826 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5827 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5828 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5829 // Compute the low part as N0.
5830 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5831 return CombineTo(N, Lo, Hi);
5832 }
5833 }
5834
5835 return SDValue();
5836}
5837
5838SDValue DAGCombiner::visitMULO(SDNode *N) {
5839 SDValue N0 = N->getOperand(0);
5840 SDValue N1 = N->getOperand(1);
5841 EVT VT = N0.getValueType();
5842 bool IsSigned = (ISD::SMULO == N->getOpcode());
5843
5844 EVT CarryVT = N->getValueType(1);
5845 SDLoc DL(N);
5846
5849
5850 // fold operation with constant operands.
5851 // TODO: Move this to FoldConstantArithmetic when it supports nodes with
5852 // multiple results.
5853 if (N0C && N1C) {
5854 bool Overflow;
5855 APInt Result =
5856 IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
5857 : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
5858 return CombineTo(N, DAG.getConstant(Result, DL, VT),
5859 DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
5860 }
5861
5862 // canonicalize constant to RHS.
5865 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
5866
5867 // fold (mulo x, 0) -> 0 + no carry out
5868 if (isNullOrNullSplat(N1))
5869 return CombineTo(N, DAG.getConstant(0, DL, VT),
5870 DAG.getConstant(0, DL, CarryVT));
5871
5872 // (mulo x, 2) -> (addo x, x)
5873 // FIXME: This needs a freeze.
5874 if (N1C && N1C->getAPIntValue() == 2 &&
5875 (!IsSigned || VT.getScalarSizeInBits() > 2))
5876 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
5877 N->getVTList(), N0, N0);
5878
5879 // A 1 bit SMULO overflows if both inputs are 1.
5880 if (IsSigned && VT.getScalarSizeInBits() == 1) {
5881 SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
5882 SDValue Cmp = DAG.getSetCC(DL, CarryVT, And,
5883 DAG.getConstant(0, DL, VT), ISD::SETNE);
5884 return CombineTo(N, And, Cmp);
5885 }
5886
5887 // If it cannot overflow, transform into a mul.
5888 if (DAG.willNotOverflowMul(IsSigned, N0, N1))
5889 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
5890 DAG.getConstant(0, DL, CarryVT));
5891 return SDValue();
5892}
5893
5894// Function to calculate whether the Min/Max pair of SDNodes (potentially
5895// swapped around) make a signed saturate pattern, clamping to between a signed
5896// saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
5897// Returns the node being clamped and the bitwidth of the clamp in BW. Should
5898// work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
5899// same as SimplifySelectCC. N0<N1 ? N2 : N3.
5901 SDValue N3, ISD::CondCode CC, unsigned &BW,
5902 bool &Unsigned, SelectionDAG &DAG) {
5903 auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
5904 ISD::CondCode CC) {
5905 // The compare and select operand should be the same or the select operands
5906 // should be truncated versions of the comparison.
5907 if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
5908 return 0;
5909 // The constants need to be the same or a truncated version of each other.
5912 if (!N1C || !N3C)
5913 return 0;
5914 const APInt &C1 = N1C->getAPIntValue().trunc(N1.getScalarValueSizeInBits());
5915 const APInt &C2 = N3C->getAPIntValue().trunc(N3.getScalarValueSizeInBits());
5916 if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth()))
5917 return 0;
5918 return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
5919 };
5920
5921 // Check the initial value is a SMIN/SMAX equivalent.
5922 unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
5923 if (!Opcode0)
5924 return SDValue();
5925
5926 // We could only need one range check, if the fptosi could never produce
5927 // the upper value.
5928 if (N0.getOpcode() == ISD::FP_TO_SINT && Opcode0 == ISD::SMAX) {
5929 if (isNullOrNullSplat(N3)) {
5930 EVT IntVT = N0.getValueType().getScalarType();
5931 EVT FPVT = N0.getOperand(0).getValueType().getScalarType();
5932 if (FPVT.isSimple()) {
5933 Type *InputTy = FPVT.getTypeForEVT(*DAG.getContext());
5934 const fltSemantics &Semantics = InputTy->getFltSemantics();
5935 uint32_t MinBitWidth =
5936 APFloatBase::semanticsIntSizeInBits(Semantics, /*isSigned*/ true);
5937 if (IntVT.getSizeInBits() >= MinBitWidth) {
5938 Unsigned = true;
5939 BW = PowerOf2Ceil(MinBitWidth);
5940 return N0;
5941 }
5942 }
5943 }
5944 }
5945
5946 SDValue N00, N01, N02, N03;
5947 ISD::CondCode N0CC;
5948 switch (N0.getOpcode()) {
5949 case ISD::SMIN:
5950 case ISD::SMAX:
5951 N00 = N02 = N0.getOperand(0);
5952 N01 = N03 = N0.getOperand(1);
5953 N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
5954 break;
5955 case ISD::SELECT_CC:
5956 N00 = N0.getOperand(0);
5957 N01 = N0.getOperand(1);
5958 N02 = N0.getOperand(2);
5959 N03 = N0.getOperand(3);
5960 N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
5961 break;
5962 case ISD::SELECT:
5963 case ISD::VSELECT:
5964 if (N0.getOperand(0).getOpcode() != ISD::SETCC)
5965 return SDValue();
5966 N00 = N0.getOperand(0).getOperand(0);
5967 N01 = N0.getOperand(0).getOperand(1);
5968 N02 = N0.getOperand(1);
5969 N03 = N0.getOperand(2);
5970 N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
5971 break;
5972 default:
5973 return SDValue();
5974 }
5975
5976 unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
5977 if (!Opcode1 || Opcode0 == Opcode1)
5978 return SDValue();
5979
5980 ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
5981 ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
5982 if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
5983 return SDValue();
5984
5985 const APInt &MinC = MinCOp->getAPIntValue();
5986 const APInt &MaxC = MaxCOp->getAPIntValue();
5987 APInt MinCPlus1 = MinC + 1;
5988 if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
5989 BW = MinCPlus1.exactLogBase2() + 1;
5990 Unsigned = false;
5991 return N02;
5992 }
5993
5994 if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
5995 BW = MinCPlus1.exactLogBase2();
5996 Unsigned = true;
5997 return N02;
5998 }
5999
6000 return SDValue();
6001}
6002
6004 SDValue N3, ISD::CondCode CC,
6005 SelectionDAG &DAG) {
6006 unsigned BW;
6007 bool Unsigned;
6008 SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned, DAG);
6009 if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
6010 return SDValue();
6011 EVT FPVT = Fp.getOperand(0).getValueType();
6012 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
6013 if (FPVT.isVector())
6014 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
6015 FPVT.getVectorElementCount());
6016 unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
6017 if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
6018 return SDValue();
6019 SDLoc DL(Fp);
6020 SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
6021 DAG.getValueType(NewVT.getScalarType()));
6022 return DAG.getExtOrTrunc(!Unsigned, Sat, DL, N2->getValueType(0));
6023}
6024
6026 SDValue N3, ISD::CondCode CC,
6027 SelectionDAG &DAG) {
6028 // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
6029 // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
6030 // be truncated versions of the setcc (N0/N1).
6031 if ((N0 != N2 &&
6032 (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
6033 N0.getOpcode() != ISD::FP_TO_UINT || CC != ISD::SETULT)
6034 return SDValue();
6037 if (!N1C || !N3C)
6038 return SDValue();
6039 const APInt &C1 = N1C->getAPIntValue();
6040 const APInt &C3 = N3C->getAPIntValue();
6041 if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
6042 C1 != C3.zext(C1.getBitWidth()))
6043 return SDValue();
6044
6045 unsigned BW = (C1 + 1).exactLogBase2();
6046 EVT FPVT = N0.getOperand(0).getValueType();
6047 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
6048 if (FPVT.isVector())
6049 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
6050 FPVT.getVectorElementCount());
6052 FPVT, NewVT))
6053 return SDValue();
6054
6055 SDValue Sat =
6056 DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
6057 DAG.getValueType(NewVT.getScalarType()));
6058 return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
6059}
6060
6061SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
6062 SDValue N0 = N->getOperand(0);
6063 SDValue N1 = N->getOperand(1);
6064 EVT VT = N0.getValueType();
6065 unsigned Opcode = N->getOpcode();
6066 SDLoc DL(N);
6067
6068 // fold operation with constant operands.
6069 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
6070 return C;
6071
6072 // If the operands are the same, this is a no-op.
6073 if (N0 == N1)
6074 return N0;
6075
6076 // Fold operation with vscale operands.
6077 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
6078 uint64_t C0 = N0->getConstantOperandVal(0);
6079 uint64_t C1 = N1->getConstantOperandVal(0);
6080 if (Opcode == ISD::UMAX)
6081 return C0 > C1 ? N0 : N1;
6082 else if (Opcode == ISD::UMIN)
6083 return C0 > C1 ? N1 : N0;
6084 }
6085
6086 // canonicalize constant to RHS
6089 return DAG.getNode(Opcode, DL, VT, N1, N0);
6090
6091 // fold vector ops
6092 if (VT.isVector())
6093 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
6094 return FoldedVOp;
6095
6096 // reassociate minmax
6097 if (SDValue RMINMAX = reassociateOps(Opcode, DL, N0, N1, N->getFlags()))
6098 return RMINMAX;
6099
6100 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
6101 // Only do this if:
6102 // 1. The current op isn't legal and the flipped is.
6103 // 2. The saturation pattern is broken by canonicalization in InstCombine.
6104 bool IsOpIllegal = !TLI.isOperationLegal(Opcode, VT);
6105 bool IsSatBroken = Opcode == ISD::UMIN && N0.getOpcode() == ISD::SMAX;
6106 if ((IsSatBroken || IsOpIllegal) && (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
6107 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
6108 unsigned AltOpcode;
6109 switch (Opcode) {
6110 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
6111 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
6112 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
6113 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
6114 default: llvm_unreachable("Unknown MINMAX opcode");
6115 }
6116 if ((IsSatBroken && IsOpIllegal) || TLI.isOperationLegal(AltOpcode, VT))
6117 return DAG.getNode(AltOpcode, DL, VT, N0, N1);
6118 }
6119
6120 if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
6122 N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
6123 return S;
6124 if (Opcode == ISD::UMIN)
6125 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
6126 return S;
6127
6128 // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
6129 auto ReductionOpcode = [](unsigned Opcode) {
6130 switch (Opcode) {
6131 case ISD::SMIN:
6132 return ISD::VECREDUCE_SMIN;
6133 case ISD::SMAX:
6134 return ISD::VECREDUCE_SMAX;
6135 case ISD::UMIN:
6136 return ISD::VECREDUCE_UMIN;
6137 case ISD::UMAX:
6138 return ISD::VECREDUCE_UMAX;
6139 default:
6140 llvm_unreachable("Unexpected opcode");
6141 }
6142 };
6143 if (SDValue SD = reassociateReduction(ReductionOpcode(Opcode), Opcode,
6144 SDLoc(N), VT, N0, N1))
6145 return SD;
6146
6147 // Simplify the operands using demanded-bits information.
6149 return SDValue(N, 0);
6150
6151 return SDValue();
6152}
6153
6154/// If this is a bitwise logic instruction and both operands have the same
6155/// opcode, try to sink the other opcode after the logic instruction.
6156SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
6157 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
6158 EVT VT = N0.getValueType();
6159 unsigned LogicOpcode = N->getOpcode();
6160 unsigned HandOpcode = N0.getOpcode();
6161 assert(ISD::isBitwiseLogicOp(LogicOpcode) && "Expected logic opcode");
6162 assert(HandOpcode == N1.getOpcode() && "Bad input!");
6163
6164 // Bail early if none of these transforms apply.
6165 if (N0.getNumOperands() == 0)
6166 return SDValue();
6167
6168 // FIXME: We should check number of uses of the operands to not increase
6169 // the instruction count for all transforms.
6170
6171 // Handle size-changing casts (or sign_extend_inreg).
6172 SDValue X = N0.getOperand(0);
6173 SDValue Y = N1.getOperand(0);
6174 EVT XVT = X.getValueType();
6175 SDLoc DL(N);
6176 if (ISD::isExtOpcode(HandOpcode) || ISD::isExtVecInRegOpcode(HandOpcode) ||
6177 (HandOpcode == ISD::SIGN_EXTEND_INREG &&
6178 N0.getOperand(1) == N1.getOperand(1))) {
6179 // If both operands have other uses, this transform would create extra
6180 // instructions without eliminating anything.
6181 if (!N0.hasOneUse() && !N1.hasOneUse())
6182 return SDValue();
6183 // We need matching integer source types.
6184 if (XVT != Y.getValueType())
6185 return SDValue();
6186 // Don't create an illegal op during or after legalization. Don't ever
6187 // create an unsupported vector op.
6188 if ((VT.isVector() || LegalOperations) &&
6189 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
6190 return SDValue();
6191 // Avoid infinite looping with PromoteIntBinOp.
6192 // TODO: Should we apply desirable/legal constraints to all opcodes?
6193 if ((HandOpcode == ISD::ANY_EXTEND ||
6194 HandOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
6195 LegalTypes && !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
6196 return SDValue();
6197 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
6198 SDNodeFlags LogicFlags;
6199 LogicFlags.setDisjoint(N->getFlags().hasDisjoint() &&
6200 ISD::isExtOpcode(HandOpcode));
6201 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y, LogicFlags);
6202 if (HandOpcode == ISD::SIGN_EXTEND_INREG)
6203 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
6204 return DAG.getNode(HandOpcode, DL, VT, Logic);
6205 }
6206
6207 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
6208 if (HandOpcode == ISD::TRUNCATE) {
6209 // If both operands have other uses, this transform would create extra
6210 // instructions without eliminating anything.
6211 if (!N0.hasOneUse() && !N1.hasOneUse())
6212 return SDValue();
6213 // We need matching source types.
6214 if (XVT != Y.getValueType())
6215 return SDValue();
6216 // Don't create an illegal op during or after legalization.
6217 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
6218 return SDValue();
6219 // Be extra careful sinking truncate. If it's free, there's no benefit in
6220 // widening a binop. Also, don't create a logic op on an illegal type.
6221 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
6222 return SDValue();
6223 if (!TLI.isTypeLegal(XVT))
6224 return SDValue();
6225 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6226 return DAG.getNode(HandOpcode, DL, VT, Logic);
6227 }
6228
6229 // For binops SHL/SRL/SRA/AND:
6230 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
6231 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
6232 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
6233 N0.getOperand(1) == N1.getOperand(1)) {
6234 // If either operand has other uses, this transform is not an improvement.
6235 if (!N0.hasOneUse() || !N1.hasOneUse())
6236 return SDValue();
6237 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6238 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
6239 }
6240
6241 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
6242 if (HandOpcode == ISD::BSWAP) {
6243 // If either operand has other uses, this transform is not an improvement.
6244 if (!N0.hasOneUse() || !N1.hasOneUse())
6245 return SDValue();
6246 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6247 return DAG.getNode(HandOpcode, DL, VT, Logic);
6248 }
6249
6250 // For funnel shifts FSHL/FSHR:
6251 // logic_op (OP x, x1, s), (OP y, y1, s) -->
6252 // --> OP (logic_op x, y), (logic_op, x1, y1), s
6253 if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) &&
6254 N0.getOperand(2) == N1.getOperand(2)) {
6255 if (!N0.hasOneUse() || !N1.hasOneUse())
6256 return SDValue();
6257 SDValue X1 = N0.getOperand(1);
6258 SDValue Y1 = N1.getOperand(1);
6259 SDValue S = N0.getOperand(2);
6260 SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y);
6261 SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1);
6262 return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S);
6263 }
6264
6265 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
6266 // Only perform this optimization up until type legalization, before
6267 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
6268 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
6269 // we don't want to undo this promotion.
6270 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
6271 // on scalars.
6272 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
6273 Level <= AfterLegalizeTypes) {
6274 // Input types must be integer and the same.
6275 if (XVT.isInteger() && XVT == Y.getValueType() &&
6276 !(VT.isVector() && TLI.isTypeLegal(VT) &&
6277 !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
6278 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6279 return DAG.getNode(HandOpcode, DL, VT, Logic);
6280 }
6281 }
6282
6283 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
6284 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
6285 // If both shuffles use the same mask, and both shuffle within a single
6286 // vector, then it is worthwhile to move the swizzle after the operation.
6287 // The type-legalizer generates this pattern when loading illegal
6288 // vector types from memory. In many cases this allows additional shuffle
6289 // optimizations.
6290 // There are other cases where moving the shuffle after the xor/and/or
6291 // is profitable even if shuffles don't perform a swizzle.
6292 // If both shuffles use the same mask, and both shuffles have the same first
6293 // or second operand, then it might still be profitable to move the shuffle
6294 // after the xor/and/or operation.
6295 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
6296 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
6297 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
6298 assert(X.getValueType() == Y.getValueType() &&
6299 "Inputs to shuffles are not the same type");
6300
6301 // Check that both shuffles use the same mask. The masks are known to be of
6302 // the same length because the result vector type is the same.
6303 // Check also that shuffles have only one use to avoid introducing extra
6304 // instructions.
6305 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
6306 !SVN0->getMask().equals(SVN1->getMask()))
6307 return SDValue();
6308
6309 // Don't try to fold this node if it requires introducing a
6310 // build vector of all zeros that might be illegal at this stage.
6311 SDValue ShOp = N0.getOperand(1);
6312 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
6313 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6314
6315 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
6316 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
6317 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
6318 N0.getOperand(0), N1.getOperand(0));
6319 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
6320 }
6321
6322 // Don't try to fold this node if it requires introducing a
6323 // build vector of all zeros that might be illegal at this stage.
6324 ShOp = N0.getOperand(0);
6325 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
6326 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6327
6328 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
6329 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
6330 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
6331 N1.getOperand(1));
6332 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
6333 }
6334 }
6335
6336 return SDValue();
6337}
6338
6339/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
6340SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
6341 const SDLoc &DL) {
6342 SDValue LL, LR, RL, RR, N0CC, N1CC;
6343 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
6344 !isSetCCEquivalent(N1, RL, RR, N1CC))
6345 return SDValue();
6346
6347 assert(N0.getValueType() == N1.getValueType() &&
6348 "Unexpected operand types for bitwise logic op");
6349 assert(LL.getValueType() == LR.getValueType() &&
6350 RL.getValueType() == RR.getValueType() &&
6351 "Unexpected operand types for setcc");
6352
6353 // If we're here post-legalization or the logic op type is not i1, the logic
6354 // op type must match a setcc result type. Also, all folds require new
6355 // operations on the left and right operands, so those types must match.
6356 EVT VT = N0.getValueType();
6357 EVT OpVT = LL.getValueType();
6358 if (LegalOperations || VT.getScalarType() != MVT::i1)
6359 if (VT != getSetCCResultType(OpVT))
6360 return SDValue();
6361 if (OpVT != RL.getValueType())
6362 return SDValue();
6363
6364 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
6365 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
6366 bool IsInteger = OpVT.isInteger();
6367 if (LR == RR && CC0 == CC1 && IsInteger) {
6368 bool IsZero = isNullOrNullSplat(LR);
6369 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
6370
6371 // All bits clear?
6372 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
6373 // All sign bits clear?
6374 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
6375 // Any bits set?
6376 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
6377 // Any sign bits set?
6378 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
6379
6380 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
6381 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
6382 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
6383 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
6384 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
6385 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
6386 AddToWorklist(Or.getNode());
6387 return DAG.getSetCC(DL, VT, Or, LR, CC1);
6388 }
6389
6390 // All bits set?
6391 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
6392 // All sign bits set?
6393 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
6394 // Any bits clear?
6395 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
6396 // Any sign bits clear?
6397 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
6398
6399 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
6400 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
6401 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
6402 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
6403 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
6404 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
6405 AddToWorklist(And.getNode());
6406 return DAG.getSetCC(DL, VT, And, LR, CC1);
6407 }
6408 }
6409
6410 // TODO: What is the 'or' equivalent of this fold?
6411 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
6412 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
6413 IsInteger && CC0 == ISD::SETNE &&
6414 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
6415 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
6416 SDValue One = DAG.getConstant(1, DL, OpVT);
6417 SDValue Two = DAG.getConstant(2, DL, OpVT);
6418 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
6419 AddToWorklist(Add.getNode());
6420 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
6421 }
6422
6423 // Try more general transforms if the predicates match and the only user of
6424 // the compares is the 'and' or 'or'.
6425 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
6426 N0.hasOneUse() && N1.hasOneUse()) {
6427 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
6428 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
6429 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
6430 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
6431 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
6432 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
6433 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6434 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
6435 }
6436
6437 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
6438 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
6439 // Match a shared variable operand and 2 non-opaque constant operands.
6440 auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) {
6441 // The difference of the constants must be a single bit.
6442 const APInt &CMax =
6443 APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
6444 const APInt &CMin =
6445 APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
6446 return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
6447 };
6448 if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) {
6449 // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
6450 // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
6451 SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
6452 SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
6453 SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
6454 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
6455 SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
6456 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
6457 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6458 return DAG.getSetCC(DL, VT, And, Zero, CC0);
6459 }
6460 }
6461 }
6462
6463 // Canonicalize equivalent operands to LL == RL.
6464 if (LL == RR && LR == RL) {
6466 std::swap(RL, RR);
6467 }
6468
6469 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6470 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6471 if (LL == RL && LR == RR) {
6472 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
6473 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
6474 if (NewCC != ISD::SETCC_INVALID &&
6475 (!LegalOperations ||
6476 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
6477 TLI.isOperationLegal(ISD::SETCC, OpVT))))
6478 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
6479 }
6480
6481 return SDValue();
6482}
6483
6484static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2,
6485 SelectionDAG &DAG) {
6486 return DAG.isKnownNeverSNaN(Operand2) && DAG.isKnownNeverSNaN(Operand1);
6487}
6488
6489static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
6490 SelectionDAG &DAG) {
6491 return DAG.isKnownNeverNaN(Operand2) && DAG.isKnownNeverNaN(Operand1);
6492}
6493
6494// FIXME: use FMINIMUMNUM if possible, such as for RISC-V.
6495static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
6496 ISD::CondCode CC, unsigned OrAndOpcode,
6497 SelectionDAG &DAG,
6498 bool isFMAXNUMFMINNUM_IEEE,
6499 bool isFMAXNUMFMINNUM) {
6500 // The optimization cannot be applied for all the predicates because
6501 // of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle
6502 // NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be
6503 // applied at all if one of the operands is a signaling NaN.
6504
6505 // It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
6506 // are non NaN values.
6507 if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
6508 ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND))) {
6509 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6510 isFMAXNUMFMINNUM_IEEE
6513 }
6514
6515 if (((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::OR)) ||
6516 ((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::AND))) {
6517 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6518 isFMAXNUMFMINNUM_IEEE
6521 }
6522
6523 // Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet
6524 // NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/
6525 // FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove
6526 // that there are not any sNaNs, then the optimization is not valid
6527 // for FMINNUM_IEEE/FMAXNUM_IEEE. In the presence of sNaNs, we apply
6528 // the optimization using FMINNUM/FMAXNUM for the following cases. If
6529 // we can prove that we do not have any sNaNs, then we can do the
6530 // optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following
6531 // cases.
6532 if (((CC == ISD::SETOLT || CC == ISD::SETOLE) && (OrAndOpcode == ISD::OR)) ||
6533 ((CC == ISD::SETUGT || CC == ISD::SETUGE) && (OrAndOpcode == ISD::AND))) {
6534 return isFMAXNUMFMINNUM ? ISD::FMINNUM
6535 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6536 isFMAXNUMFMINNUM_IEEE
6539 }
6540
6541 if (((CC == ISD::SETOGT || CC == ISD::SETOGE) && (OrAndOpcode == ISD::OR)) ||
6542 ((CC == ISD::SETULT || CC == ISD::SETULE) && (OrAndOpcode == ISD::AND))) {
6543 return isFMAXNUMFMINNUM ? ISD::FMAXNUM
6544 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6545 isFMAXNUMFMINNUM_IEEE
6548 }
6549
6550 return ISD::DELETED_NODE;
6551}
6552
6555 assert(
6556 (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
6557 "Invalid Op to combine SETCC with");
6558
6559 // TODO: Search past casts/truncates.
6560 SDValue LHS = LogicOp->getOperand(0);
6561 SDValue RHS = LogicOp->getOperand(1);
6562 if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC ||
6563 !LHS->hasOneUse() || !RHS->hasOneUse())
6564 return SDValue();
6565
6566 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6568 LogicOp, LHS.getNode(), RHS.getNode());
6569
6570 SDValue LHS0 = LHS->getOperand(0);
6571 SDValue RHS0 = RHS->getOperand(0);
6572 SDValue LHS1 = LHS->getOperand(1);
6573 SDValue RHS1 = RHS->getOperand(1);
6574 // TODO: We don't actually need a splat here, for vectors we just need the
6575 // invariants to hold for each element.
6576 auto *LHS1C = isConstOrConstSplat(LHS1);
6577 auto *RHS1C = isConstOrConstSplat(RHS1);
6578 ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6579 ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
6580 EVT VT = LogicOp->getValueType(0);
6581 EVT OpVT = LHS0.getValueType();
6582 SDLoc DL(LogicOp);
6583
6584 // Check if the operands of an and/or operation are comparisons and if they
6585 // compare against the same value. Replace the and/or-cmp-cmp sequence with
6586 // min/max cmp sequence. If LHS1 is equal to RHS1, then the or-cmp-cmp
6587 // sequence will be replaced with min-cmp sequence:
6588 // (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1
6589 // and and-cmp-cmp will be replaced with max-cmp sequence:
6590 // (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1
6591 // The optimization does not work for `==` or `!=` .
6592 // The two comparisons should have either the same predicate or the
6593 // predicate of one of the comparisons is the opposite of the other one.
6594 bool isFMAXNUMFMINNUM_IEEE = TLI.isOperationLegal(ISD::FMAXNUM_IEEE, OpVT) &&
6596 bool isFMAXNUMFMINNUM = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, OpVT) &&
6598 if (((OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
6599 TLI.isOperationLegal(ISD::SMAX, OpVT) &&
6600 TLI.isOperationLegal(ISD::UMIN, OpVT) &&
6601 TLI.isOperationLegal(ISD::SMIN, OpVT)) ||
6602 (OpVT.isFloatingPoint() &&
6603 (isFMAXNUMFMINNUM_IEEE || isFMAXNUMFMINNUM))) &&
6605 CCL != ISD::SETFALSE && CCL != ISD::SETO && CCL != ISD::SETUO &&
6606 CCL != ISD::SETTRUE &&
6607 (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR))) {
6608
6609 SDValue CommonValue, Operand1, Operand2;
6611 if (CCL == CCR) {
6612 if (LHS0 == RHS0) {
6613 CommonValue = LHS0;
6614 Operand1 = LHS1;
6615 Operand2 = RHS1;
6617 } else if (LHS1 == RHS1) {
6618 CommonValue = LHS1;
6619 Operand1 = LHS0;
6620 Operand2 = RHS0;
6621 CC = CCL;
6622 }
6623 } else {
6624 assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC");
6625 if (LHS0 == RHS1) {
6626 CommonValue = LHS0;
6627 Operand1 = LHS1;
6628 Operand2 = RHS0;
6629 CC = CCR;
6630 } else if (RHS0 == LHS1) {
6631 CommonValue = LHS1;
6632 Operand1 = LHS0;
6633 Operand2 = RHS1;
6634 CC = CCL;
6635 }
6636 }
6637
6638 // Don't do this transform for sign bit tests. Let foldLogicOfSetCCs
6639 // handle it using OR/AND.
6640 if (CC == ISD::SETLT && isNullOrNullSplat(CommonValue))
6641 CC = ISD::SETCC_INVALID;
6642 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CommonValue))
6643 CC = ISD::SETCC_INVALID;
6644
6645 if (CC != ISD::SETCC_INVALID) {
6646 unsigned NewOpcode = ISD::DELETED_NODE;
6647 bool IsSigned = isSignedIntSetCC(CC);
6648 if (OpVT.isInteger()) {
6649 bool IsLess = (CC == ISD::SETLE || CC == ISD::SETULE ||
6650 CC == ISD::SETLT || CC == ISD::SETULT);
6651 bool IsOr = (LogicOp->getOpcode() == ISD::OR);
6652 if (IsLess == IsOr)
6653 NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN;
6654 else
6655 NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
6656 } else if (OpVT.isFloatingPoint())
6657 NewOpcode =
6658 getMinMaxOpcodeForFP(Operand1, Operand2, CC, LogicOp->getOpcode(),
6659 DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
6660
6661 if (NewOpcode != ISD::DELETED_NODE) {
6662 SDValue MinMaxValue =
6663 DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
6664 return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
6665 }
6666 }
6667 }
6668
6669 if (LHS0 == LHS1 && RHS0 == RHS1 && CCL == CCR &&
6670 LHS0.getValueType() == RHS0.getValueType() &&
6671 ((LogicOp->getOpcode() == ISD::AND && CCL == ISD::SETO) ||
6672 (LogicOp->getOpcode() == ISD::OR && CCL == ISD::SETUO)))
6673 return DAG.getSetCC(DL, VT, LHS0, RHS0, CCL);
6674
6675 if (TargetPreference == AndOrSETCCFoldKind::None)
6676 return SDValue();
6677
6678 if (CCL == CCR &&
6679 CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
6680 LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger()) {
6681 const APInt &APLhs = LHS1C->getAPIntValue();
6682 const APInt &APRhs = RHS1C->getAPIntValue();
6683
6684 // Preference is to use ISD::ABS or we already have an ISD::ABS (in which
6685 // case this is just a compare).
6686 if (APLhs == (-APRhs) &&
6687 ((TargetPreference & AndOrSETCCFoldKind::ABS) ||
6688 DAG.doesNodeExist(ISD::ABS, DAG.getVTList(OpVT), {LHS0}))) {
6689 const APInt &C = APLhs.isNegative() ? APRhs : APLhs;
6690 // (icmp eq A, C) | (icmp eq A, -C)
6691 // -> (icmp eq Abs(A), C)
6692 // (icmp ne A, C) & (icmp ne A, -C)
6693 // -> (icmp ne Abs(A), C)
6694 SDValue AbsOp = DAG.getNode(ISD::ABS, DL, OpVT, LHS0);
6695 return DAG.getNode(ISD::SETCC, DL, VT, AbsOp,
6696 DAG.getConstant(C, DL, OpVT), LHS.getOperand(2));
6697 } else if (TargetPreference &
6699
6700 // AndOrSETCCFoldKind::AddAnd:
6701 // A == C0 | A == C1
6702 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6703 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) == 0
6704 // A != C0 & A != C1
6705 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6706 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) != 0
6707
6708 // AndOrSETCCFoldKind::NotAnd:
6709 // A == C0 | A == C1
6710 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6711 // -> ~A & smin(C0, C1) == 0
6712 // A != C0 & A != C1
6713 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6714 // -> ~A & smin(C0, C1) != 0
6715
6716 const APInt &MaxC = APIntOps::smax(APRhs, APLhs);
6717 const APInt &MinC = APIntOps::smin(APRhs, APLhs);
6718 APInt Dif = MaxC - MinC;
6719 if (!Dif.isZero() && Dif.isPowerOf2()) {
6720 if (MaxC.isAllOnes() &&
6721 (TargetPreference & AndOrSETCCFoldKind::NotAnd)) {
6722 SDValue NotOp = DAG.getNOT(DL, LHS0, OpVT);
6723 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, NotOp,
6724 DAG.getConstant(MinC, DL, OpVT));
6725 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6726 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6727 } else if (TargetPreference & AndOrSETCCFoldKind::AddAnd) {
6728
6729 SDValue AddOp = DAG.getNode(ISD::ADD, DL, OpVT, LHS0,
6730 DAG.getConstant(-MinC, DL, OpVT));
6731 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
6732 DAG.getConstant(~Dif, DL, OpVT));
6733 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6734 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6735 }
6736 }
6737 }
6738 }
6739
6740 return SDValue();
6741}
6742
6743// Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`.
6744// We canonicalize to the `select` form in the middle end, but the `and` form
6745// gets better codegen and all tested targets (arm, x86, riscv)
6747 const SDLoc &DL, SelectionDAG &DAG) {
6748 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6749 if (!isNullConstant(F))
6750 return SDValue();
6751
6752 EVT CondVT = Cond.getValueType();
6753 if (TLI.getBooleanContents(CondVT) !=
6755 return SDValue();
6756
6757 if (T.getOpcode() != ISD::AND)
6758 return SDValue();
6759
6760 if (!isOneConstant(T.getOperand(1)))
6761 return SDValue();
6762
6763 EVT OpVT = T.getValueType();
6764
6765 SDValue CondMask =
6766 OpVT == CondVT ? Cond : DAG.getBoolExtOrTrunc(Cond, DL, OpVT, CondVT);
6767 return DAG.getNode(ISD::AND, DL, OpVT, CondMask, T.getOperand(0));
6768}
6769
6770/// This contains all DAGCombine rules which reduce two values combined by
6771/// an And operation to a single value. This makes them reusable in the context
6772/// of visitSELECT(). Rules involving constants are not included as
6773/// visitSELECT() already handles those cases.
6774SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
6775 EVT VT = N1.getValueType();
6776 SDLoc DL(N);
6777
6778 // fold (and x, undef) -> 0
6779 if (N0.isUndef() || N1.isUndef())
6780 return DAG.getConstant(0, DL, VT);
6781
6782 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
6783 return V;
6784
6785 // Canonicalize:
6786 // and(x, add) -> and(add, x)
6787 if (N1.getOpcode() == ISD::ADD)
6788 std::swap(N0, N1);
6789
6790 // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
6791 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
6792 VT.isScalarInteger() && VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
6793 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6794 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
6795 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
6796 // immediate for an add, but it is legal if its top c2 bits are set,
6797 // transform the ADD so the immediate doesn't need to be materialized
6798 // in a register.
6799 APInt ADDC = ADDI->getAPIntValue();
6800 APInt SRLC = SRLI->getAPIntValue();
6801 if (ADDC.getSignificantBits() <= 64 && SRLC.ult(VT.getSizeInBits()) &&
6802 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6804 SRLC.getZExtValue());
6805 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
6806 ADDC |= Mask;
6807 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6808 SDLoc DL0(N0);
6809 SDValue NewAdd =
6810 DAG.getNode(ISD::ADD, DL0, VT,
6811 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
6812 CombineTo(N0.getNode(), NewAdd);
6813 // Return N so it doesn't get rechecked!
6814 return SDValue(N, 0);
6815 }
6816 }
6817 }
6818 }
6819 }
6820 }
6821
6822 return SDValue();
6823}
6824
6825bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
6826 EVT LoadResultTy, EVT &ExtVT) {
6827 if (!AndC->getAPIntValue().isMask())
6828 return false;
6829
6830 unsigned ActiveBits = AndC->getAPIntValue().countr_one();
6831
6832 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6833 EVT LoadedVT = LoadN->getMemoryVT();
6834
6835 if (ExtVT == LoadedVT &&
6836 (!LegalOperations ||
6837 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
6838 // ZEXTLOAD will match without needing to change the size of the value being
6839 // loaded.
6840 return true;
6841 }
6842
6843 // Do not change the width of a volatile or atomic loads.
6844 if (!LoadN->isSimple())
6845 return false;
6846
6847 // Do not generate loads of non-round integer types since these can
6848 // be expensive (and would be wrong if the type is not byte sized).
6849 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
6850 return false;
6851
6852 if (LegalOperations &&
6853 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
6854 return false;
6855
6856 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT, /*ByteOffset=*/0))
6857 return false;
6858
6859 return true;
6860}
6861
6862bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
6863 ISD::LoadExtType ExtType, EVT &MemVT,
6864 unsigned ShAmt) {
6865 if (!LDST)
6866 return false;
6867
6868 // Only allow byte offsets.
6869 if (ShAmt % 8)
6870 return false;
6871 const unsigned ByteShAmt = ShAmt / 8;
6872
6873 // Do not generate loads of non-round integer types since these can
6874 // be expensive (and would be wrong if the type is not byte sized).
6875 if (!MemVT.isRound())
6876 return false;
6877
6878 // Don't change the width of a volatile or atomic loads.
6879 if (!LDST->isSimple())
6880 return false;
6881
6882 EVT LdStMemVT = LDST->getMemoryVT();
6883
6884 // Bail out when changing the scalable property, since we can't be sure that
6885 // we're actually narrowing here.
6886 if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
6887 return false;
6888
6889 // Verify that we are actually reducing a load width here.
6890 if (LdStMemVT.bitsLT(MemVT))
6891 return false;
6892
6893 // Ensure that this isn't going to produce an unsupported memory access.
6894 if (ShAmt) {
6895 const Align LDSTAlign = LDST->getAlign();
6896 const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
6897 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
6898 LDST->getAddressSpace(), NarrowAlign,
6899 LDST->getMemOperand()->getFlags()))
6900 return false;
6901 }
6902
6903 // It's not possible to generate a constant of extended or untyped type.
6904 EVT PtrType = LDST->getBasePtr().getValueType();
6905 if (PtrType == MVT::Untyped || PtrType.isExtended())
6906 return false;
6907
6908 if (isa<LoadSDNode>(LDST)) {
6909 LoadSDNode *Load = cast<LoadSDNode>(LDST);
6910 // Don't transform one with multiple uses, this would require adding a new
6911 // load.
6912 if (!SDValue(Load, 0).hasOneUse())
6913 return false;
6914
6915 if (LegalOperations &&
6916 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
6917 return false;
6918
6919 // For the transform to be legal, the load must produce only two values
6920 // (the value loaded and the chain). Don't transform a pre-increment
6921 // load, for example, which produces an extra value. Otherwise the
6922 // transformation is not equivalent, and the downstream logic to replace
6923 // uses gets things wrong.
6924 if (Load->getNumValues() > 2)
6925 return false;
6926
6927 // If the load that we're shrinking is an extload and we're not just
6928 // discarding the extension we can't simply shrink the load. Bail.
6929 // TODO: It would be possible to merge the extensions in some cases.
6930 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
6931 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6932 return false;
6933
6934 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT, ByteShAmt))
6935 return false;
6936 } else {
6937 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
6938 StoreSDNode *Store = cast<StoreSDNode>(LDST);
6939 // Can't write outside the original store
6940 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6941 return false;
6942
6943 if (LegalOperations &&
6944 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
6945 return false;
6946 }
6947 return true;
6948}
6949
6950bool DAGCombiner::SearchForAndLoads(SDNode *N,
6952 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
6953 ConstantSDNode *Mask,
6954 SDNode *&NodeToMask) {
6955 // Recursively search for the operands, looking for loads which can be
6956 // narrowed.
6957 for (SDValue Op : N->op_values()) {
6958 if (Op.getValueType().isVector())
6959 return false;
6960
6961 // Some constants may need fixing up later if they are too large.
6962 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6963 assert(ISD::isBitwiseLogicOp(N->getOpcode()) &&
6964 "Expected bitwise logic operation");
6965 if (!C->getAPIntValue().isSubsetOf(Mask->getAPIntValue()))
6966 NodesWithConsts.insert(N);
6967 continue;
6968 }
6969
6970 if (!Op.hasOneUse())
6971 return false;
6972
6973 switch(Op.getOpcode()) {
6974 case ISD::LOAD: {
6975 auto *Load = cast<LoadSDNode>(Op);
6976 EVT ExtVT;
6977 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
6978 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
6979
6980 // ZEXTLOAD is already small enough.
6981 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
6982 ExtVT.bitsGE(Load->getMemoryVT()))
6983 continue;
6984
6985 // Use LE to convert equal sized loads to zext.
6986 if (ExtVT.bitsLE(Load->getMemoryVT()))
6987 Loads.push_back(Load);
6988
6989 continue;
6990 }
6991 return false;
6992 }
6993 case ISD::ZERO_EXTEND:
6994 case ISD::AssertZext: {
6995 unsigned ActiveBits = Mask->getAPIntValue().countr_one();
6996 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6997 EVT VT = Op.getOpcode() == ISD::AssertZext ?
6998 cast<VTSDNode>(Op.getOperand(1))->getVT() :
6999 Op.getOperand(0).getValueType();
7000
7001 // We can accept extending nodes if the mask is wider or an equal
7002 // width to the original type.
7003 if (ExtVT.bitsGE(VT))
7004 continue;
7005 break;
7006 }
7007 case ISD::OR:
7008 case ISD::XOR:
7009 case ISD::AND:
7010 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
7011 NodeToMask))
7012 return false;
7013 continue;
7014 }
7015
7016 // Allow one node which will masked along with any loads found.
7017 if (NodeToMask)
7018 return false;
7019
7020 // Also ensure that the node to be masked only produces one data result.
7021 NodeToMask = Op.getNode();
7022 if (NodeToMask->getNumValues() > 1) {
7023 bool HasValue = false;
7024 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
7025 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
7026 if (VT != MVT::Glue && VT != MVT::Other) {
7027 if (HasValue) {
7028 NodeToMask = nullptr;
7029 return false;
7030 }
7031 HasValue = true;
7032 }
7033 }
7034 assert(HasValue && "Node to be masked has no data result?");
7035 }
7036 }
7037 return true;
7038}
7039
7040bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
7041 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
7042 if (!Mask)
7043 return false;
7044
7045 if (!Mask->getAPIntValue().isMask())
7046 return false;
7047
7048 // No need to do anything if the and directly uses a load.
7049 if (isa<LoadSDNode>(N->getOperand(0)))
7050 return false;
7051
7053 SmallPtrSet<SDNode*, 2> NodesWithConsts;
7054 SDNode *FixupNode = nullptr;
7055 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
7056 if (Loads.empty())
7057 return false;
7058
7059 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
7060 SDValue MaskOp = N->getOperand(1);
7061
7062 // If it exists, fixup the single node we allow in the tree that needs
7063 // masking.
7064 if (FixupNode) {
7065 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
7066 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
7067 FixupNode->getValueType(0),
7068 SDValue(FixupNode, 0), MaskOp);
7069 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
7070 if (And.getOpcode() == ISD ::AND)
7071 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
7072 }
7073
7074 // Narrow any constants that need it.
7075 for (auto *LogicN : NodesWithConsts) {
7076 SDValue Op0 = LogicN->getOperand(0);
7077 SDValue Op1 = LogicN->getOperand(1);
7078
7079 // We only need to fix AND if both inputs are constants. And we only need
7080 // to fix one of the constants.
7081 if (LogicN->getOpcode() == ISD::AND &&
7082 (!isa<ConstantSDNode>(Op0) || !isa<ConstantSDNode>(Op1)))
7083 continue;
7084
7085 if (isa<ConstantSDNode>(Op0) && LogicN->getOpcode() != ISD::AND)
7086 Op0 =
7087 DAG.getNode(ISD::AND, SDLoc(Op0), Op0.getValueType(), Op0, MaskOp);
7088
7089 if (isa<ConstantSDNode>(Op1))
7090 Op1 =
7091 DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOp);
7092
7093 if (isa<ConstantSDNode>(Op0) && !isa<ConstantSDNode>(Op1))
7094 std::swap(Op0, Op1);
7095
7096 DAG.UpdateNodeOperands(LogicN, Op0, Op1);
7097 }
7098
7099 // Create narrow loads.
7100 for (auto *Load : Loads) {
7101 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
7102 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
7103 SDValue(Load, 0), MaskOp);
7104 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
7105 if (And.getOpcode() == ISD ::AND)
7106 And = SDValue(
7107 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
7108 SDValue NewLoad = reduceLoadWidth(And.getNode());
7109 assert(NewLoad &&
7110 "Shouldn't be masking the load if it can't be narrowed");
7111 CombineTo(Load, NewLoad, NewLoad.getValue(1));
7112 }
7113 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
7114 return true;
7115 }
7116 return false;
7117}
7118
7119// Unfold
7120// x & (-1 'logical shift' y)
7121// To
7122// (x 'opposite logical shift' y) 'logical shift' y
7123// if it is better for performance.
7124SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
7125 assert(N->getOpcode() == ISD::AND);
7126
7127 SDValue N0 = N->getOperand(0);
7128 SDValue N1 = N->getOperand(1);
7129
7130 // Do we actually prefer shifts over mask?
7132 return SDValue();
7133
7134 // Try to match (-1 '[outer] logical shift' y)
7135 unsigned OuterShift;
7136 unsigned InnerShift; // The opposite direction to the OuterShift.
7137 SDValue Y; // Shift amount.
7138 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
7139 if (!M.hasOneUse())
7140 return false;
7141 OuterShift = M->getOpcode();
7142 if (OuterShift == ISD::SHL)
7143 InnerShift = ISD::SRL;
7144 else if (OuterShift == ISD::SRL)
7145 InnerShift = ISD::SHL;
7146 else
7147 return false;
7148 if (!isAllOnesConstant(M->getOperand(0)))
7149 return false;
7150 Y = M->getOperand(1);
7151 return true;
7152 };
7153
7154 SDValue X;
7155 if (matchMask(N1))
7156 X = N0;
7157 else if (matchMask(N0))
7158 X = N1;
7159 else
7160 return SDValue();
7161
7162 SDLoc DL(N);
7163 EVT VT = N->getValueType(0);
7164
7165 // tmp = x 'opposite logical shift' y
7166 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
7167 // ret = tmp 'logical shift' y
7168 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
7169
7170 return T1;
7171}
7172
7173/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
7174/// For a target with a bit test, this is expected to become test + set and save
7175/// at least 1 instruction.
7177 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
7178
7179 // Look through an optional extension.
7180 SDValue And0 = And->getOperand(0), And1 = And->getOperand(1);
7181 if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse())
7182 And0 = And0.getOperand(0);
7183 if (!isOneConstant(And1) || !And0.hasOneUse())
7184 return SDValue();
7185
7186 SDValue Src = And0;
7187
7188 // Attempt to find a 'not' op.
7189 // TODO: Should we favor test+set even without the 'not' op?
7190 bool FoundNot = false;
7191 if (isBitwiseNot(Src)) {
7192 FoundNot = true;
7193 Src = Src.getOperand(0);
7194
7195 // Look though an optional truncation. The source operand may not be the
7196 // same type as the original 'and', but that is ok because we are masking
7197 // off everything but the low bit.
7198 if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse())
7199 Src = Src.getOperand(0);
7200 }
7201
7202 // Match a shift-right by constant.
7203 if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse())
7204 return SDValue();
7205
7206 // This is probably not worthwhile without a supported type.
7207 EVT SrcVT = Src.getValueType();
7208 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7209 if (!TLI.isTypeLegal(SrcVT))
7210 return SDValue();
7211
7212 // We might have looked through casts that make this transform invalid.
7213 unsigned BitWidth = SrcVT.getScalarSizeInBits();
7214 SDValue ShiftAmt = Src.getOperand(1);
7215 auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt);
7216 if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(BitWidth))
7217 return SDValue();
7218
7219 // Set source to shift source.
7220 Src = Src.getOperand(0);
7221
7222 // Try again to find a 'not' op.
7223 // TODO: Should we favor test+set even with two 'not' ops?
7224 if (!FoundNot) {
7225 if (!isBitwiseNot(Src))
7226 return SDValue();
7227 Src = Src.getOperand(0);
7228 }
7229
7230 if (!TLI.hasBitTest(Src, ShiftAmt))
7231 return SDValue();
7232
7233 // Turn this into a bit-test pattern using mask op + setcc:
7234 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
7235 // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
7236 SDLoc DL(And);
7237 SDValue X = DAG.getZExtOrTrunc(Src, DL, SrcVT);
7238 EVT CCVT =
7239 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
7240 SDValue Mask = DAG.getConstant(
7241 APInt::getOneBitSet(BitWidth, ShiftAmtC->getZExtValue()), DL, SrcVT);
7242 SDValue NewAnd = DAG.getNode(ISD::AND, DL, SrcVT, X, Mask);
7243 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
7244 SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
7245 return DAG.getZExtOrTrunc(Setcc, DL, And->getValueType(0));
7246}
7247
7248/// For targets that support usubsat, match a bit-hack form of that operation
7249/// that ends in 'and' and convert it.
7251 EVT VT = N->getValueType(0);
7252 unsigned BitWidth = VT.getScalarSizeInBits();
7253 APInt SignMask = APInt::getSignMask(BitWidth);
7254
7255 // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
7256 // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
7257 // xor/add with SMIN (signmask) are logically equivalent.
7258 SDValue X;
7259 if (!sd_match(N, m_And(m_OneUse(m_Xor(m_Value(X), m_SpecificInt(SignMask))),
7261 m_SpecificInt(BitWidth - 1))))) &&
7264 m_SpecificInt(BitWidth - 1))))))
7265 return SDValue();
7266
7267 return DAG.getNode(ISD::USUBSAT, DL, VT, X,
7268 DAG.getConstant(SignMask, DL, VT));
7269}
7270
7271/// Given a bitwise logic operation N with a matching bitwise logic operand,
7272/// fold a pattern where 2 of the source operands are identically shifted
7273/// values. For example:
7274/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
7276 SelectionDAG &DAG) {
7277 unsigned LogicOpcode = N->getOpcode();
7278 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
7279 "Expected bitwise logic operation");
7280
7281 if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
7282 return SDValue();
7283
7284 // Match another bitwise logic op and a shift.
7285 unsigned ShiftOpcode = ShiftOp.getOpcode();
7286 if (LogicOp.getOpcode() != LogicOpcode ||
7287 !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
7288 ShiftOpcode == ISD::SRA))
7289 return SDValue();
7290
7291 // Match another shift op inside the first logic operand. Handle both commuted
7292 // possibilities.
7293 // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
7294 // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
7295 SDValue X1 = ShiftOp.getOperand(0);
7296 SDValue Y = ShiftOp.getOperand(1);
7297 SDValue X0, Z;
7298 if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
7299 LogicOp.getOperand(0).getOperand(1) == Y) {
7300 X0 = LogicOp.getOperand(0).getOperand(0);
7301 Z = LogicOp.getOperand(1);
7302 } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
7303 LogicOp.getOperand(1).getOperand(1) == Y) {
7304 X0 = LogicOp.getOperand(1).getOperand(0);
7305 Z = LogicOp.getOperand(0);
7306 } else {
7307 return SDValue();
7308 }
7309
7310 EVT VT = N->getValueType(0);
7311 SDLoc DL(N);
7312 SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
7313 SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
7314 return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
7315}
7316
7317/// Given a tree of logic operations with shape like
7318/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y)))
7319/// try to match and fold shift operations with the same shift amount.
7320/// For example:
7321/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
7322/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
7324 SDValue RightHand, SelectionDAG &DAG) {
7325 unsigned LogicOpcode = N->getOpcode();
7326 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
7327 "Expected bitwise logic operation");
7328 if (LeftHand.getOpcode() != LogicOpcode ||
7329 RightHand.getOpcode() != LogicOpcode)
7330 return SDValue();
7331 if (!LeftHand.hasOneUse() || !RightHand.hasOneUse())
7332 return SDValue();
7333
7334 // Try to match one of following patterns:
7335 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W)
7336 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y))
7337 // Note that foldLogicOfShifts will handle commuted versions of the left hand
7338 // itself.
7339 SDValue CombinedShifts, W;
7340 SDValue R0 = RightHand.getOperand(0);
7341 SDValue R1 = RightHand.getOperand(1);
7342 if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG)))
7343 W = R1;
7344 else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG)))
7345 W = R0;
7346 else
7347 return SDValue();
7348
7349 EVT VT = N->getValueType(0);
7350 SDLoc DL(N);
7351 return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
7352}
7353
7354/// Fold "masked merge" expressions like `(m & x) | (~m & y)` and its DeMorgan
7355/// variant `(~m | x) & (m | y)` into the equivalent `((x ^ y) & m) ^ y)`
7356/// pattern. This is typically a better representation for targets without a
7357/// fused "and-not" operation.
7359 const TargetLowering &TLI, const SDLoc &DL) {
7360 // Note that masked-merge variants using XOR or ADD expressions are
7361 // normalized to OR by InstCombine so we only check for OR or AND.
7362 assert((Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::AND) &&
7363 "Must be called with ISD::OR or ISD::AND node");
7364
7365 // If the target supports and-not, don't fold this.
7366 if (TLI.hasAndNot(SDValue(Node, 0)))
7367 return SDValue();
7368
7369 SDValue M, X, Y;
7370
7371 if (sd_match(Node,
7373 m_OneUse(m_And(m_Deferred(M), m_Value(X))))) ||
7374 sd_match(Node,
7376 m_OneUse(m_Or(m_Deferred(M), m_Value(Y)))))) {
7377 EVT VT = M.getValueType();
7378 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, X, Y);
7379 SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor, M);
7380 return DAG.getNode(ISD::XOR, DL, VT, And, Y);
7381 }
7382 return SDValue();
7383}
7384
7385SDValue DAGCombiner::visitAND(SDNode *N) {
7386 SDValue N0 = N->getOperand(0);
7387 SDValue N1 = N->getOperand(1);
7388 EVT VT = N1.getValueType();
7389 SDLoc DL(N);
7390
7391 // x & x --> x
7392 if (N0 == N1)
7393 return N0;
7394
7395 // fold (and c1, c2) -> c1&c2
7396 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N0, N1}))
7397 return C;
7398
7399 // canonicalize constant to RHS
7402 return DAG.getNode(ISD::AND, DL, VT, N1, N0);
7403
7404 if (areBitwiseNotOfEachother(N0, N1))
7405 return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), DL, VT);
7406
7407 // fold vector ops
7408 if (VT.isVector()) {
7409 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
7410 return FoldedVOp;
7411
7412 // fold (and x, 0) -> 0, vector edition
7414 // do not return N1, because undef node may exist in N1
7416 N1.getValueType());
7417
7418 // fold (and x, -1) -> x, vector edition
7420 return N0;
7421
7422 // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
7423 auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
7424 ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
7425 if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat) {
7426 EVT LoadVT = MLoad->getMemoryVT();
7427 EVT ExtVT = VT;
7428 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
7429 // For this AND to be a zero extension of the masked load the elements
7430 // of the BuildVec must mask the bottom bits of the extended element
7431 // type
7432 uint64_t ElementSize =
7434 if (Splat->getAPIntValue().isMask(ElementSize)) {
7435 SDValue NewLoad = DAG.getMaskedLoad(
7436 ExtVT, DL, MLoad->getChain(), MLoad->getBasePtr(),
7437 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
7438 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
7439 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
7440 bool LoadHasOtherUsers = !N0.hasOneUse();
7441 CombineTo(N, NewLoad);
7442 if (LoadHasOtherUsers)
7443 CombineTo(MLoad, NewLoad.getValue(0), NewLoad.getValue(1));
7444 return SDValue(N, 0);
7445 }
7446 }
7447 }
7448 }
7449
7450 // fold (and x, -1) -> x
7451 if (isAllOnesConstant(N1))
7452 return N0;
7453
7454 // if (and x, c) is known to be zero, return 0
7455 unsigned BitWidth = VT.getScalarSizeInBits();
7458 return DAG.getConstant(0, DL, VT);
7459
7460 if (SDValue R = foldAndOrOfSETCC(N, DAG))
7461 return R;
7462
7463 if (SDValue NewSel = foldBinOpIntoSelect(N))
7464 return NewSel;
7465
7466 // reassociate and
7467 if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))
7468 return RAND;
7469
7470 // Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y))
7471 if (SDValue SD =
7472 reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, DL, VT, N0, N1))
7473 return SD;
7474
7475 // fold (and (or x, C), D) -> D if (C & D) == D
7476 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7477 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
7478 };
7479 if (N0.getOpcode() == ISD::OR &&
7480 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
7481 return N1;
7482
7483 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
7484 SDValue N0Op0 = N0.getOperand(0);
7485 EVT SrcVT = N0Op0.getValueType();
7486 unsigned SrcBitWidth = SrcVT.getScalarSizeInBits();
7487 APInt Mask = ~N1C->getAPIntValue();
7488 Mask = Mask.trunc(SrcBitWidth);
7489
7490 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
7491 if (DAG.MaskedValueIsZero(N0Op0, Mask))
7492 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0Op0);
7493
7494 // fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
7495 if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) &&
7496 TLI.isTruncateFree(VT, SrcVT) && TLI.isZExtFree(SrcVT, VT) &&
7497 TLI.isTypeDesirableForOp(ISD::AND, SrcVT) &&
7498 TLI.isNarrowingProfitable(N, VT, SrcVT))
7499 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
7500 DAG.getNode(ISD::AND, DL, SrcVT, N0Op0,
7501 DAG.getZExtOrTrunc(N1, DL, SrcVT)));
7502 }
7503
7504 // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
7505 if (ISD::isExtOpcode(N0.getOpcode())) {
7506 unsigned ExtOpc = N0.getOpcode();
7507 SDValue N0Op0 = N0.getOperand(0);
7508 if (N0Op0.getOpcode() == ISD::AND &&
7509 (ExtOpc != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0Op0, VT)) &&
7510 N0->hasOneUse() && N0Op0->hasOneUse()) {
7511 if (SDValue NewExt = DAG.FoldConstantArithmetic(ExtOpc, DL, VT,
7512 {N0Op0.getOperand(1)})) {
7513 if (SDValue NewMask =
7514 DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N1, NewExt})) {
7515 return DAG.getNode(ISD::AND, DL, VT,
7516 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(0)),
7517 NewMask);
7518 }
7519 }
7520 }
7521 }
7522
7523 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
7524 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
7525 // already be zero by virtue of the width of the base type of the load.
7526 //
7527 // the 'X' node here can either be nothing or an extract_vector_elt to catch
7528 // more cases.
7529 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7531 N0.getOperand(0).getOpcode() == ISD::LOAD &&
7532 N0.getOperand(0).getResNo() == 0) ||
7533 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
7534 auto *Load =
7535 cast<LoadSDNode>((N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(0));
7536
7537 // Get the constant (if applicable) the zero'th operand is being ANDed with.
7538 // This can be a pure constant or a vector splat, in which case we treat the
7539 // vector as a scalar and use the splat value.
7542 N1, /*AllowUndefs=*/false, /*AllowTruncation=*/true)) {
7543 Constant = C->getAPIntValue();
7544 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
7545 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
7546 APInt SplatValue, SplatUndef;
7547 unsigned SplatBitSize;
7548 bool HasAnyUndefs;
7549 // Endianness should not matter here. Code below makes sure that we only
7550 // use the result if the SplatBitSize is a multiple of the vector element
7551 // size. And after that we AND all element sized parts of the splat
7552 // together. So the end result should be the same regardless of in which
7553 // order we do those operations.
7554 const bool IsBigEndian = false;
7555 bool IsSplat =
7556 Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
7557 HasAnyUndefs, EltBitWidth, IsBigEndian);
7558
7559 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
7560 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
7561 if (IsSplat && (SplatBitSize % EltBitWidth) == 0) {
7562 // Undef bits can contribute to a possible optimisation if set, so
7563 // set them.
7564 SplatValue |= SplatUndef;
7565
7566 // The splat value may be something like "0x00FFFFFF", which means 0 for
7567 // the first vector value and FF for the rest, repeating. We need a mask
7568 // that will apply equally to all members of the vector, so AND all the
7569 // lanes of the constant together.
7570 Constant = APInt::getAllOnes(EltBitWidth);
7571 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
7572 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
7573 }
7574 }
7575
7576 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
7577 // actually legal and isn't going to get expanded, else this is a false
7578 // optimisation.
7579 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
7580 Load->getValueType(0),
7581 Load->getMemoryVT());
7582
7583 // Resize the constant to the same size as the original memory access before
7584 // extension. If it is still the AllOnesValue then this AND is completely
7585 // unneeded.
7586 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
7587
7588 bool B;
7589 switch (Load->getExtensionType()) {
7590 default: B = false; break;
7591 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
7592 case ISD::ZEXTLOAD:
7593 case ISD::NON_EXTLOAD: B = true; break;
7594 }
7595
7596 if (B && Constant.isAllOnes()) {
7597 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
7598 // preserve semantics once we get rid of the AND.
7599 SDValue NewLoad(Load, 0);
7600
7601 // Fold the AND away. NewLoad may get replaced immediately.
7602 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
7603
7604 if (Load->getExtensionType() == ISD::EXTLOAD) {
7605 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
7606 Load->getValueType(0), SDLoc(Load),
7607 Load->getChain(), Load->getBasePtr(),
7608 Load->getOffset(), Load->getMemoryVT(),
7609 Load->getMemOperand());
7610 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
7611 if (Load->getNumValues() == 3) {
7612 // PRE/POST_INC loads have 3 values.
7613 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
7614 NewLoad.getValue(2) };
7615 CombineTo(Load, To, 3, true);
7616 } else {
7617 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
7618 }
7619 }
7620
7621 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7622 }
7623 }
7624
7625 // Try to convert a constant mask AND into a shuffle clear mask.
7626 if (VT.isVector())
7627 if (SDValue Shuffle = XformToShuffleWithZero(N))
7628 return Shuffle;
7629
7630 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7631 return Combined;
7632
7633 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C &&
7635 SDValue Ext = N0.getOperand(0);
7636 EVT ExtVT = Ext->getValueType(0);
7637 SDValue Extendee = Ext->getOperand(0);
7638
7639 unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits();
7640 if (N1C->getAPIntValue().isMask(ScalarWidth) &&
7641 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, ExtVT))) {
7642 // (and (extract_subvector (zext|anyext|sext v) _) iN_mask)
7643 // => (extract_subvector (iN_zeroext v))
7644 SDValue ZeroExtExtendee =
7645 DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Extendee);
7646
7647 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ZeroExtExtendee,
7648 N0.getOperand(1));
7649 }
7650 }
7651
7652 // fold (and (masked_gather x)) -> (zext_masked_gather x)
7653 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
7654 EVT MemVT = GN0->getMemoryVT();
7655 EVT ScalarVT = MemVT.getScalarType();
7656
7657 if (SDValue(GN0, 0).hasOneUse() &&
7658 isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
7660 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
7661 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
7662
7663 SDValue ZExtLoad = DAG.getMaskedGather(
7664 DAG.getVTList(VT, MVT::Other), MemVT, DL, Ops, GN0->getMemOperand(),
7665 GN0->getIndexType(), ISD::ZEXTLOAD);
7666
7667 CombineTo(N, ZExtLoad);
7668 AddToWorklist(ZExtLoad.getNode());
7669 // Avoid recheck of N.
7670 return SDValue(N, 0);
7671 }
7672 }
7673
7674 // fold (and (load x), 255) -> (zextload x, i8)
7675 // fold (and (extload x, i16), 255) -> (zextload x, i8)
7676 if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector())
7677 if (SDValue Res = reduceLoadWidth(N))
7678 return Res;
7679
7680 if (LegalTypes) {
7681 // Attempt to propagate the AND back up to the leaves which, if they're
7682 // loads, can be combined to narrow loads and the AND node can be removed.
7683 // Perform after legalization so that extend nodes will already be
7684 // combined into the loads.
7685 if (BackwardsPropagateMask(N))
7686 return SDValue(N, 0);
7687 }
7688
7689 if (SDValue Combined = visitANDLike(N0, N1, N))
7690 return Combined;
7691
7692 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
7693 if (N0.getOpcode() == N1.getOpcode())
7694 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7695 return V;
7696
7697 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7698 return R;
7699 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
7700 return R;
7701
7702 // Fold (and X, (bswap (not Y))) -> (and X, (not (bswap Y)))
7703 // Fold (and X, (bitreverse (not Y))) -> (and X, (not (bitreverse Y)))
7704 SDValue X, Y, Z, NotY;
7705 for (unsigned Opc : {ISD::BSWAP, ISD::BITREVERSE})
7706 if (sd_match(N,
7707 m_And(m_Value(X), m_OneUse(m_UnaryOp(Opc, m_Value(NotY))))) &&
7708 sd_match(NotY, m_Not(m_Value(Y))) &&
7709 (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
7710 return DAG.getNode(ISD::AND, DL, VT, X,
7711 DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y), VT));
7712
7713 // Fold (and X, (rot (not Y), Z)) -> (and X, (not (rot Y, Z)))
7714 for (unsigned Opc : {ISD::ROTL, ISD::ROTR})
7715 if (sd_match(N, m_And(m_Value(X),
7716 m_OneUse(m_BinOp(Opc, m_Value(NotY), m_Value(Z))))) &&
7717 sd_match(NotY, m_Not(m_Value(Y))) &&
7718 (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
7719 return DAG.getNode(ISD::AND, DL, VT, X,
7720 DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y, Z), VT));
7721
7722 // Fold (and X, (add (not Y), Z)) -> (and X, (not (sub Y, Z)))
7723 // Fold (and X, (sub (not Y), Z)) -> (and X, (not (add Y, Z)))
7724 if (TLI.hasAndNot(SDValue(N, 0)))
7725 if (SDValue Folded = foldBitwiseOpWithNeg(N, DL, VT))
7726 return Folded;
7727
7728 // Fold (and (srl X, C), 1) -> (srl X, BW-1) for signbit extraction
7729 // If we are shifting down an extended sign bit, see if we can simplify
7730 // this to shifting the MSB directly to expose further simplifications.
7731 // This pattern often appears after sext_inreg legalization.
7732 APInt Amt;
7733 if (sd_match(N, m_And(m_Srl(m_Value(X), m_ConstInt(Amt)), m_One())) &&
7734 Amt.ult(BitWidth - 1) && Amt.uge(BitWidth - DAG.ComputeNumSignBits(X)))
7735 return DAG.getNode(ISD::SRL, DL, VT, X,
7736 DAG.getShiftAmountConstant(BitWidth - 1, VT, DL));
7737
7738 // Masking the negated extension of a boolean is just the zero-extended
7739 // boolean:
7740 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
7741 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
7742 //
7743 // Note: the SimplifyDemandedBits fold below can make an information-losing
7744 // transform, and then we have no way to find this better fold.
7745 if (sd_match(N, m_And(m_Sub(m_Zero(), m_Value(X)), m_One()))) {
7746 if (X.getOpcode() == ISD::ZERO_EXTEND &&
7747 X.getOperand(0).getScalarValueSizeInBits() == 1)
7748 return X;
7749 if (X.getOpcode() == ISD::SIGN_EXTEND &&
7750 X.getOperand(0).getScalarValueSizeInBits() == 1)
7751 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, X.getOperand(0));
7752 }
7753
7754 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
7755 // fold (and (sra)) -> (and (srl)) when possible.
7757 return SDValue(N, 0);
7758
7759 // fold (zext_inreg (extload x)) -> (zextload x)
7760 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
7761 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
7762 (ISD::isEXTLoad(N0.getNode()) ||
7763 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
7764 auto *LN0 = cast<LoadSDNode>(N0);
7765 EVT MemVT = LN0->getMemoryVT();
7766 // If we zero all the possible extended bits, then we can turn this into
7767 // a zextload if we are running before legalize or the operation is legal.
7768 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
7769 unsigned MemBitSize = MemVT.getScalarSizeInBits();
7770 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
7771 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
7772 ((!LegalOperations && LN0->isSimple()) ||
7773 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
7774 SDValue ExtLoad =
7775 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
7776 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
7777 AddToWorklist(N);
7778 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7779 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7780 }
7781 }
7782
7783 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
7784 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
7785 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
7786 N0.getOperand(1), false))
7787 return BSwap;
7788 }
7789
7790 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
7791 return Shifts;
7792
7793 if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
7794 return V;
7795
7796 // Recognize the following pattern:
7797 //
7798 // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
7799 //
7800 // where bitmask is a mask that clears the upper bits of AndVT. The
7801 // number of bits in bitmask must be a power of two.
7802 auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
7803 if (LHS->getOpcode() != ISD::SIGN_EXTEND)
7804 return false;
7805
7806 auto *C = dyn_cast<ConstantSDNode>(RHS);
7807 if (!C)
7808 return false;
7809
7810 if (!C->getAPIntValue().isMask(
7811 LHS.getOperand(0).getValueType().getFixedSizeInBits()))
7812 return false;
7813
7814 return true;
7815 };
7816
7817 // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
7818 if (IsAndZeroExtMask(N0, N1))
7819 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
7820
7821 if (hasOperation(ISD::USUBSAT, VT))
7822 if (SDValue V = foldAndToUsubsat(N, DAG, DL))
7823 return V;
7824
7825 // Postpone until legalization completed to avoid interference with bswap
7826 // folding
7827 if (LegalOperations || VT.isVector())
7828 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
7829 return R;
7830
7831 if (VT.isScalarInteger() && VT != MVT::i1)
7832 if (SDValue R = foldMaskedMerge(N, DAG, TLI, DL))
7833 return R;
7834
7835 return SDValue();
7836}
7837
7838/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
7839SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
7840 bool DemandHighBits) {
7841 if (!LegalOperations)
7842 return SDValue();
7843
7844 EVT VT = N->getValueType(0);
7845 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
7846 return SDValue();
7848 return SDValue();
7849
7850 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
7851 bool LookPassAnd0 = false;
7852 bool LookPassAnd1 = false;
7853 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
7854 std::swap(N0, N1);
7855 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
7856 std::swap(N0, N1);
7857 if (N0.getOpcode() == ISD::AND) {
7858 if (!N0->hasOneUse())
7859 return SDValue();
7860 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7861 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
7862 // This is needed for X86.
7863 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
7864 N01C->getZExtValue() != 0xFFFF))
7865 return SDValue();
7866 N0 = N0.getOperand(0);
7867 LookPassAnd0 = true;
7868 }
7869
7870 if (N1.getOpcode() == ISD::AND) {
7871 if (!N1->hasOneUse())
7872 return SDValue();
7873 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7874 if (!N11C || N11C->getZExtValue() != 0xFF)
7875 return SDValue();
7876 N1 = N1.getOperand(0);
7877 LookPassAnd1 = true;
7878 }
7879
7880 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
7881 std::swap(N0, N1);
7882 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
7883 return SDValue();
7884 if (!N0->hasOneUse() || !N1->hasOneUse())
7885 return SDValue();
7886
7887 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7888 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7889 if (!N01C || !N11C)
7890 return SDValue();
7891 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
7892 return SDValue();
7893
7894 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
7895 SDValue N00 = N0->getOperand(0);
7896 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
7897 if (!N00->hasOneUse())
7898 return SDValue();
7899 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
7900 if (!N001C || N001C->getZExtValue() != 0xFF)
7901 return SDValue();
7902 N00 = N00.getOperand(0);
7903 LookPassAnd0 = true;
7904 }
7905
7906 SDValue N10 = N1->getOperand(0);
7907 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
7908 if (!N10->hasOneUse())
7909 return SDValue();
7910 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
7911 // Also allow 0xFFFF since the bits will be shifted out. This is needed
7912 // for X86.
7913 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
7914 N101C->getZExtValue() != 0xFFFF))
7915 return SDValue();
7916 N10 = N10.getOperand(0);
7917 LookPassAnd1 = true;
7918 }
7919
7920 if (N00 != N10)
7921 return SDValue();
7922
7923 // Make sure everything beyond the low halfword gets set to zero since the SRL
7924 // 16 will clear the top bits.
7925 unsigned OpSizeInBits = VT.getSizeInBits();
7926 if (OpSizeInBits > 16) {
7927 // If the left-shift isn't masked out then the only way this is a bswap is
7928 // if all bits beyond the low 8 are 0. In that case the entire pattern
7929 // reduces to a left shift anyway: leave it for other parts of the combiner.
7930 if (DemandHighBits && !LookPassAnd0)
7931 return SDValue();
7932
7933 // However, if the right shift isn't masked out then it might be because
7934 // it's not needed. See if we can spot that too. If the high bits aren't
7935 // demanded, we only need bits 23:16 to be zero. Otherwise, we need all
7936 // upper bits to be zero.
7937 if (!LookPassAnd1) {
7938 unsigned HighBit = DemandHighBits ? OpSizeInBits : 24;
7939 if (!DAG.MaskedValueIsZero(N10,
7940 APInt::getBitsSet(OpSizeInBits, 16, HighBit)))
7941 return SDValue();
7942 }
7943 }
7944
7945 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
7946 if (OpSizeInBits > 16) {
7947 SDLoc DL(N);
7948 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
7949 DAG.getShiftAmountConstant(OpSizeInBits - 16, VT, DL));
7950 }
7951 return Res;
7952}
7953
7954/// Return true if the specified node is an element that makes up a 32-bit
7955/// packed halfword byteswap.
7956/// ((x & 0x000000ff) << 8) |
7957/// ((x & 0x0000ff00) >> 8) |
7958/// ((x & 0x00ff0000) << 8) |
7959/// ((x & 0xff000000) >> 8)
7961 if (!N->hasOneUse())
7962 return false;
7963
7964 unsigned Opc = N.getOpcode();
7965 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
7966 return false;
7967
7968 SDValue N0 = N.getOperand(0);
7969 unsigned Opc0 = N0.getOpcode();
7970 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
7971 return false;
7972
7973 ConstantSDNode *N1C = nullptr;
7974 // SHL or SRL: look upstream for AND mask operand
7975 if (Opc == ISD::AND)
7976 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7977 else if (Opc0 == ISD::AND)
7978 N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7979 if (!N1C)
7980 return false;
7981
7982 unsigned MaskByteOffset;
7983 switch (N1C->getZExtValue()) {
7984 default:
7985 return false;
7986 case 0xFF: MaskByteOffset = 0; break;
7987 case 0xFF00: MaskByteOffset = 1; break;
7988 case 0xFFFF:
7989 // In case demanded bits didn't clear the bits that will be shifted out.
7990 // This is needed for X86.
7991 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
7992 MaskByteOffset = 1;
7993 break;
7994 }
7995 return false;
7996 case 0xFF0000: MaskByteOffset = 2; break;
7997 case 0xFF000000: MaskByteOffset = 3; break;
7998 }
7999
8000 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
8001 if (Opc == ISD::AND) {
8002 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
8003 // (x >> 8) & 0xff
8004 // (x >> 8) & 0xff0000
8005 if (Opc0 != ISD::SRL)
8006 return false;
8007 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
8008 if (!C || C->getZExtValue() != 8)
8009 return false;
8010 } else {
8011 // (x << 8) & 0xff00
8012 // (x << 8) & 0xff000000
8013 if (Opc0 != ISD::SHL)
8014 return false;
8015 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
8016 if (!C || C->getZExtValue() != 8)
8017 return false;
8018 }
8019 } else if (Opc == ISD::SHL) {
8020 // (x & 0xff) << 8
8021 // (x & 0xff0000) << 8
8022 if (MaskByteOffset != 0 && MaskByteOffset != 2)
8023 return false;
8024 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
8025 if (!C || C->getZExtValue() != 8)
8026 return false;
8027 } else { // Opc == ISD::SRL
8028 // (x & 0xff00) >> 8
8029 // (x & 0xff000000) >> 8
8030 if (MaskByteOffset != 1 && MaskByteOffset != 3)
8031 return false;
8032 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
8033 if (!C || C->getZExtValue() != 8)
8034 return false;
8035 }
8036
8037 if (Parts[MaskByteOffset])
8038 return false;
8039
8040 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
8041 return true;
8042}
8043
8044// Match 2 elements of a packed halfword bswap.
8046 if (N.getOpcode() == ISD::OR)
8047 return isBSwapHWordElement(N.getOperand(0), Parts) &&
8048 isBSwapHWordElement(N.getOperand(1), Parts);
8049
8050 if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
8051 ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
8052 if (!C || C->getAPIntValue() != 16)
8053 return false;
8054 Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
8055 return true;
8056 }
8057
8058 return false;
8059}
8060
8061// Match this pattern:
8062// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
8063// And rewrite this to:
8064// (rotr (bswap A), 16)
8066 SelectionDAG &DAG, SDNode *N, SDValue N0,
8067 SDValue N1, EVT VT) {
8068 assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
8069 "MatchBSwapHWordOrAndAnd: expecting i32");
8070 if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
8071 return SDValue();
8072 if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
8073 return SDValue();
8074 // TODO: this is too restrictive; lifting this restriction requires more tests
8075 if (!N0->hasOneUse() || !N1->hasOneUse())
8076 return SDValue();
8079 if (!Mask0 || !Mask1)
8080 return SDValue();
8081 if (Mask0->getAPIntValue() != 0xff00ff00 ||
8082 Mask1->getAPIntValue() != 0x00ff00ff)
8083 return SDValue();
8084 SDValue Shift0 = N0.getOperand(0);
8085 SDValue Shift1 = N1.getOperand(0);
8086 if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
8087 return SDValue();
8088 ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
8089 ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
8090 if (!ShiftAmt0 || !ShiftAmt1)
8091 return SDValue();
8092 if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
8093 return SDValue();
8094 if (Shift0.getOperand(0) != Shift1.getOperand(0))
8095 return SDValue();
8096
8097 SDLoc DL(N);
8098 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
8099 SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL);
8100 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
8101}
8102
8103/// Match a 32-bit packed halfword bswap. That is
8104/// ((x & 0x000000ff) << 8) |
8105/// ((x & 0x0000ff00) >> 8) |
8106/// ((x & 0x00ff0000) << 8) |
8107/// ((x & 0xff000000) >> 8)
8108/// => (rotl (bswap x), 16)
8109SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
8110 if (!LegalOperations)
8111 return SDValue();
8112
8113 EVT VT = N->getValueType(0);
8114 if (VT != MVT::i32)
8115 return SDValue();
8117 return SDValue();
8118
8119 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT))
8120 return BSwap;
8121
8122 // Try again with commuted operands.
8123 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT))
8124 return BSwap;
8125
8126
8127 // Look for either
8128 // (or (bswaphpair), (bswaphpair))
8129 // (or (or (bswaphpair), (and)), (and))
8130 // (or (or (and), (bswaphpair)), (and))
8131 SDNode *Parts[4] = {};
8132
8133 if (isBSwapHWordPair(N0, Parts)) {
8134 // (or (or (and), (and)), (or (and), (and)))
8135 if (!isBSwapHWordPair(N1, Parts))
8136 return SDValue();
8137 } else if (N0.getOpcode() == ISD::OR) {
8138 // (or (or (or (and), (and)), (and)), (and))
8139 if (!isBSwapHWordElement(N1, Parts))
8140 return SDValue();
8141 SDValue N00 = N0.getOperand(0);
8142 SDValue N01 = N0.getOperand(1);
8143 if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
8144 !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
8145 return SDValue();
8146 } else {
8147 return SDValue();
8148 }
8149
8150 // Make sure the parts are all coming from the same node.
8151 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
8152 return SDValue();
8153
8154 SDLoc DL(N);
8155 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
8156 SDValue(Parts[0], 0));
8157
8158 // Result of the bswap should be rotated by 16. If it's not legal, then
8159 // do (x << 16) | (x >> 16).
8160 SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL);
8162 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
8164 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
8165 return DAG.getNode(ISD::OR, DL, VT,
8166 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
8167 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
8168}
8169
8170/// This contains all DAGCombine rules which reduce two values combined by
8171/// an Or operation to a single value \see visitANDLike().
8172SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, const SDLoc &DL) {
8173 EVT VT = N1.getValueType();
8174
8175 // fold (or x, undef) -> -1
8176 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
8177 return DAG.getAllOnesConstant(DL, VT);
8178
8179 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
8180 return V;
8181
8182 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
8183 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
8184 // Don't increase # computations.
8185 (N0->hasOneUse() || N1->hasOneUse())) {
8186 // We can only do this xform if we know that bits from X that are set in C2
8187 // but not in C1 are already zero. Likewise for Y.
8188 if (const ConstantSDNode *N0O1C =
8190 if (const ConstantSDNode *N1O1C =
8192 // We can only do this xform if we know that bits from X that are set in
8193 // C2 but not in C1 are already zero. Likewise for Y.
8194 const APInt &LHSMask = N0O1C->getAPIntValue();
8195 const APInt &RHSMask = N1O1C->getAPIntValue();
8196
8197 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
8198 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
8199 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
8200 N0.getOperand(0), N1.getOperand(0));
8201 return DAG.getNode(ISD::AND, DL, VT, X,
8202 DAG.getConstant(LHSMask | RHSMask, DL, VT));
8203 }
8204 }
8205 }
8206 }
8207
8208 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
8209 if (N0.getOpcode() == ISD::AND &&
8210 N1.getOpcode() == ISD::AND &&
8211 N0.getOperand(0) == N1.getOperand(0) &&
8212 // Don't increase # computations.
8213 (N0->hasOneUse() || N1->hasOneUse())) {
8214 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
8215 N0.getOperand(1), N1.getOperand(1));
8216 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
8217 }
8218
8219 return SDValue();
8220}
8221
8222/// OR combines for which the commuted variant will be tried as well.
8224 SDNode *N) {
8225 EVT VT = N0.getValueType();
8226 unsigned BW = VT.getScalarSizeInBits();
8227 SDLoc DL(N);
8228
8229 auto peekThroughResize = [](SDValue V) {
8230 if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE)
8231 return V->getOperand(0);
8232 return V;
8233 };
8234
8235 SDValue N0Resized = peekThroughResize(N0);
8236 if (N0Resized.getOpcode() == ISD::AND) {
8237 SDValue N1Resized = peekThroughResize(N1);
8238 SDValue N00 = N0Resized.getOperand(0);
8239 SDValue N01 = N0Resized.getOperand(1);
8240
8241 // fold or (and x, y), x --> x
8242 if (N00 == N1Resized || N01 == N1Resized)
8243 return N1;
8244
8245 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
8246 // TODO: Set AllowUndefs = true.
8247 if (SDValue NotOperand = getBitwiseNotOperand(N01, N00,
8248 /* AllowUndefs */ false)) {
8249 if (peekThroughResize(NotOperand) == N1Resized)
8250 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N00, DL, VT),
8251 N1);
8252 }
8253
8254 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
8255 if (SDValue NotOperand = getBitwiseNotOperand(N00, N01,
8256 /* AllowUndefs */ false)) {
8257 if (peekThroughResize(NotOperand) == N1Resized)
8258 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N01, DL, VT),
8259 N1);
8260 }
8261 }
8262
8263 SDValue X, Y;
8264
8265 // fold or (xor X, N1), N1 --> or X, N1
8266 if (sd_match(N0, m_Xor(m_Value(X), m_Specific(N1))))
8267 return DAG.getNode(ISD::OR, DL, VT, X, N1);
8268
8269 // fold or (xor x, y), (x and/or y) --> or x, y
8270 if (sd_match(N0, m_Xor(m_Value(X), m_Value(Y))) &&
8271 (sd_match(N1, m_And(m_Specific(X), m_Specific(Y))) ||
8273 return DAG.getNode(ISD::OR, DL, VT, X, Y);
8274
8275 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
8276 return R;
8277
8278 auto peekThroughZext = [](SDValue V) {
8279 if (V->getOpcode() == ISD::ZERO_EXTEND)
8280 return V->getOperand(0);
8281 return V;
8282 };
8283
8284 // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
8285 if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
8286 N0.getOperand(0) == N1.getOperand(0) &&
8287 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
8288 return N0;
8289
8290 // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
8291 if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
8292 N0.getOperand(1) == N1.getOperand(0) &&
8293 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
8294 return N0;
8295
8296 // Attempt to match a legalized build_pair-esque pattern:
8297 // or(shl(aext(Hi),BW/2),zext(Lo))
8298 SDValue Lo, Hi;
8299 if (sd_match(N0,
8301 sd_match(N1, m_ZExt(m_Value(Lo))) &&
8302 Lo.getScalarValueSizeInBits() == (BW / 2) &&
8303 Lo.getValueType() == Hi.getValueType()) {
8304 // Fold build_pair(not(Lo),not(Hi)) -> not(build_pair(Lo,Hi)).
8305 SDValue NotLo, NotHi;
8306 if (sd_match(Lo, m_OneUse(m_Not(m_Value(NotLo)))) &&
8307 sd_match(Hi, m_OneUse(m_Not(m_Value(NotHi))))) {
8308 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotLo);
8309 Hi = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NotHi);
8310 Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
8311 DAG.getShiftAmountConstant(BW / 2, VT, DL));
8312 return DAG.getNOT(DL, DAG.getNode(ISD::OR, DL, VT, Lo, Hi), VT);
8313 }
8314 }
8315
8316 return SDValue();
8317}
8318
8319SDValue DAGCombiner::visitOR(SDNode *N) {
8320 SDValue N0 = N->getOperand(0);
8321 SDValue N1 = N->getOperand(1);
8322 EVT VT = N1.getValueType();
8323 SDLoc DL(N);
8324
8325 // x | x --> x
8326 if (N0 == N1)
8327 return N0;
8328
8329 // fold (or c1, c2) -> c1|c2
8330 if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, DL, VT, {N0, N1}))
8331 return C;
8332
8333 // canonicalize constant to RHS
8336 return DAG.getNode(ISD::OR, DL, VT, N1, N0);
8337
8338 // fold vector ops
8339 if (VT.isVector()) {
8340 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
8341 return FoldedVOp;
8342
8343 // fold (or x, 0) -> x, vector edition
8345 return N0;
8346
8347 // fold (or x, -1) -> -1, vector edition
8349 // do not return N1, because undef node may exist in N1
8350 return DAG.getAllOnesConstant(DL, N1.getValueType());
8351
8352 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
8353 // Do this only if the resulting type / shuffle is legal.
8354 auto *SV0 = dyn_cast<ShuffleVectorSDNode>(N0);
8355 auto *SV1 = dyn_cast<ShuffleVectorSDNode>(N1);
8356 if (SV0 && SV1 && TLI.isTypeLegal(VT)) {
8357 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
8358 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
8359 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
8360 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
8361 // Ensure both shuffles have a zero input.
8362 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
8363 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
8364 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
8365 bool CanFold = true;
8366 int NumElts = VT.getVectorNumElements();
8367 SmallVector<int, 4> Mask(NumElts, -1);
8368
8369 for (int i = 0; i != NumElts; ++i) {
8370 int M0 = SV0->getMaskElt(i);
8371 int M1 = SV1->getMaskElt(i);
8372
8373 // Determine if either index is pointing to a zero vector.
8374 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
8375 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
8376
8377 // If one element is zero and the otherside is undef, keep undef.
8378 // This also handles the case that both are undef.
8379 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0))
8380 continue;
8381
8382 // Make sure only one of the elements is zero.
8383 if (M0Zero == M1Zero) {
8384 CanFold = false;
8385 break;
8386 }
8387
8388 assert((M0 >= 0 || M1 >= 0) && "Undef index!");
8389
8390 // We have a zero and non-zero element. If the non-zero came from
8391 // SV0 make the index a LHS index. If it came from SV1, make it
8392 // a RHS index. We need to mod by NumElts because we don't care
8393 // which operand it came from in the original shuffles.
8394 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
8395 }
8396
8397 if (CanFold) {
8398 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
8399 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
8400 SDValue LegalShuffle =
8401 TLI.buildLegalVectorShuffle(VT, DL, NewLHS, NewRHS, Mask, DAG);
8402 if (LegalShuffle)
8403 return LegalShuffle;
8404 }
8405 }
8406 }
8407 }
8408
8409 // fold (or x, 0) -> x
8410 if (isNullConstant(N1))
8411 return N0;
8412
8413 // fold (or x, -1) -> -1
8414 if (isAllOnesConstant(N1))
8415 return N1;
8416
8417 if (SDValue NewSel = foldBinOpIntoSelect(N))
8418 return NewSel;
8419
8420 // fold (or x, c) -> c iff (x & ~c) == 0
8421 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
8422 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
8423 return N1;
8424
8425 if (SDValue R = foldAndOrOfSETCC(N, DAG))
8426 return R;
8427
8428 if (SDValue Combined = visitORLike(N0, N1, DL))
8429 return Combined;
8430
8431 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
8432 return Combined;
8433
8434 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
8435 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
8436 return BSwap;
8437 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
8438 return BSwap;
8439
8440 // reassociate or
8441 if (SDValue ROR = reassociateOps(ISD::OR, DL, N0, N1, N->getFlags()))
8442 return ROR;
8443
8444 // Fold or(vecreduce(x), vecreduce(y)) -> vecreduce(or(x, y))
8445 if (SDValue SD =
8446 reassociateReduction(ISD::VECREDUCE_OR, ISD::OR, DL, VT, N0, N1))
8447 return SD;
8448
8449 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
8450 // iff (c1 & c2) != 0 or c1/c2 are undef.
8451 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
8452 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
8453 };
8454 if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
8455 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
8456 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
8457 {N1, N0.getOperand(1)})) {
8458 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
8459 AddToWorklist(IOR.getNode());
8460 return DAG.getNode(ISD::AND, DL, VT, COR, IOR);
8461 }
8462 }
8463
8464 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
8465 return Combined;
8466 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
8467 return Combined;
8468
8469 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
8470 if (N0.getOpcode() == N1.getOpcode())
8471 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
8472 return V;
8473
8474 // See if this is some rotate idiom.
8475 if (SDValue Rot = MatchRotate(N0, N1, DL, /*FromAdd=*/false))
8476 return Rot;
8477
8478 if (SDValue Load = MatchLoadCombine(N))
8479 return Load;
8480
8481 // Simplify the operands using demanded-bits information.
8483 return SDValue(N, 0);
8484
8485 // If OR can be rewritten into ADD, try combines based on ADD.
8486 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
8487 DAG.isADDLike(SDValue(N, 0)))
8488 if (SDValue Combined = visitADDLike(N))
8489 return Combined;
8490
8491 // Postpone until legalization completed to avoid interference with bswap
8492 // folding
8493 if (LegalOperations || VT.isVector())
8494 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
8495 return R;
8496
8497 if (VT.isScalarInteger() && VT != MVT::i1)
8498 if (SDValue R = foldMaskedMerge(N, DAG, TLI, DL))
8499 return R;
8500
8501 return SDValue();
8502}
8503
8505 SDValue &Mask) {
8506 if (Op.getOpcode() == ISD::AND &&
8507 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
8508 Mask = Op.getOperand(1);
8509 return Op.getOperand(0);
8510 }
8511 return Op;
8512}
8513
8514/// Match "(X shl/srl V1) & V2" where V2 may not be present.
8515static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift,
8516 SDValue &Mask) {
8517 Op = stripConstantMask(DAG, Op, Mask);
8518 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
8519 Shift = Op;
8520 return true;
8521 }
8522 return false;
8523}
8524
8525/// Helper function for visitOR to extract the needed side of a rotate idiom
8526/// from a shl/srl/mul/udiv. This is meant to handle cases where
8527/// InstCombine merged some outside op with one of the shifts from
8528/// the rotate pattern.
8529/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
8530/// Otherwise, returns an expansion of \p ExtractFrom based on the following
8531/// patterns:
8532///
8533/// (or (add v v) (shrl v bitwidth-1)):
8534/// expands (add v v) -> (shl v 1)
8535///
8536/// (or (mul v c0) (shrl (mul v c1) c2)):
8537/// expands (mul v c0) -> (shl (mul v c1) c3)
8538///
8539/// (or (udiv v c0) (shl (udiv v c1) c2)):
8540/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
8541///
8542/// (or (shl v c0) (shrl (shl v c1) c2)):
8543/// expands (shl v c0) -> (shl (shl v c1) c3)
8544///
8545/// (or (shrl v c0) (shl (shrl v c1) c2)):
8546/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
8547///
8548/// Such that in all cases, c3+c2==bitwidth(op v c1).
8550 SDValue ExtractFrom, SDValue &Mask,
8551 const SDLoc &DL) {
8552 assert(OppShift && ExtractFrom && "Empty SDValue");
8553 if (OppShift.getOpcode() != ISD::SHL && OppShift.getOpcode() != ISD::SRL)
8554 return SDValue();
8555
8556 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
8557
8558 // Value and Type of the shift.
8559 SDValue OppShiftLHS = OppShift.getOperand(0);
8560 EVT ShiftedVT = OppShiftLHS.getValueType();
8561
8562 // Amount of the existing shift.
8563 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
8564
8565 // (add v v) -> (shl v 1)
8566 // TODO: Should this be a general DAG canonicalization?
8567 if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
8568 ExtractFrom.getOpcode() == ISD::ADD &&
8569 ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
8570 ExtractFrom.getOperand(0) == OppShiftLHS &&
8571 OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
8572 return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
8573 DAG.getShiftAmountConstant(1, ShiftedVT, DL));
8574
8575 // Preconditions:
8576 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
8577 //
8578 // Find opcode of the needed shift to be extracted from (op0 v c0).
8579 unsigned Opcode = ISD::DELETED_NODE;
8580 bool IsMulOrDiv = false;
8581 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
8582 // opcode or its arithmetic (mul or udiv) variant.
8583 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
8584 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
8585 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
8586 return false;
8587 Opcode = NeededShift;
8588 return true;
8589 };
8590 // op0 must be either the needed shift opcode or the mul/udiv equivalent
8591 // that the needed shift can be extracted from.
8592 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
8593 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
8594 return SDValue();
8595
8596 // op0 must be the same opcode on both sides, have the same LHS argument,
8597 // and produce the same value type.
8598 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
8599 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
8600 ShiftedVT != ExtractFrom.getValueType())
8601 return SDValue();
8602
8603 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
8604 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
8605 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
8606 ConstantSDNode *ExtractFromCst =
8607 isConstOrConstSplat(ExtractFrom.getOperand(1));
8608 // TODO: We should be able to handle non-uniform constant vectors for these values
8609 // Check that we have constant values.
8610 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
8611 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
8612 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
8613 return SDValue();
8614
8615 // Compute the shift amount we need to extract to complete the rotate.
8616 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
8617 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
8618 return SDValue();
8619 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
8620 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
8621 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
8622 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
8623 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
8624
8625 // Now try extract the needed shift from the ExtractFrom op and see if the
8626 // result matches up with the existing shift's LHS op.
8627 if (IsMulOrDiv) {
8628 // Op to extract from is a mul or udiv by a constant.
8629 // Check:
8630 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
8631 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
8632 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
8633 NeededShiftAmt.getZExtValue());
8634 APInt ResultAmt;
8635 APInt Rem;
8636 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
8637 if (Rem != 0 || ResultAmt != OppLHSAmt)
8638 return SDValue();
8639 } else {
8640 // Op to extract from is a shift by a constant.
8641 // Check:
8642 // c2 - (bitwidth(op0 v c0) - c1) == c0
8643 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
8644 ExtractFromAmt.getBitWidth()))
8645 return SDValue();
8646 }
8647
8648 // Return the expanded shift op that should allow a rotate to be formed.
8649 EVT ShiftVT = OppShift.getOperand(1).getValueType();
8650 EVT ResVT = ExtractFrom.getValueType();
8651 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
8652 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
8653}
8654
8655// Return true if we can prove that, whenever Neg and Pos are both in the
8656// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
8657// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
8658//
8659// (or (shift1 X, Neg), (shift2 X, Pos))
8660//
8661// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
8662// in direction shift1 by Neg. The range [0, EltSize) means that we only need
8663// to consider shift amounts with defined behavior.
8664//
8665// The IsRotate flag should be set when the LHS of both shifts is the same.
8666// Otherwise if matching a general funnel shift, it should be clear.
8667static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
8668 SelectionDAG &DAG, bool IsRotate, bool FromAdd) {
8669 const auto &TLI = DAG.getTargetLoweringInfo();
8670 // If EltSize is a power of 2 then:
8671 //
8672 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
8673 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
8674 //
8675 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
8676 // for the stronger condition:
8677 //
8678 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
8679 //
8680 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
8681 // we can just replace Neg with Neg' for the rest of the function.
8682 //
8683 // In other cases we check for the even stronger condition:
8684 //
8685 // Neg == EltSize - Pos [B]
8686 //
8687 // for all Neg and Pos. Note that the (or ...) then invokes undefined
8688 // behavior if Pos == 0 (and consequently Neg == EltSize).
8689 //
8690 // We could actually use [A] whenever EltSize is a power of 2, but the
8691 // only extra cases that it would match are those uninteresting ones
8692 // where Neg and Pos are never in range at the same time. E.g. for
8693 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
8694 // as well as (sub 32, Pos), but:
8695 //
8696 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
8697 //
8698 // always invokes undefined behavior for 32-bit X.
8699 //
8700 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
8701 // This allows us to peek through any operations that only affect Mask's
8702 // un-demanded bits.
8703 //
8704 // NOTE: We can only do this when matching operations which won't modify the
8705 // least Log2(EltSize) significant bits and not a general funnel shift.
8706 unsigned MaskLoBits = 0;
8707 if (IsRotate && !FromAdd && isPowerOf2_64(EltSize)) {
8708 unsigned Bits = Log2_64(EltSize);
8709 unsigned NegBits = Neg.getScalarValueSizeInBits();
8710 if (NegBits >= Bits) {
8711 APInt DemandedBits = APInt::getLowBitsSet(NegBits, Bits);
8712 if (SDValue Inner =
8714 Neg = Inner;
8715 MaskLoBits = Bits;
8716 }
8717 }
8718 }
8719
8720 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
8721 if (Neg.getOpcode() != ISD::SUB)
8722 return false;
8724 if (!NegC)
8725 return false;
8726 SDValue NegOp1 = Neg.getOperand(1);
8727
8728 // On the RHS of [A], if Pos is the result of operation on Pos' that won't
8729 // affect Mask's demanded bits, just replace Pos with Pos'. These operations
8730 // are redundant for the purpose of the equality.
8731 if (MaskLoBits) {
8732 unsigned PosBits = Pos.getScalarValueSizeInBits();
8733 if (PosBits >= MaskLoBits) {
8734 APInt DemandedBits = APInt::getLowBitsSet(PosBits, MaskLoBits);
8735 if (SDValue Inner =
8737 Pos = Inner;
8738 }
8739 }
8740 }
8741
8742 // The condition we need is now:
8743 //
8744 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
8745 //
8746 // If NegOp1 == Pos then we need:
8747 //
8748 // EltSize & Mask == NegC & Mask
8749 //
8750 // (because "x & Mask" is a truncation and distributes through subtraction).
8751 //
8752 // We also need to account for a potential truncation of NegOp1 if the amount
8753 // has already been legalized to a shift amount type.
8754 APInt Width;
8755 if ((Pos == NegOp1) ||
8756 (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
8757 Width = NegC->getAPIntValue();
8758
8759 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
8760 // Then the condition we want to prove becomes:
8761 //
8762 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
8763 //
8764 // which, again because "x & Mask" is a truncation, becomes:
8765 //
8766 // NegC & Mask == (EltSize - PosC) & Mask
8767 // EltSize & Mask == (NegC + PosC) & Mask
8768 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
8769 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
8770 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
8771 else
8772 return false;
8773 } else
8774 return false;
8775
8776 // Now we just need to check that EltSize & Mask == Width & Mask.
8777 if (MaskLoBits)
8778 // EltSize & Mask is 0 since Mask is EltSize - 1.
8779 return Width.getLoBits(MaskLoBits) == 0;
8780 return Width == EltSize;
8781}
8782
8783// A subroutine of MatchRotate used once we have found an OR of two opposite
8784// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
8785// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
8786// former being preferred if supported. InnerPos and InnerNeg are Pos and
8787// Neg with outer conversions stripped away.
8788SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
8789 SDValue Neg, SDValue InnerPos,
8790 SDValue InnerNeg, bool FromAdd,
8791 bool HasPos, unsigned PosOpcode,
8792 unsigned NegOpcode, const SDLoc &DL) {
8793 // fold (or/add (shl x, (*ext y)),
8794 // (srl x, (*ext (sub 32, y)))) ->
8795 // (rotl x, y) or (rotr x, (sub 32, y))
8796 //
8797 // fold (or/add (shl x, (*ext (sub 32, y))),
8798 // (srl x, (*ext y))) ->
8799 // (rotr x, y) or (rotl x, (sub 32, y))
8800 EVT VT = Shifted.getValueType();
8801 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
8802 /*IsRotate*/ true, FromAdd))
8803 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
8804 HasPos ? Pos : Neg);
8805
8806 return SDValue();
8807}
8808
8809// A subroutine of MatchRotate used once we have found an OR of two opposite
8810// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
8811// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
8812// former being preferred if supported. InnerPos and InnerNeg are Pos and
8813// Neg with outer conversions stripped away.
8814// TODO: Merge with MatchRotatePosNeg.
8815SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
8816 SDValue Neg, SDValue InnerPos,
8817 SDValue InnerNeg, bool FromAdd,
8818 bool HasPos, unsigned PosOpcode,
8819 unsigned NegOpcode, const SDLoc &DL) {
8820 EVT VT = N0.getValueType();
8821 unsigned EltBits = VT.getScalarSizeInBits();
8822
8823 // fold (or/add (shl x0, (*ext y)),
8824 // (srl x1, (*ext (sub 32, y)))) ->
8825 // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
8826 //
8827 // fold (or/add (shl x0, (*ext (sub 32, y))),
8828 // (srl x1, (*ext y))) ->
8829 // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
8830 if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1,
8831 FromAdd))
8832 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
8833 HasPos ? Pos : Neg);
8834
8835 // Matching the shift+xor cases, we can't easily use the xor'd shift amount
8836 // so for now just use the PosOpcode case if its legal.
8837 // TODO: When can we use the NegOpcode case?
8838 if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
8839 SDValue X;
8840 // fold (or/add (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
8841 // -> (fshl x0, x1, y)
8842 if (sd_match(N1, m_Srl(m_Value(X), m_One())) &&
8843 sd_match(InnerNeg,
8844 m_Xor(m_Specific(InnerPos), m_SpecificInt(EltBits - 1))) &&
8846 return DAG.getNode(ISD::FSHL, DL, VT, N0, X, Pos);
8847 }
8848
8849 // fold (or/add (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
8850 // -> (fshr x0, x1, y)
8851 if (sd_match(N0, m_Shl(m_Value(X), m_One())) &&
8852 sd_match(InnerPos,
8853 m_Xor(m_Specific(InnerNeg), m_SpecificInt(EltBits - 1))) &&
8855 return DAG.getNode(ISD::FSHR, DL, VT, X, N1, Neg);
8856 }
8857
8858 // fold (or/add (shl (add x0, x0), (xor y, 31)), (srl x1, y))
8859 // -> (fshr x0, x1, y)
8860 // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
8861 if (sd_match(N0, m_Add(m_Value(X), m_Deferred(X))) &&
8862 sd_match(InnerPos,
8863 m_Xor(m_Specific(InnerNeg), m_SpecificInt(EltBits - 1))) &&
8865 return DAG.getNode(ISD::FSHR, DL, VT, X, N1, Neg);
8866 }
8867 }
8868
8869 return SDValue();
8870}
8871
8872// MatchRotate - Handle an 'or' or 'add' of two operands. If this is one of the
8873// many idioms for rotate, and if the target supports rotation instructions,
8874// generate a rot[lr]. This also matches funnel shift patterns, similar to
8875// rotation but with different shifted sources.
8876SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL,
8877 bool FromAdd) {
8878 EVT VT = LHS.getValueType();
8879
8880 // The target must have at least one rotate/funnel flavor.
8881 // We still try to match rotate by constant pre-legalization.
8882 // TODO: Support pre-legalization funnel-shift by constant.
8883 bool HasROTL = hasOperation(ISD::ROTL, VT);
8884 bool HasROTR = hasOperation(ISD::ROTR, VT);
8885 bool HasFSHL = hasOperation(ISD::FSHL, VT);
8886 bool HasFSHR = hasOperation(ISD::FSHR, VT);
8887
8888 // If the type is going to be promoted and the target has enabled custom
8889 // lowering for rotate, allow matching rotate by non-constants. Only allow
8890 // this for scalar types.
8891 if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) ==
8895 }
8896
8897 if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8898 return SDValue();
8899
8900 // Check for truncated rotate.
8901 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
8902 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
8903 assert(LHS.getValueType() == RHS.getValueType());
8904 if (SDValue Rot =
8905 MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL, FromAdd))
8906 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
8907 }
8908
8909 // Match "(X shl/srl V1) & V2" where V2 may not be present.
8910 SDValue LHSShift; // The shift.
8911 SDValue LHSMask; // AND value if any.
8912 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
8913
8914 SDValue RHSShift; // The shift.
8915 SDValue RHSMask; // AND value if any.
8916 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
8917
8918 // If neither side matched a rotate half, bail
8919 if (!LHSShift && !RHSShift)
8920 return SDValue();
8921
8922 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
8923 // side of the rotate, so try to handle that here. In all cases we need to
8924 // pass the matched shift from the opposite side to compute the opcode and
8925 // needed shift amount to extract. We still want to do this if both sides
8926 // matched a rotate half because one half may be a potential overshift that
8927 // can be broken down (ie if InstCombine merged two shl or srl ops into a
8928 // single one).
8929
8930 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
8931 if (LHSShift)
8932 if (SDValue NewRHSShift =
8933 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
8934 RHSShift = NewRHSShift;
8935 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
8936 if (RHSShift)
8937 if (SDValue NewLHSShift =
8938 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
8939 LHSShift = NewLHSShift;
8940
8941 // If a side is still missing, nothing else we can do.
8942 if (!RHSShift || !LHSShift)
8943 return SDValue();
8944
8945 // At this point we've matched or extracted a shift op on each side.
8946
8947 if (LHSShift.getOpcode() == RHSShift.getOpcode())
8948 return SDValue(); // Shifts must disagree.
8949
8950 // Canonicalize shl to left side in a shl/srl pair.
8951 if (RHSShift.getOpcode() == ISD::SHL) {
8952 std::swap(LHS, RHS);
8953 std::swap(LHSShift, RHSShift);
8954 std::swap(LHSMask, RHSMask);
8955 }
8956
8957 // Something has gone wrong - we've lost the shl/srl pair - bail.
8958 if (LHSShift.getOpcode() != ISD::SHL || RHSShift.getOpcode() != ISD::SRL)
8959 return SDValue();
8960
8961 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8962 SDValue LHSShiftArg = LHSShift.getOperand(0);
8963 SDValue LHSShiftAmt = LHSShift.getOperand(1);
8964 SDValue RHSShiftArg = RHSShift.getOperand(0);
8965 SDValue RHSShiftAmt = RHSShift.getOperand(1);
8966
8967 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
8969 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
8970 };
8971
8972 auto ApplyMasks = [&](SDValue Res) {
8973 // If there is an AND of either shifted operand, apply it to the result.
8974 if (LHSMask.getNode() || RHSMask.getNode()) {
8977
8978 if (LHSMask.getNode()) {
8979 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
8980 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8981 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
8982 }
8983 if (RHSMask.getNode()) {
8984 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
8985 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8986 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
8987 }
8988
8989 Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
8990 }
8991
8992 return Res;
8993 };
8994
8995 // TODO: Support pre-legalization funnel-shift by constant.
8996 bool IsRotate = LHSShiftArg == RHSShiftArg;
8997 if (!IsRotate && !(HasFSHL || HasFSHR)) {
8998 if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
8999 ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
9000 // Look for a disguised rotate by constant.
9001 // The common shifted operand X may be hidden inside another 'or'.
9002 SDValue X, Y;
9003 auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
9004 if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
9005 return false;
9006 if (CommonOp == Or.getOperand(0)) {
9007 X = CommonOp;
9008 Y = Or.getOperand(1);
9009 return true;
9010 }
9011 if (CommonOp == Or.getOperand(1)) {
9012 X = CommonOp;
9013 Y = Or.getOperand(0);
9014 return true;
9015 }
9016 return false;
9017 };
9018
9019 SDValue Res;
9020 if (matchOr(LHSShiftArg, RHSShiftArg)) {
9021 // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
9022 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
9023 SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
9024 Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
9025 } else if (matchOr(RHSShiftArg, LHSShiftArg)) {
9026 // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
9027 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
9028 SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
9029 Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
9030 } else {
9031 return SDValue();
9032 }
9033
9034 return ApplyMasks(Res);
9035 }
9036
9037 return SDValue(); // Requires funnel shift support.
9038 }
9039
9040 // fold (or/add (shl x, C1), (srl x, C2)) -> (rotl x, C1)
9041 // fold (or/add (shl x, C1), (srl x, C2)) -> (rotr x, C2)
9042 // fold (or/add (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
9043 // fold (or/add (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
9044 // iff C1+C2 == EltSizeInBits
9045 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
9046 SDValue Res;
9047 if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
9048 bool UseROTL = !LegalOperations || HasROTL;
9049 Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
9050 UseROTL ? LHSShiftAmt : RHSShiftAmt);
9051 } else {
9052 bool UseFSHL = !LegalOperations || HasFSHL;
9053 Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
9054 RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
9055 }
9056
9057 return ApplyMasks(Res);
9058 }
9059
9060 // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
9061 // shift.
9062 if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
9063 return SDValue();
9064
9065 // If there is a mask here, and we have a variable shift, we can't be sure
9066 // that we're masking out the right stuff.
9067 if (LHSMask.getNode() || RHSMask.getNode())
9068 return SDValue();
9069
9070 // If the shift amount is sign/zext/any-extended just peel it off.
9071 SDValue LExtOp0 = LHSShiftAmt;
9072 SDValue RExtOp0 = RHSShiftAmt;
9073 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
9074 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
9075 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
9076 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
9077 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
9078 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
9079 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
9080 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
9081 LExtOp0 = LHSShiftAmt.getOperand(0);
9082 RExtOp0 = RHSShiftAmt.getOperand(0);
9083 }
9084
9085 if (IsRotate && (HasROTL || HasROTR)) {
9086 if (SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
9087 LExtOp0, RExtOp0, FromAdd, HasROTL,
9089 return TryL;
9090
9091 if (SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
9092 RExtOp0, LExtOp0, FromAdd, HasROTR,
9094 return TryR;
9095 }
9096
9097 if (SDValue TryL = MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt,
9098 RHSShiftAmt, LExtOp0, RExtOp0, FromAdd,
9099 HasFSHL, ISD::FSHL, ISD::FSHR, DL))
9100 return TryL;
9101
9102 if (SDValue TryR = MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt,
9103 LHSShiftAmt, RExtOp0, LExtOp0, FromAdd,
9104 HasFSHR, ISD::FSHR, ISD::FSHL, DL))
9105 return TryR;
9106
9107 return SDValue();
9108}
9109
9110/// Recursively traverses the expression calculating the origin of the requested
9111/// byte of the given value. Returns std::nullopt if the provider can't be
9112/// calculated.
9113///
9114/// For all the values except the root of the expression, we verify that the
9115/// value has exactly one use and if not then return std::nullopt. This way if
9116/// the origin of the byte is returned it's guaranteed that the values which
9117/// contribute to the byte are not used outside of this expression.
9118
9119/// However, there is a special case when dealing with vector loads -- we allow
9120/// more than one use if the load is a vector type. Since the values that
9121/// contribute to the byte ultimately come from the ExtractVectorElements of the
9122/// Load, we don't care if the Load has uses other than ExtractVectorElements,
9123/// because those operations are independent from the pattern to be combined.
9124/// For vector loads, we simply care that the ByteProviders are adjacent
9125/// positions of the same vector, and their index matches the byte that is being
9126/// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex
9127/// is the index used in an ExtractVectorElement, and \p StartingIndex is the
9128/// byte position we are trying to provide for the LoadCombine. If these do
9129/// not match, then we can not combine the vector loads. \p Index uses the
9130/// byte position we are trying to provide for and is matched against the
9131/// shl and load size. The \p Index algorithm ensures the requested byte is
9132/// provided for by the pattern, and the pattern does not over provide bytes.
9133///
9134///
9135/// The supported LoadCombine pattern for vector loads is as follows
9136/// or
9137/// / \
9138/// or shl
9139/// / \ |
9140/// or shl zext
9141/// / \ | |
9142/// shl zext zext EVE*
9143/// | | | |
9144/// zext EVE* EVE* LOAD
9145/// | | |
9146/// EVE* LOAD LOAD
9147/// |
9148/// LOAD
9149///
9150/// *ExtractVectorElement
9152
9153static std::optional<SDByteProvider>
9154calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
9155 std::optional<uint64_t> VectorIndex,
9156 unsigned StartingIndex = 0) {
9157
9158 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
9159 if (Depth == 10)
9160 return std::nullopt;
9161
9162 // Only allow multiple uses if the instruction is a vector load (in which
9163 // case we will use the load for every ExtractVectorElement)
9164 if (Depth && !Op.hasOneUse() &&
9165 (Op.getOpcode() != ISD::LOAD || !Op.getValueType().isVector()))
9166 return std::nullopt;
9167
9168 // Fail to combine if we have encountered anything but a LOAD after handling
9169 // an ExtractVectorElement.
9170 if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value())
9171 return std::nullopt;
9172
9173 unsigned BitWidth = Op.getScalarValueSizeInBits();
9174 if (BitWidth % 8 != 0)
9175 return std::nullopt;
9176 unsigned ByteWidth = BitWidth / 8;
9177 assert(Index < ByteWidth && "invalid index requested");
9178 (void) ByteWidth;
9179
9180 switch (Op.getOpcode()) {
9181 case ISD::OR: {
9182 auto LHS =
9183 calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex);
9184 if (!LHS)
9185 return std::nullopt;
9186 auto RHS =
9187 calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex);
9188 if (!RHS)
9189 return std::nullopt;
9190
9191 if (LHS->isConstantZero())
9192 return RHS;
9193 if (RHS->isConstantZero())
9194 return LHS;
9195 return std::nullopt;
9196 }
9197 case ISD::SHL: {
9198 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
9199 if (!ShiftOp)
9200 return std::nullopt;
9201
9202 uint64_t BitShift = ShiftOp->getZExtValue();
9203
9204 if (BitShift % 8 != 0)
9205 return std::nullopt;
9206 uint64_t ByteShift = BitShift / 8;
9207
9208 // If we are shifting by an amount greater than the index we are trying to
9209 // provide, then do not provide anything. Otherwise, subtract the index by
9210 // the amount we shifted by.
9211 return Index < ByteShift
9213 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
9214 Depth + 1, VectorIndex, Index);
9215 }
9216 case ISD::ANY_EXTEND:
9217 case ISD::SIGN_EXTEND:
9218 case ISD::ZERO_EXTEND: {
9219 SDValue NarrowOp = Op->getOperand(0);
9220 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
9221 if (NarrowBitWidth % 8 != 0)
9222 return std::nullopt;
9223 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
9224
9225 if (Index >= NarrowByteWidth)
9226 return Op.getOpcode() == ISD::ZERO_EXTEND
9227 ? std::optional<SDByteProvider>(
9229 : std::nullopt;
9230 return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex,
9231 StartingIndex);
9232 }
9233 case ISD::BSWAP:
9234 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
9235 Depth + 1, VectorIndex, StartingIndex);
9237 auto OffsetOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
9238 if (!OffsetOp)
9239 return std::nullopt;
9240
9241 VectorIndex = OffsetOp->getZExtValue();
9242
9243 SDValue NarrowOp = Op->getOperand(0);
9244 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
9245 if (NarrowBitWidth % 8 != 0)
9246 return std::nullopt;
9247 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
9248 // EXTRACT_VECTOR_ELT can extend the element type to the width of the return
9249 // type, leaving the high bits undefined.
9250 if (Index >= NarrowByteWidth)
9251 return std::nullopt;
9252
9253 // Check to see if the position of the element in the vector corresponds
9254 // with the byte we are trying to provide for. In the case of a vector of
9255 // i8, this simply means the VectorIndex == StartingIndex. For non i8 cases,
9256 // the element will provide a range of bytes. For example, if we have a
9257 // vector of i16s, each element provides two bytes (V[1] provides byte 2 and
9258 // 3).
9259 if (*VectorIndex * NarrowByteWidth > StartingIndex)
9260 return std::nullopt;
9261 if ((*VectorIndex + 1) * NarrowByteWidth <= StartingIndex)
9262 return std::nullopt;
9263
9264 return calculateByteProvider(Op->getOperand(0), Index, Depth + 1,
9265 VectorIndex, StartingIndex);
9266 }
9267 case ISD::LOAD: {
9268 auto L = cast<LoadSDNode>(Op.getNode());
9269 if (!L->isSimple() || L->isIndexed())
9270 return std::nullopt;
9271
9272 unsigned NarrowBitWidth = L->getMemoryVT().getScalarSizeInBits();
9273 if (NarrowBitWidth % 8 != 0)
9274 return std::nullopt;
9275 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
9276
9277 // If the width of the load does not reach byte we are trying to provide for
9278 // and it is not a ZEXTLOAD, then the load does not provide for the byte in
9279 // question
9280 if (Index >= NarrowByteWidth)
9281 return L->getExtensionType() == ISD::ZEXTLOAD
9282 ? std::optional<SDByteProvider>(
9284 : std::nullopt;
9285
9286 unsigned BPVectorIndex = VectorIndex.value_or(0U);
9287 return SDByteProvider::getSrc(L, Index, BPVectorIndex);
9288 }
9289 }
9290
9291 return std::nullopt;
9292}
9293
9294static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
9295 return i;
9296}
9297
9298static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
9299 return BW - i - 1;
9300}
9301
9302// Check if the bytes offsets we are looking at match with either big or
9303// little endian value loaded. Return true for big endian, false for little
9304// endian, and std::nullopt if match failed.
9305static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
9306 int64_t FirstOffset) {
9307 // The endian can be decided only when it is 2 bytes at least.
9308 unsigned Width = ByteOffsets.size();
9309 if (Width < 2)
9310 return std::nullopt;
9311
9312 bool BigEndian = true, LittleEndian = true;
9313 for (unsigned i = 0; i < Width; i++) {
9314 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
9315 LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
9316 BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
9317 if (!BigEndian && !LittleEndian)
9318 return std::nullopt;
9319 }
9320
9321 assert((BigEndian != LittleEndian) && "It should be either big endian or"
9322 "little endian");
9323 return BigEndian;
9324}
9325
9326// Look through one layer of truncate or extend.
9328 switch (Value.getOpcode()) {
9329 case ISD::TRUNCATE:
9330 case ISD::ZERO_EXTEND:
9331 case ISD::SIGN_EXTEND:
9332 case ISD::ANY_EXTEND:
9333 return Value.getOperand(0);
9334 }
9335 return SDValue();
9336}
9337
9338/// Match a pattern where a wide type scalar value is stored by several narrow
9339/// stores. Fold it into a single store or a BSWAP and a store if the targets
9340/// supports it.
9341///
9342/// Assuming little endian target:
9343/// i8 *p = ...
9344/// i32 val = ...
9345/// p[0] = (val >> 0) & 0xFF;
9346/// p[1] = (val >> 8) & 0xFF;
9347/// p[2] = (val >> 16) & 0xFF;
9348/// p[3] = (val >> 24) & 0xFF;
9349/// =>
9350/// *((i32)p) = val;
9351///
9352/// i8 *p = ...
9353/// i32 val = ...
9354/// p[0] = (val >> 24) & 0xFF;
9355/// p[1] = (val >> 16) & 0xFF;
9356/// p[2] = (val >> 8) & 0xFF;
9357/// p[3] = (val >> 0) & 0xFF;
9358/// =>
9359/// *((i32)p) = BSWAP(val);
9360SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
9361 // The matching looks for "store (trunc x)" patterns that appear early but are
9362 // likely to be replaced by truncating store nodes during combining.
9363 // TODO: If there is evidence that running this later would help, this
9364 // limitation could be removed. Legality checks may need to be added
9365 // for the created store and optional bswap/rotate.
9366 if (LegalOperations || OptLevel == CodeGenOptLevel::None)
9367 return SDValue();
9368
9369 // We only handle merging simple stores of 1-4 bytes.
9370 // TODO: Allow unordered atomics when wider type is legal (see D66309)
9371 EVT MemVT = N->getMemoryVT();
9372 if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
9373 !N->isSimple() || N->isIndexed())
9374 return SDValue();
9375
9376 // Collect all of the stores in the chain, upto the maximum store width (i64).
9377 SDValue Chain = N->getChain();
9379 unsigned NarrowNumBits = MemVT.getScalarSizeInBits();
9380 unsigned MaxWideNumBits = 64;
9381 unsigned MaxStores = MaxWideNumBits / NarrowNumBits;
9382 while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
9383 // All stores must be the same size to ensure that we are writing all of the
9384 // bytes in the wide value.
9385 // This store should have exactly one use as a chain operand for another
9386 // store in the merging set. If there are other chain uses, then the
9387 // transform may not be safe because order of loads/stores outside of this
9388 // set may not be preserved.
9389 // TODO: We could allow multiple sizes by tracking each stored byte.
9390 if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
9391 Store->isIndexed() || !Store->hasOneUse())
9392 return SDValue();
9393 Stores.push_back(Store);
9394 Chain = Store->getChain();
9395 if (MaxStores < Stores.size())
9396 return SDValue();
9397 }
9398 // There is no reason to continue if we do not have at least a pair of stores.
9399 if (Stores.size() < 2)
9400 return SDValue();
9401
9402 // Handle simple types only.
9403 LLVMContext &Context = *DAG.getContext();
9404 unsigned NumStores = Stores.size();
9405 unsigned WideNumBits = NumStores * NarrowNumBits;
9406 if (WideNumBits != 16 && WideNumBits != 32 && WideNumBits != 64)
9407 return SDValue();
9408
9409 // Check if all bytes of the source value that we are looking at are stored
9410 // to the same base address. Collect offsets from Base address into OffsetMap.
9411 SDValue SourceValue;
9412 SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
9413 int64_t FirstOffset = INT64_MAX;
9414 StoreSDNode *FirstStore = nullptr;
9415 std::optional<BaseIndexOffset> Base;
9416 for (auto *Store : Stores) {
9417 // All the stores store different parts of the CombinedValue. A truncate is
9418 // required to get the partial value.
9419 SDValue Trunc = Store->getValue();
9420 if (Trunc.getOpcode() != ISD::TRUNCATE)
9421 return SDValue();
9422 // Other than the first/last part, a shift operation is required to get the
9423 // offset.
9424 int64_t Offset = 0;
9425 SDValue WideVal = Trunc.getOperand(0);
9426 if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
9427 isa<ConstantSDNode>(WideVal.getOperand(1))) {
9428 // The shift amount must be a constant multiple of the narrow type.
9429 // It is translated to the offset address in the wide source value "y".
9430 //
9431 // x = srl y, ShiftAmtC
9432 // i8 z = trunc x
9433 // store z, ...
9434 uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
9435 if (ShiftAmtC % NarrowNumBits != 0)
9436 return SDValue();
9437
9438 // Make sure we aren't reading bits that are shifted in.
9439 if (ShiftAmtC > WideVal.getScalarValueSizeInBits() - NarrowNumBits)
9440 return SDValue();
9441
9442 Offset = ShiftAmtC / NarrowNumBits;
9443 WideVal = WideVal.getOperand(0);
9444 }
9445
9446 // Stores must share the same source value with different offsets.
9447 if (!SourceValue)
9448 SourceValue = WideVal;
9449 else if (SourceValue != WideVal) {
9450 // Truncate and extends can be stripped to see if the values are related.
9451 if (stripTruncAndExt(SourceValue) != WideVal &&
9452 stripTruncAndExt(WideVal) != SourceValue)
9453 return SDValue();
9454
9455 if (WideVal.getScalarValueSizeInBits() >
9456 SourceValue.getScalarValueSizeInBits())
9457 SourceValue = WideVal;
9458
9459 // Give up if the source value type is smaller than the store size.
9460 if (SourceValue.getScalarValueSizeInBits() < WideNumBits)
9461 return SDValue();
9462 }
9463
9464 // Stores must share the same base address.
9466 int64_t ByteOffsetFromBase = 0;
9467 if (!Base)
9468 Base = Ptr;
9469 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9470 return SDValue();
9471
9472 // Remember the first store.
9473 if (ByteOffsetFromBase < FirstOffset) {
9474 FirstStore = Store;
9475 FirstOffset = ByteOffsetFromBase;
9476 }
9477 // Map the offset in the store and the offset in the combined value, and
9478 // early return if it has been set before.
9479 if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
9480 return SDValue();
9481 OffsetMap[Offset] = ByteOffsetFromBase;
9482 }
9483
9484 EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
9485
9486 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9487 assert(FirstStore && "First store must be set");
9488
9489 // Check that a store of the wide type is both allowed and fast on the target
9490 const DataLayout &Layout = DAG.getDataLayout();
9491 unsigned Fast = 0;
9492 bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
9493 *FirstStore->getMemOperand(), &Fast);
9494 if (!Allowed || !Fast)
9495 return SDValue();
9496
9497 // Check if the pieces of the value are going to the expected places in memory
9498 // to merge the stores.
9499 auto checkOffsets = [&](bool MatchLittleEndian) {
9500 if (MatchLittleEndian) {
9501 for (unsigned i = 0; i != NumStores; ++i)
9502 if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
9503 return false;
9504 } else { // MatchBigEndian by reversing loop counter.
9505 for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
9506 if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
9507 return false;
9508 }
9509 return true;
9510 };
9511
9512 // Check if the offsets line up for the native data layout of this target.
9513 bool NeedBswap = false;
9514 bool NeedRotate = false;
9515 if (!checkOffsets(Layout.isLittleEndian())) {
9516 // Special-case: check if byte offsets line up for the opposite endian.
9517 if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
9518 NeedBswap = true;
9519 else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
9520 NeedRotate = true;
9521 else
9522 return SDValue();
9523 }
9524
9525 SDLoc DL(N);
9526 if (WideVT != SourceValue.getValueType()) {
9527 assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
9528 "Unexpected store value to merge");
9529 SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
9530 }
9531
9532 // Before legalize we can introduce illegal bswaps/rotates which will be later
9533 // converted to an explicit bswap sequence. This way we end up with a single
9534 // store and byte shuffling instead of several stores and byte shuffling.
9535 if (NeedBswap) {
9536 SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
9537 } else if (NeedRotate) {
9538 assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
9539 SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
9540 SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
9541 }
9542
9543 SDValue NewStore =
9544 DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
9545 FirstStore->getPointerInfo(), FirstStore->getAlign());
9546
9547 // Rely on other DAG combine rules to remove the other individual stores.
9548 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
9549 return NewStore;
9550}
9551
9552/// Match a pattern where a wide type scalar value is loaded by several narrow
9553/// loads and combined by shifts and ors. Fold it into a single load or a load
9554/// and a BSWAP if the targets supports it.
9555///
9556/// Assuming little endian target:
9557/// i8 *a = ...
9558/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
9559/// =>
9560/// i32 val = *((i32)a)
9561///
9562/// i8 *a = ...
9563/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
9564/// =>
9565/// i32 val = BSWAP(*((i32)a))
9566///
9567/// TODO: This rule matches complex patterns with OR node roots and doesn't
9568/// interact well with the worklist mechanism. When a part of the pattern is
9569/// updated (e.g. one of the loads) its direct users are put into the worklist,
9570/// but the root node of the pattern which triggers the load combine is not
9571/// necessarily a direct user of the changed node. For example, once the address
9572/// of t28 load is reassociated load combine won't be triggered:
9573/// t25: i32 = add t4, Constant:i32<2>
9574/// t26: i64 = sign_extend t25
9575/// t27: i64 = add t2, t26
9576/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
9577/// t29: i32 = zero_extend t28
9578/// t32: i32 = shl t29, Constant:i8<8>
9579/// t33: i32 = or t23, t32
9580/// As a possible fix visitLoad can check if the load can be a part of a load
9581/// combine pattern and add corresponding OR roots to the worklist.
9582SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
9583 assert(N->getOpcode() == ISD::OR &&
9584 "Can only match load combining against OR nodes");
9585
9586 // Handles simple types only
9587 EVT VT = N->getValueType(0);
9588 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
9589 return SDValue();
9590 unsigned ByteWidth = VT.getSizeInBits() / 8;
9591
9592 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
9593 auto MemoryByteOffset = [&](SDByteProvider P) {
9594 assert(P.hasSrc() && "Must be a memory byte provider");
9595 auto *Load = cast<LoadSDNode>(P.Src.value());
9596
9597 unsigned LoadBitWidth = Load->getMemoryVT().getScalarSizeInBits();
9598
9599 assert(LoadBitWidth % 8 == 0 &&
9600 "can only analyze providers for individual bytes not bit");
9601 unsigned LoadByteWidth = LoadBitWidth / 8;
9602 return IsBigEndianTarget ? bigEndianByteAt(LoadByteWidth, P.DestOffset)
9603 : littleEndianByteAt(LoadByteWidth, P.DestOffset);
9604 };
9605
9606 std::optional<BaseIndexOffset> Base;
9607 SDValue Chain;
9608
9610 std::optional<SDByteProvider> FirstByteProvider;
9611 int64_t FirstOffset = INT64_MAX;
9612
9613 // Check if all the bytes of the OR we are looking at are loaded from the same
9614 // base address. Collect bytes offsets from Base address in ByteOffsets.
9615 SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
9616 unsigned ZeroExtendedBytes = 0;
9617 for (int i = ByteWidth - 1; i >= 0; --i) {
9618 auto P =
9619 calculateByteProvider(SDValue(N, 0), i, 0, /*VectorIndex*/ std::nullopt,
9620 /*StartingIndex*/ i);
9621 if (!P)
9622 return SDValue();
9623
9624 if (P->isConstantZero()) {
9625 // It's OK for the N most significant bytes to be 0, we can just
9626 // zero-extend the load.
9627 if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
9628 return SDValue();
9629 continue;
9630 }
9631 assert(P->hasSrc() && "provenance should either be memory or zero");
9632 auto *L = cast<LoadSDNode>(P->Src.value());
9633
9634 // All loads must share the same chain
9635 SDValue LChain = L->getChain();
9636 if (!Chain)
9637 Chain = LChain;
9638 else if (Chain != LChain)
9639 return SDValue();
9640
9641 // Loads must share the same base address
9643 int64_t ByteOffsetFromBase = 0;
9644
9645 // For vector loads, the expected load combine pattern will have an
9646 // ExtractElement for each index in the vector. While each of these
9647 // ExtractElements will be accessing the same base address as determined
9648 // by the load instruction, the actual bytes they interact with will differ
9649 // due to different ExtractElement indices. To accurately determine the
9650 // byte position of an ExtractElement, we offset the base load ptr with
9651 // the index multiplied by the byte size of each element in the vector.
9652 if (L->getMemoryVT().isVector()) {
9653 unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
9654 if (LoadWidthInBit % 8 != 0)
9655 return SDValue();
9656 unsigned ByteOffsetFromVector = P->SrcOffset * LoadWidthInBit / 8;
9657 Ptr.addToOffset(ByteOffsetFromVector);
9658 }
9659
9660 if (!Base)
9661 Base = Ptr;
9662
9663 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9664 return SDValue();
9665
9666 // Calculate the offset of the current byte from the base address
9667 ByteOffsetFromBase += MemoryByteOffset(*P);
9668 ByteOffsets[i] = ByteOffsetFromBase;
9669
9670 // Remember the first byte load
9671 if (ByteOffsetFromBase < FirstOffset) {
9672 FirstByteProvider = P;
9673 FirstOffset = ByteOffsetFromBase;
9674 }
9675
9676 Loads.insert(L);
9677 }
9678
9679 assert(!Loads.empty() && "All the bytes of the value must be loaded from "
9680 "memory, so there must be at least one load which produces the value");
9681 assert(Base && "Base address of the accessed memory location must be set");
9682 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9683
9684 bool NeedsZext = ZeroExtendedBytes > 0;
9685
9686 EVT MemVT =
9687 EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
9688
9689 if (!MemVT.isSimple())
9690 return SDValue();
9691
9692 // Before legalize we can introduce too wide illegal loads which will be later
9693 // split into legal sized loads. This enables us to combine i64 load by i8
9694 // patterns to a couple of i32 loads on 32 bit targets.
9695 if (LegalOperations &&
9696 !TLI.isLoadExtLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, VT,
9697 MemVT))
9698 return SDValue();
9699
9700 // Check if the bytes of the OR we are looking at match with either big or
9701 // little endian value load
9702 std::optional<bool> IsBigEndian = isBigEndian(
9703 ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
9704 if (!IsBigEndian)
9705 return SDValue();
9706
9707 assert(FirstByteProvider && "must be set");
9708
9709 // Ensure that the first byte is loaded from zero offset of the first load.
9710 // So the combined value can be loaded from the first load address.
9711 if (MemoryByteOffset(*FirstByteProvider) != 0)
9712 return SDValue();
9713 auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
9714
9715 // The node we are looking at matches with the pattern, check if we can
9716 // replace it with a single (possibly zero-extended) load and bswap + shift if
9717 // needed.
9718
9719 // If the load needs byte swap check if the target supports it
9720 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
9721
9722 // Before legalize we can introduce illegal bswaps which will be later
9723 // converted to an explicit bswap sequence. This way we end up with a single
9724 // load and byte shuffling instead of several loads and byte shuffling.
9725 // We do not introduce illegal bswaps when zero-extending as this tends to
9726 // introduce too many arithmetic instructions.
9727 if (NeedsBswap && (LegalOperations || NeedsZext) &&
9728 !TLI.isOperationLegal(ISD::BSWAP, VT))
9729 return SDValue();
9730
9731 // If we need to bswap and zero extend, we have to insert a shift. Check that
9732 // it is legal.
9733 if (NeedsBswap && NeedsZext && LegalOperations &&
9734 !TLI.isOperationLegal(ISD::SHL, VT))
9735 return SDValue();
9736
9737 // Check that a load of the wide type is both allowed and fast on the target
9738 unsigned Fast = 0;
9739 bool Allowed =
9740 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
9741 *FirstLoad->getMemOperand(), &Fast);
9742 if (!Allowed || !Fast)
9743 return SDValue();
9744
9745 SDValue NewLoad =
9746 DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
9747 Chain, FirstLoad->getBasePtr(),
9748 FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
9749
9750 // Transfer chain users from old loads to the new load.
9751 for (LoadSDNode *L : Loads)
9752 DAG.makeEquivalentMemoryOrdering(L, NewLoad);
9753
9754 if (!NeedsBswap)
9755 return NewLoad;
9756
9757 SDValue ShiftedLoad =
9758 NeedsZext ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
9759 DAG.getShiftAmountConstant(ZeroExtendedBytes * 8,
9760 VT, SDLoc(N)))
9761 : NewLoad;
9762 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
9763}
9764
9765// If the target has andn, bsl, or a similar bit-select instruction,
9766// we want to unfold masked merge, with canonical pattern of:
9767// | A | |B|
9768// ((x ^ y) & m) ^ y
9769// | D |
9770// Into:
9771// (x & m) | (y & ~m)
9772// If y is a constant, m is not a 'not', and the 'andn' does not work with
9773// immediates, we unfold into a different pattern:
9774// ~(~x & m) & (m | y)
9775// If x is a constant, m is a 'not', and the 'andn' does not work with
9776// immediates, we unfold into a different pattern:
9777// (x | ~m) & ~(~m & ~y)
9778// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
9779// the very least that breaks andnpd / andnps patterns, and because those
9780// patterns are simplified in IR and shouldn't be created in the DAG
9781SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
9782 assert(N->getOpcode() == ISD::XOR);
9783
9784 // Don't touch 'not' (i.e. where y = -1).
9785 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
9786 return SDValue();
9787
9788 EVT VT = N->getValueType(0);
9789
9790 // There are 3 commutable operators in the pattern,
9791 // so we have to deal with 8 possible variants of the basic pattern.
9792 SDValue X, Y, M;
9793 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
9794 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
9795 return false;
9796 SDValue Xor = And.getOperand(XorIdx);
9797 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
9798 return false;
9799 SDValue Xor0 = Xor.getOperand(0);
9800 SDValue Xor1 = Xor.getOperand(1);
9801 // Don't touch 'not' (i.e. where y = -1).
9802 if (isAllOnesOrAllOnesSplat(Xor1))
9803 return false;
9804 if (Other == Xor0)
9805 std::swap(Xor0, Xor1);
9806 if (Other != Xor1)
9807 return false;
9808 X = Xor0;
9809 Y = Xor1;
9810 M = And.getOperand(XorIdx ? 0 : 1);
9811 return true;
9812 };
9813
9814 SDValue N0 = N->getOperand(0);
9815 SDValue N1 = N->getOperand(1);
9816 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
9817 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
9818 return SDValue();
9819
9820 // Don't do anything if the mask is constant. This should not be reachable.
9821 // InstCombine should have already unfolded this pattern, and DAGCombiner
9822 // probably shouldn't produce it, too.
9823 if (isa<ConstantSDNode>(M.getNode()))
9824 return SDValue();
9825
9826 // We can transform if the target has AndNot
9827 if (!TLI.hasAndNot(M))
9828 return SDValue();
9829
9830 SDLoc DL(N);
9831
9832 // If Y is a constant, check that 'andn' works with immediates. Unless M is
9833 // a bitwise not that would already allow ANDN to be used.
9834 if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
9835 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
9836 // If not, we need to do a bit more work to make sure andn is still used.
9837 SDValue NotX = DAG.getNOT(DL, X, VT);
9838 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
9839 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
9840 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
9841 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
9842 }
9843
9844 // If X is a constant and M is a bitwise not, check that 'andn' works with
9845 // immediates.
9846 if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
9847 assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
9848 // If not, we need to do a bit more work to make sure andn is still used.
9849 SDValue NotM = M.getOperand(0);
9850 SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
9851 SDValue NotY = DAG.getNOT(DL, Y, VT);
9852 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
9853 SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
9854 return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
9855 }
9856
9857 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
9858 SDValue NotM = DAG.getNOT(DL, M, VT);
9859 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
9860
9861 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
9862}
9863
9864SDValue DAGCombiner::visitXOR(SDNode *N) {
9865 SDValue N0 = N->getOperand(0);
9866 SDValue N1 = N->getOperand(1);
9867 EVT VT = N0.getValueType();
9868 SDLoc DL(N);
9869
9870 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
9871 if (N0.isUndef() && N1.isUndef())
9872 return DAG.getConstant(0, DL, VT);
9873
9874 // fold (xor x, undef) -> undef
9875 if (N0.isUndef())
9876 return N0;
9877 if (N1.isUndef())
9878 return N1;
9879
9880 // fold (xor c1, c2) -> c1^c2
9881 if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
9882 return C;
9883
9884 // canonicalize constant to RHS
9887 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
9888
9889 // fold vector ops
9890 if (VT.isVector()) {
9891 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
9892 return FoldedVOp;
9893
9894 // fold (xor x, 0) -> x, vector edition
9896 return N0;
9897 }
9898
9899 // fold (xor x, 0) -> x
9900 if (isNullConstant(N1))
9901 return N0;
9902
9903 if (SDValue NewSel = foldBinOpIntoSelect(N))
9904 return NewSel;
9905
9906 // reassociate xor
9907 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
9908 return RXOR;
9909
9910 // Fold xor(vecreduce(x), vecreduce(y)) -> vecreduce(xor(x, y))
9911 if (SDValue SD =
9912 reassociateReduction(ISD::VECREDUCE_XOR, ISD::XOR, DL, VT, N0, N1))
9913 return SD;
9914
9915 // fold (a^b) -> (a|b) iff a and b share no bits.
9916 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
9917 DAG.haveNoCommonBitsSet(N0, N1))
9918 return DAG.getNode(ISD::OR, DL, VT, N0, N1, SDNodeFlags::Disjoint);
9919
9920 // look for 'add-like' folds:
9921 // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
9922 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
9924 if (SDValue Combined = visitADDLike(N))
9925 return Combined;
9926
9927 // fold not (setcc x, y, cc) -> setcc x y !cc
9928 // Avoid breaking: and (not(setcc x, y, cc), z) -> andn for vec
9929 unsigned N0Opcode = N0.getOpcode();
9930 SDValue LHS, RHS, CC;
9931 if (TLI.isConstTrueVal(N1) &&
9932 isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true) &&
9933 !(VT.isVector() && TLI.hasAndNot(SDValue(N, 0)) && N->hasOneUse() &&
9934 N->use_begin()->getUser()->getOpcode() == ISD::AND)) {
9935 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
9936 LHS.getValueType());
9937 if (!LegalOperations ||
9938 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
9939 switch (N0Opcode) {
9940 default:
9941 llvm_unreachable("Unhandled SetCC Equivalent!");
9942 case ISD::SETCC:
9943 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
9944 case ISD::SELECT_CC:
9945 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
9946 N0.getOperand(3), NotCC);
9947 case ISD::STRICT_FSETCC:
9948 case ISD::STRICT_FSETCCS: {
9949 if (N0.hasOneUse()) {
9950 // FIXME Can we handle multiple uses? Could we token factor the chain
9951 // results from the new/old setcc?
9952 SDValue SetCC =
9953 DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
9954 N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
9955 CombineTo(N, SetCC);
9956 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
9957 recursivelyDeleteUnusedNodes(N0.getNode());
9958 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9959 }
9960 break;
9961 }
9962 }
9963 }
9964 }
9965
9966 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
9967 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
9968 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
9969 SDValue V = N0.getOperand(0);
9970 SDLoc DL0(N0);
9971 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
9972 DAG.getConstant(1, DL0, V.getValueType()));
9973 AddToWorklist(V.getNode());
9974 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
9975 }
9976
9977 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
9978 // fold (not (and x, y)) -> (or (not x), (not y)) iff x or y are setcc
9979 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
9980 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9981 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9982 if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
9983 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9984 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9985 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9986 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9987 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9988 }
9989 }
9990 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
9991 // fold (not (and x, y)) -> (or (not x), (not y)) iff x or y are constants
9992 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
9993 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9994 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9995 if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
9996 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9997 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9998 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9999 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
10000 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
10001 }
10002 }
10003
10004 // fold (not (neg x)) -> (add X, -1)
10005 // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
10006 // Y is a constant or the subtract has a single use.
10007 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
10008 isNullConstant(N0.getOperand(0))) {
10009 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
10010 DAG.getAllOnesConstant(DL, VT));
10011 }
10012
10013 // fold (not (add X, -1)) -> (neg X)
10014 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && isAllOnesConstant(N1) &&
10016 return DAG.getNegative(N0.getOperand(0), DL, VT);
10017 }
10018
10019 // fold (xor (and x, y), y) -> (and (not x), y)
10020 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
10021 SDValue X = N0.getOperand(0);
10022 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
10023 AddToWorklist(NotX.getNode());
10024 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
10025 }
10026
10027 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
10028 if (!LegalOperations || hasOperation(ISD::ABS, VT)) {
10029 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
10030 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
10031 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
10032 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
10033 SDValue S0 = S.getOperand(0);
10034 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
10036 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
10037 return DAG.getNode(ISD::ABS, DL, VT, S0);
10038 }
10039 }
10040
10041 // fold (xor x, x) -> 0
10042 if (N0 == N1)
10043 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
10044
10045 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
10046 // Here is a concrete example of this equivalence:
10047 // i16 x == 14
10048 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
10049 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
10050 //
10051 // =>
10052 //
10053 // i16 ~1 == 0b1111111111111110
10054 // i16 rol(~1, 14) == 0b1011111111111111
10055 //
10056 // Some additional tips to help conceptualize this transform:
10057 // - Try to see the operation as placing a single zero in a value of all ones.
10058 // - There exists no value for x which would allow the result to contain zero.
10059 // - Values of x larger than the bitwidth are undefined and do not require a
10060 // consistent result.
10061 // - Pushing the zero left requires shifting one bits in from the right.
10062 // A rotate left of ~1 is a nice way of achieving the desired result.
10063 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
10065 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getSignedConstant(~1, DL, VT),
10066 N0.getOperand(1));
10067 }
10068
10069 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
10070 if (N0Opcode == N1.getOpcode())
10071 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
10072 return V;
10073
10074 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
10075 return R;
10076 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
10077 return R;
10078 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
10079 return R;
10080
10081 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
10082 if (SDValue MM = unfoldMaskedMerge(N))
10083 return MM;
10084
10085 // Simplify the expression using non-local knowledge.
10087 return SDValue(N, 0);
10088
10089 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
10090 return Combined;
10091
10092 return SDValue();
10093}
10094
10095/// If we have a shift-by-constant of a bitwise logic op that itself has a
10096/// shift-by-constant operand with identical opcode, we may be able to convert
10097/// that into 2 independent shifts followed by the logic op. This is a
10098/// throughput improvement.
10100 // Match a one-use bitwise logic op.
10101 SDValue LogicOp = Shift->getOperand(0);
10102 if (!LogicOp.hasOneUse())
10103 return SDValue();
10104
10105 unsigned LogicOpcode = LogicOp.getOpcode();
10106 if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
10107 LogicOpcode != ISD::XOR)
10108 return SDValue();
10109
10110 // Find a matching one-use shift by constant.
10111 unsigned ShiftOpcode = Shift->getOpcode();
10112 SDValue C1 = Shift->getOperand(1);
10113 ConstantSDNode *C1Node = isConstOrConstSplat(C1);
10114 assert(C1Node && "Expected a shift with constant operand");
10115 const APInt &C1Val = C1Node->getAPIntValue();
10116 auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
10117 const APInt *&ShiftAmtVal) {
10118 if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
10119 return false;
10120
10121 ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
10122 if (!ShiftCNode)
10123 return false;
10124
10125 // Capture the shifted operand and shift amount value.
10126 ShiftOp = V.getOperand(0);
10127 ShiftAmtVal = &ShiftCNode->getAPIntValue();
10128
10129 // Shift amount types do not have to match their operand type, so check that
10130 // the constants are the same width.
10131 if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
10132 return false;
10133
10134 // The fold is not valid if the sum of the shift values doesn't fit in the
10135 // given shift amount type.
10136 bool Overflow = false;
10137 APInt NewShiftAmt = C1Val.uadd_ov(*ShiftAmtVal, Overflow);
10138 if (Overflow)
10139 return false;
10140
10141 // The fold is not valid if the sum of the shift values exceeds bitwidth.
10142 if (NewShiftAmt.uge(V.getScalarValueSizeInBits()))
10143 return false;
10144
10145 return true;
10146 };
10147
10148 // Logic ops are commutative, so check each operand for a match.
10149 SDValue X, Y;
10150 const APInt *C0Val;
10151 if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
10152 Y = LogicOp.getOperand(1);
10153 else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
10154 Y = LogicOp.getOperand(0);
10155 else
10156 return SDValue();
10157
10158 // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
10159 SDLoc DL(Shift);
10160 EVT VT = Shift->getValueType(0);
10161 EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
10162 SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
10163 SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
10164 SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
10165 return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2,
10166 LogicOp->getFlags());
10167}
10168
10169/// Handle transforms common to the three shifts, when the shift amount is a
10170/// constant.
10171/// We are looking for: (shift being one of shl/sra/srl)
10172/// shift (binop X, C0), C1
10173/// And want to transform into:
10174/// binop (shift X, C1), (shift C0, C1)
10175SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
10176 assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
10177
10178 // Do not turn a 'not' into a regular xor.
10179 if (isBitwiseNot(N->getOperand(0)))
10180 return SDValue();
10181
10182 // The inner binop must be one-use, since we want to replace it.
10183 SDValue LHS = N->getOperand(0);
10184 if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
10185 return SDValue();
10186
10187 // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)).
10188 if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
10189 return R;
10190
10191 // We want to pull some binops through shifts, so that we have (and (shift))
10192 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
10193 // thing happens with address calculations, so it's important to canonicalize
10194 // it.
10195 switch (LHS.getOpcode()) {
10196 default:
10197 return SDValue();
10198 case ISD::OR:
10199 case ISD::XOR:
10200 case ISD::AND:
10201 break;
10202 case ISD::ADD:
10203 if (N->getOpcode() != ISD::SHL)
10204 return SDValue(); // only shl(add) not sr[al](add).
10205 break;
10206 }
10207
10208 // FIXME: disable this unless the input to the binop is a shift by a constant
10209 // or is copy/select. Enable this in other cases when figure out it's exactly
10210 // profitable.
10211 SDValue BinOpLHSVal = LHS.getOperand(0);
10212 bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
10213 BinOpLHSVal.getOpcode() == ISD::SRA ||
10214 BinOpLHSVal.getOpcode() == ISD::SRL) &&
10215 isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
10216 bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
10217 BinOpLHSVal.getOpcode() == ISD::SELECT;
10218
10219 if (!IsShiftByConstant && !IsCopyOrSelect)
10220 return SDValue();
10221
10222 if (IsCopyOrSelect && N->hasOneUse())
10223 return SDValue();
10224
10225 // Attempt to fold the constants, shifting the binop RHS by the shift amount.
10226 SDLoc DL(N);
10227 EVT VT = N->getValueType(0);
10228 if (SDValue NewRHS = DAG.FoldConstantArithmetic(
10229 N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) {
10230 SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
10231 N->getOperand(1));
10232 return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
10233 }
10234
10235 return SDValue();
10236}
10237
10238SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
10239 assert(N->getOpcode() == ISD::TRUNCATE);
10240 assert(N->getOperand(0).getOpcode() == ISD::AND);
10241
10242 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
10243 EVT TruncVT = N->getValueType(0);
10244 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
10245 TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
10246 SDValue N01 = N->getOperand(0).getOperand(1);
10247 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
10248 SDLoc DL(N);
10249 SDValue N00 = N->getOperand(0).getOperand(0);
10250 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
10251 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
10252 AddToWorklist(Trunc00.getNode());
10253 AddToWorklist(Trunc01.getNode());
10254 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
10255 }
10256 }
10257
10258 return SDValue();
10259}
10260
10261SDValue DAGCombiner::visitRotate(SDNode *N) {
10262 SDLoc dl(N);
10263 SDValue N0 = N->getOperand(0);
10264 SDValue N1 = N->getOperand(1);
10265 EVT VT = N->getValueType(0);
10266 unsigned Bitsize = VT.getScalarSizeInBits();
10267
10268 // fold (rot x, 0) -> x
10269 if (isNullOrNullSplat(N1))
10270 return N0;
10271
10272 // fold (rot x, c) -> x iff (c % BitSize) == 0
10273 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
10274 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
10275 if (DAG.MaskedValueIsZero(N1, ModuloMask))
10276 return N0;
10277 }
10278
10279 // fold (rot x, c) -> (rot x, c % BitSize)
10280 bool OutOfRange = false;
10281 auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
10282 OutOfRange |= C->getAPIntValue().uge(Bitsize);
10283 return true;
10284 };
10285 if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
10286 EVT AmtVT = N1.getValueType();
10287 SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
10288 if (SDValue Amt =
10289 DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
10290 return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
10291 }
10292
10293 // rot i16 X, 8 --> bswap X
10294 auto *RotAmtC = isConstOrConstSplat(N1);
10295 if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
10296 VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
10297 return DAG.getNode(ISD::BSWAP, dl, VT, N0);
10298
10299 // Simplify the operands using demanded-bits information.
10301 return SDValue(N, 0);
10302
10303 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
10304 if (N1.getOpcode() == ISD::TRUNCATE &&
10305 N1.getOperand(0).getOpcode() == ISD::AND) {
10306 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10307 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
10308 }
10309
10310 unsigned NextOp = N0.getOpcode();
10311
10312 // fold (rot* (rot* x, c2), c1)
10313 // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize)
10314 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
10315 bool C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
10317 if (C1 && C2 && N1.getValueType() == N0.getOperand(1).getValueType()) {
10318 EVT ShiftVT = N1.getValueType();
10319 bool SameSide = (N->getOpcode() == NextOp);
10320 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
10321 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
10322 SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
10323 {N1, BitsizeC});
10324 SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
10325 {N0.getOperand(1), BitsizeC});
10326 if (Norm1 && Norm2)
10327 if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
10328 CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
10329 CombinedShift = DAG.FoldConstantArithmetic(ISD::ADD, dl, ShiftVT,
10330 {CombinedShift, BitsizeC});
10331 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
10332 ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
10333 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
10334 CombinedShiftNorm);
10335 }
10336 }
10337 }
10338 return SDValue();
10339}
10340
10341SDValue DAGCombiner::visitSHL(SDNode *N) {
10342 SDValue N0 = N->getOperand(0);
10343 SDValue N1 = N->getOperand(1);
10344 if (SDValue V = DAG.simplifyShift(N0, N1))
10345 return V;
10346
10347 SDLoc DL(N);
10348 EVT VT = N0.getValueType();
10349 EVT ShiftVT = N1.getValueType();
10350 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10351
10352 // fold (shl c1, c2) -> c1<<c2
10353 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N0, N1}))
10354 return C;
10355
10356 // fold vector ops
10357 if (VT.isVector()) {
10358 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10359 return FoldedVOp;
10360
10361 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
10362 // If setcc produces all-one true value then:
10363 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
10364 if (N1CV && N1CV->isConstant()) {
10365 if (N0.getOpcode() == ISD::AND) {
10366 SDValue N00 = N0->getOperand(0);
10367 SDValue N01 = N0->getOperand(1);
10368 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
10369
10370 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
10373 if (SDValue C =
10374 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N01, N1}))
10375 return DAG.getNode(ISD::AND, DL, VT, N00, C);
10376 }
10377 }
10378 }
10379 }
10380
10381 if (SDValue NewSel = foldBinOpIntoSelect(N))
10382 return NewSel;
10383
10384 // if (shl x, c) is known to be zero, return 0
10385 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
10386 return DAG.getConstant(0, DL, VT);
10387
10388 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
10389 if (N1.getOpcode() == ISD::TRUNCATE &&
10390 N1.getOperand(0).getOpcode() == ISD::AND) {
10391 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10392 return DAG.getNode(ISD::SHL, DL, VT, N0, NewOp1);
10393 }
10394
10395 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
10396 if (N0.getOpcode() == ISD::SHL) {
10397 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
10399 APInt c1 = LHS->getAPIntValue();
10400 APInt c2 = RHS->getAPIntValue();
10401 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10402 return (c1 + c2).uge(OpSizeInBits);
10403 };
10404 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
10405 return DAG.getConstant(0, DL, VT);
10406
10407 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
10409 APInt c1 = LHS->getAPIntValue();
10410 APInt c2 = RHS->getAPIntValue();
10411 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10412 return (c1 + c2).ult(OpSizeInBits);
10413 };
10414 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
10415 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
10416 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
10417 }
10418 }
10419
10420 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
10421 // For this to be valid, the second form must not preserve any of the bits
10422 // that are shifted out by the inner shift in the first form. This means
10423 // the outer shift size must be >= the number of bits added by the ext.
10424 // As a corollary, we don't care what kind of ext it is.
10425 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
10426 N0.getOpcode() == ISD::ANY_EXTEND ||
10427 N0.getOpcode() == ISD::SIGN_EXTEND) &&
10428 N0.getOperand(0).getOpcode() == ISD::SHL) {
10429 SDValue N0Op0 = N0.getOperand(0);
10430 SDValue InnerShiftAmt = N0Op0.getOperand(1);
10431 EVT InnerVT = N0Op0.getValueType();
10432 uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
10433
10434 auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
10436 APInt c1 = LHS->getAPIntValue();
10437 APInt c2 = RHS->getAPIntValue();
10438 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10439 return c2.uge(OpSizeInBits - InnerBitwidth) &&
10440 (c1 + c2).uge(OpSizeInBits);
10441 };
10442 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
10443 /*AllowUndefs*/ false,
10444 /*AllowTypeMismatch*/ true))
10445 return DAG.getConstant(0, DL, VT);
10446
10447 auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
10449 APInt c1 = LHS->getAPIntValue();
10450 APInt c2 = RHS->getAPIntValue();
10451 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10452 return c2.uge(OpSizeInBits - InnerBitwidth) &&
10453 (c1 + c2).ult(OpSizeInBits);
10454 };
10455 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
10456 /*AllowUndefs*/ false,
10457 /*AllowTypeMismatch*/ true)) {
10458 SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
10459 SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
10460 Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
10461 return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
10462 }
10463 }
10464
10465 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
10466 // Only fold this if the inner zext has no other uses to avoid increasing
10467 // the total number of instructions.
10468 if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
10469 N0.getOperand(0).getOpcode() == ISD::SRL) {
10470 SDValue N0Op0 = N0.getOperand(0);
10471 SDValue InnerShiftAmt = N0Op0.getOperand(1);
10472
10473 auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10474 APInt c1 = LHS->getAPIntValue();
10475 APInt c2 = RHS->getAPIntValue();
10476 zeroExtendToMatch(c1, c2);
10477 return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
10478 };
10479 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
10480 /*AllowUndefs*/ false,
10481 /*AllowTypeMismatch*/ true)) {
10482 EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
10483 SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
10484 NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
10485 AddToWorklist(NewSHL.getNode());
10486 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
10487 }
10488 }
10489
10490 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) {
10491 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
10493 const APInt &LHSC = LHS->getAPIntValue();
10494 const APInt &RHSC = RHS->getAPIntValue();
10495 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
10496 LHSC.getZExtValue() <= RHSC.getZExtValue();
10497 };
10498
10499 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
10500 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
10501 if (N0->getFlags().hasExact()) {
10502 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10503 /*AllowUndefs*/ false,
10504 /*AllowTypeMismatch*/ true)) {
10505 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10506 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10507 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10508 }
10509 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10510 /*AllowUndefs*/ false,
10511 /*AllowTypeMismatch*/ true)) {
10512 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10513 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10514 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
10515 }
10516 }
10517
10518 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
10519 // (and (srl x, (sub c1, c2), MASK)
10520 // Only fold this if the inner shift has no other uses -- if it does,
10521 // folding this will increase the total number of instructions.
10522 if (N0.getOpcode() == ISD::SRL &&
10523 (N0.getOperand(1) == N1 || N0.hasOneUse()) &&
10525 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10526 /*AllowUndefs*/ false,
10527 /*AllowTypeMismatch*/ true)) {
10528 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10529 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10530 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10531 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
10532 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
10533 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10534 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10535 }
10536 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10537 /*AllowUndefs*/ false,
10538 /*AllowTypeMismatch*/ true)) {
10539 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10540 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10541 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10542 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
10543 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10544 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10545 }
10546 }
10547 }
10548
10549 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
10550 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
10551 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
10552 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
10553 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
10554 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
10555 }
10556
10557 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
10558 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
10559 // Variant of version done on multiply, except mul by a power of 2 is turned
10560 // into a shift.
10561 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
10562 TLI.isDesirableToCommuteWithShift(N, Level)) {
10563 SDValue N01 = N0.getOperand(1);
10564 if (SDValue Shl1 =
10565 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
10566 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
10567 AddToWorklist(Shl0.getNode());
10569 // Preserve the disjoint flag for Or.
10570 if (N0.getOpcode() == ISD::OR && N0->getFlags().hasDisjoint())
10572 return DAG.getNode(N0.getOpcode(), DL, VT, Shl0, Shl1, Flags);
10573 }
10574 }
10575
10576 // fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)
10577 // TODO: Add zext/add_nuw variant with suitable test coverage
10578 // TODO: Should we limit this with isLegalAddImmediate?
10579 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
10580 N0.getOperand(0).getOpcode() == ISD::ADD &&
10581 N0.getOperand(0)->getFlags().hasNoSignedWrap() &&
10582 TLI.isDesirableToCommuteWithShift(N, Level)) {
10583 SDValue Add = N0.getOperand(0);
10584 SDLoc DL(N0);
10585 if (SDValue ExtC = DAG.FoldConstantArithmetic(N0.getOpcode(), DL, VT,
10586 {Add.getOperand(1)})) {
10587 if (SDValue ShlC =
10588 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {ExtC, N1})) {
10589 SDValue ExtX = DAG.getNode(N0.getOpcode(), DL, VT, Add.getOperand(0));
10590 SDValue ShlX = DAG.getNode(ISD::SHL, DL, VT, ExtX, N1);
10591 return DAG.getNode(ISD::ADD, DL, VT, ShlX, ShlC);
10592 }
10593 }
10594 }
10595
10596 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
10597 if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
10598 SDValue N01 = N0.getOperand(1);
10599 if (SDValue Shl =
10600 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1}))
10601 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), Shl);
10602 }
10603
10605 if (N1C && !N1C->isOpaque())
10606 if (SDValue NewSHL = visitShiftByConstant(N))
10607 return NewSHL;
10608
10609 // fold (shl X, cttz(Y)) -> (mul (Y & -Y), X) if cttz is unsupported on the
10610 // target.
10611 if (((N1.getOpcode() == ISD::CTTZ &&
10612 VT.getScalarSizeInBits() <= ShiftVT.getScalarSizeInBits()) ||
10613 N1.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
10614 N1.hasOneUse() && !TLI.isOperationLegalOrCustom(ISD::CTTZ, ShiftVT) &&
10616 SDValue Y = N1.getOperand(0);
10617 SDLoc DL(N);
10618 SDValue NegY = DAG.getNegative(Y, DL, ShiftVT);
10619 SDValue And =
10620 DAG.getZExtOrTrunc(DAG.getNode(ISD::AND, DL, ShiftVT, Y, NegY), DL, VT);
10621 return DAG.getNode(ISD::MUL, DL, VT, And, N0);
10622 }
10623
10625 return SDValue(N, 0);
10626
10627 // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
10628 if (N0.getOpcode() == ISD::VSCALE && N1C) {
10629 const APInt &C0 = N0.getConstantOperandAPInt(0);
10630 const APInt &C1 = N1C->getAPIntValue();
10631 return DAG.getVScale(DL, VT, C0 << C1);
10632 }
10633
10634 SDValue X;
10635 APInt VS0;
10636
10637 // fold (shl (X * vscale(VS0)), C1) -> (X * vscale(VS0 << C1))
10638 if (N1C && sd_match(N0, m_Mul(m_Value(X), m_VScale(m_ConstInt(VS0))))) {
10640 Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap() &&
10641 N0->getFlags().hasNoUnsignedWrap());
10642
10643 SDValue VScale = DAG.getVScale(DL, VT, VS0 << N1C->getAPIntValue());
10644 return DAG.getNode(ISD::MUL, DL, VT, X, VScale, Flags);
10645 }
10646
10647 // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
10648 APInt ShlVal;
10649 if (N0.getOpcode() == ISD::STEP_VECTOR &&
10650 ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
10651 const APInt &C0 = N0.getConstantOperandAPInt(0);
10652 if (ShlVal.ult(C0.getBitWidth())) {
10653 APInt NewStep = C0 << ShlVal;
10654 return DAG.getStepVector(DL, VT, NewStep);
10655 }
10656 }
10657
10658 return SDValue();
10659}
10660
10661// Transform a right shift of a multiply into a multiply-high.
10662// Examples:
10663// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
10664// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
10666 const TargetLowering &TLI) {
10667 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
10668 "SRL or SRA node is required here!");
10669
10670 // Check the shift amount. Proceed with the transformation if the shift
10671 // amount is constant.
10672 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
10673 if (!ShiftAmtSrc)
10674 return SDValue();
10675
10676 // The operation feeding into the shift must be a multiply.
10677 SDValue ShiftOperand = N->getOperand(0);
10678 if (ShiftOperand.getOpcode() != ISD::MUL)
10679 return SDValue();
10680
10681 // Both operands must be equivalent extend nodes.
10682 SDValue LeftOp = ShiftOperand.getOperand(0);
10683 SDValue RightOp = ShiftOperand.getOperand(1);
10684
10685 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
10686 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
10687
10688 if (!IsSignExt && !IsZeroExt)
10689 return SDValue();
10690
10691 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
10692 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
10693
10694 // return true if U may use the lower bits of its operands
10695 auto UserOfLowerBits = [NarrowVTSize](SDNode *U) {
10696 if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) {
10697 return true;
10698 }
10699 ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(U->getOperand(1));
10700 if (!UShiftAmtSrc) {
10701 return true;
10702 }
10703 unsigned UShiftAmt = UShiftAmtSrc->getZExtValue();
10704 return UShiftAmt < NarrowVTSize;
10705 };
10706
10707 // If the lower part of the MUL is also used and MUL_LOHI is supported
10708 // do not introduce the MULH in favor of MUL_LOHI
10709 unsigned MulLoHiOp = IsSignExt ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10710 if (!ShiftOperand.hasOneUse() &&
10711 TLI.isOperationLegalOrCustom(MulLoHiOp, NarrowVT) &&
10712 llvm::any_of(ShiftOperand->users(), UserOfLowerBits)) {
10713 return SDValue();
10714 }
10715
10716 SDValue MulhRightOp;
10718 unsigned ActiveBits = IsSignExt
10719 ? Constant->getAPIntValue().getSignificantBits()
10720 : Constant->getAPIntValue().getActiveBits();
10721 if (ActiveBits > NarrowVTSize)
10722 return SDValue();
10723 MulhRightOp = DAG.getConstant(
10724 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
10725 NarrowVT);
10726 } else {
10727 if (LeftOp.getOpcode() != RightOp.getOpcode())
10728 return SDValue();
10729 // Check that the two extend nodes are the same type.
10730 if (NarrowVT != RightOp.getOperand(0).getValueType())
10731 return SDValue();
10732 MulhRightOp = RightOp.getOperand(0);
10733 }
10734
10735 EVT WideVT = LeftOp.getValueType();
10736 // Proceed with the transformation if the wide types match.
10737 assert((WideVT == RightOp.getValueType()) &&
10738 "Cannot have a multiply node with two different operand types.");
10739
10740 // Proceed with the transformation if the wide type is twice as large
10741 // as the narrow type.
10742 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
10743 return SDValue();
10744
10745 // Check the shift amount with the narrow type size.
10746 // Proceed with the transformation if the shift amount is the width
10747 // of the narrow type.
10748 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
10749 if (ShiftAmt != NarrowVTSize)
10750 return SDValue();
10751
10752 // If the operation feeding into the MUL is a sign extend (sext),
10753 // we use mulhs. Othewise, zero extends (zext) use mulhu.
10754 unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
10755
10756 // Combine to mulh if mulh is legal/custom for the narrow type on the target
10757 // or if it is a vector type then we could transform to an acceptable type and
10758 // rely on legalization to split/combine the result.
10759 if (NarrowVT.isVector()) {
10760 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), NarrowVT);
10761 if (TransformVT.getVectorElementType() != NarrowVT.getVectorElementType() ||
10762 !TLI.isOperationLegalOrCustom(MulhOpcode, TransformVT))
10763 return SDValue();
10764 } else {
10765 if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
10766 return SDValue();
10767 }
10768
10769 SDValue Result =
10770 DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
10771 bool IsSigned = N->getOpcode() == ISD::SRA;
10772 return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
10773}
10774
10775// fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y))
10776// This helper function accept SDNode with opcode ISD::BSWAP and ISD::BITREVERSE
10778 unsigned Opcode = N->getOpcode();
10779 if (Opcode != ISD::BSWAP && Opcode != ISD::BITREVERSE)
10780 return SDValue();
10781
10782 SDValue N0 = N->getOperand(0);
10783 EVT VT = N->getValueType(0);
10784 SDLoc DL(N);
10785 SDValue X, Y;
10786
10787 // If both operands are bswap/bitreverse, ignore the multiuse
10789 m_UnaryOp(Opcode, m_Value(Y))))))
10790 return DAG.getNode(N0.getOpcode(), DL, VT, X, Y);
10791
10792 // Otherwise need to ensure logic_op and bswap/bitreverse(x) have one use.
10794 m_OneUse(m_UnaryOp(Opcode, m_Value(X))), m_Value(Y))))) {
10795 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, Y);
10796 return DAG.getNode(N0.getOpcode(), DL, VT, X, NewBitReorder);
10797 }
10798
10799 return SDValue();
10800}
10801
10802SDValue DAGCombiner::visitSRA(SDNode *N) {
10803 SDValue N0 = N->getOperand(0);
10804 SDValue N1 = N->getOperand(1);
10805 if (SDValue V = DAG.simplifyShift(N0, N1))
10806 return V;
10807
10808 SDLoc DL(N);
10809 EVT VT = N0.getValueType();
10810 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10811
10812 // fold (sra c1, c2) -> (sra c1, c2)
10813 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, DL, VT, {N0, N1}))
10814 return C;
10815
10816 // Arithmetic shifting an all-sign-bit value is a no-op.
10817 // fold (sra 0, x) -> 0
10818 // fold (sra -1, x) -> -1
10819 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
10820 return N0;
10821
10822 // fold vector ops
10823 if (VT.isVector())
10824 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10825 return FoldedVOp;
10826
10827 if (SDValue NewSel = foldBinOpIntoSelect(N))
10828 return NewSel;
10829
10831
10832 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
10833 // clamp (add c1, c2) to max shift.
10834 if (N0.getOpcode() == ISD::SRA) {
10835 EVT ShiftVT = N1.getValueType();
10836 EVT ShiftSVT = ShiftVT.getScalarType();
10837 SmallVector<SDValue, 16> ShiftValues;
10838
10839 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10840 APInt c1 = LHS->getAPIntValue();
10841 APInt c2 = RHS->getAPIntValue();
10842 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10843 APInt Sum = c1 + c2;
10844 unsigned ShiftSum =
10845 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
10846 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
10847 return true;
10848 };
10849 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
10850 SDValue ShiftValue;
10851 if (N1.getOpcode() == ISD::BUILD_VECTOR)
10852 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
10853 else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
10854 assert(ShiftValues.size() == 1 &&
10855 "Expected matchBinaryPredicate to return one element for "
10856 "SPLAT_VECTORs");
10857 ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
10858 } else
10859 ShiftValue = ShiftValues[0];
10860 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
10861 }
10862 }
10863
10864 // fold (sra (shl X, m), (sub result_size, n))
10865 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
10866 // result_size - n != m.
10867 // If truncate is free for the target sext(shl) is likely to result in better
10868 // code.
10869 if (N0.getOpcode() == ISD::SHL && N1C) {
10870 // Get the two constants of the shifts, CN0 = m, CN = n.
10871 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
10872 if (N01C) {
10873 LLVMContext &Ctx = *DAG.getContext();
10874 // Determine what the truncate's result bitsize and type would be.
10875 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
10876
10877 if (VT.isVector())
10878 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10879
10880 // Determine the residual right-shift amount.
10881 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
10882
10883 // If the shift is not a no-op (in which case this should be just a sign
10884 // extend already), the truncated to type is legal, sign_extend is legal
10885 // on that type, and the truncate to that type is both legal and free,
10886 // perform the transform.
10887 if ((ShiftAmt > 0) &&
10890 TLI.isTruncateFree(VT, TruncVT)) {
10891 SDValue Amt = DAG.getShiftAmountConstant(ShiftAmt, VT, DL);
10892 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
10893 N0.getOperand(0), Amt);
10894 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
10895 Shift);
10896 return DAG.getNode(ISD::SIGN_EXTEND, DL,
10897 N->getValueType(0), Trunc);
10898 }
10899 }
10900 }
10901
10902 // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
10903 // sra (add (shl X, N1C), AddC), N1C -->
10904 // sext (add (trunc X to (width - N1C)), AddC')
10905 // sra (sub AddC, (shl X, N1C)), N1C -->
10906 // sext (sub AddC1',(trunc X to (width - N1C)))
10907 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C &&
10908 N0.hasOneUse()) {
10909 bool IsAdd = N0.getOpcode() == ISD::ADD;
10910 SDValue Shl = N0.getOperand(IsAdd ? 0 : 1);
10911 if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 &&
10912 Shl.hasOneUse()) {
10913 // TODO: AddC does not need to be a splat.
10914 if (ConstantSDNode *AddC =
10915 isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) {
10916 // Determine what the truncate's type would be and ask the target if
10917 // that is a free operation.
10918 LLVMContext &Ctx = *DAG.getContext();
10919 unsigned ShiftAmt = N1C->getZExtValue();
10920 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
10921 if (VT.isVector())
10922 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10923
10924 // TODO: The simple type check probably belongs in the default hook
10925 // implementation and/or target-specific overrides (because
10926 // non-simple types likely require masking when legalized), but
10927 // that restriction may conflict with other transforms.
10928 if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
10929 TLI.isTruncateFree(VT, TruncVT)) {
10930 SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
10931 SDValue ShiftC =
10932 DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
10933 TruncVT.getScalarSizeInBits()),
10934 DL, TruncVT);
10935 SDValue Add;
10936 if (IsAdd)
10937 Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
10938 else
10939 Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc);
10940 return DAG.getSExtOrTrunc(Add, DL, VT);
10941 }
10942 }
10943 }
10944 }
10945
10946 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
10947 if (N1.getOpcode() == ISD::TRUNCATE &&
10948 N1.getOperand(0).getOpcode() == ISD::AND) {
10949 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10950 return DAG.getNode(ISD::SRA, DL, VT, N0, NewOp1);
10951 }
10952
10953 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
10954 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
10955 // if c1 is equal to the number of bits the trunc removes
10956 // TODO - support non-uniform vector shift amounts.
10957 if (N0.getOpcode() == ISD::TRUNCATE &&
10958 (N0.getOperand(0).getOpcode() == ISD::SRL ||
10959 N0.getOperand(0).getOpcode() == ISD::SRA) &&
10960 N0.getOperand(0).hasOneUse() &&
10961 N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
10962 SDValue N0Op0 = N0.getOperand(0);
10963 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
10964 EVT LargeVT = N0Op0.getValueType();
10965 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
10966 if (LargeShift->getAPIntValue() == TruncBits) {
10967 EVT LargeShiftVT = getShiftAmountTy(LargeVT);
10968 SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT);
10969 Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt,
10970 DAG.getConstant(TruncBits, DL, LargeShiftVT));
10971 SDValue SRA =
10972 DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
10973 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
10974 }
10975 }
10976 }
10977
10978 // Simplify, based on bits shifted out of the LHS.
10980 return SDValue(N, 0);
10981
10982 // If the sign bit is known to be zero, switch this to a SRL.
10983 if (DAG.SignBitIsZero(N0))
10984 return DAG.getNode(ISD::SRL, DL, VT, N0, N1);
10985
10986 if (N1C && !N1C->isOpaque())
10987 if (SDValue NewSRA = visitShiftByConstant(N))
10988 return NewSRA;
10989
10990 // Try to transform this shift into a multiply-high if
10991 // it matches the appropriate pattern detected in combineShiftToMULH.
10992 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
10993 return MULH;
10994
10995 // Attempt to convert a sra of a load into a narrower sign-extending load.
10996 if (SDValue NarrowLoad = reduceLoadWidth(N))
10997 return NarrowLoad;
10998
10999 if (SDValue AVG = foldShiftToAvg(N, DL))
11000 return AVG;
11001
11002 return SDValue();
11003}
11004
11005SDValue DAGCombiner::visitSRL(SDNode *N) {
11006 SDValue N0 = N->getOperand(0);
11007 SDValue N1 = N->getOperand(1);
11008 if (SDValue V = DAG.simplifyShift(N0, N1))
11009 return V;
11010
11011 SDLoc DL(N);
11012 EVT VT = N0.getValueType();
11013 EVT ShiftVT = N1.getValueType();
11014 unsigned OpSizeInBits = VT.getScalarSizeInBits();
11015
11016 // fold (srl c1, c2) -> c1 >>u c2
11017 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, DL, VT, {N0, N1}))
11018 return C;
11019
11020 // fold vector ops
11021 if (VT.isVector())
11022 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
11023 return FoldedVOp;
11024
11025 if (SDValue NewSel = foldBinOpIntoSelect(N))
11026 return NewSel;
11027
11028 // if (srl x, c) is known to be zero, return 0
11030 if (N1C &&
11031 DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
11032 return DAG.getConstant(0, DL, VT);
11033
11034 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
11035 if (N0.getOpcode() == ISD::SRL) {
11036 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
11038 APInt c1 = LHS->getAPIntValue();
11039 APInt c2 = RHS->getAPIntValue();
11040 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
11041 return (c1 + c2).uge(OpSizeInBits);
11042 };
11043 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
11044 return DAG.getConstant(0, DL, VT);
11045
11046 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
11048 APInt c1 = LHS->getAPIntValue();
11049 APInt c2 = RHS->getAPIntValue();
11050 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
11051 return (c1 + c2).ult(OpSizeInBits);
11052 };
11053 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
11054 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
11055 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
11056 }
11057 }
11058
11059 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
11060 N0.getOperand(0).getOpcode() == ISD::SRL) {
11061 SDValue InnerShift = N0.getOperand(0);
11062 // TODO - support non-uniform vector shift amounts.
11063 if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
11064 uint64_t c1 = N001C->getZExtValue();
11065 uint64_t c2 = N1C->getZExtValue();
11066 EVT InnerShiftVT = InnerShift.getValueType();
11067 EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
11068 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
11069 // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
11070 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
11071 if (c1 + OpSizeInBits == InnerShiftSize) {
11072 if (c1 + c2 >= InnerShiftSize)
11073 return DAG.getConstant(0, DL, VT);
11074 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
11075 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
11076 InnerShift.getOperand(0), NewShiftAmt);
11077 return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
11078 }
11079 // In the more general case, we can clear the high bits after the shift:
11080 // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
11081 if (N0.hasOneUse() && InnerShift.hasOneUse() &&
11082 c1 + c2 < InnerShiftSize) {
11083 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
11084 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
11085 InnerShift.getOperand(0), NewShiftAmt);
11086 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
11087 OpSizeInBits - c2),
11088 DL, InnerShiftVT);
11089 SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
11090 return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
11091 }
11092 }
11093 }
11094
11095 if (N0.getOpcode() == ISD::SHL) {
11096 // fold (srl (shl nuw x, c), c) -> x
11097 if (N0.getOperand(1) == N1 && N0->getFlags().hasNoUnsignedWrap())
11098 return N0.getOperand(0);
11099
11100 // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
11101 // (and (srl x, (sub c2, c1), MASK)
11102 if ((N0.getOperand(1) == N1 || N0->hasOneUse()) &&
11104 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
11106 const APInt &LHSC = LHS->getAPIntValue();
11107 const APInt &RHSC = RHS->getAPIntValue();
11108 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
11109 LHSC.getZExtValue() <= RHSC.getZExtValue();
11110 };
11111 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
11112 /*AllowUndefs*/ false,
11113 /*AllowTypeMismatch*/ true)) {
11114 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
11115 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
11116 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
11117 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
11118 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
11119 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
11120 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
11121 }
11122 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
11123 /*AllowUndefs*/ false,
11124 /*AllowTypeMismatch*/ true)) {
11125 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
11126 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
11127 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
11128 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
11129 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
11130 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
11131 }
11132 }
11133 }
11134
11135 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
11136 // TODO - support non-uniform vector shift amounts.
11137 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
11138 // Shifting in all undef bits?
11139 EVT SmallVT = N0.getOperand(0).getValueType();
11140 unsigned BitSize = SmallVT.getScalarSizeInBits();
11141 if (N1C->getAPIntValue().uge(BitSize))
11142 return DAG.getUNDEF(VT);
11143
11144 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
11145 uint64_t ShiftAmt = N1C->getZExtValue();
11146 SDLoc DL0(N0);
11147 SDValue SmallShift =
11148 DAG.getNode(ISD::SRL, DL0, SmallVT, N0.getOperand(0),
11149 DAG.getShiftAmountConstant(ShiftAmt, SmallVT, DL0));
11150 AddToWorklist(SmallShift.getNode());
11151 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
11152 return DAG.getNode(ISD::AND, DL, VT,
11153 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
11154 DAG.getConstant(Mask, DL, VT));
11155 }
11156 }
11157
11158 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
11159 // bit, which is unmodified by sra.
11160 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
11161 if (N0.getOpcode() == ISD::SRA)
11162 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1);
11163 }
11164
11165 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit), and x has a power
11166 // of two bitwidth. The "5" represents (log2 (bitwidth x)).
11167 if (N1C && N0.getOpcode() == ISD::CTLZ &&
11168 isPowerOf2_32(OpSizeInBits) &&
11169 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
11170 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
11171
11172 // If any of the input bits are KnownOne, then the input couldn't be all
11173 // zeros, thus the result of the srl will always be zero.
11174 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
11175
11176 // If all of the bits input the to ctlz node are known to be zero, then
11177 // the result of the ctlz is "32" and the result of the shift is one.
11178 APInt UnknownBits = ~Known.Zero;
11179 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
11180
11181 // Otherwise, check to see if there is exactly one bit input to the ctlz.
11182 if (UnknownBits.isPowerOf2()) {
11183 // Okay, we know that only that the single bit specified by UnknownBits
11184 // could be set on input to the CTLZ node. If this bit is set, the SRL
11185 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
11186 // to an SRL/XOR pair, which is likely to simplify more.
11187 unsigned ShAmt = UnknownBits.countr_zero();
11188 SDValue Op = N0.getOperand(0);
11189
11190 if (ShAmt) {
11191 SDLoc DL(N0);
11192 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
11193 DAG.getShiftAmountConstant(ShAmt, VT, DL));
11194 AddToWorklist(Op.getNode());
11195 }
11196 return DAG.getNode(ISD::XOR, DL, VT, Op, DAG.getConstant(1, DL, VT));
11197 }
11198 }
11199
11200 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
11201 if (N1.getOpcode() == ISD::TRUNCATE &&
11202 N1.getOperand(0).getOpcode() == ISD::AND) {
11203 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
11204 return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1);
11205 }
11206
11207 // fold (srl (logic_op x, (shl (zext y), c1)), c1)
11208 // -> (logic_op (srl x, c1), (zext y))
11209 // c1 <= leadingzeros(zext(y))
11210 SDValue X, ZExtY;
11211 if (N1C && sd_match(N0, m_OneUse(m_BitwiseLogic(
11212 m_Value(X),
11215 m_Specific(N1))))))) {
11216 unsigned NumLeadingZeros = ZExtY.getScalarValueSizeInBits() -
11218 if (N1C->getZExtValue() <= NumLeadingZeros)
11219 return DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
11220 DAG.getNode(ISD::SRL, SDLoc(N0), VT, X, N1), ZExtY);
11221 }
11222
11223 // fold operands of srl based on knowledge that the low bits are not
11224 // demanded.
11226 return SDValue(N, 0);
11227
11228 if (N1C && !N1C->isOpaque())
11229 if (SDValue NewSRL = visitShiftByConstant(N))
11230 return NewSRL;
11231
11232 // Attempt to convert a srl of a load into a narrower zero-extending load.
11233 if (SDValue NarrowLoad = reduceLoadWidth(N))
11234 return NarrowLoad;
11235
11236 // Here is a common situation. We want to optimize:
11237 //
11238 // %a = ...
11239 // %b = and i32 %a, 2
11240 // %c = srl i32 %b, 1
11241 // brcond i32 %c ...
11242 //
11243 // into
11244 //
11245 // %a = ...
11246 // %b = and %a, 2
11247 // %c = setcc eq %b, 0
11248 // brcond %c ...
11249 //
11250 // However when after the source operand of SRL is optimized into AND, the SRL
11251 // itself may not be optimized further. Look for it and add the BRCOND into
11252 // the worklist.
11253 //
11254 // The also tends to happen for binary operations when SimplifyDemandedBits
11255 // is involved.
11256 //
11257 // FIXME: This is unecessary if we process the DAG in topological order,
11258 // which we plan to do. This workaround can be removed once the DAG is
11259 // processed in topological order.
11260 if (N->hasOneUse()) {
11261 SDNode *User = *N->user_begin();
11262
11263 // Look pass the truncate.
11264 if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse())
11265 User = *User->user_begin();
11266
11267 if (User->getOpcode() == ISD::BRCOND || User->getOpcode() == ISD::AND ||
11268 User->getOpcode() == ISD::OR || User->getOpcode() == ISD::XOR)
11269 AddToWorklist(User);
11270 }
11271
11272 // Try to transform this shift into a multiply-high if
11273 // it matches the appropriate pattern detected in combineShiftToMULH.
11274 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
11275 return MULH;
11276
11277 if (SDValue AVG = foldShiftToAvg(N, DL))
11278 return AVG;
11279
11280 return SDValue();
11281}
11282
11283SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
11284 EVT VT = N->getValueType(0);
11285 SDValue N0 = N->getOperand(0);
11286 SDValue N1 = N->getOperand(1);
11287 SDValue N2 = N->getOperand(2);
11288 bool IsFSHL = N->getOpcode() == ISD::FSHL;
11289 unsigned BitWidth = VT.getScalarSizeInBits();
11290 SDLoc DL(N);
11291
11292 // fold (fshl/fshr C0, C1, C2) -> C3
11293 if (SDValue C =
11294 DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1, N2}))
11295 return C;
11296
11297 // fold (fshl N0, N1, 0) -> N0
11298 // fold (fshr N0, N1, 0) -> N1
11300 if (DAG.MaskedValueIsZero(
11301 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
11302 return IsFSHL ? N0 : N1;
11303
11304 auto IsUndefOrZero = [](SDValue V) {
11305 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
11306 };
11307
11308 // TODO - support non-uniform vector shift amounts.
11309 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
11310 EVT ShAmtTy = N2.getValueType();
11311
11312 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
11313 if (Cst->getAPIntValue().uge(BitWidth)) {
11314 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
11315 return DAG.getNode(N->getOpcode(), DL, VT, N0, N1,
11316 DAG.getConstant(RotAmt, DL, ShAmtTy));
11317 }
11318
11319 unsigned ShAmt = Cst->getZExtValue();
11320 if (ShAmt == 0)
11321 return IsFSHL ? N0 : N1;
11322
11323 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
11324 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
11325 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
11326 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
11327 if (IsUndefOrZero(N0))
11328 return DAG.getNode(
11329 ISD::SRL, DL, VT, N1,
11330 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt, DL, ShAmtTy));
11331 if (IsUndefOrZero(N1))
11332 return DAG.getNode(
11333 ISD::SHL, DL, VT, N0,
11334 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt, DL, ShAmtTy));
11335
11336 // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
11337 // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
11338 // TODO - bigendian support once we have test coverage.
11339 // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
11340 // TODO - permit LHS EXTLOAD if extensions are shifted out.
11341 if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
11342 !DAG.getDataLayout().isBigEndian()) {
11343 auto *LHS = dyn_cast<LoadSDNode>(N0);
11344 auto *RHS = dyn_cast<LoadSDNode>(N1);
11345 if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
11346 LHS->getAddressSpace() == RHS->getAddressSpace() &&
11347 (LHS->hasNUsesOfValue(1, 0) || RHS->hasNUsesOfValue(1, 0)) &&
11349 if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
11350 SDLoc DL(RHS);
11351 uint64_t PtrOff =
11352 IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
11353 Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
11354 unsigned Fast = 0;
11355 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
11356 RHS->getAddressSpace(), NewAlign,
11357 RHS->getMemOperand()->getFlags(), &Fast) &&
11358 Fast) {
11359 SDValue NewPtr = DAG.getMemBasePlusOffset(
11360 RHS->getBasePtr(), TypeSize::getFixed(PtrOff), DL);
11361 AddToWorklist(NewPtr.getNode());
11362 SDValue Load = DAG.getLoad(
11363 VT, DL, RHS->getChain(), NewPtr,
11364 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
11365 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
11366 DAG.makeEquivalentMemoryOrdering(LHS, Load.getValue(1));
11367 DAG.makeEquivalentMemoryOrdering(RHS, Load.getValue(1));
11368 return Load;
11369 }
11370 }
11371 }
11372 }
11373 }
11374
11375 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
11376 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
11377 // iff We know the shift amount is in range.
11378 // TODO: when is it worth doing SUB(BW, N2) as well?
11379 if (isPowerOf2_32(BitWidth)) {
11380 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
11381 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
11382 return DAG.getNode(ISD::SRL, DL, VT, N1, N2);
11383 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
11384 return DAG.getNode(ISD::SHL, DL, VT, N0, N2);
11385 }
11386
11387 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
11388 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
11389 // TODO: Investigate flipping this rotate if only one is legal.
11390 // If funnel shift is legal as well we might be better off avoiding
11391 // non-constant (BW - N2).
11392 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
11393 if (N0 == N1 && hasOperation(RotOpc, VT))
11394 return DAG.getNode(RotOpc, DL, VT, N0, N2);
11395
11396 // Simplify, based on bits shifted out of N0/N1.
11398 return SDValue(N, 0);
11399
11400 return SDValue();
11401}
11402
11403SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
11404 SDValue N0 = N->getOperand(0);
11405 SDValue N1 = N->getOperand(1);
11406 if (SDValue V = DAG.simplifyShift(N0, N1))
11407 return V;
11408
11409 SDLoc DL(N);
11410 EVT VT = N0.getValueType();
11411
11412 // fold (*shlsat c1, c2) -> c1<<c2
11413 if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
11414 return C;
11415
11417
11418 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
11419 // fold (sshlsat x, c) -> (shl x, c)
11420 if (N->getOpcode() == ISD::SSHLSAT && N1C &&
11421 N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
11422 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
11423
11424 // fold (ushlsat x, c) -> (shl x, c)
11425 if (N->getOpcode() == ISD::USHLSAT && N1C &&
11426 N1C->getAPIntValue().ule(
11428 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
11429 }
11430
11431 return SDValue();
11432}
11433
11434// Given a ABS node, detect the following patterns:
11435// (ABS (SUB (EXTEND a), (EXTEND b))).
11436// (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))).
11437// Generates UABD/SABD instruction.
11438SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
11439 EVT SrcVT = N->getValueType(0);
11440
11441 if (N->getOpcode() == ISD::TRUNCATE)
11442 N = N->getOperand(0).getNode();
11443
11444 EVT VT = N->getValueType(0);
11445 SDValue Op0, Op1;
11446
11447 if (!sd_match(N, m_Abs(m_Sub(m_Value(Op0), m_Value(Op1)))))
11448 return SDValue();
11449
11450 SDValue AbsOp0 = N->getOperand(0);
11451 unsigned Opc0 = Op0.getOpcode();
11452
11453 // Check if the operands of the sub are (zero|sign)-extended, otherwise
11454 // fallback to ValueTracking.
11455 if (Opc0 != Op1.getOpcode() ||
11456 (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
11457 Opc0 != ISD::SIGN_EXTEND_INREG)) {
11458 // fold (abs (sub nsw x, y)) -> abds(x, y)
11459 // Don't fold this for unsupported types as we lose the NSW handling.
11460 if (hasOperation(ISD::ABDS, VT) && TLI.preferABDSToABSWithNSW(VT) &&
11461 (AbsOp0->getFlags().hasNoSignedWrap() ||
11462 DAG.willNotOverflowSub(/*IsSigned=*/true, Op0, Op1))) {
11463 SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
11464 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11465 }
11466 // fold (abs (sub x, y)) -> abdu(x, y)
11467 if (hasOperation(ISD::ABDU, VT) && DAG.SignBitIsZero(Op0) &&
11468 DAG.SignBitIsZero(Op1)) {
11469 SDValue ABD = DAG.getNode(ISD::ABDU, DL, VT, Op0, Op1);
11470 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11471 }
11472 return SDValue();
11473 }
11474
11475 EVT VT0, VT1;
11476 if (Opc0 == ISD::SIGN_EXTEND_INREG) {
11477 VT0 = cast<VTSDNode>(Op0.getOperand(1))->getVT();
11478 VT1 = cast<VTSDNode>(Op1.getOperand(1))->getVT();
11479 } else {
11480 VT0 = Op0.getOperand(0).getValueType();
11481 VT1 = Op1.getOperand(0).getValueType();
11482 }
11483 unsigned ABDOpcode = (Opc0 == ISD::ZERO_EXTEND) ? ISD::ABDU : ISD::ABDS;
11484
11485 // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
11486 // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
11487 EVT MaxVT = VT0.bitsGT(VT1) ? VT0 : VT1;
11488 if ((VT0 == MaxVT || Op0->hasOneUse()) &&
11489 (VT1 == MaxVT || Op1->hasOneUse()) &&
11490 (!LegalTypes || hasOperation(ABDOpcode, MaxVT))) {
11491 SDValue ABD = DAG.getNode(ABDOpcode, DL, MaxVT,
11492 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op0),
11493 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op1));
11494 ABD = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ABD);
11495 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11496 }
11497
11498 // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
11499 // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
11500 if (!LegalOperations || hasOperation(ABDOpcode, VT)) {
11501 SDValue ABD = DAG.getNode(ABDOpcode, DL, VT, Op0, Op1);
11502 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11503 }
11504
11505 return SDValue();
11506}
11507
11508SDValue DAGCombiner::visitABS(SDNode *N) {
11509 SDValue N0 = N->getOperand(0);
11510 EVT VT = N->getValueType(0);
11511 SDLoc DL(N);
11512
11513 // fold (abs c1) -> c2
11514 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, DL, VT, {N0}))
11515 return C;
11516 // fold (abs (abs x)) -> (abs x)
11517 if (N0.getOpcode() == ISD::ABS)
11518 return N0;
11519 // fold (abs x) -> x iff not-negative
11520 if (DAG.SignBitIsZero(N0))
11521 return N0;
11522
11523 if (SDValue ABD = foldABSToABD(N, DL))
11524 return ABD;
11525
11526 // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
11527 // iff zero_extend/truncate are free.
11528 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
11529 EVT ExtVT = cast<VTSDNode>(N0.getOperand(1))->getVT();
11530 if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) &&
11531 TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) &&
11532 hasOperation(ISD::ABS, ExtVT)) {
11533 return DAG.getNode(
11534 ISD::ZERO_EXTEND, DL, VT,
11535 DAG.getNode(ISD::ABS, DL, ExtVT,
11536 DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N0.getOperand(0))));
11537 }
11538 }
11539
11540 return SDValue();
11541}
11542
11543SDValue DAGCombiner::visitBSWAP(SDNode *N) {
11544 SDValue N0 = N->getOperand(0);
11545 EVT VT = N->getValueType(0);
11546 SDLoc DL(N);
11547
11548 // fold (bswap c1) -> c2
11549 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BSWAP, DL, VT, {N0}))
11550 return C;
11551 // fold (bswap (bswap x)) -> x
11552 if (N0.getOpcode() == ISD::BSWAP)
11553 return N0.getOperand(0);
11554
11555 // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
11556 // isn't supported, it will be expanded to bswap followed by a manual reversal
11557 // of bits in each byte. By placing bswaps before bitreverse, we can remove
11558 // the two bswaps if the bitreverse gets expanded.
11559 if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
11560 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11561 return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
11562 }
11563
11564 // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
11565 // iff x >= bw/2 (i.e. lower half is known zero)
11566 unsigned BW = VT.getScalarSizeInBits();
11567 if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
11568 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11569 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
11570 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11571 ShAmt->getZExtValue() >= (BW / 2) &&
11572 (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
11573 TLI.isTruncateFree(VT, HalfVT) &&
11574 (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
11575 SDValue Res = N0.getOperand(0);
11576 if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
11577 Res = DAG.getNode(ISD::SHL, DL, VT, Res,
11578 DAG.getShiftAmountConstant(NewShAmt, VT, DL));
11579 Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
11580 Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
11581 return DAG.getZExtOrTrunc(Res, DL, VT);
11582 }
11583 }
11584
11585 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
11586 // inverse-shift-of-bswap:
11587 // bswap (X u<< C) --> (bswap X) u>> C
11588 // bswap (X u>> C) --> (bswap X) u<< C
11589 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
11590 N0.hasOneUse()) {
11591 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11592 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11593 ShAmt->getZExtValue() % 8 == 0) {
11594 SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11595 unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
11596 return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
11597 }
11598 }
11599
11600 if (SDValue V = foldBitOrderCrossLogicOp(N, DAG))
11601 return V;
11602
11603 return SDValue();
11604}
11605
11606SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
11607 SDValue N0 = N->getOperand(0);
11608 EVT VT = N->getValueType(0);
11609 SDLoc DL(N);
11610
11611 // fold (bitreverse c1) -> c2
11612 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0}))
11613 return C;
11614
11615 // fold (bitreverse (bitreverse x)) -> x
11616 if (N0.getOpcode() == ISD::BITREVERSE)
11617 return N0.getOperand(0);
11618
11619 SDValue X, Y;
11620
11621 // fold (bitreverse (lshr (bitreverse x), y)) -> (shl x, y)
11622 if ((!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
11624 return DAG.getNode(ISD::SHL, DL, VT, X, Y);
11625
11626 // fold (bitreverse (shl (bitreverse x), y)) -> (lshr x, y)
11627 if ((!LegalOperations || TLI.isOperationLegal(ISD::SRL, VT)) &&
11629 return DAG.getNode(ISD::SRL, DL, VT, X, Y);
11630
11631 return SDValue();
11632}
11633
11634SDValue DAGCombiner::visitCTLZ(SDNode *N) {
11635 SDValue N0 = N->getOperand(0);
11636 EVT VT = N->getValueType(0);
11637 SDLoc DL(N);
11638
11639 // fold (ctlz c1) -> c2
11640 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTLZ, DL, VT, {N0}))
11641 return C;
11642
11643 // If the value is known never to be zero, switch to the undef version.
11644 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT))
11645 if (DAG.isKnownNeverZero(N0))
11646 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, N0);
11647
11648 return SDValue();
11649}
11650
11651SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
11652 SDValue N0 = N->getOperand(0);
11653 EVT VT = N->getValueType(0);
11654 SDLoc DL(N);
11655
11656 // fold (ctlz_zero_undef c1) -> c2
11657 if (SDValue C =
11659 return C;
11660 return SDValue();
11661}
11662
11663SDValue DAGCombiner::visitCTTZ(SDNode *N) {
11664 SDValue N0 = N->getOperand(0);
11665 EVT VT = N->getValueType(0);
11666 SDLoc DL(N);
11667
11668 // fold (cttz c1) -> c2
11669 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTTZ, DL, VT, {N0}))
11670 return C;
11671
11672 // If the value is known never to be zero, switch to the undef version.
11673 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT))
11674 if (DAG.isKnownNeverZero(N0))
11675 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, DL, VT, N0);
11676
11677 return SDValue();
11678}
11679
11680SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
11681 SDValue N0 = N->getOperand(0);
11682 EVT VT = N->getValueType(0);
11683 SDLoc DL(N);
11684
11685 // fold (cttz_zero_undef c1) -> c2
11686 if (SDValue C =
11688 return C;
11689 return SDValue();
11690}
11691
11692SDValue DAGCombiner::visitCTPOP(SDNode *N) {
11693 SDValue N0 = N->getOperand(0);
11694 EVT VT = N->getValueType(0);
11695 unsigned NumBits = VT.getScalarSizeInBits();
11696 SDLoc DL(N);
11697
11698 // fold (ctpop c1) -> c2
11699 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTPOP, DL, VT, {N0}))
11700 return C;
11701
11702 // If the source is being shifted, but doesn't affect any active bits,
11703 // then we can call CTPOP on the shift source directly.
11704 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SHL) {
11705 if (ConstantSDNode *AmtC = isConstOrConstSplat(N0.getOperand(1))) {
11706 const APInt &Amt = AmtC->getAPIntValue();
11707 if (Amt.ult(NumBits)) {
11708 KnownBits KnownSrc = DAG.computeKnownBits(N0.getOperand(0));
11709 if ((N0.getOpcode() == ISD::SRL &&
11710 Amt.ule(KnownSrc.countMinTrailingZeros())) ||
11711 (N0.getOpcode() == ISD::SHL &&
11712 Amt.ule(KnownSrc.countMinLeadingZeros()))) {
11713 return DAG.getNode(ISD::CTPOP, DL, VT, N0.getOperand(0));
11714 }
11715 }
11716 }
11717 }
11718
11719 // If the upper bits are known to be zero, then see if its profitable to
11720 // only count the lower bits.
11721 if (VT.isScalarInteger() && NumBits > 8 && (NumBits & 1) == 0) {
11722 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), NumBits / 2);
11723 if (hasOperation(ISD::CTPOP, HalfVT) &&
11724 TLI.isTypeDesirableForOp(ISD::CTPOP, HalfVT) &&
11725 TLI.isTruncateFree(N0, HalfVT) && TLI.isZExtFree(HalfVT, VT)) {
11726 APInt UpperBits = APInt::getHighBitsSet(NumBits, NumBits / 2);
11727 if (DAG.MaskedValueIsZero(N0, UpperBits)) {
11728 SDValue PopCnt = DAG.getNode(ISD::CTPOP, DL, HalfVT,
11729 DAG.getZExtOrTrunc(N0, DL, HalfVT));
11730 return DAG.getZExtOrTrunc(PopCnt, DL, VT);
11731 }
11732 }
11733 }
11734
11735 return SDValue();
11736}
11737
11739 SDValue RHS, const SDNodeFlags Flags,
11740 const TargetLowering &TLI) {
11741 EVT VT = LHS.getValueType();
11742 if (!VT.isFloatingPoint())
11743 return false;
11744
11745 const TargetOptions &Options = DAG.getTarget().Options;
11746
11747 return (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) &&
11749 (Flags.hasNoNaNs() ||
11750 (DAG.isKnownNeverNaN(RHS) && DAG.isKnownNeverNaN(LHS)));
11751}
11752
11754 SDValue RHS, SDValue True, SDValue False,
11755 ISD::CondCode CC,
11756 const TargetLowering &TLI,
11757 SelectionDAG &DAG) {
11758 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
11759 switch (CC) {
11760 case ISD::SETOLT:
11761 case ISD::SETOLE:
11762 case ISD::SETLT:
11763 case ISD::SETLE:
11764 case ISD::SETULT:
11765 case ISD::SETULE: {
11766 // Since it's known never nan to get here already, either fminnum or
11767 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
11768 // expanded in terms of it.
11769 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
11770 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11771 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11772
11773 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
11774 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11775 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11776 return SDValue();
11777 }
11778 case ISD::SETOGT:
11779 case ISD::SETOGE:
11780 case ISD::SETGT:
11781 case ISD::SETGE:
11782 case ISD::SETUGT:
11783 case ISD::SETUGE: {
11784 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
11785 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11786 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11787
11788 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
11789 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11790 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11791 return SDValue();
11792 }
11793 default:
11794 return SDValue();
11795 }
11796}
11797
11798// Convert (sr[al] (add n[su]w x, y)) -> (avgfloor[su] x, y)
11799SDValue DAGCombiner::foldShiftToAvg(SDNode *N, const SDLoc &DL) {
11800 const unsigned Opcode = N->getOpcode();
11801 if (Opcode != ISD::SRA && Opcode != ISD::SRL)
11802 return SDValue();
11803
11804 EVT VT = N->getValueType(0);
11805 bool IsUnsigned = Opcode == ISD::SRL;
11806
11807 // Captured values.
11808 SDValue A, B, Add;
11809
11810 // Match floor average as it is common to both floor/ceil avgs.
11811 if (sd_match(N, m_BinOp(Opcode,
11813 m_One()))) {
11814 // Decide whether signed or unsigned.
11815 unsigned FloorISD = IsUnsigned ? ISD::AVGFLOORU : ISD::AVGFLOORS;
11816 if (!hasOperation(FloorISD, VT))
11817 return SDValue();
11818
11819 // Can't optimize adds that may wrap.
11820 if ((IsUnsigned && !Add->getFlags().hasNoUnsignedWrap()) ||
11821 (!IsUnsigned && !Add->getFlags().hasNoSignedWrap()))
11822 return SDValue();
11823
11824 return DAG.getNode(FloorISD, DL, N->getValueType(0), {A, B});
11825 }
11826
11827 return SDValue();
11828}
11829
11830SDValue DAGCombiner::foldBitwiseOpWithNeg(SDNode *N, const SDLoc &DL, EVT VT) {
11831 unsigned Opc = N->getOpcode();
11832 SDValue X, Y, Z;
11833 if (sd_match(
11835 return DAG.getNode(Opc, DL, VT, X,
11836 DAG.getNOT(DL, DAG.getNode(ISD::SUB, DL, VT, Y, Z), VT));
11837
11839 m_Value(Z)))))
11840 return DAG.getNode(Opc, DL, VT, X,
11841 DAG.getNOT(DL, DAG.getNode(ISD::ADD, DL, VT, Y, Z), VT));
11842
11843 return SDValue();
11844}
11845
11846/// Generate Min/Max node
11847SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
11848 SDValue RHS, SDValue True,
11849 SDValue False, ISD::CondCode CC) {
11850 if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
11851 return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG);
11852
11853 // If we can't directly match this, try to see if we can pull an fneg out of
11854 // the select.
11856 True, DAG, LegalOperations, ForCodeSize);
11857 if (!NegTrue)
11858 return SDValue();
11859
11860 HandleSDNode NegTrueHandle(NegTrue);
11861
11862 // Try to unfold an fneg from the select if we are comparing the negated
11863 // constant.
11864 //
11865 // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
11866 //
11867 // TODO: Handle fabs
11868 if (LHS == NegTrue) {
11869 // If we can't directly match this, try to see if we can pull an fneg out of
11870 // the select.
11872 RHS, DAG, LegalOperations, ForCodeSize);
11873 if (NegRHS) {
11874 HandleSDNode NegRHSHandle(NegRHS);
11875 if (NegRHS == False) {
11876 SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
11877 False, CC, TLI, DAG);
11878 if (Combined)
11879 return DAG.getNode(ISD::FNEG, DL, VT, Combined);
11880 }
11881 }
11882 }
11883
11884 return SDValue();
11885}
11886
11887/// If a (v)select has a condition value that is a sign-bit test, try to smear
11888/// the condition operand sign-bit across the value width and use it as a mask.
11890 SelectionDAG &DAG) {
11891 SDValue Cond = N->getOperand(0);
11892 SDValue C1 = N->getOperand(1);
11893 SDValue C2 = N->getOperand(2);
11895 return SDValue();
11896
11897 EVT VT = N->getValueType(0);
11898 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
11899 VT != Cond.getOperand(0).getValueType())
11900 return SDValue();
11901
11902 // The inverted-condition + commuted-select variants of these patterns are
11903 // canonicalized to these forms in IR.
11904 SDValue X = Cond.getOperand(0);
11905 SDValue CondC = Cond.getOperand(1);
11906 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11907 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
11909 // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
11910 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11911 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11912 return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
11913 }
11914 if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
11915 // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
11916 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11917 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11918 return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
11919 }
11920 return SDValue();
11921}
11922
11924 const TargetLowering &TLI) {
11925 if (!TLI.convertSelectOfConstantsToMath(VT))
11926 return false;
11927
11928 if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse())
11929 return true;
11931 return true;
11932
11933 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11934 if (CC == ISD::SETLT && isNullOrNullSplat(Cond.getOperand(1)))
11935 return true;
11936 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond.getOperand(1)))
11937 return true;
11938
11939 return false;
11940}
11941
11942SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
11943 SDValue Cond = N->getOperand(0);
11944 SDValue N1 = N->getOperand(1);
11945 SDValue N2 = N->getOperand(2);
11946 EVT VT = N->getValueType(0);
11947 EVT CondVT = Cond.getValueType();
11948 SDLoc DL(N);
11949
11950 if (!VT.isInteger())
11951 return SDValue();
11952
11953 auto *C1 = dyn_cast<ConstantSDNode>(N1);
11954 auto *C2 = dyn_cast<ConstantSDNode>(N2);
11955 if (!C1 || !C2)
11956 return SDValue();
11957
11958 if (CondVT != MVT::i1 || LegalOperations) {
11959 // fold (select Cond, 0, 1) -> (xor Cond, 1)
11960 // We can't do this reliably if integer based booleans have different contents
11961 // to floating point based booleans. This is because we can't tell whether we
11962 // have an integer-based boolean or a floating-point-based boolean unless we
11963 // can find the SETCC that produced it and inspect its operands. This is
11964 // fairly easy if C is the SETCC node, but it can potentially be
11965 // undiscoverable (or not reasonably discoverable). For example, it could be
11966 // in another basic block or it could require searching a complicated
11967 // expression.
11968 if (CondVT.isInteger() &&
11969 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
11971 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
11973 C1->isZero() && C2->isOne()) {
11974 SDValue NotCond =
11975 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
11976 if (VT.bitsEq(CondVT))
11977 return NotCond;
11978 return DAG.getZExtOrTrunc(NotCond, DL, VT);
11979 }
11980
11981 return SDValue();
11982 }
11983
11984 // Only do this before legalization to avoid conflicting with target-specific
11985 // transforms in the other direction (create a select from a zext/sext). There
11986 // is also a target-independent combine here in DAGCombiner in the other
11987 // direction for (select Cond, -1, 0) when the condition is not i1.
11988 assert(CondVT == MVT::i1 && !LegalOperations);
11989
11990 // select Cond, 1, 0 --> zext (Cond)
11991 if (C1->isOne() && C2->isZero())
11992 return DAG.getZExtOrTrunc(Cond, DL, VT);
11993
11994 // select Cond, -1, 0 --> sext (Cond)
11995 if (C1->isAllOnes() && C2->isZero())
11996 return DAG.getSExtOrTrunc(Cond, DL, VT);
11997
11998 // select Cond, 0, 1 --> zext (!Cond)
11999 if (C1->isZero() && C2->isOne()) {
12000 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
12001 NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT);
12002 return NotCond;
12003 }
12004
12005 // select Cond, 0, -1 --> sext (!Cond)
12006 if (C1->isZero() && C2->isAllOnes()) {
12007 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
12008 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
12009 return NotCond;
12010 }
12011
12012 // Use a target hook because some targets may prefer to transform in the
12013 // other direction.
12015 return SDValue();
12016
12017 // For any constants that differ by 1, we can transform the select into
12018 // an extend and add.
12019 const APInt &C1Val = C1->getAPIntValue();
12020 const APInt &C2Val = C2->getAPIntValue();
12021
12022 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
12023 if (C1Val - 1 == C2Val) {
12024 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
12025 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
12026 }
12027
12028 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
12029 if (C1Val + 1 == C2Val) {
12030 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
12031 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
12032 }
12033
12034 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
12035 if (C1Val.isPowerOf2() && C2Val.isZero()) {
12036 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
12037 SDValue ShAmtC =
12038 DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
12039 return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
12040 }
12041
12042 // select Cond, -1, C --> or (sext Cond), C
12043 if (C1->isAllOnes()) {
12044 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
12045 return DAG.getNode(ISD::OR, DL, VT, Cond, N2);
12046 }
12047
12048 // select Cond, C, -1 --> or (sext (not Cond)), C
12049 if (C2->isAllOnes()) {
12050 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
12051 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
12052 return DAG.getNode(ISD::OR, DL, VT, NotCond, N1);
12053 }
12054
12056 return V;
12057
12058 return SDValue();
12059}
12060
12061template <class MatchContextClass>
12063 SelectionDAG &DAG) {
12064 assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT ||
12065 N->getOpcode() == ISD::VP_SELECT) &&
12066 "Expected a (v)(vp.)select");
12067 SDValue Cond = N->getOperand(0);
12068 SDValue T = N->getOperand(1), F = N->getOperand(2);
12069 EVT VT = N->getValueType(0);
12070 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12071 MatchContextClass matcher(DAG, TLI, N);
12072
12073 if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
12074 return SDValue();
12075
12076 // select Cond, Cond, F --> or Cond, freeze(F)
12077 // select Cond, 1, F --> or Cond, freeze(F)
12078 if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
12079 return matcher.getNode(ISD::OR, DL, VT, Cond, DAG.getFreeze(F));
12080
12081 // select Cond, T, Cond --> and Cond, freeze(T)
12082 // select Cond, T, 0 --> and Cond, freeze(T)
12083 if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
12084 return matcher.getNode(ISD::AND, DL, VT, Cond, DAG.getFreeze(T));
12085
12086 // select Cond, T, 1 --> or (not Cond), freeze(T)
12087 if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
12088 SDValue NotCond =
12089 matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
12090 return matcher.getNode(ISD::OR, DL, VT, NotCond, DAG.getFreeze(T));
12091 }
12092
12093 // select Cond, 0, F --> and (not Cond), freeze(F)
12094 if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
12095 SDValue NotCond =
12096 matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
12097 return matcher.getNode(ISD::AND, DL, VT, NotCond, DAG.getFreeze(F));
12098 }
12099
12100 return SDValue();
12101}
12102
12104 SDValue N0 = N->getOperand(0);
12105 SDValue N1 = N->getOperand(1);
12106 SDValue N2 = N->getOperand(2);
12107 EVT VT = N->getValueType(0);
12108 unsigned EltSizeInBits = VT.getScalarSizeInBits();
12109
12110 SDValue Cond0, Cond1;
12111 ISD::CondCode CC;
12112 if (!sd_match(N0, m_OneUse(m_SetCC(m_Value(Cond0), m_Value(Cond1),
12113 m_CondCode(CC)))) ||
12114 VT != Cond0.getValueType())
12115 return SDValue();
12116
12117 // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
12118 // compare is inverted from that pattern ("Cond0 s> -1").
12119 if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
12120 ; // This is the pattern we are looking for.
12121 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
12122 std::swap(N1, N2);
12123 else
12124 return SDValue();
12125
12126 // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & freeze(N1)
12127 if (isNullOrNullSplat(N2)) {
12128 SDLoc DL(N);
12129 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
12130 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
12131 return DAG.getNode(ISD::AND, DL, VT, Sra, DAG.getFreeze(N1));
12132 }
12133
12134 // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | freeze(N2)
12135 if (isAllOnesOrAllOnesSplat(N1)) {
12136 SDLoc DL(N);
12137 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
12138 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
12139 return DAG.getNode(ISD::OR, DL, VT, Sra, DAG.getFreeze(N2));
12140 }
12141
12142 // If we have to invert the sign bit mask, only do that transform if the
12143 // target has a bitwise 'and not' instruction (the invert is free).
12144 // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & freeze(N2)
12145 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12146 if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
12147 SDLoc DL(N);
12148 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
12149 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
12150 SDValue Not = DAG.getNOT(DL, Sra, VT);
12151 return DAG.getNode(ISD::AND, DL, VT, Not, DAG.getFreeze(N2));
12152 }
12153
12154 // TODO: There's another pattern in this family, but it may require
12155 // implementing hasOrNot() to check for profitability:
12156 // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | freeze(N2)
12157
12158 return SDValue();
12159}
12160
12161// Match SELECTs with absolute difference patterns.
12162// (select (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
12163// (select (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
12164// (select (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
12165// (select (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
12166SDValue DAGCombiner::foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
12167 SDValue False, ISD::CondCode CC,
12168 const SDLoc &DL) {
12169 bool IsSigned = isSignedIntSetCC(CC);
12170 unsigned ABDOpc = IsSigned ? ISD::ABDS : ISD::ABDU;
12171 EVT VT = LHS.getValueType();
12172
12173 if (LegalOperations && !hasOperation(ABDOpc, VT))
12174 return SDValue();
12175
12176 switch (CC) {
12177 case ISD::SETGT:
12178 case ISD::SETGE:
12179 case ISD::SETUGT:
12180 case ISD::SETUGE:
12181 if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
12182 sd_match(False, m_Sub(m_Specific(RHS), m_Specific(LHS))))
12183 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12184 if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
12185 sd_match(False, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
12186 hasOperation(ABDOpc, VT))
12187 return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
12188 break;
12189 case ISD::SETLT:
12190 case ISD::SETLE:
12191 case ISD::SETULT:
12192 case ISD::SETULE:
12193 if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
12194 sd_match(False, m_Sub(m_Specific(LHS), m_Specific(RHS))))
12195 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12196 if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
12197 sd_match(False, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
12198 hasOperation(ABDOpc, VT))
12199 return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
12200 break;
12201 default:
12202 break;
12203 }
12204
12205 return SDValue();
12206}
12207
12208// ([v]select (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
12209// ([v]select (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
12210SDValue DAGCombiner::foldSelectToUMin(SDValue LHS, SDValue RHS, SDValue True,
12211 SDValue False, ISD::CondCode CC,
12212 const SDLoc &DL) {
12213 APInt C;
12214 EVT VT = True.getValueType();
12215 if (sd_match(RHS, m_ConstInt(C)) && hasUMin(VT)) {
12216 if (CC == ISD::SETUGT && LHS == False &&
12217 sd_match(True, m_Add(m_Specific(False), m_SpecificInt(~C)))) {
12218 SDValue AddC = DAG.getConstant(~C, DL, VT);
12219 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, False, AddC);
12220 return DAG.getNode(ISD::UMIN, DL, VT, Add, False);
12221 }
12222 if (CC == ISD::SETULT && LHS == True &&
12223 sd_match(False, m_Add(m_Specific(True), m_SpecificInt(-C)))) {
12224 SDValue AddC = DAG.getConstant(-C, DL, VT);
12225 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, True, AddC);
12226 return DAG.getNode(ISD::UMIN, DL, VT, True, Add);
12227 }
12228 }
12229 return SDValue();
12230}
12231
12232SDValue DAGCombiner::visitSELECT(SDNode *N) {
12233 SDValue N0 = N->getOperand(0);
12234 SDValue N1 = N->getOperand(1);
12235 SDValue N2 = N->getOperand(2);
12236 EVT VT = N->getValueType(0);
12237 EVT VT0 = N0.getValueType();
12238 SDLoc DL(N);
12239 SDNodeFlags Flags = N->getFlags();
12240
12241 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12242 return V;
12243
12244 if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DL, DAG))
12245 return V;
12246
12247 // select (not Cond), N1, N2 -> select Cond, N2, N1
12248 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
12249 return DAG.getSelect(DL, VT, F, N2, N1, Flags);
12250
12251 if (SDValue V = foldSelectOfConstants(N))
12252 return V;
12253
12254 // If we can fold this based on the true/false value, do so.
12255 if (SimplifySelectOps(N, N1, N2))
12256 return SDValue(N, 0); // Don't revisit N.
12257
12258 if (VT0 == MVT::i1) {
12259 // The code in this block deals with the following 2 equivalences:
12260 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
12261 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
12262 // The target can specify its preferred form with the
12263 // shouldNormalizeToSelectSequence() callback. However we always transform
12264 // to the right anyway if we find the inner select exists in the DAG anyway
12265 // and we always transform to the left side if we know that we can further
12266 // optimize the combination of the conditions.
12267 bool normalizeToSequence =
12269 // select (and Cond0, Cond1), X, Y
12270 // -> select Cond0, (select Cond1, X, Y), Y
12271 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
12272 SDValue Cond0 = N0->getOperand(0);
12273 SDValue Cond1 = N0->getOperand(1);
12274 SDValue InnerSelect =
12275 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
12276 if (normalizeToSequence || !InnerSelect.use_empty())
12277 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
12278 InnerSelect, N2, Flags);
12279 // Cleanup on failure.
12280 if (InnerSelect.use_empty())
12281 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
12282 }
12283 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
12284 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
12285 SDValue Cond0 = N0->getOperand(0);
12286 SDValue Cond1 = N0->getOperand(1);
12287 SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
12288 Cond1, N1, N2, Flags);
12289 if (normalizeToSequence || !InnerSelect.use_empty())
12290 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
12291 InnerSelect, Flags);
12292 // Cleanup on failure.
12293 if (InnerSelect.use_empty())
12294 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
12295 }
12296
12297 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
12298 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
12299 SDValue N1_0 = N1->getOperand(0);
12300 SDValue N1_1 = N1->getOperand(1);
12301 SDValue N1_2 = N1->getOperand(2);
12302 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
12303 // Create the actual and node if we can generate good code for it.
12304 if (!normalizeToSequence) {
12305 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
12306 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
12307 N2, Flags);
12308 }
12309 // Otherwise see if we can optimize the "and" to a better pattern.
12310 if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
12311 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
12312 N2, Flags);
12313 }
12314 }
12315 }
12316 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
12317 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
12318 SDValue N2_0 = N2->getOperand(0);
12319 SDValue N2_1 = N2->getOperand(1);
12320 SDValue N2_2 = N2->getOperand(2);
12321 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
12322 // Create the actual or node if we can generate good code for it.
12323 if (!normalizeToSequence) {
12324 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
12325 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
12326 N2_2, Flags);
12327 }
12328 // Otherwise see if we can optimize to a better pattern.
12329 if (SDValue Combined = visitORLike(N0, N2_0, DL))
12330 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
12331 N2_2, Flags);
12332 }
12333 }
12334
12335 // select usubo(x, y).overflow, (sub y, x), (usubo x, y) -> abdu(x, y)
12336 if (N0.getOpcode() == ISD::USUBO && N0.getResNo() == 1 &&
12337 N2.getNode() == N0.getNode() && N2.getResNo() == 0 &&
12338 N1.getOpcode() == ISD::SUB && N2.getOperand(0) == N1.getOperand(1) &&
12339 N2.getOperand(1) == N1.getOperand(0) &&
12340 (!LegalOperations || TLI.isOperationLegal(ISD::ABDU, VT)))
12341 return DAG.getNode(ISD::ABDU, DL, VT, N0.getOperand(0), N0.getOperand(1));
12342
12343 // select usubo(x, y).overflow, (usubo x, y), (sub y, x) -> neg (abdu x, y)
12344 if (N0.getOpcode() == ISD::USUBO && N0.getResNo() == 1 &&
12345 N1.getNode() == N0.getNode() && N1.getResNo() == 0 &&
12346 N2.getOpcode() == ISD::SUB && N2.getOperand(0) == N1.getOperand(1) &&
12347 N2.getOperand(1) == N1.getOperand(0) &&
12348 (!LegalOperations || TLI.isOperationLegal(ISD::ABDU, VT)))
12349 return DAG.getNegative(
12350 DAG.getNode(ISD::ABDU, DL, VT, N0.getOperand(0), N0.getOperand(1)),
12351 DL, VT);
12352 }
12353
12354 // Fold selects based on a setcc into other things, such as min/max/abs.
12355 if (N0.getOpcode() == ISD::SETCC) {
12356 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
12357 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
12358
12359 // select (fcmp lt x, y), x, y -> fminnum x, y
12360 // select (fcmp gt x, y), x, y -> fmaxnum x, y
12361 //
12362 // This is OK if we don't care what happens if either operand is a NaN.
12363 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, Flags, TLI))
12364 if (SDValue FMinMax =
12365 combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC))
12366 return FMinMax;
12367
12368 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
12369 // This is conservatively limited to pre-legal-operations to give targets
12370 // a chance to reverse the transform if they want to do that. Also, it is
12371 // unlikely that the pattern would be formed late, so it's probably not
12372 // worth going through the other checks.
12373 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
12374 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
12375 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
12376 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
12377 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
12378 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
12379 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
12380 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
12381 //
12382 // The IR equivalent of this transform would have this form:
12383 // %a = add %x, C
12384 // %c = icmp ugt %x, ~C
12385 // %r = select %c, -1, %a
12386 // =>
12387 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
12388 // %u0 = extractvalue %u, 0
12389 // %u1 = extractvalue %u, 1
12390 // %r = select %u1, -1, %u0
12391 SDVTList VTs = DAG.getVTList(VT, VT0);
12392 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
12393 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
12394 }
12395 }
12396
12397 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
12398 (!LegalOperations &&
12400 // Any flags available in a select/setcc fold will be on the setcc as they
12401 // migrated from fcmp
12402 return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2,
12403 N0.getOperand(2), N0->getFlags());
12404 }
12405
12406 if (SDValue ABD = foldSelectToABD(Cond0, Cond1, N1, N2, CC, DL))
12407 return ABD;
12408
12409 if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
12410 return NewSel;
12411
12412 // (select (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
12413 // (select (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
12414 if (SDValue UMin = foldSelectToUMin(Cond0, Cond1, N1, N2, CC, DL))
12415 return UMin;
12416 }
12417
12418 if (!VT.isVector())
12419 if (SDValue BinOp = foldSelectOfBinops(N))
12420 return BinOp;
12421
12422 if (SDValue R = combineSelectAsExtAnd(N0, N1, N2, DL, DAG))
12423 return R;
12424
12425 return SDValue();
12426}
12427
12428// This function assumes all the vselect's arguments are CONCAT_VECTOR
12429// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
12431 SDLoc DL(N);
12432 SDValue Cond = N->getOperand(0);
12433 SDValue LHS = N->getOperand(1);
12434 SDValue RHS = N->getOperand(2);
12435 EVT VT = N->getValueType(0);
12436 int NumElems = VT.getVectorNumElements();
12437 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
12438 RHS.getOpcode() == ISD::CONCAT_VECTORS &&
12439 Cond.getOpcode() == ISD::BUILD_VECTOR);
12440
12441 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
12442 // binary ones here.
12443 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
12444 return SDValue();
12445
12446 // We're sure we have an even number of elements due to the
12447 // concat_vectors we have as arguments to vselect.
12448 // Skip BV elements until we find one that's not an UNDEF
12449 // After we find an UNDEF element, keep looping until we get to half the
12450 // length of the BV and see if all the non-undef nodes are the same.
12451 ConstantSDNode *BottomHalf = nullptr;
12452 for (int i = 0; i < NumElems / 2; ++i) {
12453 if (Cond->getOperand(i)->isUndef())
12454 continue;
12455
12456 if (BottomHalf == nullptr)
12457 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
12458 else if (Cond->getOperand(i).getNode() != BottomHalf)
12459 return SDValue();
12460 }
12461
12462 // Do the same for the second half of the BuildVector
12463 ConstantSDNode *TopHalf = nullptr;
12464 for (int i = NumElems / 2; i < NumElems; ++i) {
12465 if (Cond->getOperand(i)->isUndef())
12466 continue;
12467
12468 if (TopHalf == nullptr)
12469 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
12470 else if (Cond->getOperand(i).getNode() != TopHalf)
12471 return SDValue();
12472 }
12473
12474 assert(TopHalf && BottomHalf &&
12475 "One half of the selector was all UNDEFs and the other was all the "
12476 "same value. This should have been addressed before this function.");
12477 return DAG.getNode(
12479 BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
12480 TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
12481}
12482
12483bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
12484 SelectionDAG &DAG, const SDLoc &DL) {
12485
12486 // Only perform the transformation when existing operands can be reused.
12487 if (IndexIsScaled)
12488 return false;
12489
12490 if (!isNullConstant(BasePtr) && !Index.hasOneUse())
12491 return false;
12492
12493 EVT VT = BasePtr.getValueType();
12494
12495 if (SDValue SplatVal = DAG.getSplatValue(Index);
12496 SplatVal && !isNullConstant(SplatVal) &&
12497 SplatVal.getValueType() == VT) {
12498 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12499 Index = DAG.getSplat(Index.getValueType(), DL, DAG.getConstant(0, DL, VT));
12500 return true;
12501 }
12502
12503 if (Index.getOpcode() != ISD::ADD)
12504 return false;
12505
12506 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0));
12507 SplatVal && SplatVal.getValueType() == VT) {
12508 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12509 Index = Index.getOperand(1);
12510 return true;
12511 }
12512 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1));
12513 SplatVal && SplatVal.getValueType() == VT) {
12514 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12515 Index = Index.getOperand(0);
12516 return true;
12517 }
12518 return false;
12519}
12520
12521// Fold sext/zext of index into index type.
12522bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT,
12523 SelectionDAG &DAG) {
12524 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12525
12526 // It's always safe to look through zero extends.
12527 if (Index.getOpcode() == ISD::ZERO_EXTEND) {
12528 if (TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
12529 IndexType = ISD::UNSIGNED_SCALED;
12530 Index = Index.getOperand(0);
12531 return true;
12532 }
12533 if (ISD::isIndexTypeSigned(IndexType)) {
12534 IndexType = ISD::UNSIGNED_SCALED;
12535 return true;
12536 }
12537 }
12538
12539 // It's only safe to look through sign extends when Index is signed.
12540 if (Index.getOpcode() == ISD::SIGN_EXTEND &&
12541 ISD::isIndexTypeSigned(IndexType) &&
12542 TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
12543 Index = Index.getOperand(0);
12544 return true;
12545 }
12546
12547 return false;
12548}
12549
12550SDValue DAGCombiner::visitVPSCATTER(SDNode *N) {
12551 VPScatterSDNode *MSC = cast<VPScatterSDNode>(N);
12552 SDValue Mask = MSC->getMask();
12553 SDValue Chain = MSC->getChain();
12554 SDValue Index = MSC->getIndex();
12555 SDValue Scale = MSC->getScale();
12556 SDValue StoreVal = MSC->getValue();
12557 SDValue BasePtr = MSC->getBasePtr();
12558 SDValue VL = MSC->getVectorLength();
12559 ISD::MemIndexType IndexType = MSC->getIndexType();
12560 SDLoc DL(N);
12561
12562 // Zap scatters with a zero mask.
12564 return Chain;
12565
12566 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
12567 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
12568 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12569 DL, Ops, MSC->getMemOperand(), IndexType);
12570 }
12571
12572 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
12573 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
12574 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12575 DL, Ops, MSC->getMemOperand(), IndexType);
12576 }
12577
12578 return SDValue();
12579}
12580
12581SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
12582 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
12583 SDValue Mask = MSC->getMask();
12584 SDValue Chain = MSC->getChain();
12585 SDValue Index = MSC->getIndex();
12586 SDValue Scale = MSC->getScale();
12587 SDValue StoreVal = MSC->getValue();
12588 SDValue BasePtr = MSC->getBasePtr();
12589 ISD::MemIndexType IndexType = MSC->getIndexType();
12590 SDLoc DL(N);
12591
12592 // Zap scatters with a zero mask.
12594 return Chain;
12595
12596 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
12597 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
12598 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12599 DL, Ops, MSC->getMemOperand(), IndexType,
12600 MSC->isTruncatingStore());
12601 }
12602
12603 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
12604 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
12605 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12606 DL, Ops, MSC->getMemOperand(), IndexType,
12607 MSC->isTruncatingStore());
12608 }
12609
12610 return SDValue();
12611}
12612
12613SDValue DAGCombiner::visitMSTORE(SDNode *N) {
12614 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
12615 SDValue Mask = MST->getMask();
12616 SDValue Chain = MST->getChain();
12617 SDValue Value = MST->getValue();
12618 SDValue Ptr = MST->getBasePtr();
12619
12620 // Zap masked stores with a zero mask.
12622 return Chain;
12623
12624 // Remove a masked store if base pointers and masks are equal.
12625 if (MaskedStoreSDNode *MST1 = dyn_cast<MaskedStoreSDNode>(Chain)) {
12626 if (MST->isUnindexed() && MST->isSimple() && MST1->isUnindexed() &&
12627 MST1->isSimple() && MST1->getBasePtr() == Ptr &&
12628 !MST->getBasePtr().isUndef() &&
12629 ((Mask == MST1->getMask() && MST->getMemoryVT().getStoreSize() ==
12630 MST1->getMemoryVT().getStoreSize()) ||
12632 TypeSize::isKnownLE(MST1->getMemoryVT().getStoreSize(),
12633 MST->getMemoryVT().getStoreSize())) {
12634 CombineTo(MST1, MST1->getChain());
12635 if (N->getOpcode() != ISD::DELETED_NODE)
12636 AddToWorklist(N);
12637 return SDValue(N, 0);
12638 }
12639 }
12640
12641 // If this is a masked load with an all ones mask, we can use a unmasked load.
12642 // FIXME: Can we do this for indexed, compressing, or truncating stores?
12643 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
12644 !MST->isCompressingStore() && !MST->isTruncatingStore())
12645 return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
12646 MST->getBasePtr(), MST->getPointerInfo(),
12647 MST->getBaseAlign(), MST->getMemOperand()->getFlags(),
12648 MST->getAAInfo());
12649
12650 // Try transforming N to an indexed store.
12651 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12652 return SDValue(N, 0);
12653
12654 if (MST->isTruncatingStore() && MST->isUnindexed() &&
12655 Value.getValueType().isInteger() &&
12656 (!isa<ConstantSDNode>(Value) ||
12657 !cast<ConstantSDNode>(Value)->isOpaque())) {
12658 APInt TruncDemandedBits =
12659 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
12661
12662 // See if we can simplify the operation with
12663 // SimplifyDemandedBits, which only works if the value has a single use.
12664 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
12665 // Re-visit the store if anything changed and the store hasn't been merged
12666 // with another node (N is deleted) SimplifyDemandedBits will add Value's
12667 // node back to the worklist if necessary, but we also need to re-visit
12668 // the Store node itself.
12669 if (N->getOpcode() != ISD::DELETED_NODE)
12670 AddToWorklist(N);
12671 return SDValue(N, 0);
12672 }
12673 }
12674
12675 // If this is a TRUNC followed by a masked store, fold this into a masked
12676 // truncating store. We can do this even if this is already a masked
12677 // truncstore.
12678 // TODO: Try combine to masked compress store if possiable.
12679 if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
12680 MST->isUnindexed() && !MST->isCompressingStore() &&
12681 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
12682 MST->getMemoryVT(), LegalOperations)) {
12683 auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
12684 Value.getOperand(0).getValueType());
12685 return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
12686 MST->getOffset(), Mask, MST->getMemoryVT(),
12687 MST->getMemOperand(), MST->getAddressingMode(),
12688 /*IsTruncating=*/true);
12689 }
12690
12691 return SDValue();
12692}
12693
12694SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) {
12695 auto *SST = cast<VPStridedStoreSDNode>(N);
12696 EVT EltVT = SST->getValue().getValueType().getVectorElementType();
12697 // Combine strided stores with unit-stride to a regular VP store.
12698 if (auto *CStride = dyn_cast<ConstantSDNode>(SST->getStride());
12699 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
12700 return DAG.getStoreVP(SST->getChain(), SDLoc(N), SST->getValue(),
12701 SST->getBasePtr(), SST->getOffset(), SST->getMask(),
12702 SST->getVectorLength(), SST->getMemoryVT(),
12703 SST->getMemOperand(), SST->getAddressingMode(),
12704 SST->isTruncatingStore(), SST->isCompressingStore());
12705 }
12706 return SDValue();
12707}
12708
12709SDValue DAGCombiner::visitVECTOR_COMPRESS(SDNode *N) {
12710 SDLoc DL(N);
12711 SDValue Vec = N->getOperand(0);
12712 SDValue Mask = N->getOperand(1);
12713 SDValue Passthru = N->getOperand(2);
12714 EVT VecVT = Vec.getValueType();
12715
12716 bool HasPassthru = !Passthru.isUndef();
12717
12718 APInt SplatVal;
12719 if (ISD::isConstantSplatVector(Mask.getNode(), SplatVal))
12720 return TLI.isConstTrueVal(Mask) ? Vec : Passthru;
12721
12722 if (Vec.isUndef() || Mask.isUndef())
12723 return Passthru;
12724
12725 // No need for potentially expensive compress if the mask is constant.
12728 EVT ScalarVT = VecVT.getVectorElementType();
12729 unsigned NumSelected = 0;
12730 unsigned NumElmts = VecVT.getVectorNumElements();
12731 for (unsigned I = 0; I < NumElmts; ++I) {
12732 SDValue MaskI = Mask.getOperand(I);
12733 // We treat undef mask entries as "false".
12734 if (MaskI.isUndef())
12735 continue;
12736
12737 if (TLI.isConstTrueVal(MaskI)) {
12738 SDValue VecI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Vec,
12739 DAG.getVectorIdxConstant(I, DL));
12740 Ops.push_back(VecI);
12741 NumSelected++;
12742 }
12743 }
12744 for (unsigned Rest = NumSelected; Rest < NumElmts; ++Rest) {
12745 SDValue Val =
12746 HasPassthru
12747 ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Passthru,
12748 DAG.getVectorIdxConstant(Rest, DL))
12749 : DAG.getUNDEF(ScalarVT);
12750 Ops.push_back(Val);
12751 }
12752 return DAG.getBuildVector(VecVT, DL, Ops);
12753 }
12754
12755 return SDValue();
12756}
12757
12758SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
12759 VPGatherSDNode *MGT = cast<VPGatherSDNode>(N);
12760 SDValue Mask = MGT->getMask();
12761 SDValue Chain = MGT->getChain();
12762 SDValue Index = MGT->getIndex();
12763 SDValue Scale = MGT->getScale();
12764 SDValue BasePtr = MGT->getBasePtr();
12765 SDValue VL = MGT->getVectorLength();
12766 ISD::MemIndexType IndexType = MGT->getIndexType();
12767 SDLoc DL(N);
12768
12769 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12770 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
12771 return DAG.getGatherVP(
12772 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12773 Ops, MGT->getMemOperand(), IndexType);
12774 }
12775
12776 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12777 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
12778 return DAG.getGatherVP(
12779 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12780 Ops, MGT->getMemOperand(), IndexType);
12781 }
12782
12783 return SDValue();
12784}
12785
12786SDValue DAGCombiner::visitMGATHER(SDNode *N) {
12787 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
12788 SDValue Mask = MGT->getMask();
12789 SDValue Chain = MGT->getChain();
12790 SDValue Index = MGT->getIndex();
12791 SDValue Scale = MGT->getScale();
12792 SDValue PassThru = MGT->getPassThru();
12793 SDValue BasePtr = MGT->getBasePtr();
12794 ISD::MemIndexType IndexType = MGT->getIndexType();
12795 SDLoc DL(N);
12796
12797 // Zap gathers with a zero mask.
12799 return CombineTo(N, PassThru, MGT->getChain());
12800
12801 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12802 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12803 return DAG.getMaskedGather(
12804 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12805 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12806 }
12807
12808 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12809 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12810 return DAG.getMaskedGather(
12811 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12812 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12813 }
12814
12815 return SDValue();
12816}
12817
12818SDValue DAGCombiner::visitMLOAD(SDNode *N) {
12819 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
12820 SDValue Mask = MLD->getMask();
12821
12822 // Zap masked loads with a zero mask.
12824 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
12825
12826 // If this is a masked load with an all ones mask, we can use a unmasked load.
12827 // FIXME: Can we do this for indexed, expanding, or extending loads?
12828 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
12829 !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
12830 SDValue NewLd = DAG.getLoad(
12831 N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
12832 MLD->getPointerInfo(), MLD->getBaseAlign(),
12833 MLD->getMemOperand()->getFlags(), MLD->getAAInfo(), MLD->getRanges());
12834 return CombineTo(N, NewLd, NewLd.getValue(1));
12835 }
12836
12837 // Try transforming N to an indexed load.
12838 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12839 return SDValue(N, 0);
12840
12841 return SDValue();
12842}
12843
12844SDValue DAGCombiner::visitMHISTOGRAM(SDNode *N) {
12845 MaskedHistogramSDNode *HG = cast<MaskedHistogramSDNode>(N);
12846 SDValue Chain = HG->getChain();
12847 SDValue Inc = HG->getInc();
12848 SDValue Mask = HG->getMask();
12849 SDValue BasePtr = HG->getBasePtr();
12850 SDValue Index = HG->getIndex();
12851 SDLoc DL(HG);
12852
12853 EVT MemVT = HG->getMemoryVT();
12854 EVT DataVT = Index.getValueType();
12855 MachineMemOperand *MMO = HG->getMemOperand();
12856 ISD::MemIndexType IndexType = HG->getIndexType();
12857
12859 return Chain;
12860
12861 if (refineUniformBase(BasePtr, Index, HG->isIndexScaled(), DAG, DL) ||
12862 refineIndexType(Index, IndexType, DataVT, DAG)) {
12863 SDValue Ops[] = {Chain, Inc, Mask, BasePtr, Index,
12864 HG->getScale(), HG->getIntID()};
12865 return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, Ops,
12866 MMO, IndexType);
12867 }
12868
12869 return SDValue();
12870}
12871
12872SDValue DAGCombiner::visitPARTIAL_REDUCE_MLA(SDNode *N) {
12873 if (SDValue Res = foldPartialReduceMLAMulOp(N))
12874 return Res;
12875 if (SDValue Res = foldPartialReduceAdd(N))
12876 return Res;
12877 return SDValue();
12878}
12879
12880// partial_reduce_*mla(acc, mul(ext(a), ext(b)), splat(1))
12881// -> partial_reduce_*mla(acc, a, b)
12882//
12883// partial_reduce_*mla(acc, mul(ext(x), splat(C)), splat(1))
12884// -> partial_reduce_*mla(acc, x, C)
12885SDValue DAGCombiner::foldPartialReduceMLAMulOp(SDNode *N) {
12886 SDLoc DL(N);
12887 auto *Context = DAG.getContext();
12888 SDValue Acc = N->getOperand(0);
12889 SDValue Op1 = N->getOperand(1);
12890 SDValue Op2 = N->getOperand(2);
12891
12892 APInt C;
12893 if (Op1->getOpcode() != ISD::MUL ||
12894 !ISD::isConstantSplatVector(Op2.getNode(), C) || !C.isOne())
12895 return SDValue();
12896
12897 SDValue LHS = Op1->getOperand(0);
12898 SDValue RHS = Op1->getOperand(1);
12899 unsigned LHSOpcode = LHS->getOpcode();
12900 if (!ISD::isExtOpcode(LHSOpcode))
12901 return SDValue();
12902
12903 SDValue LHSExtOp = LHS->getOperand(0);
12904 EVT LHSExtOpVT = LHSExtOp.getValueType();
12905
12906 // partial_reduce_*mla(acc, mul(ext(x), splat(C)), splat(1))
12907 // -> partial_reduce_*mla(acc, x, C)
12908 if (ISD::isConstantSplatVector(RHS.getNode(), C)) {
12909 // TODO: Make use of partial_reduce_sumla here
12910 APInt CTrunc = C.trunc(LHSExtOpVT.getScalarSizeInBits());
12911 unsigned LHSBits = LHS.getValueType().getScalarSizeInBits();
12912 if ((LHSOpcode != ISD::ZERO_EXTEND || CTrunc.zext(LHSBits) != C) &&
12913 (LHSOpcode != ISD::SIGN_EXTEND || CTrunc.sext(LHSBits) != C))
12914 return SDValue();
12915
12916 unsigned NewOpcode = LHSOpcode == ISD::SIGN_EXTEND
12919
12920 // Only perform these combines if the target supports folding
12921 // the extends into the operation.
12923 NewOpcode, TLI.getTypeToTransformTo(*Context, N->getValueType(0)),
12924 TLI.getTypeToTransformTo(*Context, LHSExtOpVT)))
12925 return SDValue();
12926
12927 return DAG.getNode(NewOpcode, DL, N->getValueType(0), Acc, LHSExtOp,
12928 DAG.getConstant(CTrunc, DL, LHSExtOpVT));
12929 }
12930
12931 unsigned RHSOpcode = RHS->getOpcode();
12932 if (!ISD::isExtOpcode(RHSOpcode))
12933 return SDValue();
12934
12935 SDValue RHSExtOp = RHS->getOperand(0);
12936 if (LHSExtOpVT != RHSExtOp.getValueType())
12937 return SDValue();
12938
12939 unsigned NewOpc;
12940 if (LHSOpcode == ISD::SIGN_EXTEND && RHSOpcode == ISD::SIGN_EXTEND)
12941 NewOpc = ISD::PARTIAL_REDUCE_SMLA;
12942 else if (LHSOpcode == ISD::ZERO_EXTEND && RHSOpcode == ISD::ZERO_EXTEND)
12943 NewOpc = ISD::PARTIAL_REDUCE_UMLA;
12944 else if (LHSOpcode == ISD::SIGN_EXTEND && RHSOpcode == ISD::ZERO_EXTEND)
12946 else if (LHSOpcode == ISD::ZERO_EXTEND && RHSOpcode == ISD::SIGN_EXTEND) {
12948 std::swap(LHSExtOp, RHSExtOp);
12949 } else
12950 return SDValue();
12951 // For a 2-stage extend the signedness of both of the extends must match
12952 // If the mul has the same type, there is no outer extend, and thus we
12953 // can simply use the inner extends to pick the result node.
12954 // TODO: extend to handle nonneg zext as sext
12955 EVT AccElemVT = Acc.getValueType().getVectorElementType();
12956 if (Op1.getValueType().getVectorElementType() != AccElemVT &&
12957 NewOpc != N->getOpcode())
12958 return SDValue();
12959
12960 // Only perform these combines if the target supports folding
12961 // the extends into the operation.
12963 NewOpc, TLI.getTypeToTransformTo(*Context, N->getValueType(0)),
12964 TLI.getTypeToTransformTo(*Context, LHSExtOpVT)))
12965 return SDValue();
12966
12967 return DAG.getNode(NewOpc, DL, N->getValueType(0), Acc, LHSExtOp, RHSExtOp);
12968}
12969
12970// partial.reduce.umla(acc, zext(op), splat(1))
12971// -> partial.reduce.umla(acc, op, splat(trunc(1)))
12972// partial.reduce.smla(acc, sext(op), splat(1))
12973// -> partial.reduce.smla(acc, op, splat(trunc(1)))
12974// partial.reduce.sumla(acc, sext(op), splat(1))
12975// -> partial.reduce.smla(acc, op, splat(trunc(1)))
12976SDValue DAGCombiner::foldPartialReduceAdd(SDNode *N) {
12977 SDLoc DL(N);
12978 SDValue Acc = N->getOperand(0);
12979 SDValue Op1 = N->getOperand(1);
12980 SDValue Op2 = N->getOperand(2);
12981
12982 APInt ConstantOne;
12983 if (!ISD::isConstantSplatVector(Op2.getNode(), ConstantOne) ||
12984 !ConstantOne.isOne())
12985 return SDValue();
12986
12987 unsigned Op1Opcode = Op1.getOpcode();
12988 if (!ISD::isExtOpcode(Op1Opcode))
12989 return SDValue();
12990
12991 bool Op1IsSigned = Op1Opcode == ISD::SIGN_EXTEND;
12992 bool NodeIsSigned = N->getOpcode() != ISD::PARTIAL_REDUCE_UMLA;
12993 EVT AccElemVT = Acc.getValueType().getVectorElementType();
12994 if (Op1IsSigned != NodeIsSigned &&
12995 Op1.getValueType().getVectorElementType() != AccElemVT)
12996 return SDValue();
12997
12998 unsigned NewOpcode =
13000
13001 SDValue UnextOp1 = Op1.getOperand(0);
13002 EVT UnextOp1VT = UnextOp1.getValueType();
13003 auto *Context = DAG.getContext();
13005 NewOpcode, TLI.getTypeToTransformTo(*Context, N->getValueType(0)),
13006 TLI.getTypeToTransformTo(*Context, UnextOp1VT)))
13007 return SDValue();
13008
13009 return DAG.getNode(NewOpcode, DL, N->getValueType(0), Acc, UnextOp1,
13010 DAG.getConstant(1, DL, UnextOp1VT));
13011}
13012
13013SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) {
13014 auto *SLD = cast<VPStridedLoadSDNode>(N);
13015 EVT EltVT = SLD->getValueType(0).getVectorElementType();
13016 // Combine strided loads with unit-stride to a regular VP load.
13017 if (auto *CStride = dyn_cast<ConstantSDNode>(SLD->getStride());
13018 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
13019 SDValue NewLd = DAG.getLoadVP(
13020 SLD->getAddressingMode(), SLD->getExtensionType(), SLD->getValueType(0),
13021 SDLoc(N), SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(),
13022 SLD->getMask(), SLD->getVectorLength(), SLD->getMemoryVT(),
13023 SLD->getMemOperand(), SLD->isExpandingLoad());
13024 return CombineTo(N, NewLd, NewLd.getValue(1));
13025 }
13026 return SDValue();
13027}
13028
13029/// A vector select of 2 constant vectors can be simplified to math/logic to
13030/// avoid a variable select instruction and possibly avoid constant loads.
13031SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
13032 SDValue Cond = N->getOperand(0);
13033 SDValue N1 = N->getOperand(1);
13034 SDValue N2 = N->getOperand(2);
13035 EVT VT = N->getValueType(0);
13036 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
13040 return SDValue();
13041
13042 // Check if we can use the condition value to increment/decrement a single
13043 // constant value. This simplifies a select to an add and removes a constant
13044 // load/materialization from the general case.
13045 bool AllAddOne = true;
13046 bool AllSubOne = true;
13047 unsigned Elts = VT.getVectorNumElements();
13048 for (unsigned i = 0; i != Elts; ++i) {
13049 SDValue N1Elt = N1.getOperand(i);
13050 SDValue N2Elt = N2.getOperand(i);
13051 if (N1Elt.isUndef())
13052 continue;
13053 // N2 should not contain undef values since it will be reused in the fold.
13054 if (N2Elt.isUndef() || N1Elt.getValueType() != N2Elt.getValueType()) {
13055 AllAddOne = false;
13056 AllSubOne = false;
13057 break;
13058 }
13059
13060 const APInt &C1 = N1Elt->getAsAPIntVal();
13061 const APInt &C2 = N2Elt->getAsAPIntVal();
13062 if (C1 != C2 + 1)
13063 AllAddOne = false;
13064 if (C1 != C2 - 1)
13065 AllSubOne = false;
13066 }
13067
13068 // Further simplifications for the extra-special cases where the constants are
13069 // all 0 or all -1 should be implemented as folds of these patterns.
13070 SDLoc DL(N);
13071 if (AllAddOne || AllSubOne) {
13072 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
13073 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
13074 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
13075 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
13076 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
13077 }
13078
13079 // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
13080 APInt Pow2C;
13081 if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
13082 isNullOrNullSplat(N2)) {
13083 SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
13084 SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
13085 return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
13086 }
13087
13089 return V;
13090
13091 // The general case for select-of-constants:
13092 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
13093 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
13094 // leave that to a machine-specific pass.
13095 return SDValue();
13096}
13097
13098SDValue DAGCombiner::visitVP_SELECT(SDNode *N) {
13099 SDValue N0 = N->getOperand(0);
13100 SDValue N1 = N->getOperand(1);
13101 SDValue N2 = N->getOperand(2);
13102 SDLoc DL(N);
13103
13104 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
13105 return V;
13106
13107 if (SDValue V = foldBoolSelectToLogic<VPMatchContext>(N, DL, DAG))
13108 return V;
13109
13110 return SDValue();
13111}
13112
13114 SDValue FVal,
13115 const TargetLowering &TLI,
13116 SelectionDAG &DAG,
13117 const SDLoc &DL) {
13118 EVT VT = TVal.getValueType();
13119 if (!TLI.isTypeLegal(VT))
13120 return SDValue();
13121
13122 EVT CondVT = Cond.getValueType();
13123 assert(CondVT.isVector() && "Vector select expects a vector selector!");
13124
13125 bool IsTAllZero = ISD::isConstantSplatVectorAllZeros(TVal.getNode());
13126 bool IsTAllOne = ISD::isConstantSplatVectorAllOnes(TVal.getNode());
13127 bool IsFAllZero = ISD::isConstantSplatVectorAllZeros(FVal.getNode());
13128 bool IsFAllOne = ISD::isConstantSplatVectorAllOnes(FVal.getNode());
13129
13130 // no vselect(cond, 0/-1, X) or vselect(cond, X, 0/-1), return
13131 if (!IsTAllZero && !IsTAllOne && !IsFAllZero && !IsFAllOne)
13132 return SDValue();
13133
13134 // select Cond, 0, 0 → 0
13135 if (IsTAllZero && IsFAllZero) {
13136 return VT.isFloatingPoint() ? DAG.getConstantFP(0.0, DL, VT)
13137 : DAG.getConstant(0, DL, VT);
13138 }
13139
13140 // check select(setgt lhs, -1), 1, -1 --> or (sra lhs, bitwidth - 1), 1
13141 APInt TValAPInt;
13142 if (Cond.getOpcode() == ISD::SETCC &&
13143 Cond.getOperand(2) == DAG.getCondCode(ISD::SETGT) &&
13144 Cond.getOperand(0).getValueType() == VT && VT.isSimple() &&
13145 ISD::isConstantSplatVector(TVal.getNode(), TValAPInt) &&
13146 TValAPInt.isOne() &&
13147 ISD::isConstantSplatVectorAllOnes(Cond.getOperand(1).getNode()) &&
13149 return SDValue();
13150 }
13151
13152 // To use the condition operand as a bitwise mask, it must have elements that
13153 // are the same size as the select elements. i.e, the condition operand must
13154 // have already been promoted from the IR select condition type <N x i1>.
13155 // Don't check if the types themselves are equal because that excludes
13156 // vector floating-point selects.
13157 if (CondVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
13158 return SDValue();
13159
13160 // Cond value must be 'sign splat' to be converted to a logical op.
13161 if (DAG.ComputeNumSignBits(Cond) != CondVT.getScalarSizeInBits())
13162 return SDValue();
13163
13164 // Try inverting Cond and swapping T/F if it gives all-ones/all-zeros form
13165 if (!IsTAllOne && !IsFAllZero && Cond.hasOneUse() &&
13166 Cond.getOpcode() == ISD::SETCC &&
13167 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT) ==
13168 CondVT) {
13169 if (IsTAllZero || IsFAllOne) {
13170 SDValue CC = Cond.getOperand(2);
13172 cast<CondCodeSDNode>(CC)->get(), Cond.getOperand(0).getValueType());
13173 Cond = DAG.getSetCC(DL, CondVT, Cond.getOperand(0), Cond.getOperand(1),
13174 InverseCC);
13175 std::swap(TVal, FVal);
13176 std::swap(IsTAllOne, IsFAllOne);
13177 std::swap(IsTAllZero, IsFAllZero);
13178 }
13179 }
13180
13182 "Select condition no longer all-sign bits");
13183
13184 // select Cond, -1, 0 → bitcast Cond
13185 if (IsTAllOne && IsFAllZero)
13186 return DAG.getBitcast(VT, Cond);
13187
13188 // select Cond, -1, x → or Cond, x
13189 if (IsTAllOne) {
13190 SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(FVal));
13191 SDValue Or = DAG.getNode(ISD::OR, DL, CondVT, Cond, X);
13192 return DAG.getBitcast(VT, Or);
13193 }
13194
13195 // select Cond, x, 0 → and Cond, x
13196 if (IsFAllZero) {
13197 SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(TVal));
13198 SDValue And = DAG.getNode(ISD::AND, DL, CondVT, Cond, X);
13199 return DAG.getBitcast(VT, And);
13200 }
13201
13202 // select Cond, 0, x -> and not(Cond), x
13203 if (IsTAllZero &&
13205 SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(FVal));
13206 SDValue And =
13207 DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT), X);
13208 return DAG.getBitcast(VT, And);
13209 }
13210
13211 return SDValue();
13212}
13213
13214SDValue DAGCombiner::visitVSELECT(SDNode *N) {
13215 SDValue N0 = N->getOperand(0);
13216 SDValue N1 = N->getOperand(1);
13217 SDValue N2 = N->getOperand(2);
13218 EVT VT = N->getValueType(0);
13219 SDLoc DL(N);
13220
13221 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
13222 return V;
13223
13224 if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DL, DAG))
13225 return V;
13226
13227 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
13228 if (!TLI.isTargetCanonicalSelect(N))
13229 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
13230 return DAG.getSelect(DL, VT, F, N2, N1);
13231
13232 // select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
13233 if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N2 && N1->hasOneUse() &&
13236 TLI.getBooleanContents(N0.getValueType()) ==
13238 return DAG.getNode(
13239 ISD::ADD, DL, N1.getValueType(), N2,
13240 DAG.getNode(ISD::AND, DL, N0.getValueType(), N1.getOperand(1), N0));
13241 }
13242
13243 // Canonicalize integer abs.
13244 // vselect (setg[te] X, 0), X, -X ->
13245 // vselect (setgt X, -1), X, -X ->
13246 // vselect (setl[te] X, 0), -X, X ->
13247 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
13248 if (N0.getOpcode() == ISD::SETCC) {
13249 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
13250 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
13251 bool isAbs = false;
13252 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
13253
13254 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
13255 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
13256 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
13258 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
13259 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
13261
13262 if (isAbs) {
13264 return DAG.getNode(ISD::ABS, DL, VT, LHS);
13265
13266 SDValue Shift = DAG.getNode(
13267 ISD::SRA, DL, VT, LHS,
13268 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, DL));
13269 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
13270 AddToWorklist(Shift.getNode());
13271 AddToWorklist(Add.getNode());
13272 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
13273 }
13274
13275 // vselect x, y (fcmp lt x, y) -> fminnum x, y
13276 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
13277 //
13278 // This is OK if we don't care about what happens if either operand is a
13279 // NaN.
13280 //
13281 if (N0.hasOneUse() &&
13282 isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, N->getFlags(), TLI)) {
13283 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC))
13284 return FMinMax;
13285 }
13286
13287 if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
13288 return S;
13289 if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
13290 return S;
13291
13292 // If this select has a condition (setcc) with narrower operands than the
13293 // select, try to widen the compare to match the select width.
13294 // TODO: This should be extended to handle any constant.
13295 // TODO: This could be extended to handle non-loading patterns, but that
13296 // requires thorough testing to avoid regressions.
13297 if (isNullOrNullSplat(RHS)) {
13298 EVT NarrowVT = LHS.getValueType();
13300 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
13301 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
13302 unsigned WideWidth = WideVT.getScalarSizeInBits();
13303 bool IsSigned = isSignedIntSetCC(CC);
13304 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13305 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
13306 SetCCWidth != 1 && SetCCWidth < WideWidth &&
13307 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
13308 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
13309 // Both compare operands can be widened for free. The LHS can use an
13310 // extended load, and the RHS is a constant:
13311 // vselect (ext (setcc load(X), C)), N1, N2 -->
13312 // vselect (setcc extload(X), C'), N1, N2
13313 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13314 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
13315 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
13316 EVT WideSetCCVT = getSetCCResultType(WideVT);
13317 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
13318 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
13319 }
13320 }
13321
13322 if (SDValue ABD = foldSelectToABD(LHS, RHS, N1, N2, CC, DL))
13323 return ABD;
13324
13325 // Match VSELECTs into add with unsigned saturation.
13326 if (hasOperation(ISD::UADDSAT, VT)) {
13327 // Check if one of the arms of the VSELECT is vector with all bits set.
13328 // If it's on the left side invert the predicate to simplify logic below.
13329 SDValue Other;
13330 ISD::CondCode SatCC = CC;
13332 Other = N2;
13333 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
13334 } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
13335 Other = N1;
13336 }
13337
13338 if (Other && Other.getOpcode() == ISD::ADD) {
13339 SDValue CondLHS = LHS, CondRHS = RHS;
13340 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
13341
13342 // Canonicalize condition operands.
13343 if (SatCC == ISD::SETUGE) {
13344 std::swap(CondLHS, CondRHS);
13345 SatCC = ISD::SETULE;
13346 }
13347
13348 // We can test against either of the addition operands.
13349 // x <= x+y ? x+y : ~0 --> uaddsat x, y
13350 // x+y >= x ? x+y : ~0 --> uaddsat x, y
13351 if (SatCC == ISD::SETULE && Other == CondRHS &&
13352 (OpLHS == CondLHS || OpRHS == CondLHS))
13353 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
13354
13355 if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
13356 (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
13357 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
13358 CondLHS == OpLHS) {
13359 // If the RHS is a constant we have to reverse the const
13360 // canonicalization.
13361 // x >= ~C ? x+C : ~0 --> uaddsat x, C
13362 auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
13363 return Cond->getAPIntValue() == ~Op->getAPIntValue();
13364 };
13365 if (SatCC == ISD::SETULE &&
13366 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
13367 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
13368 }
13369 }
13370 }
13371
13372 // Match VSELECTs into sub with unsigned saturation.
13373 if (hasOperation(ISD::USUBSAT, VT)) {
13374 // Check if one of the arms of the VSELECT is a zero vector. If it's on
13375 // the left side invert the predicate to simplify logic below.
13376 SDValue Other;
13377 ISD::CondCode SatCC = CC;
13379 Other = N2;
13380 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
13382 Other = N1;
13383 }
13384
13385 // zext(x) >= y ? trunc(zext(x) - y) : 0
13386 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
13387 // zext(x) > y ? trunc(zext(x) - y) : 0
13388 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
13389 if (Other && Other.getOpcode() == ISD::TRUNCATE &&
13390 Other.getOperand(0).getOpcode() == ISD::SUB &&
13391 (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) {
13392 SDValue OpLHS = Other.getOperand(0).getOperand(0);
13393 SDValue OpRHS = Other.getOperand(0).getOperand(1);
13394 if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND)
13395 if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS,
13396 DAG, DL))
13397 return R;
13398 }
13399
13400 if (Other && Other.getNumOperands() == 2) {
13401 SDValue CondRHS = RHS;
13402 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
13403
13404 if (OpLHS == LHS) {
13405 // Look for a general sub with unsigned saturation first.
13406 // x >= y ? x-y : 0 --> usubsat x, y
13407 // x > y ? x-y : 0 --> usubsat x, y
13408 if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
13409 Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
13410 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
13411
13412 if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
13413 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
13414 if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
13415 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
13416 // If the RHS is a constant we have to reverse the const
13417 // canonicalization.
13418 // x > C-1 ? x+-C : 0 --> usubsat x, C
13419 auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
13420 return (!Op && !Cond) ||
13421 (Op && Cond &&
13422 Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
13423 };
13424 if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
13425 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
13426 /*AllowUndefs*/ true)) {
13427 OpRHS = DAG.getNegative(OpRHS, DL, VT);
13428 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
13429 }
13430
13431 // Another special case: If C was a sign bit, the sub has been
13432 // canonicalized into a xor.
13433 // FIXME: Would it be better to use computeKnownBits to
13434 // determine whether it's safe to decanonicalize the xor?
13435 // x s< 0 ? x^C : 0 --> usubsat x, C
13436 APInt SplatValue;
13437 if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
13438 ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
13440 SplatValue.isSignMask()) {
13441 // Note that we have to rebuild the RHS constant here to
13442 // ensure we don't rely on particular values of undef lanes.
13443 OpRHS = DAG.getConstant(SplatValue, DL, VT);
13444 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
13445 }
13446 }
13447 }
13448 }
13449 }
13450 }
13451
13452 // (vselect (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
13453 // (vselect (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
13454 if (SDValue UMin = foldSelectToUMin(LHS, RHS, N1, N2, CC, DL))
13455 return UMin;
13456 }
13457
13458 if (SimplifySelectOps(N, N1, N2))
13459 return SDValue(N, 0); // Don't revisit N.
13460
13461 // Fold (vselect all_ones, N1, N2) -> N1
13463 return N1;
13464 // Fold (vselect all_zeros, N1, N2) -> N2
13466 return N2;
13467
13468 // The ConvertSelectToConcatVector function is assuming both the above
13469 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
13470 // and addressed.
13471 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
13474 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
13475 return CV;
13476 }
13477
13478 if (SDValue V = foldVSelectOfConstants(N))
13479 return V;
13480
13481 if (hasOperation(ISD::SRA, VT))
13483 return V;
13484
13486 return SDValue(N, 0);
13487
13488 if (SDValue V = combineVSelectWithAllOnesOrZeros(N0, N1, N2, TLI, DAG, DL))
13489 return V;
13490
13491 return SDValue();
13492}
13493
13494SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
13495 SDValue N0 = N->getOperand(0);
13496 SDValue N1 = N->getOperand(1);
13497 SDValue N2 = N->getOperand(2);
13498 SDValue N3 = N->getOperand(3);
13499 SDValue N4 = N->getOperand(4);
13500 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
13501 SDLoc DL(N);
13502
13503 // fold select_cc lhs, rhs, x, x, cc -> x
13504 if (N2 == N3)
13505 return N2;
13506
13507 // select_cc bool, 0, x, y, seteq -> select bool, y, x
13508 if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 &&
13509 isNullConstant(N1))
13510 return DAG.getSelect(DL, N2.getValueType(), N0, N3, N2);
13511
13512 // Determine if the condition we're dealing with is constant
13513 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
13514 CC, DL, false)) {
13515 AddToWorklist(SCC.getNode());
13516
13517 // cond always true -> true val
13518 // cond always false -> false val
13519 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode()))
13520 return SCCC->isZero() ? N3 : N2;
13521
13522 // When the condition is UNDEF, just return the first operand. This is
13523 // coherent the DAG creation, no setcc node is created in this case
13524 if (SCC->isUndef())
13525 return N2;
13526
13527 // Fold to a simpler select_cc
13528 if (SCC.getOpcode() == ISD::SETCC) {
13529 return DAG.getNode(ISD::SELECT_CC, DL, N2.getValueType(),
13530 SCC.getOperand(0), SCC.getOperand(1), N2, N3,
13531 SCC.getOperand(2), SCC->getFlags());
13532 }
13533 }
13534
13535 // If we can fold this based on the true/false value, do so.
13536 if (SimplifySelectOps(N, N2, N3))
13537 return SDValue(N, 0); // Don't revisit N.
13538
13539 // fold select_cc into other things, such as min/max/abs
13540 return SimplifySelectCC(DL, N0, N1, N2, N3, CC);
13541}
13542
13543SDValue DAGCombiner::visitSETCC(SDNode *N) {
13544 // setcc is very commonly used as an argument to brcond. This pattern
13545 // also lend itself to numerous combines and, as a result, it is desired
13546 // we keep the argument to a brcond as a setcc as much as possible.
13547 bool PreferSetCC =
13548 N->hasOneUse() && N->user_begin()->getOpcode() == ISD::BRCOND;
13549
13550 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
13551 EVT VT = N->getValueType(0);
13552 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
13553 SDLoc DL(N);
13554
13555 if (SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, DL, !PreferSetCC)) {
13556 // If we prefer to have a setcc, and we don't, we'll try our best to
13557 // recreate one using rebuildSetCC.
13558 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
13559 SDValue NewSetCC = rebuildSetCC(Combined);
13560
13561 // We don't have anything interesting to combine to.
13562 if (NewSetCC.getNode() == N)
13563 return SDValue();
13564
13565 if (NewSetCC)
13566 return NewSetCC;
13567 }
13568 return Combined;
13569 }
13570
13571 // Optimize
13572 // 1) (icmp eq/ne (and X, C0), (shift X, C1))
13573 // or
13574 // 2) (icmp eq/ne X, (rotate X, C1))
13575 // If C0 is a mask or shifted mask and the shift amt (C1) isolates the
13576 // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
13577 // Then:
13578 // If C1 is a power of 2, then the rotate and shift+and versions are
13579 // equivilent, so we can interchange them depending on target preference.
13580 // Otherwise, if we have the shift+and version we can interchange srl/shl
13581 // which inturn affects the constant C0. We can use this to get better
13582 // constants again determined by target preference.
13583 if (Cond == ISD::SETNE || Cond == ISD::SETEQ) {
13584 auto IsAndWithShift = [](SDValue A, SDValue B) {
13585 return A.getOpcode() == ISD::AND &&
13586 (B.getOpcode() == ISD::SRL || B.getOpcode() == ISD::SHL) &&
13587 A.getOperand(0) == B.getOperand(0);
13588 };
13589 auto IsRotateWithOp = [](SDValue A, SDValue B) {
13590 return (B.getOpcode() == ISD::ROTL || B.getOpcode() == ISD::ROTR) &&
13591 B.getOperand(0) == A;
13592 };
13593 SDValue AndOrOp = SDValue(), ShiftOrRotate = SDValue();
13594 bool IsRotate = false;
13595
13596 // Find either shift+and or rotate pattern.
13597 if (IsAndWithShift(N0, N1)) {
13598 AndOrOp = N0;
13599 ShiftOrRotate = N1;
13600 } else if (IsAndWithShift(N1, N0)) {
13601 AndOrOp = N1;
13602 ShiftOrRotate = N0;
13603 } else if (IsRotateWithOp(N0, N1)) {
13604 IsRotate = true;
13605 AndOrOp = N0;
13606 ShiftOrRotate = N1;
13607 } else if (IsRotateWithOp(N1, N0)) {
13608 IsRotate = true;
13609 AndOrOp = N1;
13610 ShiftOrRotate = N0;
13611 }
13612
13613 if (AndOrOp && ShiftOrRotate && ShiftOrRotate.hasOneUse() &&
13614 (IsRotate || AndOrOp.hasOneUse())) {
13615 EVT OpVT = N0.getValueType();
13616 // Get constant shift/rotate amount and possibly mask (if its shift+and
13617 // variant).
13618 auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> {
13619 ConstantSDNode *CNode = isConstOrConstSplat(Op, /*AllowUndefs*/ false,
13620 /*AllowTrunc*/ false);
13621 if (CNode == nullptr)
13622 return std::nullopt;
13623 return CNode->getAPIntValue();
13624 };
13625 std::optional<APInt> AndCMask =
13626 IsRotate ? std::nullopt : GetAPIntValue(AndOrOp.getOperand(1));
13627 std::optional<APInt> ShiftCAmt =
13628 GetAPIntValue(ShiftOrRotate.getOperand(1));
13629 unsigned NumBits = OpVT.getScalarSizeInBits();
13630
13631 // We found constants.
13632 if (ShiftCAmt && (IsRotate || AndCMask) && ShiftCAmt->ult(NumBits)) {
13633 unsigned ShiftOpc = ShiftOrRotate.getOpcode();
13634 // Check that the constants meet the constraints.
13635 bool CanTransform = IsRotate;
13636 if (!CanTransform) {
13637 // Check that mask and shift compliment eachother
13638 CanTransform = *ShiftCAmt == (~*AndCMask).popcount();
13639 // Check that we are comparing all bits
13640 CanTransform &= (*ShiftCAmt + AndCMask->popcount()) == NumBits;
13641 // Check that the and mask is correct for the shift
13642 CanTransform &=
13643 ShiftOpc == ISD::SHL ? (~*AndCMask).isMask() : AndCMask->isMask();
13644 }
13645
13646 // See if target prefers another shift/rotate opcode.
13647 unsigned NewShiftOpc = TLI.preferedOpcodeForCmpEqPiecesOfOperand(
13648 OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
13649 // Transform is valid and we have a new preference.
13650 if (CanTransform && NewShiftOpc != ShiftOpc) {
13651 SDValue NewShiftOrRotate =
13652 DAG.getNode(NewShiftOpc, DL, OpVT, ShiftOrRotate.getOperand(0),
13653 ShiftOrRotate.getOperand(1));
13654 SDValue NewAndOrOp = SDValue();
13655
13656 if (NewShiftOpc == ISD::SHL || NewShiftOpc == ISD::SRL) {
13657 APInt NewMask =
13658 NewShiftOpc == ISD::SHL
13659 ? APInt::getHighBitsSet(NumBits,
13660 NumBits - ShiftCAmt->getZExtValue())
13661 : APInt::getLowBitsSet(NumBits,
13662 NumBits - ShiftCAmt->getZExtValue());
13663 NewAndOrOp =
13664 DAG.getNode(ISD::AND, DL, OpVT, ShiftOrRotate.getOperand(0),
13665 DAG.getConstant(NewMask, DL, OpVT));
13666 } else {
13667 NewAndOrOp = ShiftOrRotate.getOperand(0);
13668 }
13669
13670 return DAG.getSetCC(DL, VT, NewAndOrOp, NewShiftOrRotate, Cond);
13671 }
13672 }
13673 }
13674 }
13675 return SDValue();
13676}
13677
13678SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
13679 SDValue LHS = N->getOperand(0);
13680 SDValue RHS = N->getOperand(1);
13681 SDValue Carry = N->getOperand(2);
13682 SDValue Cond = N->getOperand(3);
13683
13684 // If Carry is false, fold to a regular SETCC.
13685 if (isNullConstant(Carry))
13686 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
13687
13688 return SDValue();
13689}
13690
13691/// Check if N satisfies:
13692/// N is used once.
13693/// N is a Load.
13694/// The load is compatible with ExtOpcode. It means
13695/// If load has explicit zero/sign extension, ExpOpcode must have the same
13696/// extension.
13697/// Otherwise returns true.
13698static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
13699 if (!N.hasOneUse())
13700 return false;
13701
13702 if (!isa<LoadSDNode>(N))
13703 return false;
13704
13705 LoadSDNode *Load = cast<LoadSDNode>(N);
13706 ISD::LoadExtType LoadExt = Load->getExtensionType();
13707 if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
13708 return true;
13709
13710 // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
13711 // extension.
13712 if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
13713 (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
13714 return false;
13715
13716 return true;
13717}
13718
13719/// Fold
13720/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
13721/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
13722/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
13723/// This function is called by the DAGCombiner when visiting sext/zext/aext
13724/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
13726 SelectionDAG &DAG, const SDLoc &DL,
13727 CombineLevel Level) {
13728 unsigned Opcode = N->getOpcode();
13729 SDValue N0 = N->getOperand(0);
13730 EVT VT = N->getValueType(0);
13731 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
13732 Opcode == ISD::ANY_EXTEND) &&
13733 "Expected EXTEND dag node in input!");
13734
13735 if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
13736 !N0.hasOneUse())
13737 return SDValue();
13738
13739 SDValue Op1 = N0->getOperand(1);
13740 SDValue Op2 = N0->getOperand(2);
13741 if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
13742 return SDValue();
13743
13744 auto ExtLoadOpcode = ISD::EXTLOAD;
13745 if (Opcode == ISD::SIGN_EXTEND)
13746 ExtLoadOpcode = ISD::SEXTLOAD;
13747 else if (Opcode == ISD::ZERO_EXTEND)
13748 ExtLoadOpcode = ISD::ZEXTLOAD;
13749
13750 // Illegal VSELECT may ISel fail if happen after legalization (DAG
13751 // Combine2), so we should conservatively check the OperationAction.
13752 LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
13753 LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
13754 if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
13755 !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) ||
13756 (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes &&
13758 return SDValue();
13759
13760 SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
13761 SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
13762 return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
13763}
13764
13765/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
13766/// a build_vector of constants.
13767/// This function is called by the DAGCombiner when visiting sext/zext/aext
13768/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
13769/// Vector extends are not folded if operations are legal; this is to
13770/// avoid introducing illegal build_vector dag nodes.
13772 const TargetLowering &TLI,
13773 SelectionDAG &DAG, bool LegalTypes) {
13774 unsigned Opcode = N->getOpcode();
13775 SDValue N0 = N->getOperand(0);
13776 EVT VT = N->getValueType(0);
13777
13778 assert((ISD::isExtOpcode(Opcode) || ISD::isExtVecInRegOpcode(Opcode)) &&
13779 "Expected EXTEND dag node in input!");
13780
13781 // fold (sext c1) -> c1
13782 // fold (zext c1) -> c1
13783 // fold (aext c1) -> c1
13784 if (isa<ConstantSDNode>(N0))
13785 return DAG.getNode(Opcode, DL, VT, N0);
13786
13787 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
13788 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
13789 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
13790 if (N0->getOpcode() == ISD::SELECT) {
13791 SDValue Op1 = N0->getOperand(1);
13792 SDValue Op2 = N0->getOperand(2);
13793 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
13794 (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
13795 // For any_extend, choose sign extension of the constants to allow a
13796 // possible further transform to sign_extend_inreg.i.e.
13797 //
13798 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
13799 // t2: i64 = any_extend t1
13800 // -->
13801 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
13802 // -->
13803 // t4: i64 = sign_extend_inreg t3
13804 unsigned FoldOpc = Opcode;
13805 if (FoldOpc == ISD::ANY_EXTEND)
13806 FoldOpc = ISD::SIGN_EXTEND;
13807 return DAG.getSelect(DL, VT, N0->getOperand(0),
13808 DAG.getNode(FoldOpc, DL, VT, Op1),
13809 DAG.getNode(FoldOpc, DL, VT, Op2));
13810 }
13811 }
13812
13813 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
13814 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
13815 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
13816 EVT SVT = VT.getScalarType();
13817 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
13819 return SDValue();
13820
13821 // We can fold this node into a build_vector.
13822 unsigned VTBits = SVT.getSizeInBits();
13823 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
13825 unsigned NumElts = VT.getVectorNumElements();
13826
13827 for (unsigned i = 0; i != NumElts; ++i) {
13828 SDValue Op = N0.getOperand(i);
13829 if (Op.isUndef()) {
13830 if (Opcode == ISD::ANY_EXTEND || Opcode == ISD::ANY_EXTEND_VECTOR_INREG)
13831 Elts.push_back(DAG.getUNDEF(SVT));
13832 else
13833 Elts.push_back(DAG.getConstant(0, DL, SVT));
13834 continue;
13835 }
13836
13837 SDLoc DL(Op);
13838 // Get the constant value and if needed trunc it to the size of the type.
13839 // Nodes like build_vector might have constants wider than the scalar type.
13840 APInt C = Op->getAsAPIntVal().zextOrTrunc(EVTBits);
13841 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
13842 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
13843 else
13844 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
13845 }
13846
13847 return DAG.getBuildVector(VT, DL, Elts);
13848}
13849
13850// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
13851// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
13852// transformation. Returns true if extension are possible and the above
13853// mentioned transformation is profitable.
13855 unsigned ExtOpc,
13856 SmallVectorImpl<SDNode *> &ExtendNodes,
13857 const TargetLowering &TLI) {
13858 bool HasCopyToRegUses = false;
13859 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
13860 for (SDUse &Use : N0->uses()) {
13861 SDNode *User = Use.getUser();
13862 if (User == N)
13863 continue;
13864 if (Use.getResNo() != N0.getResNo())
13865 continue;
13866 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
13867 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
13868 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
13869 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
13870 // Sign bits will be lost after a zext.
13871 return false;
13872 bool Add = false;
13873 for (unsigned i = 0; i != 2; ++i) {
13874 SDValue UseOp = User->getOperand(i);
13875 if (UseOp == N0)
13876 continue;
13877 if (!isa<ConstantSDNode>(UseOp))
13878 return false;
13879 Add = true;
13880 }
13881 if (Add)
13882 ExtendNodes.push_back(User);
13883 continue;
13884 }
13885 // If truncates aren't free and there are users we can't
13886 // extend, it isn't worthwhile.
13887 if (!isTruncFree)
13888 return false;
13889 // Remember if this value is live-out.
13890 if (User->getOpcode() == ISD::CopyToReg)
13891 HasCopyToRegUses = true;
13892 }
13893
13894 if (HasCopyToRegUses) {
13895 bool BothLiveOut = false;
13896 for (SDUse &Use : N->uses()) {
13897 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
13898 BothLiveOut = true;
13899 break;
13900 }
13901 }
13902 if (BothLiveOut)
13903 // Both unextended and extended values are live out. There had better be
13904 // a good reason for the transformation.
13905 return !ExtendNodes.empty();
13906 }
13907 return true;
13908}
13909
13910void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
13911 SDValue OrigLoad, SDValue ExtLoad,
13912 ISD::NodeType ExtType) {
13913 // Extend SetCC uses if necessary.
13914 SDLoc DL(ExtLoad);
13915 for (SDNode *SetCC : SetCCs) {
13917
13918 for (unsigned j = 0; j != 2; ++j) {
13919 SDValue SOp = SetCC->getOperand(j);
13920 if (SOp == OrigLoad)
13921 Ops.push_back(ExtLoad);
13922 else
13923 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
13924 }
13925
13926 Ops.push_back(SetCC->getOperand(2));
13927 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
13928 }
13929}
13930
13931// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
13932SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
13933 SDValue N0 = N->getOperand(0);
13934 EVT DstVT = N->getValueType(0);
13935 EVT SrcVT = N0.getValueType();
13936
13937 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
13938 N->getOpcode() == ISD::ZERO_EXTEND) &&
13939 "Unexpected node type (not an extend)!");
13940
13941 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
13942 // For example, on a target with legal v4i32, but illegal v8i32, turn:
13943 // (v8i32 (sext (v8i16 (load x))))
13944 // into:
13945 // (v8i32 (concat_vectors (v4i32 (sextload x)),
13946 // (v4i32 (sextload (x + 16)))))
13947 // Where uses of the original load, i.e.:
13948 // (v8i16 (load x))
13949 // are replaced with:
13950 // (v8i16 (truncate
13951 // (v8i32 (concat_vectors (v4i32 (sextload x)),
13952 // (v4i32 (sextload (x + 16)))))))
13953 //
13954 // This combine is only applicable to illegal, but splittable, vectors.
13955 // All legal types, and illegal non-vector types, are handled elsewhere.
13956 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
13957 //
13958 if (N0->getOpcode() != ISD::LOAD)
13959 return SDValue();
13960
13961 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13962
13963 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
13964 !N0.hasOneUse() || !LN0->isSimple() ||
13965 !DstVT.isVector() || !DstVT.isPow2VectorType() ||
13967 return SDValue();
13968
13970 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
13971 return SDValue();
13972
13973 ISD::LoadExtType ExtType =
13974 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13975
13976 // Try to split the vector types to get down to legal types.
13977 EVT SplitSrcVT = SrcVT;
13978 EVT SplitDstVT = DstVT;
13979 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
13980 SplitSrcVT.getVectorNumElements() > 1) {
13981 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
13982 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
13983 }
13984
13985 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
13986 return SDValue();
13987
13988 assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
13989
13990 SDLoc DL(N);
13991 const unsigned NumSplits =
13992 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
13993 const unsigned Stride = SplitSrcVT.getStoreSize();
13996
13997 SDValue BasePtr = LN0->getBasePtr();
13998 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
13999 const unsigned Offset = Idx * Stride;
14000
14002 DAG.getExtLoad(ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(),
14003 BasePtr, LN0->getPointerInfo().getWithOffset(Offset),
14004 SplitSrcVT, LN0->getBaseAlign(),
14005 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14006
14007 BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::getFixed(Stride), DL);
14008
14009 Loads.push_back(SplitLoad.getValue(0));
14010 Chains.push_back(SplitLoad.getValue(1));
14011 }
14012
14013 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
14014 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
14015
14016 // Simplify TF.
14017 AddToWorklist(NewChain.getNode());
14018
14019 CombineTo(N, NewValue);
14020
14021 // Replace uses of the original load (before extension)
14022 // with a truncate of the concatenated sextloaded vectors.
14023 SDValue Trunc =
14024 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
14025 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
14026 CombineTo(N0.getNode(), Trunc, NewChain);
14027 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14028}
14029
14030// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
14031// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
14032SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
14033 assert(N->getOpcode() == ISD::ZERO_EXTEND);
14034 EVT VT = N->getValueType(0);
14035 EVT OrigVT = N->getOperand(0).getValueType();
14036 if (TLI.isZExtFree(OrigVT, VT))
14037 return SDValue();
14038
14039 // and/or/xor
14040 SDValue N0 = N->getOperand(0);
14041 if (!ISD::isBitwiseLogicOp(N0.getOpcode()) ||
14042 N0.getOperand(1).getOpcode() != ISD::Constant ||
14043 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
14044 return SDValue();
14045
14046 // shl/shr
14047 SDValue N1 = N0->getOperand(0);
14048 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
14049 N1.getOperand(1).getOpcode() != ISD::Constant ||
14050 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
14051 return SDValue();
14052
14053 // load
14054 if (!isa<LoadSDNode>(N1.getOperand(0)))
14055 return SDValue();
14056 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
14057 EVT MemVT = Load->getMemoryVT();
14058 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
14059 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
14060 return SDValue();
14061
14062
14063 // If the shift op is SHL, the logic op must be AND, otherwise the result
14064 // will be wrong.
14065 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
14066 return SDValue();
14067
14068 if (!N0.hasOneUse() || !N1.hasOneUse())
14069 return SDValue();
14070
14072 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
14073 ISD::ZERO_EXTEND, SetCCs, TLI))
14074 return SDValue();
14075
14076 // Actually do the transformation.
14077 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
14078 Load->getChain(), Load->getBasePtr(),
14079 Load->getMemoryVT(), Load->getMemOperand());
14080
14081 SDLoc DL1(N1);
14082 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
14083 N1.getOperand(1));
14084
14086 SDLoc DL0(N0);
14087 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
14088 DAG.getConstant(Mask, DL0, VT));
14089
14090 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
14091 CombineTo(N, And);
14092 if (SDValue(Load, 0).hasOneUse()) {
14093 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
14094 } else {
14095 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
14096 Load->getValueType(0), ExtLoad);
14097 CombineTo(Load, Trunc, ExtLoad.getValue(1));
14098 }
14099
14100 // N0 is dead at this point.
14101 recursivelyDeleteUnusedNodes(N0.getNode());
14102
14103 return SDValue(N,0); // Return N so it doesn't get rechecked!
14104}
14105
14106/// If we're narrowing or widening the result of a vector select and the final
14107/// size is the same size as a setcc (compare) feeding the select, then try to
14108/// apply the cast operation to the select's operands because matching vector
14109/// sizes for a select condition and other operands should be more efficient.
14110SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
14111 unsigned CastOpcode = Cast->getOpcode();
14112 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
14113 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
14114 CastOpcode == ISD::FP_ROUND) &&
14115 "Unexpected opcode for vector select narrowing/widening");
14116
14117 // We only do this transform before legal ops because the pattern may be
14118 // obfuscated by target-specific operations after legalization. Do not create
14119 // an illegal select op, however, because that may be difficult to lower.
14120 EVT VT = Cast->getValueType(0);
14121 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
14122 return SDValue();
14123
14124 SDValue VSel = Cast->getOperand(0);
14125 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
14126 VSel.getOperand(0).getOpcode() != ISD::SETCC)
14127 return SDValue();
14128
14129 // Does the setcc have the same vector size as the casted select?
14130 SDValue SetCC = VSel.getOperand(0);
14131 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
14132 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
14133 return SDValue();
14134
14135 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
14136 SDValue A = VSel.getOperand(1);
14137 SDValue B = VSel.getOperand(2);
14138 SDValue CastA, CastB;
14139 SDLoc DL(Cast);
14140 if (CastOpcode == ISD::FP_ROUND) {
14141 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
14142 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
14143 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
14144 } else {
14145 CastA = DAG.getNode(CastOpcode, DL, VT, A);
14146 CastB = DAG.getNode(CastOpcode, DL, VT, B);
14147 }
14148 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
14149}
14150
14151// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
14152// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
14154 const TargetLowering &TLI, EVT VT,
14155 bool LegalOperations, SDNode *N,
14156 SDValue N0, ISD::LoadExtType ExtLoadType) {
14157 SDNode *N0Node = N0.getNode();
14158 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
14159 : ISD::isZEXTLoad(N0Node);
14160 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
14161 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
14162 return SDValue();
14163
14164 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14165 EVT MemVT = LN0->getMemoryVT();
14166 if ((LegalOperations || !LN0->isSimple() ||
14167 VT.isVector()) &&
14168 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
14169 return SDValue();
14170
14171 SDValue ExtLoad =
14172 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
14173 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
14174 Combiner.CombineTo(N, ExtLoad);
14175 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14176 if (LN0->use_empty())
14177 Combiner.recursivelyDeleteUnusedNodes(LN0);
14178 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14179}
14180
14181// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
14182// Only generate vector extloads when 1) they're legal, and 2) they are
14183// deemed desirable by the target. NonNegZExt can be set to true if a zero
14184// extend has the nonneg flag to allow use of sextload if profitable.
14186 const TargetLowering &TLI, EVT VT,
14187 bool LegalOperations, SDNode *N, SDValue N0,
14188 ISD::LoadExtType ExtLoadType,
14189 ISD::NodeType ExtOpc,
14190 bool NonNegZExt = false) {
14192 return {};
14193
14194 // If this is zext nneg, see if it would make sense to treat it as a sext.
14195 if (NonNegZExt) {
14196 assert(ExtLoadType == ISD::ZEXTLOAD && ExtOpc == ISD::ZERO_EXTEND &&
14197 "Unexpected load type or opcode");
14198 for (SDNode *User : N0->users()) {
14199 if (User->getOpcode() == ISD::SETCC) {
14200 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
14201 if (ISD::isSignedIntSetCC(CC)) {
14202 ExtLoadType = ISD::SEXTLOAD;
14203 ExtOpc = ISD::SIGN_EXTEND;
14204 break;
14205 }
14206 }
14207 }
14208 }
14209
14210 // TODO: isFixedLengthVector() should be removed and any negative effects on
14211 // code generation being the result of that target's implementation of
14212 // isVectorLoadExtDesirable().
14213 if ((LegalOperations || VT.isFixedLengthVector() ||
14214 !cast<LoadSDNode>(N0)->isSimple()) &&
14215 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))
14216 return {};
14217
14218 bool DoXform = true;
14220 if (!N0.hasOneUse())
14221 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
14222 if (VT.isVector())
14223 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
14224 if (!DoXform)
14225 return {};
14226
14227 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14228 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
14229 LN0->getBasePtr(), N0.getValueType(),
14230 LN0->getMemOperand());
14231 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
14232 // If the load value is used only by N, replace it via CombineTo N.
14233 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
14234 Combiner.CombineTo(N, ExtLoad);
14235 if (NoReplaceTrunc) {
14236 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14237 Combiner.recursivelyDeleteUnusedNodes(LN0);
14238 } else {
14239 SDValue Trunc =
14240 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
14241 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
14242 }
14243 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14244}
14245
14246static SDValue
14248 bool LegalOperations, SDNode *N, SDValue N0,
14249 ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) {
14250 if (!N0.hasOneUse())
14251 return SDValue();
14252
14253 MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
14254 if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
14255 return SDValue();
14256
14257 if ((LegalOperations || !cast<MaskedLoadSDNode>(N0)->isSimple()) &&
14258 !TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
14259 return SDValue();
14260
14261 if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
14262 return SDValue();
14263
14264 SDLoc dl(Ld);
14265 SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
14266 SDValue NewLoad = DAG.getMaskedLoad(
14267 VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
14268 PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
14269 ExtLoadType, Ld->isExpandingLoad());
14270 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
14271 return NewLoad;
14272}
14273
14274// fold ([s|z]ext (atomic_load)) -> ([s|z]ext (truncate ([s|z]ext atomic_load)))
14276 const TargetLowering &TLI, EVT VT,
14277 SDValue N0,
14278 ISD::LoadExtType ExtLoadType) {
14279 auto *ALoad = dyn_cast<AtomicSDNode>(N0);
14280 if (!ALoad || ALoad->getOpcode() != ISD::ATOMIC_LOAD)
14281 return {};
14282 EVT MemoryVT = ALoad->getMemoryVT();
14283 if (!TLI.isAtomicLoadExtLegal(ExtLoadType, VT, MemoryVT))
14284 return {};
14285 // Can't fold into ALoad if it is already extending differently.
14286 ISD::LoadExtType ALoadExtTy = ALoad->getExtensionType();
14287 if ((ALoadExtTy == ISD::ZEXTLOAD && ExtLoadType == ISD::SEXTLOAD) ||
14288 (ALoadExtTy == ISD::SEXTLOAD && ExtLoadType == ISD::ZEXTLOAD))
14289 return {};
14290
14291 EVT OrigVT = ALoad->getValueType(0);
14292 assert(OrigVT.getSizeInBits() < VT.getSizeInBits() && "VT should be wider.");
14293 auto *NewALoad = cast<AtomicSDNode>(DAG.getAtomicLoad(
14294 ExtLoadType, SDLoc(ALoad), MemoryVT, VT, ALoad->getChain(),
14295 ALoad->getBasePtr(), ALoad->getMemOperand()));
14297 SDValue(ALoad, 0),
14298 DAG.getNode(ISD::TRUNCATE, SDLoc(ALoad), OrigVT, SDValue(NewALoad, 0)));
14299 // Update the chain uses.
14300 DAG.ReplaceAllUsesOfValueWith(SDValue(ALoad, 1), SDValue(NewALoad, 1));
14301 return SDValue(NewALoad, 0);
14302}
14303
14305 bool LegalOperations) {
14306 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
14307 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
14308
14309 SDValue SetCC = N->getOperand(0);
14310 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
14311 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
14312 return SDValue();
14313
14314 SDValue X = SetCC.getOperand(0);
14315 SDValue Ones = SetCC.getOperand(1);
14316 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
14317 EVT VT = N->getValueType(0);
14318 EVT XVT = X.getValueType();
14319 // setge X, C is canonicalized to setgt, so we do not need to match that
14320 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
14321 // not require the 'not' op.
14322 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
14323 // Invert and smear/shift the sign bit:
14324 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
14325 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
14326 SDLoc DL(N);
14327 unsigned ShCt = VT.getSizeInBits() - 1;
14328 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14329 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
14330 SDValue NotX = DAG.getNOT(DL, X, VT);
14331 SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
14332 auto ShiftOpcode =
14333 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
14334 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
14335 }
14336 }
14337 return SDValue();
14338}
14339
14340SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
14341 SDValue N0 = N->getOperand(0);
14342 if (N0.getOpcode() != ISD::SETCC)
14343 return SDValue();
14344
14345 SDValue N00 = N0.getOperand(0);
14346 SDValue N01 = N0.getOperand(1);
14347 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
14348 EVT VT = N->getValueType(0);
14349 EVT N00VT = N00.getValueType();
14350 SDLoc DL(N);
14351
14352 // Propagate fast-math-flags.
14353 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14354
14355 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
14356 // the same size as the compared operands. Try to optimize sext(setcc())
14357 // if this is the case.
14358 if (VT.isVector() && !LegalOperations &&
14359 TLI.getBooleanContents(N00VT) ==
14361 EVT SVT = getSetCCResultType(N00VT);
14362
14363 // If we already have the desired type, don't change it.
14364 if (SVT != N0.getValueType()) {
14365 // We know that the # elements of the results is the same as the
14366 // # elements of the compare (and the # elements of the compare result
14367 // for that matter). Check to see that they are the same size. If so,
14368 // we know that the element size of the sext'd result matches the
14369 // element size of the compare operands.
14370 if (VT.getSizeInBits() == SVT.getSizeInBits())
14371 return DAG.getSetCC(DL, VT, N00, N01, CC);
14372
14373 // If the desired elements are smaller or larger than the source
14374 // elements, we can use a matching integer vector type and then
14375 // truncate/sign extend.
14376 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
14377 if (SVT == MatchingVecType) {
14378 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
14379 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
14380 }
14381 }
14382
14383 // Try to eliminate the sext of a setcc by zexting the compare operands.
14384 if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
14386 bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
14387 unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
14388 unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
14389
14390 // We have an unsupported narrow vector compare op that would be legal
14391 // if extended to the destination type. See if the compare operands
14392 // can be freely extended to the destination type.
14393 auto IsFreeToExtend = [&](SDValue V) {
14394 if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
14395 return true;
14396 // Match a simple, non-extended load that can be converted to a
14397 // legal {z/s}ext-load.
14398 // TODO: Allow widening of an existing {z/s}ext-load?
14399 if (!(ISD::isNON_EXTLoad(V.getNode()) &&
14400 ISD::isUNINDEXEDLoad(V.getNode()) &&
14401 cast<LoadSDNode>(V)->isSimple() &&
14402 TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
14403 return false;
14404
14405 // Non-chain users of this value must either be the setcc in this
14406 // sequence or extends that can be folded into the new {z/s}ext-load.
14407 for (SDUse &Use : V->uses()) {
14408 // Skip uses of the chain and the setcc.
14409 SDNode *User = Use.getUser();
14410 if (Use.getResNo() != 0 || User == N0.getNode())
14411 continue;
14412 // Extra users must have exactly the same cast we are about to create.
14413 // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
14414 // is enhanced similarly.
14415 if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
14416 return false;
14417 }
14418 return true;
14419 };
14420
14421 if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
14422 SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
14423 SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
14424 return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
14425 }
14426 }
14427 }
14428
14429 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
14430 // Here, T can be 1 or -1, depending on the type of the setcc and
14431 // getBooleanContents().
14432 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
14433
14434 // To determine the "true" side of the select, we need to know the high bit
14435 // of the value returned by the setcc if it evaluates to true.
14436 // If the type of the setcc is i1, then the true case of the select is just
14437 // sext(i1 1), that is, -1.
14438 // If the type of the setcc is larger (say, i8) then the value of the high
14439 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
14440 // of the appropriate width.
14441 SDValue ExtTrueVal = (SetCCWidth == 1)
14442 ? DAG.getAllOnesConstant(DL, VT)
14443 : DAG.getBoolConstant(true, DL, VT, N00VT);
14444 SDValue Zero = DAG.getConstant(0, DL, VT);
14445 if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
14446 return SCC;
14447
14448 if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(N0, VT, TLI)) {
14449 EVT SetCCVT = getSetCCResultType(N00VT);
14450 // Don't do this transform for i1 because there's a select transform
14451 // that would reverse it.
14452 // TODO: We should not do this transform at all without a target hook
14453 // because a sext is likely cheaper than a select?
14454 if (SetCCVT.getScalarSizeInBits() != 1 &&
14455 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
14456 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
14457 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
14458 }
14459 }
14460
14461 return SDValue();
14462}
14463
14464SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
14465 SDValue N0 = N->getOperand(0);
14466 EVT VT = N->getValueType(0);
14467 SDLoc DL(N);
14468
14469 if (VT.isVector())
14470 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
14471 return FoldedVOp;
14472
14473 // sext(undef) = 0 because the top bit will all be the same.
14474 if (N0.isUndef())
14475 return DAG.getConstant(0, DL, VT);
14476
14477 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14478 return Res;
14479
14480 // fold (sext (sext x)) -> (sext x)
14481 // fold (sext (aext x)) -> (sext x)
14482 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
14483 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
14484
14485 // fold (sext (aext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
14486 // fold (sext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
14490 N0.getOperand(0));
14491
14492 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
14493 SDValue N00 = N0.getOperand(0);
14494 EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
14495 if (N00.getOpcode() == ISD::TRUNCATE || TLI.isTruncateFree(N00, ExtVT)) {
14496 // fold (sext (sext_inreg x)) -> (sext (trunc x))
14497 if ((!LegalTypes || TLI.isTypeLegal(ExtVT))) {
14498 SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00);
14499 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
14500 }
14501
14502 // If the trunc wasn't legal, try to fold to (sext_inreg (anyext x))
14503 if (!LegalTypes || TLI.isTypeLegal(VT)) {
14504 SDValue ExtSrc = DAG.getAnyExtOrTrunc(N00, DL, VT);
14505 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, ExtSrc,
14506 N0->getOperand(1));
14507 }
14508 }
14509 }
14510
14511 if (N0.getOpcode() == ISD::TRUNCATE) {
14512 // fold (sext (truncate (load x))) -> (sext (smaller load x))
14513 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
14514 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
14515 SDNode *oye = N0.getOperand(0).getNode();
14516 if (NarrowLoad.getNode() != N0.getNode()) {
14517 CombineTo(N0.getNode(), NarrowLoad);
14518 // CombineTo deleted the truncate, if needed, but not what's under it.
14519 AddToWorklist(oye);
14520 }
14521 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14522 }
14523
14524 // See if the value being truncated is already sign extended. If so, just
14525 // eliminate the trunc/sext pair.
14526 SDValue Op = N0.getOperand(0);
14527 unsigned OpBits = Op.getScalarValueSizeInBits();
14528 unsigned MidBits = N0.getScalarValueSizeInBits();
14529 unsigned DestBits = VT.getScalarSizeInBits();
14530
14531 if (N0->getFlags().hasNoSignedWrap() ||
14532 DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
14533 if (OpBits == DestBits) {
14534 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
14535 // bits, it is already ready.
14536 return Op;
14537 }
14538
14539 if (OpBits < DestBits) {
14540 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
14541 // bits, just sext from i32.
14542 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
14543 }
14544
14545 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
14546 // bits, just truncate to i32.
14548 Flags.setNoSignedWrap(true);
14549 Flags.setNoUnsignedWrap(N0->getFlags().hasNoUnsignedWrap());
14550 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op, Flags);
14551 }
14552
14553 // fold (sext (truncate x)) -> (sextinreg x).
14554 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
14555 N0.getValueType())) {
14556 if (OpBits < DestBits)
14557 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
14558 else if (OpBits > DestBits)
14559 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
14560 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
14561 DAG.getValueType(N0.getValueType()));
14562 }
14563 }
14564
14565 // Try to simplify (sext (load x)).
14566 if (SDValue foldedExt =
14567 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
14569 return foldedExt;
14570
14571 if (SDValue foldedExt =
14572 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
14574 return foldedExt;
14575
14576 // fold (sext (load x)) to multiple smaller sextloads.
14577 // Only on illegal but splittable vectors.
14578 if (SDValue ExtLoad = CombineExtLoad(N))
14579 return ExtLoad;
14580
14581 // Try to simplify (sext (sextload x)).
14582 if (SDValue foldedExt = tryToFoldExtOfExtload(
14583 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
14584 return foldedExt;
14585
14586 // Try to simplify (sext (atomic_load x)).
14587 if (SDValue foldedExt =
14588 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::SEXTLOAD))
14589 return foldedExt;
14590
14591 // fold (sext (and/or/xor (load x), cst)) ->
14592 // (and/or/xor (sextload x), (sext cst))
14593 if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
14594 isa<LoadSDNode>(N0.getOperand(0)) &&
14595 N0.getOperand(1).getOpcode() == ISD::Constant &&
14596 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
14597 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
14598 EVT MemVT = LN00->getMemoryVT();
14599 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
14600 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
14602 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
14603 ISD::SIGN_EXTEND, SetCCs, TLI);
14604 if (DoXform) {
14605 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
14606 LN00->getChain(), LN00->getBasePtr(),
14607 LN00->getMemoryVT(),
14608 LN00->getMemOperand());
14610 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
14611 ExtLoad, DAG.getConstant(Mask, DL, VT));
14612 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
14613 bool NoReplaceTruncAnd = !N0.hasOneUse();
14614 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
14615 CombineTo(N, And);
14616 // If N0 has multiple uses, change other uses as well.
14617 if (NoReplaceTruncAnd) {
14618 SDValue TruncAnd =
14620 CombineTo(N0.getNode(), TruncAnd);
14621 }
14622 if (NoReplaceTrunc) {
14623 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
14624 } else {
14625 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
14626 LN00->getValueType(0), ExtLoad);
14627 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
14628 }
14629 return SDValue(N,0); // Return N so it doesn't get rechecked!
14630 }
14631 }
14632 }
14633
14634 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
14635 return V;
14636
14637 if (SDValue V = foldSextSetcc(N))
14638 return V;
14639
14640 // fold (sext x) -> (zext x) if the sign bit is known zero.
14641 if (!TLI.isSExtCheaperThanZExt(N0.getValueType(), VT) &&
14642 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
14643 DAG.SignBitIsZero(N0))
14644 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0, SDNodeFlags::NonNeg);
14645
14646 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14647 return NewVSel;
14648
14649 // Eliminate this sign extend by doing a negation in the destination type:
14650 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
14651 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
14655 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
14656 return DAG.getNegative(Zext, DL, VT);
14657 }
14658 // Eliminate this sign extend by doing a decrement in the destination type:
14659 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
14660 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
14664 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
14665 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
14666 }
14667
14668 // fold sext (not i1 X) -> add (zext i1 X), -1
14669 // TODO: This could be extended to handle bool vectors.
14670 if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
14671 (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
14672 TLI.isOperationLegal(ISD::ADD, VT)))) {
14673 // If we can eliminate the 'not', the sext form should be better
14674 if (SDValue NewXor = visitXOR(N0.getNode())) {
14675 // Returning N0 is a form of in-visit replacement that may have
14676 // invalidated N0.
14677 if (NewXor.getNode() == N0.getNode()) {
14678 // Return SDValue here as the xor should have already been replaced in
14679 // this sext.
14680 return SDValue();
14681 }
14682
14683 // Return a new sext with the new xor.
14684 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
14685 }
14686
14687 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
14688 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
14689 }
14690
14691 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
14692 return Res;
14693
14694 return SDValue();
14695}
14696
14697/// Given an extending node with a pop-count operand, if the target does not
14698/// support a pop-count in the narrow source type but does support it in the
14699/// destination type, widen the pop-count to the destination type.
14700static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG, const SDLoc &DL) {
14701 assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
14702 Extend->getOpcode() == ISD::ANY_EXTEND) &&
14703 "Expected extend op");
14704
14705 SDValue CtPop = Extend->getOperand(0);
14706 if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
14707 return SDValue();
14708
14709 EVT VT = Extend->getValueType(0);
14710 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14713 return SDValue();
14714
14715 // zext (ctpop X) --> ctpop (zext X)
14716 SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
14717 return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
14718}
14719
14720// If we have (zext (abs X)) where X is a type that will be promoted by type
14721// legalization, convert to (abs (sext X)). But don't extend past a legal type.
14722static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) {
14723 assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend.");
14724
14725 EVT VT = Extend->getValueType(0);
14726 if (VT.isVector())
14727 return SDValue();
14728
14729 SDValue Abs = Extend->getOperand(0);
14730 if (Abs.getOpcode() != ISD::ABS || !Abs.hasOneUse())
14731 return SDValue();
14732
14733 EVT AbsVT = Abs.getValueType();
14734 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14735 if (TLI.getTypeAction(*DAG.getContext(), AbsVT) !=
14737 return SDValue();
14738
14739 EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), AbsVT);
14740
14741 SDValue SExt =
14742 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Abs), LegalVT, Abs.getOperand(0));
14743 SDValue NewAbs = DAG.getNode(ISD::ABS, SDLoc(Abs), LegalVT, SExt);
14744 return DAG.getZExtOrTrunc(NewAbs, SDLoc(Extend), VT);
14745}
14746
14747SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
14748 SDValue N0 = N->getOperand(0);
14749 EVT VT = N->getValueType(0);
14750 SDLoc DL(N);
14751
14752 if (VT.isVector())
14753 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
14754 return FoldedVOp;
14755
14756 // zext(undef) = 0
14757 if (N0.isUndef())
14758 return DAG.getConstant(0, DL, VT);
14759
14760 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14761 return Res;
14762
14763 // fold (zext (zext x)) -> (zext x)
14764 // fold (zext (aext x)) -> (zext x)
14765 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
14767 if (N0.getOpcode() == ISD::ZERO_EXTEND)
14768 Flags.setNonNeg(N0->getFlags().hasNonNeg());
14769 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0), Flags);
14770 }
14771
14772 // fold (zext (aext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14773 // fold (zext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14776 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, N0.getOperand(0));
14777
14778 // fold (zext (truncate x)) -> (zext x) or
14779 // (zext (truncate x)) -> (truncate x)
14780 // This is valid when the truncated bits of x are already zero.
14781 SDValue Op;
14782 KnownBits Known;
14783 if (isTruncateOf(DAG, N0, Op, Known)) {
14784 APInt TruncatedBits =
14785 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
14786 APInt(Op.getScalarValueSizeInBits(), 0) :
14787 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
14789 std::min(Op.getScalarValueSizeInBits(),
14790 VT.getScalarSizeInBits()));
14791 if (TruncatedBits.isSubsetOf(Known.Zero)) {
14792 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
14793 DAG.salvageDebugInfo(*N0.getNode());
14794
14795 return ZExtOrTrunc;
14796 }
14797 }
14798
14799 // fold (zext (truncate x)) -> (and x, mask)
14800 if (N0.getOpcode() == ISD::TRUNCATE) {
14801 // fold (zext (truncate (load x))) -> (zext (smaller load x))
14802 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
14803 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
14804 SDNode *oye = N0.getOperand(0).getNode();
14805 if (NarrowLoad.getNode() != N0.getNode()) {
14806 CombineTo(N0.getNode(), NarrowLoad);
14807 // CombineTo deleted the truncate, if needed, but not what's under it.
14808 AddToWorklist(oye);
14809 }
14810 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14811 }
14812
14813 EVT SrcVT = N0.getOperand(0).getValueType();
14814 EVT MinVT = N0.getValueType();
14815
14816 if (N->getFlags().hasNonNeg()) {
14817 SDValue Op = N0.getOperand(0);
14818 unsigned OpBits = SrcVT.getScalarSizeInBits();
14819 unsigned MidBits = MinVT.getScalarSizeInBits();
14820 unsigned DestBits = VT.getScalarSizeInBits();
14821
14822 if (N0->getFlags().hasNoSignedWrap() ||
14823 DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
14824 if (OpBits == DestBits) {
14825 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
14826 // bits, it is already ready.
14827 return Op;
14828 }
14829
14830 if (OpBits < DestBits) {
14831 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
14832 // bits, just sext from i32.
14833 // FIXME: This can probably be ZERO_EXTEND nneg?
14834 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
14835 }
14836
14837 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
14838 // bits, just truncate to i32.
14840 Flags.setNoSignedWrap(true);
14841 Flags.setNoUnsignedWrap(true);
14842 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op, Flags);
14843 }
14844 }
14845
14846 // Try to mask before the extension to avoid having to generate a larger mask,
14847 // possibly over several sub-vectors.
14848 if (SrcVT.bitsLT(VT) && VT.isVector()) {
14849 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
14851 SDValue Op = N0.getOperand(0);
14852 Op = DAG.getZeroExtendInReg(Op, DL, MinVT);
14853 AddToWorklist(Op.getNode());
14854 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
14855 // Transfer the debug info; the new node is equivalent to N0.
14856 DAG.transferDbgValues(N0, ZExtOrTrunc);
14857 return ZExtOrTrunc;
14858 }
14859 }
14860
14861 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
14862 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
14863 AddToWorklist(Op.getNode());
14864 SDValue And = DAG.getZeroExtendInReg(Op, DL, MinVT);
14865 // We may safely transfer the debug info describing the truncate node over
14866 // to the equivalent and operation.
14867 DAG.transferDbgValues(N0, And);
14868 return And;
14869 }
14870 }
14871
14872 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
14873 // if either of the casts is not free.
14874 if (N0.getOpcode() == ISD::AND &&
14875 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
14876 N0.getOperand(1).getOpcode() == ISD::Constant &&
14877 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType()) ||
14878 !TLI.isZExtFree(N0.getValueType(), VT))) {
14879 SDValue X = N0.getOperand(0).getOperand(0);
14880 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
14882 return DAG.getNode(ISD::AND, DL, VT,
14883 X, DAG.getConstant(Mask, DL, VT));
14884 }
14885
14886 // Try to simplify (zext (load x)).
14887 if (SDValue foldedExt = tryToFoldExtOfLoad(
14888 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD,
14889 ISD::ZERO_EXTEND, N->getFlags().hasNonNeg()))
14890 return foldedExt;
14891
14892 if (SDValue foldedExt =
14893 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
14895 return foldedExt;
14896
14897 // fold (zext (load x)) to multiple smaller zextloads.
14898 // Only on illegal but splittable vectors.
14899 if (SDValue ExtLoad = CombineExtLoad(N))
14900 return ExtLoad;
14901
14902 // Try to simplify (zext (atomic_load x)).
14903 if (SDValue foldedExt =
14904 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::ZEXTLOAD))
14905 return foldedExt;
14906
14907 // fold (zext (and/or/xor (load x), cst)) ->
14908 // (and/or/xor (zextload x), (zext cst))
14909 // Unless (and (load x) cst) will match as a zextload already and has
14910 // additional users, or the zext is already free.
14911 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && !TLI.isZExtFree(N0, VT) &&
14912 isa<LoadSDNode>(N0.getOperand(0)) &&
14913 N0.getOperand(1).getOpcode() == ISD::Constant &&
14914 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
14915 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
14916 EVT MemVT = LN00->getMemoryVT();
14917 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
14918 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
14919 bool DoXform = true;
14921 if (!N0.hasOneUse()) {
14922 if (N0.getOpcode() == ISD::AND) {
14923 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
14924 EVT LoadResultTy = AndC->getValueType(0);
14925 EVT ExtVT;
14926 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
14927 DoXform = false;
14928 }
14929 }
14930 if (DoXform)
14931 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
14932 ISD::ZERO_EXTEND, SetCCs, TLI);
14933 if (DoXform) {
14934 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
14935 LN00->getChain(), LN00->getBasePtr(),
14936 LN00->getMemoryVT(),
14937 LN00->getMemOperand());
14939 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
14940 ExtLoad, DAG.getConstant(Mask, DL, VT));
14941 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
14942 bool NoReplaceTruncAnd = !N0.hasOneUse();
14943 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
14944 CombineTo(N, And);
14945 // If N0 has multiple uses, change other uses as well.
14946 if (NoReplaceTruncAnd) {
14947 SDValue TruncAnd =
14949 CombineTo(N0.getNode(), TruncAnd);
14950 }
14951 if (NoReplaceTrunc) {
14952 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
14953 } else {
14954 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
14955 LN00->getValueType(0), ExtLoad);
14956 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
14957 }
14958 return SDValue(N,0); // Return N so it doesn't get rechecked!
14959 }
14960 }
14961 }
14962
14963 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
14964 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
14965 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
14966 return ZExtLoad;
14967
14968 // Try to simplify (zext (zextload x)).
14969 if (SDValue foldedExt = tryToFoldExtOfExtload(
14970 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
14971 return foldedExt;
14972
14973 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
14974 return V;
14975
14976 if (N0.getOpcode() == ISD::SETCC) {
14977 // Propagate fast-math-flags.
14978 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14979
14980 // Only do this before legalize for now.
14981 if (!LegalOperations && VT.isVector() &&
14982 N0.getValueType().getVectorElementType() == MVT::i1) {
14983 EVT N00VT = N0.getOperand(0).getValueType();
14984 if (getSetCCResultType(N00VT) == N0.getValueType())
14985 return SDValue();
14986
14987 // We know that the # elements of the results is the same as the #
14988 // elements of the compare (and the # elements of the compare result for
14989 // that matter). Check to see that they are the same size. If so, we know
14990 // that the element size of the sext'd result matches the element size of
14991 // the compare operands.
14992 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
14993 // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
14994 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
14995 N0.getOperand(1), N0.getOperand(2));
14996 return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
14997 }
14998
14999 // If the desired elements are smaller or larger than the source
15000 // elements we can use a matching integer vector type and then
15001 // truncate/any extend followed by zext_in_reg.
15002 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
15003 SDValue VsetCC =
15004 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
15005 N0.getOperand(1), N0.getOperand(2));
15006 return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
15007 N0.getValueType());
15008 }
15009
15010 // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
15011 EVT N0VT = N0.getValueType();
15012 EVT N00VT = N0.getOperand(0).getValueType();
15013 if (SDValue SCC = SimplifySelectCC(
15014 DL, N0.getOperand(0), N0.getOperand(1),
15015 DAG.getBoolConstant(true, DL, N0VT, N00VT),
15016 DAG.getBoolConstant(false, DL, N0VT, N00VT),
15017 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
15018 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
15019 }
15020
15021 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
15022 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
15023 !TLI.isZExtFree(N0, VT)) {
15024 SDValue ShVal = N0.getOperand(0);
15025 SDValue ShAmt = N0.getOperand(1);
15026 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt)) {
15027 if (ShVal.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) {
15028 if (N0.getOpcode() == ISD::SHL) {
15029 // If the original shl may be shifting out bits, do not perform this
15030 // transformation.
15031 unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
15032 ShVal.getOperand(0).getValueSizeInBits();
15033 if (ShAmtC->getAPIntValue().ugt(KnownZeroBits)) {
15034 // If the shift is too large, then see if we can deduce that the
15035 // shift is safe anyway.
15036
15037 // Check if the bits being shifted out are known to be zero.
15038 KnownBits KnownShVal = DAG.computeKnownBits(ShVal);
15039 if (ShAmtC->getAPIntValue().ugt(KnownShVal.countMinLeadingZeros()))
15040 return SDValue();
15041 }
15042 }
15043
15044 // Ensure that the shift amount is wide enough for the shifted value.
15045 if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
15046 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
15047
15048 return DAG.getNode(N0.getOpcode(), DL, VT,
15049 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ShVal), ShAmt);
15050 }
15051 }
15052 }
15053
15054 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15055 return NewVSel;
15056
15057 if (SDValue NewCtPop = widenCtPop(N, DAG, DL))
15058 return NewCtPop;
15059
15060 if (SDValue V = widenAbs(N, DAG))
15061 return V;
15062
15063 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
15064 return Res;
15065
15066 // CSE zext nneg with sext if the zext is not free.
15067 if (N->getFlags().hasNonNeg() && !TLI.isZExtFree(N0.getValueType(), VT)) {
15068 SDNode *CSENode = DAG.getNodeIfExists(ISD::SIGN_EXTEND, N->getVTList(), N0);
15069 if (CSENode)
15070 return SDValue(CSENode, 0);
15071 }
15072
15073 return SDValue();
15074}
15075
15076SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
15077 SDValue N0 = N->getOperand(0);
15078 EVT VT = N->getValueType(0);
15079 SDLoc DL(N);
15080
15081 // aext(undef) = undef
15082 if (N0.isUndef())
15083 return DAG.getUNDEF(VT);
15084
15085 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
15086 return Res;
15087
15088 // fold (aext (aext x)) -> (aext x)
15089 // fold (aext (zext x)) -> (zext x)
15090 // fold (aext (sext x)) -> (sext x)
15091 if (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::ZERO_EXTEND ||
15092 N0.getOpcode() == ISD::SIGN_EXTEND) {
15094 if (N0.getOpcode() == ISD::ZERO_EXTEND)
15095 Flags.setNonNeg(N0->getFlags().hasNonNeg());
15096 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
15097 }
15098
15099 // fold (aext (aext_extend_vector_inreg x)) -> (aext_extend_vector_inreg x)
15100 // fold (aext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
15101 // fold (aext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
15105 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
15106
15107 // fold (aext (truncate (load x))) -> (aext (smaller load x))
15108 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
15109 if (N0.getOpcode() == ISD::TRUNCATE) {
15110 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
15111 SDNode *oye = N0.getOperand(0).getNode();
15112 if (NarrowLoad.getNode() != N0.getNode()) {
15113 CombineTo(N0.getNode(), NarrowLoad);
15114 // CombineTo deleted the truncate, if needed, but not what's under it.
15115 AddToWorklist(oye);
15116 }
15117 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15118 }
15119 }
15120
15121 // fold (aext (truncate x))
15122 if (N0.getOpcode() == ISD::TRUNCATE)
15123 return DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
15124
15125 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
15126 // if the trunc is not free.
15127 if (N0.getOpcode() == ISD::AND &&
15128 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
15129 N0.getOperand(1).getOpcode() == ISD::Constant &&
15130 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType())) {
15131 SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
15132 SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
15133 assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!");
15134 return DAG.getNode(ISD::AND, DL, VT, X, Y);
15135 }
15136
15137 // fold (aext (load x)) -> (aext (truncate (extload x)))
15138 // None of the supported targets knows how to perform load and any_ext
15139 // on vectors in one instruction, so attempt to fold to zext instead.
15140 if (VT.isVector()) {
15141 // Try to simplify (zext (load x)).
15142 if (SDValue foldedExt =
15143 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
15145 return foldedExt;
15146 } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
15149 bool DoXform = true;
15151 if (!N0.hasOneUse())
15152 DoXform =
15153 ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
15154 if (DoXform) {
15155 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15156 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, LN0->getChain(),
15157 LN0->getBasePtr(), N0.getValueType(),
15158 LN0->getMemOperand());
15159 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
15160 // If the load value is used only by N, replace it via CombineTo N.
15161 bool NoReplaceTrunc = N0.hasOneUse();
15162 CombineTo(N, ExtLoad);
15163 if (NoReplaceTrunc) {
15164 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
15165 recursivelyDeleteUnusedNodes(LN0);
15166 } else {
15167 SDValue Trunc =
15168 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
15169 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
15170 }
15171 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15172 }
15173 }
15174
15175 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
15176 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
15177 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
15178 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
15179 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
15180 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15181 ISD::LoadExtType ExtType = LN0->getExtensionType();
15182 EVT MemVT = LN0->getMemoryVT();
15183 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
15184 SDValue ExtLoad =
15185 DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), LN0->getBasePtr(),
15186 MemVT, LN0->getMemOperand());
15187 CombineTo(N, ExtLoad);
15188 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
15189 recursivelyDeleteUnusedNodes(LN0);
15190 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15191 }
15192 }
15193
15194 if (N0.getOpcode() == ISD::SETCC) {
15195 // Propagate fast-math-flags.
15196 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
15197
15198 // For vectors:
15199 // aext(setcc) -> vsetcc
15200 // aext(setcc) -> truncate(vsetcc)
15201 // aext(setcc) -> aext(vsetcc)
15202 // Only do this before legalize for now.
15203 if (VT.isVector() && !LegalOperations) {
15204 EVT N00VT = N0.getOperand(0).getValueType();
15205 if (getSetCCResultType(N00VT) == N0.getValueType())
15206 return SDValue();
15207
15208 // We know that the # elements of the results is the same as the
15209 // # elements of the compare (and the # elements of the compare result
15210 // for that matter). Check to see that they are the same size. If so,
15211 // we know that the element size of the sext'd result matches the
15212 // element size of the compare operands.
15213 if (VT.getSizeInBits() == N00VT.getSizeInBits())
15214 return DAG.getSetCC(DL, VT, N0.getOperand(0), N0.getOperand(1),
15215 cast<CondCodeSDNode>(N0.getOperand(2))->get());
15216
15217 // If the desired elements are smaller or larger than the source
15218 // elements we can use a matching integer vector type and then
15219 // truncate/any extend
15220 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
15221 SDValue VsetCC = DAG.getSetCC(
15222 DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1),
15223 cast<CondCodeSDNode>(N0.getOperand(2))->get());
15224 return DAG.getAnyExtOrTrunc(VsetCC, DL, VT);
15225 }
15226
15227 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
15228 if (SDValue SCC = SimplifySelectCC(
15229 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
15230 DAG.getConstant(0, DL, VT),
15231 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
15232 return SCC;
15233 }
15234
15235 if (SDValue NewCtPop = widenCtPop(N, DAG, DL))
15236 return NewCtPop;
15237
15238 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
15239 return Res;
15240
15241 return SDValue();
15242}
15243
15244SDValue DAGCombiner::visitAssertExt(SDNode *N) {
15245 unsigned Opcode = N->getOpcode();
15246 SDValue N0 = N->getOperand(0);
15247 SDValue N1 = N->getOperand(1);
15248 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
15249
15250 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
15251 if (N0.getOpcode() == Opcode &&
15252 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
15253 return N0;
15254
15255 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
15256 N0.getOperand(0).getOpcode() == Opcode) {
15257 // We have an assert, truncate, assert sandwich. Make one stronger assert
15258 // by asserting on the smallest asserted type to the larger source type.
15259 // This eliminates the later assert:
15260 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
15261 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
15262 SDLoc DL(N);
15263 SDValue BigA = N0.getOperand(0);
15264 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
15265 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
15266 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
15267 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
15268 BigA.getOperand(0), MinAssertVTVal);
15269 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
15270 }
15271
15272 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
15273 // than X. Just move the AssertZext in front of the truncate and drop the
15274 // AssertSExt.
15275 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
15277 Opcode == ISD::AssertZext) {
15278 SDValue BigA = N0.getOperand(0);
15279 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
15280 if (AssertVT.bitsLT(BigA_AssertVT)) {
15281 SDLoc DL(N);
15282 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
15283 BigA.getOperand(0), N1);
15284 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
15285 }
15286 }
15287
15288 if (Opcode == ISD::AssertZext && N0.getOpcode() == ISD::AND &&
15289 isa<ConstantSDNode>(N0.getOperand(1))) {
15290 const APInt &Mask = N0.getConstantOperandAPInt(1);
15291
15292 // If we have (AssertZext (and (AssertSext X, iX), M), iY) and Y is smaller
15293 // than X, and the And doesn't change the lower iX bits, we can move the
15294 // AssertZext in front of the And and drop the AssertSext.
15295 if (N0.getOperand(0).getOpcode() == ISD::AssertSext && N0.hasOneUse()) {
15296 SDValue BigA = N0.getOperand(0);
15297 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
15298 if (AssertVT.bitsLT(BigA_AssertVT) &&
15299 Mask.countr_one() >= BigA_AssertVT.getScalarSizeInBits()) {
15300 SDLoc DL(N);
15301 SDValue NewAssert =
15302 DAG.getNode(Opcode, DL, N->getValueType(0), BigA.getOperand(0), N1);
15303 return DAG.getNode(ISD::AND, DL, N->getValueType(0), NewAssert,
15304 N0.getOperand(1));
15305 }
15306 }
15307
15308 // Remove AssertZext entirely if the mask guarantees the assertion cannot
15309 // fail.
15310 // TODO: Use KB countMinLeadingZeros to handle non-constant masks?
15311 if (Mask.isIntN(AssertVT.getScalarSizeInBits()))
15312 return N0;
15313 }
15314
15315 return SDValue();
15316}
15317
15318SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
15319 SDLoc DL(N);
15320
15321 Align AL = cast<AssertAlignSDNode>(N)->getAlign();
15322 SDValue N0 = N->getOperand(0);
15323
15324 // Fold (assertalign (assertalign x, AL0), AL1) ->
15325 // (assertalign x, max(AL0, AL1))
15326 if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
15327 return DAG.getAssertAlign(DL, N0.getOperand(0),
15328 std::max(AL, AAN->getAlign()));
15329
15330 // In rare cases, there are trivial arithmetic ops in source operands. Sink
15331 // this assert down to source operands so that those arithmetic ops could be
15332 // exposed to the DAG combining.
15333 switch (N0.getOpcode()) {
15334 default:
15335 break;
15336 case ISD::ADD:
15337 case ISD::PTRADD:
15338 case ISD::SUB: {
15339 unsigned AlignShift = Log2(AL);
15340 SDValue LHS = N0.getOperand(0);
15341 SDValue RHS = N0.getOperand(1);
15342 unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
15343 unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
15344 if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
15345 if (LHSAlignShift < AlignShift)
15346 LHS = DAG.getAssertAlign(DL, LHS, AL);
15347 if (RHSAlignShift < AlignShift)
15348 RHS = DAG.getAssertAlign(DL, RHS, AL);
15349 return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
15350 }
15351 break;
15352 }
15353 }
15354
15355 return SDValue();
15356}
15357
15358/// If the result of a load is shifted/masked/truncated to an effectively
15359/// narrower type, try to transform the load to a narrower type and/or
15360/// use an extending load.
15361SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
15362 unsigned Opc = N->getOpcode();
15363
15365 SDValue N0 = N->getOperand(0);
15366 EVT VT = N->getValueType(0);
15367 EVT ExtVT = VT;
15368
15369 // This transformation isn't valid for vector loads.
15370 if (VT.isVector())
15371 return SDValue();
15372
15373 // The ShAmt variable is used to indicate that we've consumed a right
15374 // shift. I.e. we want to narrow the width of the load by skipping to load the
15375 // ShAmt least significant bits.
15376 unsigned ShAmt = 0;
15377 // A special case is when the least significant bits from the load are masked
15378 // away, but using an AND rather than a right shift. HasShiftedOffset is used
15379 // to indicate that the narrowed load should be left-shifted ShAmt bits to get
15380 // the result.
15381 unsigned ShiftedOffset = 0;
15382 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
15383 // extended to VT.
15384 if (Opc == ISD::SIGN_EXTEND_INREG) {
15385 ExtType = ISD::SEXTLOAD;
15386 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
15387 } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
15388 // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
15389 // value, or it may be shifting a higher subword, half or byte into the
15390 // lowest bits.
15391
15392 // Only handle shift with constant shift amount, and the shiftee must be a
15393 // load.
15394 auto *LN = dyn_cast<LoadSDNode>(N0);
15395 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
15396 if (!N1C || !LN)
15397 return SDValue();
15398 // If the shift amount is larger than the memory type then we're not
15399 // accessing any of the loaded bytes.
15400 ShAmt = N1C->getZExtValue();
15401 uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
15402 if (MemoryWidth <= ShAmt)
15403 return SDValue();
15404 // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
15405 ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
15406 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
15407 // If original load is a SEXTLOAD then we can't simply replace it by a
15408 // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
15409 // followed by a ZEXT, but that is not handled at the moment). Similarly if
15410 // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
15411 if ((LN->getExtensionType() == ISD::SEXTLOAD ||
15412 LN->getExtensionType() == ISD::ZEXTLOAD) &&
15413 LN->getExtensionType() != ExtType)
15414 return SDValue();
15415 } else if (Opc == ISD::AND) {
15416 // An AND with a constant mask is the same as a truncate + zero-extend.
15417 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
15418 if (!AndC)
15419 return SDValue();
15420
15421 const APInt &Mask = AndC->getAPIntValue();
15422 unsigned ActiveBits = 0;
15423 if (Mask.isMask()) {
15424 ActiveBits = Mask.countr_one();
15425 } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
15426 ShiftedOffset = ShAmt;
15427 } else {
15428 return SDValue();
15429 }
15430
15431 ExtType = ISD::ZEXTLOAD;
15432 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
15433 }
15434
15435 // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
15436 // a right shift. Here we redo some of those checks, to possibly adjust the
15437 // ExtVT even further based on "a masking AND". We could also end up here for
15438 // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
15439 // need to be done here as well.
15440 if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
15441 SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
15442 // Bail out when the SRL has more than one use. This is done for historical
15443 // (undocumented) reasons. Maybe intent was to guard the AND-masking below
15444 // check below? And maybe it could be non-profitable to do the transform in
15445 // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
15446 // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
15447 if (!SRL.hasOneUse())
15448 return SDValue();
15449
15450 // Only handle shift with constant shift amount, and the shiftee must be a
15451 // load.
15452 auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
15453 auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
15454 if (!SRL1C || !LN)
15455 return SDValue();
15456
15457 // If the shift amount is larger than the input type then we're not
15458 // accessing any of the loaded bytes. If the load was a zextload/extload
15459 // then the result of the shift+trunc is zero/undef (handled elsewhere).
15460 ShAmt = SRL1C->getZExtValue();
15461 uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
15462 if (ShAmt >= MemoryWidth)
15463 return SDValue();
15464
15465 // Because a SRL must be assumed to *need* to zero-extend the high bits
15466 // (as opposed to anyext the high bits), we can't combine the zextload
15467 // lowering of SRL and an sextload.
15468 if (LN->getExtensionType() == ISD::SEXTLOAD)
15469 return SDValue();
15470
15471 // Avoid reading outside the memory accessed by the original load (could
15472 // happened if we only adjust the load base pointer by ShAmt). Instead we
15473 // try to narrow the load even further. The typical scenario here is:
15474 // (i64 (truncate (i96 (srl (load x), 64)))) ->
15475 // (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
15476 if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
15477 // Don't replace sextload by zextload.
15478 if (ExtType == ISD::SEXTLOAD)
15479 return SDValue();
15480 // Narrow the load.
15481 ExtType = ISD::ZEXTLOAD;
15482 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
15483 }
15484
15485 // If the SRL is only used by a masking AND, we may be able to adjust
15486 // the ExtVT to make the AND redundant.
15487 SDNode *Mask = *(SRL->user_begin());
15488 if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
15489 isa<ConstantSDNode>(Mask->getOperand(1))) {
15490 unsigned Offset, ActiveBits;
15491 const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
15492 if (ShiftMask.isMask()) {
15493 EVT MaskedVT =
15494 EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countr_one());
15495 // If the mask is smaller, recompute the type.
15496 if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
15497 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
15498 ExtVT = MaskedVT;
15499 } else if (ExtType == ISD::ZEXTLOAD &&
15500 ShiftMask.isShiftedMask(Offset, ActiveBits) &&
15501 (Offset + ShAmt) < VT.getScalarSizeInBits()) {
15502 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
15503 // If the mask is shifted we can use a narrower load and a shl to insert
15504 // the trailing zeros.
15505 if (((Offset + ActiveBits) <= ExtVT.getScalarSizeInBits()) &&
15506 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT)) {
15507 ExtVT = MaskedVT;
15508 ShAmt = Offset + ShAmt;
15509 ShiftedOffset = Offset;
15510 }
15511 }
15512 }
15513
15514 N0 = SRL.getOperand(0);
15515 }
15516
15517 // If the load is shifted left (and the result isn't shifted back right), we
15518 // can fold a truncate through the shift. The typical scenario is that N
15519 // points at a TRUNCATE here so the attempted fold is:
15520 // (truncate (shl (load x), c))) -> (shl (narrow load x), c)
15521 // ShLeftAmt will indicate how much a narrowed load should be shifted left.
15522 unsigned ShLeftAmt = 0;
15523 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
15524 ExtVT == VT && TLI.isNarrowingProfitable(N, N0.getValueType(), VT)) {
15525 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
15526 ShLeftAmt = N01->getZExtValue();
15527 N0 = N0.getOperand(0);
15528 }
15529 }
15530
15531 // If we haven't found a load, we can't narrow it.
15532 if (!isa<LoadSDNode>(N0))
15533 return SDValue();
15534
15535 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15536 // Reducing the width of a volatile load is illegal. For atomics, we may be
15537 // able to reduce the width provided we never widen again. (see D66309)
15538 if (!LN0->isSimple() ||
15539 !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
15540 return SDValue();
15541
15542 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
15543 unsigned LVTStoreBits =
15545 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedValue();
15546 return LVTStoreBits - EVTStoreBits - ShAmt;
15547 };
15548
15549 // We need to adjust the pointer to the load by ShAmt bits in order to load
15550 // the correct bytes.
15551 unsigned PtrAdjustmentInBits =
15552 DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
15553
15554 uint64_t PtrOff = PtrAdjustmentInBits / 8;
15555 SDLoc DL(LN0);
15556 // The original load itself didn't wrap, so an offset within it doesn't.
15557 SDValue NewPtr =
15560 AddToWorklist(NewPtr.getNode());
15561
15562 SDValue Load;
15563 if (ExtType == ISD::NON_EXTLOAD) {
15564 const MDNode *OldRanges = LN0->getRanges();
15565 const MDNode *NewRanges = nullptr;
15566 // If LSBs are loaded and the truncated ConstantRange for the OldRanges
15567 // metadata is not the full-set for the new width then create a NewRanges
15568 // metadata for the truncated load
15569 if (ShAmt == 0 && OldRanges) {
15571 unsigned BitSize = VT.getScalarSizeInBits();
15572
15573 // It is possible for an 8-bit extending load with 8-bit range
15574 // metadata to be narrowed to an 8-bit load. This guard is necessary to
15575 // ensure that truncation is strictly smaller.
15576 if (CR.getBitWidth() > BitSize) {
15577 ConstantRange TruncatedCR = CR.truncate(BitSize);
15578 if (!TruncatedCR.isFullSet()) {
15579 Metadata *Bounds[2] = {
15581 ConstantInt::get(*DAG.getContext(), TruncatedCR.getLower())),
15583 ConstantInt::get(*DAG.getContext(), TruncatedCR.getUpper()))};
15584 NewRanges = MDNode::get(*DAG.getContext(), Bounds);
15585 }
15586 } else if (CR.getBitWidth() == BitSize)
15587 NewRanges = OldRanges;
15588 }
15589 Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
15590 LN0->getPointerInfo().getWithOffset(PtrOff),
15591 LN0->getBaseAlign(), LN0->getMemOperand()->getFlags(),
15592 LN0->getAAInfo(), NewRanges);
15593 } else
15594 Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
15595 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
15596 LN0->getBaseAlign(), LN0->getMemOperand()->getFlags(),
15597 LN0->getAAInfo());
15598
15599 // Replace the old load's chain with the new load's chain.
15600 WorklistRemover DeadNodes(*this);
15601 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15602
15603 // Shift the result left, if we've swallowed a left shift.
15605 if (ShLeftAmt != 0) {
15606 // If the shift amount is as large as the result size (but, presumably,
15607 // no larger than the source) then the useful bits of the result are
15608 // zero; we can't simply return the shortened shift, because the result
15609 // of that operation is undefined.
15610 if (ShLeftAmt >= VT.getScalarSizeInBits())
15611 Result = DAG.getConstant(0, DL, VT);
15612 else
15613 Result = DAG.getNode(ISD::SHL, DL, VT, Result,
15614 DAG.getShiftAmountConstant(ShLeftAmt, VT, DL));
15615 }
15616
15617 if (ShiftedOffset != 0) {
15618 // We're using a shifted mask, so the load now has an offset. This means
15619 // that data has been loaded into the lower bytes than it would have been
15620 // before, so we need to shl the loaded data into the correct position in the
15621 // register.
15622 SDValue ShiftC = DAG.getConstant(ShiftedOffset, DL, VT);
15623 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
15624 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
15625 }
15626
15627 // Return the new loaded value.
15628 return Result;
15629}
15630
15631SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
15632 SDValue N0 = N->getOperand(0);
15633 SDValue N1 = N->getOperand(1);
15634 EVT VT = N->getValueType(0);
15635 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
15636 unsigned VTBits = VT.getScalarSizeInBits();
15637 unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
15638 SDLoc DL(N);
15639
15640 // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
15641 if (N0.isUndef())
15642 return DAG.getConstant(0, DL, VT);
15643
15644 // fold (sext_in_reg c1) -> c1
15645 if (SDValue C =
15647 return C;
15648
15649 // If the input is already sign extended, just drop the extension.
15650 if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
15651 return N0;
15652
15653 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
15654 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
15655 ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
15656 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N0.getOperand(0), N1);
15657
15658 // fold (sext_in_reg (sext x)) -> (sext x)
15659 // fold (sext_in_reg (aext x)) -> (sext x)
15660 // if x is small enough or if we know that x has more than 1 sign bit and the
15661 // sign_extend_inreg is extending from one of them.
15662 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
15663 SDValue N00 = N0.getOperand(0);
15664 unsigned N00Bits = N00.getScalarValueSizeInBits();
15665 if ((N00Bits <= ExtVTBits ||
15666 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
15667 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
15668 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N00);
15669 }
15670
15671 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
15672 // if x is small enough or if we know that x has more than 1 sign bit and the
15673 // sign_extend_inreg is extending from one of them.
15675 SDValue N00 = N0.getOperand(0);
15676 unsigned N00Bits = N00.getScalarValueSizeInBits();
15677 bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
15678 if ((N00Bits == ExtVTBits ||
15679 (!IsZext && (N00Bits < ExtVTBits ||
15680 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
15681 (!LegalOperations ||
15683 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, N00);
15684 }
15685
15686 // fold (sext_in_reg (zext x)) -> (sext x)
15687 // iff we are extending the source sign bit.
15688 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
15689 SDValue N00 = N0.getOperand(0);
15690 if (N00.getScalarValueSizeInBits() == ExtVTBits &&
15691 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
15692 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N00);
15693 }
15694
15695 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
15696 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
15697 return DAG.getZeroExtendInReg(N0, DL, ExtVT);
15698
15699 // fold operands of sext_in_reg based on knowledge that the top bits are not
15700 // demanded.
15702 return SDValue(N, 0);
15703
15704 // fold (sext_in_reg (load x)) -> (smaller sextload x)
15705 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
15706 if (SDValue NarrowLoad = reduceLoadWidth(N))
15707 return NarrowLoad;
15708
15709 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
15710 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
15711 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
15712 if (N0.getOpcode() == ISD::SRL) {
15713 if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
15714 if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
15715 // We can turn this into an SRA iff the input to the SRL is already sign
15716 // extended enough.
15717 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
15718 if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
15719 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
15720 N0.getOperand(1));
15721 }
15722 }
15723
15724 // fold (sext_inreg (extload x)) -> (sextload x)
15725 // If sextload is not supported by target, we can only do the combine when
15726 // load has one use. Doing otherwise can block folding the extload with other
15727 // extends that the target does support.
15729 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
15730 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
15731 N0.hasOneUse()) ||
15732 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
15733 auto *LN0 = cast<LoadSDNode>(N0);
15734 SDValue ExtLoad =
15735 DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
15736 LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
15737 CombineTo(N, ExtLoad);
15738 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
15739 AddToWorklist(ExtLoad.getNode());
15740 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15741 }
15742
15743 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
15745 N0.hasOneUse() && ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
15746 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
15747 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
15748 auto *LN0 = cast<LoadSDNode>(N0);
15749 SDValue ExtLoad =
15750 DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
15751 LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
15752 CombineTo(N, ExtLoad);
15753 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
15754 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15755 }
15756
15757 // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
15758 // ignore it if the masked load is already sign extended
15759 if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
15760 if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
15761 Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
15762 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
15763 SDValue ExtMaskedLoad = DAG.getMaskedLoad(
15764 VT, DL, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
15765 Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
15766 Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
15767 CombineTo(N, ExtMaskedLoad);
15768 CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
15769 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15770 }
15771 }
15772
15773 // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
15774 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
15775 if (SDValue(GN0, 0).hasOneUse() && ExtVT == GN0->getMemoryVT() &&
15777 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
15778 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
15779
15780 SDValue ExtLoad = DAG.getMaskedGather(
15781 DAG.getVTList(VT, MVT::Other), ExtVT, DL, Ops, GN0->getMemOperand(),
15782 GN0->getIndexType(), ISD::SEXTLOAD);
15783
15784 CombineTo(N, ExtLoad);
15785 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
15786 AddToWorklist(ExtLoad.getNode());
15787 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15788 }
15789 }
15790
15791 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
15792 if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
15793 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
15794 N0.getOperand(1), false))
15795 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, BSwap, N1);
15796 }
15797
15798 // Fold (iM_signext_inreg
15799 // (extract_subvector (zext|anyext|sext iN_v to _) _)
15800 // from iN)
15801 // -> (extract_subvector (signext iN_v to iM))
15802 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() &&
15804 SDValue InnerExt = N0.getOperand(0);
15805 EVT InnerExtVT = InnerExt->getValueType(0);
15806 SDValue Extendee = InnerExt->getOperand(0);
15807
15808 if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() &&
15809 (!LegalOperations ||
15810 TLI.isOperationLegal(ISD::SIGN_EXTEND, InnerExtVT))) {
15811 SDValue SignExtExtendee =
15812 DAG.getNode(ISD::SIGN_EXTEND, DL, InnerExtVT, Extendee);
15813 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SignExtExtendee,
15814 N0.getOperand(1));
15815 }
15816 }
15817
15818 return SDValue();
15819}
15820
15822 SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG,
15823 bool LegalOperations) {
15824 unsigned InregOpcode = N->getOpcode();
15825 unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode);
15826
15827 SDValue Src = N->getOperand(0);
15828 EVT VT = N->getValueType(0);
15829 EVT SrcVT = EVT::getVectorVT(*DAG.getContext(),
15830 Src.getValueType().getVectorElementType(),
15832
15833 assert(ISD::isExtVecInRegOpcode(InregOpcode) &&
15834 "Expected EXTEND_VECTOR_INREG dag node in input!");
15835
15836 // Profitability check: our operand must be an one-use CONCAT_VECTORS.
15837 // FIXME: one-use check may be overly restrictive
15838 if (!Src.hasOneUse() || Src.getOpcode() != ISD::CONCAT_VECTORS)
15839 return SDValue();
15840
15841 // Profitability check: we must be extending exactly one of it's operands.
15842 // FIXME: this is probably overly restrictive.
15843 Src = Src.getOperand(0);
15844 if (Src.getValueType() != SrcVT)
15845 return SDValue();
15846
15847 if (LegalOperations && !TLI.isOperationLegal(Opcode, VT))
15848 return SDValue();
15849
15850 return DAG.getNode(Opcode, DL, VT, Src);
15851}
15852
15853SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
15854 SDValue N0 = N->getOperand(0);
15855 EVT VT = N->getValueType(0);
15856 SDLoc DL(N);
15857
15858 if (N0.isUndef()) {
15859 // aext_vector_inreg(undef) = undef because the top bits are undefined.
15860 // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
15861 return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
15862 ? DAG.getUNDEF(VT)
15863 : DAG.getConstant(0, DL, VT);
15864 }
15865
15866 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
15867 return Res;
15868
15870 return SDValue(N, 0);
15871
15873 LegalOperations))
15874 return R;
15875
15876 return SDValue();
15877}
15878
15879SDValue DAGCombiner::visitTRUNCATE_USAT_U(SDNode *N) {
15880 EVT VT = N->getValueType(0);
15881 SDValue N0 = N->getOperand(0);
15882
15883 SDValue FPVal;
15884 if (sd_match(N0, m_FPToUI(m_Value(FPVal))) &&
15886 ISD::FP_TO_UINT_SAT, FPVal.getValueType(), VT))
15887 return DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), VT, FPVal,
15888 DAG.getValueType(VT.getScalarType()));
15889
15890 return SDValue();
15891}
15892
15893/// Detect patterns of truncation with unsigned saturation:
15894///
15895/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
15896/// Return the source value x to be truncated or SDValue() if the pattern was
15897/// not matched.
15898///
15900 unsigned NumDstBits = VT.getScalarSizeInBits();
15901 unsigned NumSrcBits = In.getScalarValueSizeInBits();
15902 // Saturation with truncation. We truncate from InVT to VT.
15903 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
15904
15905 SDValue Min;
15906 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
15907 if (sd_match(In, m_UMin(m_Value(Min), m_SpecificInt(UnsignedMax))))
15908 return Min;
15909
15910 return SDValue();
15911}
15912
15913/// Detect patterns of truncation with signed saturation:
15914/// (truncate (smin (smax (x, signed_min_of_dest_type),
15915/// signed_max_of_dest_type)) to dest_type)
15916/// or:
15917/// (truncate (smax (smin (x, signed_max_of_dest_type),
15918/// signed_min_of_dest_type)) to dest_type).
15919///
15920/// Return the source value to be truncated or SDValue() if the pattern was not
15921/// matched.
15923 unsigned NumDstBits = VT.getScalarSizeInBits();
15924 unsigned NumSrcBits = In.getScalarValueSizeInBits();
15925 // Saturation with truncation. We truncate from InVT to VT.
15926 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
15927
15928 SDValue Val;
15929 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
15930 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
15931
15932 if (sd_match(In, m_SMin(m_SMax(m_Value(Val), m_SpecificInt(SignedMin)),
15933 m_SpecificInt(SignedMax))))
15934 return Val;
15935
15936 if (sd_match(In, m_SMax(m_SMin(m_Value(Val), m_SpecificInt(SignedMax)),
15937 m_SpecificInt(SignedMin))))
15938 return Val;
15939
15940 return SDValue();
15941}
15942
15943/// Detect patterns of truncation with unsigned saturation:
15945 const SDLoc &DL) {
15946 unsigned NumDstBits = VT.getScalarSizeInBits();
15947 unsigned NumSrcBits = In.getScalarValueSizeInBits();
15948 // Saturation with truncation. We truncate from InVT to VT.
15949 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
15950
15951 SDValue Val;
15952 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
15953 // Min == 0, Max is unsigned max of destination type.
15954 if (sd_match(In, m_SMax(m_SMin(m_Value(Val), m_SpecificInt(UnsignedMax)),
15955 m_Zero())))
15956 return Val;
15957
15958 if (sd_match(In, m_SMin(m_SMax(m_Value(Val), m_Zero()),
15959 m_SpecificInt(UnsignedMax))))
15960 return Val;
15961
15962 if (sd_match(In, m_UMin(m_SMax(m_Value(Val), m_Zero()),
15963 m_SpecificInt(UnsignedMax))))
15964 return Val;
15965
15966 return SDValue();
15967}
15968
15969static SDValue foldToSaturated(SDNode *N, EVT &VT, SDValue &Src, EVT &SrcVT,
15970 SDLoc &DL, const TargetLowering &TLI,
15971 SelectionDAG &DAG) {
15972 auto AllowedTruncateSat = [&](unsigned Opc, EVT SrcVT, EVT VT) -> bool {
15973 return (TLI.isOperationLegalOrCustom(Opc, SrcVT) &&
15974 TLI.isTypeDesirableForOp(Opc, VT));
15975 };
15976
15977 if (Src.getOpcode() == ISD::SMIN || Src.getOpcode() == ISD::SMAX) {
15978 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_S, SrcVT, VT))
15979 if (SDValue SSatVal = detectSSatSPattern(Src, VT))
15980 return DAG.getNode(ISD::TRUNCATE_SSAT_S, DL, VT, SSatVal);
15981 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_U, SrcVT, VT))
15982 if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
15983 return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
15984 } else if (Src.getOpcode() == ISD::UMIN) {
15985 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_U, SrcVT, VT))
15986 if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
15987 return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
15988 if (AllowedTruncateSat(ISD::TRUNCATE_USAT_U, SrcVT, VT))
15989 if (SDValue USatVal = detectUSatUPattern(Src, VT))
15990 return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, VT, USatVal);
15991 }
15992
15993 return SDValue();
15994}
15995
15996SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
15997 SDValue N0 = N->getOperand(0);
15998 EVT VT = N->getValueType(0);
15999 EVT SrcVT = N0.getValueType();
16000 bool isLE = DAG.getDataLayout().isLittleEndian();
16001 SDLoc DL(N);
16002
16003 // trunc(undef) = undef
16004 if (N0.isUndef())
16005 return DAG.getUNDEF(VT);
16006
16007 // fold (truncate (truncate x)) -> (truncate x)
16008 if (N0.getOpcode() == ISD::TRUNCATE)
16009 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16010
16011 // fold saturated truncate
16012 if (SDValue SaturatedTR = foldToSaturated(N, VT, N0, SrcVT, DL, TLI, DAG))
16013 return SaturatedTR;
16014
16015 // fold (truncate c1) -> c1
16016 if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, DL, VT, {N0}))
16017 return C;
16018
16019 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
16020 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
16021 N0.getOpcode() == ISD::SIGN_EXTEND ||
16022 N0.getOpcode() == ISD::ANY_EXTEND) {
16023 // if the source is smaller than the dest, we still need an extend.
16024 if (N0.getOperand(0).getValueType().bitsLT(VT)) {
16026 if (N0.getOpcode() == ISD::ZERO_EXTEND)
16027 Flags.setNonNeg(N0->getFlags().hasNonNeg());
16028 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
16029 }
16030 // if the source is larger than the dest, than we just need the truncate.
16031 if (N0.getOperand(0).getValueType().bitsGT(VT))
16032 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16033 // if the source and dest are the same type, we can drop both the extend
16034 // and the truncate.
16035 return N0.getOperand(0);
16036 }
16037
16038 // Try to narrow a truncate-of-sext_in_reg to the destination type:
16039 // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
16040 if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
16041 N0.hasOneUse()) {
16042 SDValue X = N0.getOperand(0);
16043 SDValue ExtVal = N0.getOperand(1);
16044 EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
16045 if (ExtVT.bitsLT(VT) && TLI.preferSextInRegOfTruncate(VT, SrcVT, ExtVT)) {
16046 SDValue TrX = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
16047 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, TrX, ExtVal);
16048 }
16049 }
16050
16051 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
16052 if (N->hasOneUse() && (N->user_begin()->getOpcode() == ISD::ANY_EXTEND))
16053 return SDValue();
16054
16055 // Fold extract-and-trunc into a narrow extract. For example:
16056 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
16057 // i32 y = TRUNCATE(i64 x)
16058 // -- becomes --
16059 // v16i8 b = BITCAST (v2i64 val)
16060 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
16061 //
16062 // Note: We only run this optimization after type legalization (which often
16063 // creates this pattern) and before operation legalization after which
16064 // we need to be more careful about the vector instructions that we generate.
16065 if (LegalTypes && !LegalOperations && VT.isScalarInteger() && VT != MVT::i1 &&
16066 N0->hasOneUse()) {
16067 EVT TrTy = N->getValueType(0);
16068 SDValue Src = N0;
16069
16070 // Check for cases where we shift down an upper element before truncation.
16071 int EltOffset = 0;
16072 if (Src.getOpcode() == ISD::SRL && Src.getOperand(0)->hasOneUse()) {
16073 if (auto ShAmt = DAG.getValidShiftAmount(Src)) {
16074 if ((*ShAmt % TrTy.getSizeInBits()) == 0) {
16075 Src = Src.getOperand(0);
16076 EltOffset = *ShAmt / TrTy.getSizeInBits();
16077 }
16078 }
16079 }
16080
16081 if (Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16082 EVT VecTy = Src.getOperand(0).getValueType();
16083 EVT ExTy = Src.getValueType();
16084
16085 auto EltCnt = VecTy.getVectorElementCount();
16086 unsigned SizeRatio = ExTy.getSizeInBits() / TrTy.getSizeInBits();
16087 auto NewEltCnt = EltCnt * SizeRatio;
16088
16089 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
16090 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
16091
16092 SDValue EltNo = Src->getOperand(1);
16093 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
16094 int Elt = EltNo->getAsZExtVal();
16095 int Index = isLE ? (Elt * SizeRatio + EltOffset)
16096 : (Elt * SizeRatio + (SizeRatio - 1) - EltOffset);
16097 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
16098 DAG.getBitcast(NVT, Src.getOperand(0)),
16099 DAG.getVectorIdxConstant(Index, DL));
16100 }
16101 }
16102 }
16103
16104 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
16105 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse() &&
16106 TLI.isTruncateFree(SrcVT, VT)) {
16107 if (!LegalOperations ||
16108 (TLI.isOperationLegal(ISD::SELECT, SrcVT) &&
16109 TLI.isNarrowingProfitable(N0.getNode(), SrcVT, VT))) {
16110 SDLoc SL(N0);
16111 SDValue Cond = N0.getOperand(0);
16112 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
16113 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
16114 return DAG.getNode(ISD::SELECT, DL, VT, Cond, TruncOp0, TruncOp1);
16115 }
16116 }
16117
16118 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
16119 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
16120 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
16121 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
16122 SDValue Amt = N0.getOperand(1);
16123 KnownBits Known = DAG.computeKnownBits(Amt);
16124 unsigned Size = VT.getScalarSizeInBits();
16125 if (Known.countMaxActiveBits() <= Log2_32(Size)) {
16126 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
16127 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16128 if (AmtVT != Amt.getValueType()) {
16129 Amt = DAG.getZExtOrTrunc(Amt, DL, AmtVT);
16130 AddToWorklist(Amt.getNode());
16131 }
16132 return DAG.getNode(ISD::SHL, DL, VT, Trunc, Amt);
16133 }
16134 }
16135
16136 if (SDValue V = foldSubToUSubSat(VT, N0.getNode(), DL))
16137 return V;
16138
16139 if (SDValue ABD = foldABSToABD(N, DL))
16140 return ABD;
16141
16142 // Attempt to pre-truncate BUILD_VECTOR sources.
16143 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
16144 N0.hasOneUse() &&
16145 TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
16146 // Avoid creating illegal types if running after type legalizer.
16147 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
16148 EVT SVT = VT.getScalarType();
16149 SmallVector<SDValue, 8> TruncOps;
16150 for (const SDValue &Op : N0->op_values()) {
16151 SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
16152 TruncOps.push_back(TruncOp);
16153 }
16154 return DAG.getBuildVector(VT, DL, TruncOps);
16155 }
16156
16157 // trunc (splat_vector x) -> splat_vector (trunc x)
16158 if (N0.getOpcode() == ISD::SPLAT_VECTOR &&
16159 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType())) &&
16160 (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) {
16161 EVT SVT = VT.getScalarType();
16162 return DAG.getSplatVector(
16163 VT, DL, DAG.getNode(ISD::TRUNCATE, DL, SVT, N0->getOperand(0)));
16164 }
16165
16166 // Fold a series of buildvector, bitcast, and truncate if possible.
16167 // For example fold
16168 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
16169 // (2xi32 (buildvector x, y)).
16170 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
16171 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
16173 N0.getOperand(0).hasOneUse()) {
16174 SDValue BuildVect = N0.getOperand(0);
16175 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
16176 EVT TruncVecEltTy = VT.getVectorElementType();
16177
16178 // Check that the element types match.
16179 if (BuildVectEltTy == TruncVecEltTy) {
16180 // Now we only need to compute the offset of the truncated elements.
16181 unsigned BuildVecNumElts = BuildVect.getNumOperands();
16182 unsigned TruncVecNumElts = VT.getVectorNumElements();
16183 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
16184 unsigned FirstElt = isLE ? 0 : (TruncEltOffset - 1);
16185
16186 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
16187 "Invalid number of elements");
16188
16190 for (unsigned i = FirstElt, e = BuildVecNumElts; i < e;
16191 i += TruncEltOffset)
16192 Opnds.push_back(BuildVect.getOperand(i));
16193
16194 return DAG.getBuildVector(VT, DL, Opnds);
16195 }
16196 }
16197
16198 // fold (truncate (load x)) -> (smaller load x)
16199 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
16200 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
16201 if (SDValue Reduced = reduceLoadWidth(N))
16202 return Reduced;
16203
16204 // Handle the case where the truncated result is at least as wide as the
16205 // loaded type.
16206 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
16207 auto *LN0 = cast<LoadSDNode>(N0);
16208 if (LN0->isSimple() && LN0->getMemoryVT().bitsLE(VT)) {
16209 SDValue NewLoad = DAG.getExtLoad(
16210 LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(),
16211 LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand());
16212 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
16213 return NewLoad;
16214 }
16215 }
16216 }
16217
16218 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
16219 // where ... are all 'undef'.
16220 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
16222 SDValue V;
16223 unsigned Idx = 0;
16224 unsigned NumDefs = 0;
16225
16226 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
16227 SDValue X = N0.getOperand(i);
16228 if (!X.isUndef()) {
16229 V = X;
16230 Idx = i;
16231 NumDefs++;
16232 }
16233 // Stop if more than one members are non-undef.
16234 if (NumDefs > 1)
16235 break;
16236
16239 X.getValueType().getVectorElementCount()));
16240 }
16241
16242 if (NumDefs == 0)
16243 return DAG.getUNDEF(VT);
16244
16245 if (NumDefs == 1) {
16246 assert(V.getNode() && "The single defined operand is empty!");
16248 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
16249 if (i != Idx) {
16250 Opnds.push_back(DAG.getUNDEF(VTs[i]));
16251 continue;
16252 }
16253 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
16254 AddToWorklist(NV.getNode());
16255 Opnds.push_back(NV);
16256 }
16257 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
16258 }
16259 }
16260
16261 // Fold truncate of a bitcast of a vector to an extract of the low vector
16262 // element.
16263 //
16264 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
16265 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
16266 SDValue VecSrc = N0.getOperand(0);
16267 EVT VecSrcVT = VecSrc.getValueType();
16268 if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
16269 (!LegalOperations ||
16270 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
16271 unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
16272 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VecSrc,
16274 }
16275 }
16276
16277 // Simplify the operands using demanded-bits information.
16279 return SDValue(N, 0);
16280
16281 // fold (truncate (extract_subvector(ext x))) ->
16282 // (extract_subvector x)
16283 // TODO: This can be generalized to cover cases where the truncate and extract
16284 // do not fully cancel each other out.
16285 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
16286 SDValue N00 = N0.getOperand(0);
16287 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
16288 N00.getOpcode() == ISD::ZERO_EXTEND ||
16289 N00.getOpcode() == ISD::ANY_EXTEND) {
16290 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
16292 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
16293 N00.getOperand(0), N0.getOperand(1));
16294 }
16295 }
16296
16297 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
16298 return NewVSel;
16299
16300 // Narrow a suitable binary operation with a non-opaque constant operand by
16301 // moving it ahead of the truncate. This is limited to pre-legalization
16302 // because targets may prefer a wider type during later combines and invert
16303 // this transform.
16304 switch (N0.getOpcode()) {
16305 case ISD::ADD:
16306 case ISD::SUB:
16307 case ISD::MUL:
16308 case ISD::AND:
16309 case ISD::OR:
16310 case ISD::XOR:
16311 if (!LegalOperations && N0.hasOneUse() &&
16312 (isConstantOrConstantVector(N0.getOperand(0), true) ||
16313 isConstantOrConstantVector(N0.getOperand(1), true))) {
16314 // TODO: We already restricted this to pre-legalization, but for vectors
16315 // we are extra cautious to not create an unsupported operation.
16316 // Target-specific changes are likely needed to avoid regressions here.
16317 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
16318 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16319 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
16320 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
16321 }
16322 }
16323 break;
16324 case ISD::ADDE:
16325 case ISD::UADDO_CARRY:
16326 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
16327 // (trunc uaddo_carry(X, Y, Carry)) ->
16328 // (uaddo_carry trunc(X), trunc(Y), Carry)
16329 // When the adde's carry is not used.
16330 // We only do for uaddo_carry before legalize operation
16331 if (((!LegalOperations && N0.getOpcode() == ISD::UADDO_CARRY) ||
16332 TLI.isOperationLegal(N0.getOpcode(), VT)) &&
16333 N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
16334 SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16335 SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
16336 SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
16337 return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2));
16338 }
16339 break;
16340 case ISD::USUBSAT:
16341 // Truncate the USUBSAT only if LHS is a known zero-extension, its not
16342 // enough to know that the upper bits are zero we must ensure that we don't
16343 // introduce an extra truncate.
16344 if (!LegalOperations && N0.hasOneUse() &&
16347 VT.getScalarSizeInBits() &&
16348 hasOperation(N0.getOpcode(), VT)) {
16349 return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
16350 DAG, DL);
16351 }
16352 break;
16353 case ISD::AVGFLOORS:
16354 case ISD::AVGFLOORU:
16355 case ISD::AVGCEILS:
16356 case ISD::AVGCEILU:
16357 case ISD::ABDS:
16358 case ISD::ABDU:
16359 // (trunc (avg a, b)) -> (avg (trunc a), (trunc b))
16360 // (trunc (abdu/abds a, b)) -> (abdu/abds (trunc a), (trunc b))
16361 if (!LegalOperations && N0.hasOneUse() &&
16362 TLI.isOperationLegal(N0.getOpcode(), VT)) {
16363 EVT TruncVT = VT;
16364 unsigned SrcBits = SrcVT.getScalarSizeInBits();
16365 unsigned TruncBits = TruncVT.getScalarSizeInBits();
16366
16367 SDValue A = N0.getOperand(0);
16368 SDValue B = N0.getOperand(1);
16369 bool CanFold = false;
16370
16371 if (N0.getOpcode() == ISD::AVGFLOORU || N0.getOpcode() == ISD::AVGCEILU ||
16372 N0.getOpcode() == ISD::ABDU) {
16373 APInt UpperBits = APInt::getBitsSetFrom(SrcBits, TruncBits);
16374 CanFold = DAG.MaskedValueIsZero(B, UpperBits) &&
16375 DAG.MaskedValueIsZero(A, UpperBits);
16376 } else {
16377 unsigned NeededBits = SrcBits - TruncBits;
16378 CanFold = DAG.ComputeNumSignBits(B) > NeededBits &&
16379 DAG.ComputeNumSignBits(A) > NeededBits;
16380 }
16381
16382 if (CanFold) {
16383 SDValue NewA = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, A);
16384 SDValue NewB = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, B);
16385 return DAG.getNode(N0.getOpcode(), DL, TruncVT, NewA, NewB);
16386 }
16387 }
16388 break;
16389 }
16390
16391 return SDValue();
16392}
16393
16394static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
16395 SDValue Elt = N->getOperand(i);
16396 if (Elt.getOpcode() != ISD::MERGE_VALUES)
16397 return Elt.getNode();
16398 return Elt.getOperand(Elt.getResNo()).getNode();
16399}
16400
16401/// build_pair (load, load) -> load
16402/// if load locations are consecutive.
16403SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
16404 assert(N->getOpcode() == ISD::BUILD_PAIR);
16405
16406 auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
16407 auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
16408
16409 // A BUILD_PAIR is always having the least significant part in elt 0 and the
16410 // most significant part in elt 1. So when combining into one large load, we
16411 // need to consider the endianness.
16412 if (DAG.getDataLayout().isBigEndian())
16413 std::swap(LD1, LD2);
16414
16415 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
16416 !LD1->hasOneUse() || !LD2->hasOneUse() ||
16417 LD1->getAddressSpace() != LD2->getAddressSpace())
16418 return SDValue();
16419
16420 unsigned LD1Fast = 0;
16421 EVT LD1VT = LD1->getValueType(0);
16422 unsigned LD1Bytes = LD1VT.getStoreSize();
16423 if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
16424 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
16425 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
16426 *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
16427 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
16428 LD1->getPointerInfo(), LD1->getAlign());
16429
16430 return SDValue();
16431}
16432
16433static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
16434 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
16435 // and Lo parts; on big-endian machines it doesn't.
16436 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
16437}
16438
16439SDValue DAGCombiner::foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
16440 const TargetLowering &TLI) {
16441 // If this is not a bitcast to an FP type or if the target doesn't have
16442 // IEEE754-compliant FP logic, we're done.
16443 EVT VT = N->getValueType(0);
16444 SDValue N0 = N->getOperand(0);
16445 EVT SourceVT = N0.getValueType();
16446
16447 if (!VT.isFloatingPoint())
16448 return SDValue();
16449
16450 // TODO: Handle cases where the integer constant is a different scalar
16451 // bitwidth to the FP.
16452 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
16453 return SDValue();
16454
16455 unsigned FPOpcode;
16456 APInt SignMask;
16457 switch (N0.getOpcode()) {
16458 case ISD::AND:
16459 FPOpcode = ISD::FABS;
16460 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
16461 break;
16462 case ISD::XOR:
16463 FPOpcode = ISD::FNEG;
16464 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
16465 break;
16466 case ISD::OR:
16467 FPOpcode = ISD::FABS;
16468 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
16469 break;
16470 default:
16471 return SDValue();
16472 }
16473
16474 if (LegalOperations && !TLI.isOperationLegal(FPOpcode, VT))
16475 return SDValue();
16476
16477 // This needs to be the inverse of logic in foldSignChangeInBitcast.
16478 // FIXME: I don't think looking for bitcast intrinsically makes sense, but
16479 // removing this would require more changes.
16480 auto IsBitCastOrFree = [&TLI, FPOpcode](SDValue Op, EVT VT) {
16481 if (sd_match(Op, m_BitCast(m_SpecificVT(VT))))
16482 return true;
16483
16484 return FPOpcode == ISD::FABS ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
16485 };
16486
16487 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
16488 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
16489 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
16490 // fneg (fabs X)
16491 SDValue LogicOp0 = N0.getOperand(0);
16492 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
16493 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
16494 IsBitCastOrFree(LogicOp0, VT)) {
16495 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, LogicOp0);
16496 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, CastOp0);
16497 NumFPLogicOpsConv++;
16498 if (N0.getOpcode() == ISD::OR)
16499 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
16500 return FPOp;
16501 }
16502
16503 return SDValue();
16504}
16505
16506SDValue DAGCombiner::visitBITCAST(SDNode *N) {
16507 SDValue N0 = N->getOperand(0);
16508 EVT VT = N->getValueType(0);
16509
16510 if (N0.isUndef())
16511 return DAG.getUNDEF(VT);
16512
16513 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
16514 // Only do this before legalize types, unless both types are integer and the
16515 // scalar type is legal. Only do this before legalize ops, since the target
16516 // maybe depending on the bitcast.
16517 // First check to see if this is all constant.
16518 // TODO: Support FP bitcasts after legalize types.
16519 if (VT.isVector() &&
16520 (!LegalTypes ||
16521 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
16522 TLI.isTypeLegal(VT.getVectorElementType()))) &&
16523 N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
16524 cast<BuildVectorSDNode>(N0)->isConstant())
16525 return DAG.FoldConstantBuildVector(cast<BuildVectorSDNode>(N0), SDLoc(N),
16527
16528 // If the input is a constant, let getNode fold it.
16529 if (isIntOrFPConstant(N0)) {
16530 // If we can't allow illegal operations, we need to check that this is just
16531 // a fp -> int or int -> conversion and that the resulting operation will
16532 // be legal.
16533 if (!LegalOperations ||
16534 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
16536 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
16537 TLI.isOperationLegal(ISD::Constant, VT))) {
16538 SDValue C = DAG.getBitcast(VT, N0);
16539 if (C.getNode() != N)
16540 return C;
16541 }
16542 }
16543
16544 // (conv (conv x, t1), t2) -> (conv x, t2)
16545 if (N0.getOpcode() == ISD::BITCAST)
16546 return DAG.getBitcast(VT, N0.getOperand(0));
16547
16548 // fold (conv (logicop (conv x), (c))) -> (logicop x, (conv c))
16549 // iff the current bitwise logicop type isn't legal
16550 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && VT.isInteger() &&
16551 !TLI.isTypeLegal(N0.getOperand(0).getValueType())) {
16552 auto IsFreeBitcast = [VT](SDValue V) {
16553 return (V.getOpcode() == ISD::BITCAST &&
16554 V.getOperand(0).getValueType() == VT) ||
16556 V->hasOneUse());
16557 };
16558 if (IsFreeBitcast(N0.getOperand(0)) && IsFreeBitcast(N0.getOperand(1)))
16559 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
16560 DAG.getBitcast(VT, N0.getOperand(0)),
16561 DAG.getBitcast(VT, N0.getOperand(1)));
16562 }
16563
16564 // fold (conv (load x)) -> (load (conv*)x)
16565 // If the resultant load doesn't need a higher alignment than the original!
16566 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
16567 // Do not remove the cast if the types differ in endian layout.
16569 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
16570 // If the load is volatile, we only want to change the load type if the
16571 // resulting load is legal. Otherwise we might increase the number of
16572 // memory accesses. We don't care if the original type was legal or not
16573 // as we assume software couldn't rely on the number of accesses of an
16574 // illegal type.
16575 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
16576 TLI.isOperationLegal(ISD::LOAD, VT))) {
16577 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
16578
16579 if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
16580 *LN0->getMemOperand())) {
16581 // If the range metadata type does not match the new memory
16582 // operation type, remove the range metadata.
16583 if (const MDNode *MD = LN0->getRanges()) {
16584 ConstantInt *Lower = mdconst::extract<ConstantInt>(MD->getOperand(0));
16585 if (Lower->getBitWidth() != VT.getScalarSizeInBits() ||
16586 !VT.isInteger()) {
16587 LN0->getMemOperand()->clearRanges();
16588 }
16589 }
16590 SDValue Load =
16591 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
16592 LN0->getMemOperand());
16593 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
16594 return Load;
16595 }
16596 }
16597
16598 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
16599 return V;
16600
16601 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
16602 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
16603 //
16604 // For ppc_fp128:
16605 // fold (bitcast (fneg x)) ->
16606 // flipbit = signbit
16607 // (xor (bitcast x) (build_pair flipbit, flipbit))
16608 //
16609 // fold (bitcast (fabs x)) ->
16610 // flipbit = (and (extract_element (bitcast x), 0), signbit)
16611 // (xor (bitcast x) (build_pair flipbit, flipbit))
16612 // This often reduces constant pool loads.
16613 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
16614 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
16615 N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
16616 !N0.getValueType().isVector()) {
16617 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
16618 AddToWorklist(NewConv.getNode());
16619
16620 SDLoc DL(N);
16621 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
16622 assert(VT.getSizeInBits() == 128);
16623 SDValue SignBit = DAG.getConstant(
16624 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
16625 SDValue FlipBit;
16626 if (N0.getOpcode() == ISD::FNEG) {
16627 FlipBit = SignBit;
16628 AddToWorklist(FlipBit.getNode());
16629 } else {
16630 assert(N0.getOpcode() == ISD::FABS);
16631 SDValue Hi =
16632 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
16634 SDLoc(NewConv)));
16635 AddToWorklist(Hi.getNode());
16636 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
16637 AddToWorklist(FlipBit.getNode());
16638 }
16639 SDValue FlipBits =
16640 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
16641 AddToWorklist(FlipBits.getNode());
16642 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
16643 }
16644 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
16645 if (N0.getOpcode() == ISD::FNEG)
16646 return DAG.getNode(ISD::XOR, DL, VT,
16647 NewConv, DAG.getConstant(SignBit, DL, VT));
16648 assert(N0.getOpcode() == ISD::FABS);
16649 return DAG.getNode(ISD::AND, DL, VT,
16650 NewConv, DAG.getConstant(~SignBit, DL, VT));
16651 }
16652
16653 // fold (bitconvert (fcopysign cst, x)) ->
16654 // (or (and (bitconvert x), sign), (and cst, (not sign)))
16655 // Note that we don't handle (copysign x, cst) because this can always be
16656 // folded to an fneg or fabs.
16657 //
16658 // For ppc_fp128:
16659 // fold (bitcast (fcopysign cst, x)) ->
16660 // flipbit = (and (extract_element
16661 // (xor (bitcast cst), (bitcast x)), 0),
16662 // signbit)
16663 // (xor (bitcast cst) (build_pair flipbit, flipbit))
16664 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
16665 isa<ConstantFPSDNode>(N0.getOperand(0)) && VT.isInteger() &&
16666 !VT.isVector()) {
16667 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
16668 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
16669 if (isTypeLegal(IntXVT)) {
16670 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
16671 AddToWorklist(X.getNode());
16672
16673 // If X has a different width than the result/lhs, sext it or truncate it.
16674 unsigned VTWidth = VT.getSizeInBits();
16675 if (OrigXWidth < VTWidth) {
16676 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
16677 AddToWorklist(X.getNode());
16678 } else if (OrigXWidth > VTWidth) {
16679 // To get the sign bit in the right place, we have to shift it right
16680 // before truncating.
16681 SDLoc DL(X);
16682 X = DAG.getNode(ISD::SRL, DL,
16683 X.getValueType(), X,
16684 DAG.getConstant(OrigXWidth-VTWidth, DL,
16685 X.getValueType()));
16686 AddToWorklist(X.getNode());
16687 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
16688 AddToWorklist(X.getNode());
16689 }
16690
16691 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
16692 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
16693 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
16694 AddToWorklist(Cst.getNode());
16695 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
16696 AddToWorklist(X.getNode());
16697 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
16698 AddToWorklist(XorResult.getNode());
16699 SDValue XorResult64 = DAG.getNode(
16700 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
16702 SDLoc(XorResult)));
16703 AddToWorklist(XorResult64.getNode());
16704 SDValue FlipBit =
16705 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
16706 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
16707 AddToWorklist(FlipBit.getNode());
16708 SDValue FlipBits =
16709 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
16710 AddToWorklist(FlipBits.getNode());
16711 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
16712 }
16713 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
16714 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
16715 X, DAG.getConstant(SignBit, SDLoc(X), VT));
16716 AddToWorklist(X.getNode());
16717
16718 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
16719 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
16720 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
16721 AddToWorklist(Cst.getNode());
16722
16723 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
16724 }
16725 }
16726
16727 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
16728 if (N0.getOpcode() == ISD::BUILD_PAIR)
16729 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
16730 return CombineLD;
16731
16732 // int_vt (bitcast (vec_vt (scalar_to_vector elt_vt:x)))
16733 // => int_vt (any_extend elt_vt:x)
16734 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && VT.isScalarInteger()) {
16735 SDValue SrcScalar = N0.getOperand(0);
16736 if (SrcScalar.getValueType().isScalarInteger())
16737 return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SrcScalar);
16738 }
16739
16740 // Remove double bitcasts from shuffles - this is often a legacy of
16741 // XformToShuffleWithZero being used to combine bitmaskings (of
16742 // float vectors bitcast to integer vectors) into shuffles.
16743 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
16744 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
16745 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
16748 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
16749
16750 // If operands are a bitcast, peek through if it casts the original VT.
16751 // If operands are a constant, just bitcast back to original VT.
16752 auto PeekThroughBitcast = [&](SDValue Op) {
16753 if (Op.getOpcode() == ISD::BITCAST &&
16754 Op.getOperand(0).getValueType() == VT)
16755 return SDValue(Op.getOperand(0));
16756 if (Op.isUndef() || isAnyConstantBuildVector(Op))
16757 return DAG.getBitcast(VT, Op);
16758 return SDValue();
16759 };
16760
16761 // FIXME: If either input vector is bitcast, try to convert the shuffle to
16762 // the result type of this bitcast. This would eliminate at least one
16763 // bitcast. See the transform in InstCombine.
16764 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
16765 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
16766 if (!(SV0 && SV1))
16767 return SDValue();
16768
16769 int MaskScale =
16771 SmallVector<int, 8> NewMask;
16772 for (int M : SVN->getMask())
16773 for (int i = 0; i != MaskScale; ++i)
16774 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
16775
16776 SDValue LegalShuffle =
16777 TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
16778 if (LegalShuffle)
16779 return LegalShuffle;
16780 }
16781
16782 return SDValue();
16783}
16784
16785SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
16786 EVT VT = N->getValueType(0);
16787 return CombineConsecutiveLoads(N, VT);
16788}
16789
16790SDValue DAGCombiner::visitFREEZE(SDNode *N) {
16791 SDValue N0 = N->getOperand(0);
16792
16793 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
16794 return N0;
16795
16796 // If we have frozen and unfrozen users of N0, update so everything uses N.
16797 if (!N0.isUndef() && !N0.hasOneUse()) {
16798 SDValue FrozenN0(N, 0);
16799 // Unfreeze all uses of N to avoid double deleting N from the CSE map.
16800 DAG.ReplaceAllUsesOfValueWith(FrozenN0, N0);
16801 DAG.ReplaceAllUsesOfValueWith(N0, FrozenN0);
16802 // ReplaceAllUsesOfValueWith will have also updated the use in N, thus
16803 // creating a cycle in a DAG. Let's undo that by mutating the freeze.
16804 assert(N->getOperand(0) == FrozenN0 && "Expected cycle in DAG");
16805 DAG.UpdateNodeOperands(N, N0);
16806 return FrozenN0;
16807 }
16808
16809 // We currently avoid folding freeze over SRA/SRL, due to the problems seen
16810 // with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for
16811 // example https://reviews.llvm.org/D136529#4120959.
16812 if (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)
16813 return SDValue();
16814
16815 // Fold freeze(op(x, ...)) -> op(freeze(x), ...).
16816 // Try to push freeze through instructions that propagate but don't produce
16817 // poison as far as possible. If an operand of freeze follows three
16818 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
16819 // guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
16820 // the freeze through to the operands that are not guaranteed non-poison.
16821 // NOTE: we will strip poison-generating flags, so ignore them here.
16822 if (DAG.canCreateUndefOrPoison(N0, /*PoisonOnly*/ false,
16823 /*ConsiderFlags*/ false) ||
16824 N0->getNumValues() != 1 || !N0->hasOneUse())
16825 return SDValue();
16826
16827 // TOOD: we should always allow multiple operands, however this increases the
16828 // likelihood of infinite loops due to the ReplaceAllUsesOfValueWith call
16829 // below causing later nodes that share frozen operands to fold again and no
16830 // longer being able to confirm other operands are not poison due to recursion
16831 // depth limits on isGuaranteedNotToBeUndefOrPoison.
16832 bool AllowMultipleMaybePoisonOperands =
16833 N0.getOpcode() == ISD::SELECT_CC || N0.getOpcode() == ISD::SETCC ||
16834 N0.getOpcode() == ISD::BUILD_VECTOR ||
16835 N0.getOpcode() == ISD::BUILD_PAIR ||
16838
16839 // Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
16840 // ones" or "constant" into something that depends on FrozenUndef. We can
16841 // instead pick undef values to keep those properties, while at the same time
16842 // folding away the freeze.
16843 // If we implement a more general solution for folding away freeze(undef) in
16844 // the future, then this special handling can be removed.
16845 if (N0.getOpcode() == ISD::BUILD_VECTOR) {
16846 SDLoc DL(N0);
16847 EVT VT = N0.getValueType();
16849 return DAG.getAllOnesConstant(DL, VT);
16852 for (const SDValue &Op : N0->op_values())
16853 NewVecC.push_back(
16854 Op.isUndef() ? DAG.getConstant(0, DL, Op.getValueType()) : Op);
16855 return DAG.getBuildVector(VT, DL, NewVecC);
16856 }
16857 }
16858
16859 SmallSet<SDValue, 8> MaybePoisonOperands;
16860 SmallVector<unsigned, 8> MaybePoisonOperandNumbers;
16861 for (auto [OpNo, Op] : enumerate(N0->ops())) {
16862 if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly=*/false))
16863 continue;
16864 bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
16865 bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op).second;
16866 if (IsNewMaybePoisonOperand)
16867 MaybePoisonOperandNumbers.push_back(OpNo);
16868 if (!HadMaybePoisonOperands)
16869 continue;
16870 if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
16871 // Multiple maybe-poison ops when not allowed - bail out.
16872 return SDValue();
16873 }
16874 }
16875 // NOTE: the whole op may be not guaranteed to not be undef or poison because
16876 // it could create undef or poison due to it's poison-generating flags.
16877 // So not finding any maybe-poison operands is fine.
16878
16879 for (unsigned OpNo : MaybePoisonOperandNumbers) {
16880 // N0 can mutate during iteration, so make sure to refetch the maybe poison
16881 // operands via the operand numbers. The typical scenario is that we have
16882 // something like this
16883 // t262: i32 = freeze t181
16884 // t150: i32 = ctlz_zero_undef t262
16885 // t184: i32 = ctlz_zero_undef t181
16886 // t268: i32 = select_cc t181, Constant:i32<0>, t184, t186, setne:ch
16887 // When freezing the t181 operand we get t262 back, and then the
16888 // ReplaceAllUsesOfValueWith call will not only replace t181 by t262, but
16889 // also recursively replace t184 by t150.
16890 SDValue MaybePoisonOperand = N->getOperand(0).getOperand(OpNo);
16891 // Don't replace every single UNDEF everywhere with frozen UNDEF, though.
16892 if (MaybePoisonOperand.isUndef())
16893 continue;
16894 // First, freeze each offending operand.
16895 SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand);
16896 // Then, change all other uses of unfrozen operand to use frozen operand.
16897 DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
16898 if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE &&
16899 FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) {
16900 // But, that also updated the use in the freeze we just created, thus
16901 // creating a cycle in a DAG. Let's undo that by mutating the freeze.
16902 DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(),
16903 MaybePoisonOperand);
16904 }
16905
16906 // This node has been merged with another.
16907 if (N->getOpcode() == ISD::DELETED_NODE)
16908 return SDValue(N, 0);
16909 }
16910
16911 assert(N->getOpcode() != ISD::DELETED_NODE && "Node was deleted!");
16912
16913 // The whole node may have been updated, so the value we were holding
16914 // may no longer be valid. Re-fetch the operand we're `freeze`ing.
16915 N0 = N->getOperand(0);
16916
16917 // Finally, recreate the node, it's operands were updated to use
16918 // frozen operands, so we just need to use it's "original" operands.
16919 SmallVector<SDValue> Ops(N0->ops());
16920 // TODO: ISD::UNDEF and ISD::POISON should get separate handling, but best
16921 // leave for a future patch.
16922 for (SDValue &Op : Ops) {
16923 if (Op.isUndef())
16924 Op = DAG.getFreeze(Op);
16925 }
16926
16927 SDLoc DL(N0);
16928
16929 // Special case handling for ShuffleVectorSDNode nodes.
16930 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N0))
16931 return DAG.getVectorShuffle(N0.getValueType(), DL, Ops[0], Ops[1],
16932 SVN->getMask());
16933
16934 // NOTE: this strips poison generating flags.
16935 // Folding freeze(op(x, ...)) -> op(freeze(x), ...) does not require nnan,
16936 // ninf, nsz, or fast.
16937 // However, contract, reassoc, afn, and arcp should be preserved,
16938 // as these fast-math flags do not introduce poison values.
16939 SDNodeFlags SrcFlags = N0->getFlags();
16940 SDNodeFlags SafeFlags;
16941 SafeFlags.setAllowContract(SrcFlags.hasAllowContract());
16942 SafeFlags.setAllowReassociation(SrcFlags.hasAllowReassociation());
16943 SafeFlags.setApproximateFuncs(SrcFlags.hasApproximateFuncs());
16944 SafeFlags.setAllowReciprocal(SrcFlags.hasAllowReciprocal());
16945 return DAG.getNode(N0.getOpcode(), DL, N0->getVTList(), Ops, SafeFlags);
16946}
16947
16948// Returns true if floating point contraction is allowed on the FMUL-SDValue
16949// `N`
16951 assert(N.getOpcode() == ISD::FMUL);
16952
16953 return Options.AllowFPOpFusion == FPOpFusion::Fast ||
16954 N->getFlags().hasAllowContract();
16955}
16956
16957// Returns true if `N` can assume no infinities involved in its computation.
16959 return Options.NoInfsFPMath || N->getFlags().hasNoInfs();
16960}
16961
16962/// Try to perform FMA combining on a given FADD node.
16963template <class MatchContextClass>
16964SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
16965 SDValue N0 = N->getOperand(0);
16966 SDValue N1 = N->getOperand(1);
16967 EVT VT = N->getValueType(0);
16968 SDLoc SL(N);
16969 MatchContextClass matcher(DAG, TLI, N);
16970 const TargetOptions &Options = DAG.getTarget().Options;
16971
16972 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
16973
16974 // Floating-point multiply-add with intermediate rounding.
16975 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
16976 // FIXME: Add VP_FMAD opcode.
16977 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
16978
16979 // Floating-point multiply-add without intermediate rounding.
16980 bool HasFMA =
16981 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT)) &&
16983
16984 // No valid opcode, do not combine.
16985 if (!HasFMAD && !HasFMA)
16986 return SDValue();
16987
16988 bool AllowFusionGlobally =
16989 Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD;
16990 // If the addition is not contractable, do not combine.
16991 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
16992 return SDValue();
16993
16994 // Folding fadd (fmul x, y), (fmul x, y) -> fma x, y, (fmul x, y) is never
16995 // beneficial. It does not reduce latency. It increases register pressure. It
16996 // replaces an fadd with an fma which is a more complex instruction, so is
16997 // likely to have a larger encoding, use more functional units, etc.
16998 if (N0 == N1)
16999 return SDValue();
17000
17001 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
17002 return SDValue();
17003
17004 // Always prefer FMAD to FMA for precision.
17005 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
17007
17008 auto isFusedOp = [&](SDValue N) {
17009 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
17010 };
17011
17012 // Is the node an FMUL and contractable either due to global flags or
17013 // SDNodeFlags.
17014 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
17015 if (!matcher.match(N, ISD::FMUL))
17016 return false;
17017 return AllowFusionGlobally || N->getFlags().hasAllowContract();
17018 };
17019 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
17020 // prefer to fold the multiply with fewer uses.
17022 if (N0->use_size() > N1->use_size())
17023 std::swap(N0, N1);
17024 }
17025
17026 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
17027 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
17028 return matcher.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
17029 N0.getOperand(1), N1);
17030 }
17031
17032 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
17033 // Note: Commutes FADD operands.
17034 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
17035 return matcher.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
17036 N1.getOperand(1), N0);
17037 }
17038
17039 // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
17040 // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
17041 // This also works with nested fma instructions:
17042 // fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G -->
17043 // fma A, B, (fma C, D, fma (E, F, G))
17044 // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
17045 // fma A, B, (fma C, D, fma (E, F, G)).
17046 // This requires reassociation because it changes the order of operations.
17047 bool CanReassociate = N->getFlags().hasAllowReassociation();
17048 if (CanReassociate) {
17049 SDValue FMA, E;
17050 if (isFusedOp(N0) && N0.hasOneUse()) {
17051 FMA = N0;
17052 E = N1;
17053 } else if (isFusedOp(N1) && N1.hasOneUse()) {
17054 FMA = N1;
17055 E = N0;
17056 }
17057
17058 SDValue TmpFMA = FMA;
17059 while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) {
17060 SDValue FMul = TmpFMA->getOperand(2);
17061 if (matcher.match(FMul, ISD::FMUL) && FMul.hasOneUse()) {
17062 SDValue C = FMul.getOperand(0);
17063 SDValue D = FMul.getOperand(1);
17064 SDValue CDE = matcher.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
17066 // Replacing the inner FMul could cause the outer FMA to be simplified
17067 // away.
17068 return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue(N, 0) : FMA;
17069 }
17070
17071 TmpFMA = TmpFMA->getOperand(2);
17072 }
17073 }
17074
17075 // Look through FP_EXTEND nodes to do more combining.
17076
17077 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
17078 if (matcher.match(N0, ISD::FP_EXTEND)) {
17079 SDValue N00 = N0.getOperand(0);
17080 if (isContractableFMUL(N00) &&
17081 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17082 N00.getValueType())) {
17083 return matcher.getNode(
17084 PreferredFusedOpcode, SL, VT,
17085 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
17086 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), N1);
17087 }
17088 }
17089
17090 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
17091 // Note: Commutes FADD operands.
17092 if (matcher.match(N1, ISD::FP_EXTEND)) {
17093 SDValue N10 = N1.getOperand(0);
17094 if (isContractableFMUL(N10) &&
17095 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17096 N10.getValueType())) {
17097 return matcher.getNode(
17098 PreferredFusedOpcode, SL, VT,
17099 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
17100 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
17101 }
17102 }
17103
17104 // More folding opportunities when target permits.
17105 if (Aggressive) {
17106 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
17107 // -> (fma x, y, (fma (fpext u), (fpext v), z))
17108 auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
17109 SDValue Z) {
17110 return matcher.getNode(
17111 PreferredFusedOpcode, SL, VT, X, Y,
17112 matcher.getNode(PreferredFusedOpcode, SL, VT,
17113 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
17114 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
17115 };
17116 if (isFusedOp(N0)) {
17117 SDValue N02 = N0.getOperand(2);
17118 if (matcher.match(N02, ISD::FP_EXTEND)) {
17119 SDValue N020 = N02.getOperand(0);
17120 if (isContractableFMUL(N020) &&
17121 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17122 N020.getValueType())) {
17123 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
17124 N020.getOperand(0), N020.getOperand(1),
17125 N1);
17126 }
17127 }
17128 }
17129
17130 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
17131 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
17132 // FIXME: This turns two single-precision and one double-precision
17133 // operation into two double-precision operations, which might not be
17134 // interesting for all targets, especially GPUs.
17135 auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
17136 SDValue Z) {
17137 return matcher.getNode(
17138 PreferredFusedOpcode, SL, VT,
17139 matcher.getNode(ISD::FP_EXTEND, SL, VT, X),
17140 matcher.getNode(ISD::FP_EXTEND, SL, VT, Y),
17141 matcher.getNode(PreferredFusedOpcode, SL, VT,
17142 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
17143 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
17144 };
17145 if (N0.getOpcode() == ISD::FP_EXTEND) {
17146 SDValue N00 = N0.getOperand(0);
17147 if (isFusedOp(N00)) {
17148 SDValue N002 = N00.getOperand(2);
17149 if (isContractableFMUL(N002) &&
17150 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17151 N00.getValueType())) {
17152 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
17153 N002.getOperand(0), N002.getOperand(1),
17154 N1);
17155 }
17156 }
17157 }
17158
17159 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
17160 // -> (fma y, z, (fma (fpext u), (fpext v), x))
17161 if (isFusedOp(N1)) {
17162 SDValue N12 = N1.getOperand(2);
17163 if (N12.getOpcode() == ISD::FP_EXTEND) {
17164 SDValue N120 = N12.getOperand(0);
17165 if (isContractableFMUL(N120) &&
17166 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17167 N120.getValueType())) {
17168 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
17169 N120.getOperand(0), N120.getOperand(1),
17170 N0);
17171 }
17172 }
17173 }
17174
17175 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
17176 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
17177 // FIXME: This turns two single-precision and one double-precision
17178 // operation into two double-precision operations, which might not be
17179 // interesting for all targets, especially GPUs.
17180 if (N1.getOpcode() == ISD::FP_EXTEND) {
17181 SDValue N10 = N1.getOperand(0);
17182 if (isFusedOp(N10)) {
17183 SDValue N102 = N10.getOperand(2);
17184 if (isContractableFMUL(N102) &&
17185 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17186 N10.getValueType())) {
17187 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
17188 N102.getOperand(0), N102.getOperand(1),
17189 N0);
17190 }
17191 }
17192 }
17193 }
17194
17195 return SDValue();
17196}
17197
17198/// Try to perform FMA combining on a given FSUB node.
17199template <class MatchContextClass>
17200SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
17201 SDValue N0 = N->getOperand(0);
17202 SDValue N1 = N->getOperand(1);
17203 EVT VT = N->getValueType(0);
17204 SDLoc SL(N);
17205 MatchContextClass matcher(DAG, TLI, N);
17206 const TargetOptions &Options = DAG.getTarget().Options;
17207
17208 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
17209
17210 // Floating-point multiply-add with intermediate rounding.
17211 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
17212 // FIXME: Add VP_FMAD opcode.
17213 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
17214
17215 // Floating-point multiply-add without intermediate rounding.
17216 bool HasFMA =
17217 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT)) &&
17219
17220 // No valid opcode, do not combine.
17221 if (!HasFMAD && !HasFMA)
17222 return SDValue();
17223
17224 const SDNodeFlags Flags = N->getFlags();
17225 bool AllowFusionGlobally =
17226 (Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD);
17227
17228 // If the subtraction is not contractable, do not combine.
17229 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
17230 return SDValue();
17231
17232 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
17233 return SDValue();
17234
17235 // Always prefer FMAD to FMA for precision.
17236 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
17238 bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
17239
17240 // Is the node an FMUL and contractable either due to global flags or
17241 // SDNodeFlags.
17242 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
17243 if (!matcher.match(N, ISD::FMUL))
17244 return false;
17245 return AllowFusionGlobally || N->getFlags().hasAllowContract();
17246 };
17247
17248 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
17249 auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
17250 if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
17251 return matcher.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
17252 XY.getOperand(1),
17253 matcher.getNode(ISD::FNEG, SL, VT, Z));
17254 }
17255 return SDValue();
17256 };
17257
17258 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
17259 // Note: Commutes FSUB operands.
17260 auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
17261 if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
17262 return matcher.getNode(
17263 PreferredFusedOpcode, SL, VT,
17264 matcher.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
17265 YZ.getOperand(1), X);
17266 }
17267 return SDValue();
17268 };
17269
17270 // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
17271 // prefer to fold the multiply with fewer uses.
17272 if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
17273 (N0->use_size() > N1->use_size())) {
17274 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
17275 if (SDValue V = tryToFoldXSubYZ(N0, N1))
17276 return V;
17277 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
17278 if (SDValue V = tryToFoldXYSubZ(N0, N1))
17279 return V;
17280 } else {
17281 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
17282 if (SDValue V = tryToFoldXYSubZ(N0, N1))
17283 return V;
17284 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
17285 if (SDValue V = tryToFoldXSubYZ(N0, N1))
17286 return V;
17287 }
17288
17289 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
17290 if (matcher.match(N0, ISD::FNEG) && isContractableFMUL(N0.getOperand(0)) &&
17291 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
17292 SDValue N00 = N0.getOperand(0).getOperand(0);
17293 SDValue N01 = N0.getOperand(0).getOperand(1);
17294 return matcher.getNode(PreferredFusedOpcode, SL, VT,
17295 matcher.getNode(ISD::FNEG, SL, VT, N00), N01,
17296 matcher.getNode(ISD::FNEG, SL, VT, N1));
17297 }
17298
17299 // Look through FP_EXTEND nodes to do more combining.
17300
17301 // fold (fsub (fpext (fmul x, y)), z)
17302 // -> (fma (fpext x), (fpext y), (fneg z))
17303 if (matcher.match(N0, ISD::FP_EXTEND)) {
17304 SDValue N00 = N0.getOperand(0);
17305 if (isContractableFMUL(N00) &&
17306 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17307 N00.getValueType())) {
17308 return matcher.getNode(
17309 PreferredFusedOpcode, SL, VT,
17310 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
17311 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
17312 matcher.getNode(ISD::FNEG, SL, VT, N1));
17313 }
17314 }
17315
17316 // fold (fsub x, (fpext (fmul y, z)))
17317 // -> (fma (fneg (fpext y)), (fpext z), x)
17318 // Note: Commutes FSUB operands.
17319 if (matcher.match(N1, ISD::FP_EXTEND)) {
17320 SDValue N10 = N1.getOperand(0);
17321 if (isContractableFMUL(N10) &&
17322 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17323 N10.getValueType())) {
17324 return matcher.getNode(
17325 PreferredFusedOpcode, SL, VT,
17326 matcher.getNode(
17327 ISD::FNEG, SL, VT,
17328 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
17329 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
17330 }
17331 }
17332
17333 // fold (fsub (fpext (fneg (fmul, x, y))), z)
17334 // -> (fneg (fma (fpext x), (fpext y), z))
17335 // Note: This could be removed with appropriate canonicalization of the
17336 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
17337 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
17338 // from implementing the canonicalization in visitFSUB.
17339 if (matcher.match(N0, ISD::FP_EXTEND)) {
17340 SDValue N00 = N0.getOperand(0);
17341 if (matcher.match(N00, ISD::FNEG)) {
17342 SDValue N000 = N00.getOperand(0);
17343 if (isContractableFMUL(N000) &&
17344 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17345 N00.getValueType())) {
17346 return matcher.getNode(
17347 ISD::FNEG, SL, VT,
17348 matcher.getNode(
17349 PreferredFusedOpcode, SL, VT,
17350 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
17351 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
17352 N1));
17353 }
17354 }
17355 }
17356
17357 // fold (fsub (fneg (fpext (fmul, x, y))), z)
17358 // -> (fneg (fma (fpext x)), (fpext y), z)
17359 // Note: This could be removed with appropriate canonicalization of the
17360 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
17361 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
17362 // from implementing the canonicalization in visitFSUB.
17363 if (matcher.match(N0, ISD::FNEG)) {
17364 SDValue N00 = N0.getOperand(0);
17365 if (matcher.match(N00, ISD::FP_EXTEND)) {
17366 SDValue N000 = N00.getOperand(0);
17367 if (isContractableFMUL(N000) &&
17368 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17369 N000.getValueType())) {
17370 return matcher.getNode(
17371 ISD::FNEG, SL, VT,
17372 matcher.getNode(
17373 PreferredFusedOpcode, SL, VT,
17374 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
17375 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
17376 N1));
17377 }
17378 }
17379 }
17380
17381 auto isContractableAndReassociableFMUL = [&isContractableFMUL](SDValue N) {
17382 return isContractableFMUL(N) && N->getFlags().hasAllowReassociation();
17383 };
17384
17385 auto isFusedOp = [&](SDValue N) {
17386 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
17387 };
17388
17389 // More folding opportunities when target permits.
17390 if (Aggressive && N->getFlags().hasAllowReassociation()) {
17391 bool CanFuse = N->getFlags().hasAllowContract();
17392 // fold (fsub (fma x, y, (fmul u, v)), z)
17393 // -> (fma x, y (fma u, v, (fneg z)))
17394 if (CanFuse && isFusedOp(N0) &&
17395 isContractableAndReassociableFMUL(N0.getOperand(2)) &&
17396 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
17397 return matcher.getNode(
17398 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
17399 matcher.getNode(PreferredFusedOpcode, SL, VT,
17400 N0.getOperand(2).getOperand(0),
17401 N0.getOperand(2).getOperand(1),
17402 matcher.getNode(ISD::FNEG, SL, VT, N1)));
17403 }
17404
17405 // fold (fsub x, (fma y, z, (fmul u, v)))
17406 // -> (fma (fneg y), z, (fma (fneg u), v, x))
17407 if (CanFuse && isFusedOp(N1) &&
17408 isContractableAndReassociableFMUL(N1.getOperand(2)) &&
17409 N1->hasOneUse() && NoSignedZero) {
17410 SDValue N20 = N1.getOperand(2).getOperand(0);
17411 SDValue N21 = N1.getOperand(2).getOperand(1);
17412 return matcher.getNode(
17413 PreferredFusedOpcode, SL, VT,
17414 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
17415 N1.getOperand(1),
17416 matcher.getNode(PreferredFusedOpcode, SL, VT,
17417 matcher.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
17418 }
17419
17420 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
17421 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
17422 if (isFusedOp(N0) && N0->hasOneUse()) {
17423 SDValue N02 = N0.getOperand(2);
17424 if (matcher.match(N02, ISD::FP_EXTEND)) {
17425 SDValue N020 = N02.getOperand(0);
17426 if (isContractableAndReassociableFMUL(N020) &&
17427 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17428 N020.getValueType())) {
17429 return matcher.getNode(
17430 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
17431 matcher.getNode(
17432 PreferredFusedOpcode, SL, VT,
17433 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
17434 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
17435 matcher.getNode(ISD::FNEG, SL, VT, N1)));
17436 }
17437 }
17438 }
17439
17440 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
17441 // -> (fma (fpext x), (fpext y),
17442 // (fma (fpext u), (fpext v), (fneg z)))
17443 // FIXME: This turns two single-precision and one double-precision
17444 // operation into two double-precision operations, which might not be
17445 // interesting for all targets, especially GPUs.
17446 if (matcher.match(N0, ISD::FP_EXTEND)) {
17447 SDValue N00 = N0.getOperand(0);
17448 if (isFusedOp(N00)) {
17449 SDValue N002 = N00.getOperand(2);
17450 if (isContractableAndReassociableFMUL(N002) &&
17451 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17452 N00.getValueType())) {
17453 return matcher.getNode(
17454 PreferredFusedOpcode, SL, VT,
17455 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
17456 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
17457 matcher.getNode(
17458 PreferredFusedOpcode, SL, VT,
17459 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
17460 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
17461 matcher.getNode(ISD::FNEG, SL, VT, N1)));
17462 }
17463 }
17464 }
17465
17466 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
17467 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
17468 if (isFusedOp(N1) && matcher.match(N1.getOperand(2), ISD::FP_EXTEND) &&
17469 N1->hasOneUse()) {
17470 SDValue N120 = N1.getOperand(2).getOperand(0);
17471 if (isContractableAndReassociableFMUL(N120) &&
17472 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17473 N120.getValueType())) {
17474 SDValue N1200 = N120.getOperand(0);
17475 SDValue N1201 = N120.getOperand(1);
17476 return matcher.getNode(
17477 PreferredFusedOpcode, SL, VT,
17478 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
17479 N1.getOperand(1),
17480 matcher.getNode(
17481 PreferredFusedOpcode, SL, VT,
17482 matcher.getNode(ISD::FNEG, SL, VT,
17483 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
17484 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
17485 }
17486 }
17487
17488 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
17489 // -> (fma (fneg (fpext y)), (fpext z),
17490 // (fma (fneg (fpext u)), (fpext v), x))
17491 // FIXME: This turns two single-precision and one double-precision
17492 // operation into two double-precision operations, which might not be
17493 // interesting for all targets, especially GPUs.
17494 if (matcher.match(N1, ISD::FP_EXTEND) && isFusedOp(N1.getOperand(0))) {
17495 SDValue CvtSrc = N1.getOperand(0);
17496 SDValue N100 = CvtSrc.getOperand(0);
17497 SDValue N101 = CvtSrc.getOperand(1);
17498 SDValue N102 = CvtSrc.getOperand(2);
17499 if (isContractableAndReassociableFMUL(N102) &&
17500 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17501 CvtSrc.getValueType())) {
17502 SDValue N1020 = N102.getOperand(0);
17503 SDValue N1021 = N102.getOperand(1);
17504 return matcher.getNode(
17505 PreferredFusedOpcode, SL, VT,
17506 matcher.getNode(ISD::FNEG, SL, VT,
17507 matcher.getNode(ISD::FP_EXTEND, SL, VT, N100)),
17508 matcher.getNode(ISD::FP_EXTEND, SL, VT, N101),
17509 matcher.getNode(
17510 PreferredFusedOpcode, SL, VT,
17511 matcher.getNode(ISD::FNEG, SL, VT,
17512 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
17513 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
17514 }
17515 }
17516 }
17517
17518 return SDValue();
17519}
17520
17521/// Try to perform FMA combining on a given FMUL node based on the distributive
17522/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
17523/// subtraction instead of addition).
17524SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
17525 SDValue N0 = N->getOperand(0);
17526 SDValue N1 = N->getOperand(1);
17527 EVT VT = N->getValueType(0);
17528 SDLoc SL(N);
17529
17530 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
17531
17532 const TargetOptions &Options = DAG.getTarget().Options;
17533
17534 // The transforms below are incorrect when x == 0 and y == inf, because the
17535 // intermediate multiplication produces a nan.
17536 SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
17537 if (!hasNoInfs(Options, FAdd))
17538 return SDValue();
17539
17540 // Floating-point multiply-add without intermediate rounding.
17541 bool HasFMA =
17543 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
17545
17546 // Floating-point multiply-add with intermediate rounding. This can result
17547 // in a less precise result due to the changed rounding order.
17548 bool HasFMAD = LegalOperations && TLI.isFMADLegal(DAG, N);
17549
17550 // No valid opcode, do not combine.
17551 if (!HasFMAD && !HasFMA)
17552 return SDValue();
17553
17554 // Always prefer FMAD to FMA for precision.
17555 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
17557
17558 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
17559 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
17560 auto FuseFADD = [&](SDValue X, SDValue Y) {
17561 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
17562 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
17563 if (C->isExactlyValue(+1.0))
17564 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17565 Y);
17566 if (C->isExactlyValue(-1.0))
17567 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17568 DAG.getNode(ISD::FNEG, SL, VT, Y));
17569 }
17570 }
17571 return SDValue();
17572 };
17573
17574 if (SDValue FMA = FuseFADD(N0, N1))
17575 return FMA;
17576 if (SDValue FMA = FuseFADD(N1, N0))
17577 return FMA;
17578
17579 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
17580 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
17581 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
17582 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
17583 auto FuseFSUB = [&](SDValue X, SDValue Y) {
17584 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
17585 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
17586 if (C0->isExactlyValue(+1.0))
17587 return DAG.getNode(PreferredFusedOpcode, SL, VT,
17588 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
17589 Y);
17590 if (C0->isExactlyValue(-1.0))
17591 return DAG.getNode(PreferredFusedOpcode, SL, VT,
17592 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
17593 DAG.getNode(ISD::FNEG, SL, VT, Y));
17594 }
17595 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
17596 if (C1->isExactlyValue(+1.0))
17597 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17598 DAG.getNode(ISD::FNEG, SL, VT, Y));
17599 if (C1->isExactlyValue(-1.0))
17600 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17601 Y);
17602 }
17603 }
17604 return SDValue();
17605 };
17606
17607 if (SDValue FMA = FuseFSUB(N0, N1))
17608 return FMA;
17609 if (SDValue FMA = FuseFSUB(N1, N0))
17610 return FMA;
17611
17612 return SDValue();
17613}
17614
17615SDValue DAGCombiner::visitVP_FADD(SDNode *N) {
17616 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17617
17618 // FADD -> FMA combines:
17619 if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) {
17620 if (Fused.getOpcode() != ISD::DELETED_NODE)
17621 AddToWorklist(Fused.getNode());
17622 return Fused;
17623 }
17624 return SDValue();
17625}
17626
17627SDValue DAGCombiner::visitFADD(SDNode *N) {
17628 SDValue N0 = N->getOperand(0);
17629 SDValue N1 = N->getOperand(1);
17630 bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
17631 bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
17632 EVT VT = N->getValueType(0);
17633 SDLoc DL(N);
17634 const TargetOptions &Options = DAG.getTarget().Options;
17635 SDNodeFlags Flags = N->getFlags();
17636 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17637
17638 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17639 return R;
17640
17641 // fold (fadd c1, c2) -> c1 + c2
17642 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
17643 return C;
17644
17645 // canonicalize constant to RHS
17646 if (N0CFP && !N1CFP)
17647 return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
17648
17649 // fold vector ops
17650 if (VT.isVector())
17651 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17652 return FoldedVOp;
17653
17654 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
17655 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
17656 if (N1C && N1C->isZero())
17657 if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
17658 return N0;
17659
17660 if (SDValue NewSel = foldBinOpIntoSelect(N))
17661 return NewSel;
17662
17663 // fold (fadd A, (fneg B)) -> (fsub A, B)
17664 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
17665 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
17666 N1, DAG, LegalOperations, ForCodeSize))
17667 return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
17668
17669 // fold (fadd (fneg A), B) -> (fsub B, A)
17670 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
17671 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
17672 N0, DAG, LegalOperations, ForCodeSize))
17673 return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
17674
17675 auto isFMulNegTwo = [](SDValue FMul) {
17676 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
17677 return false;
17678 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
17679 return C && C->isExactlyValue(-2.0);
17680 };
17681
17682 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
17683 if (isFMulNegTwo(N0)) {
17684 SDValue B = N0.getOperand(0);
17685 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
17686 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
17687 }
17688 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
17689 if (isFMulNegTwo(N1)) {
17690 SDValue B = N1.getOperand(0);
17691 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
17692 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
17693 }
17694
17695 // No FP constant should be created after legalization as Instruction
17696 // Selection pass has a hard time dealing with FP constants.
17697 bool AllowNewConst = (Level < AfterLegalizeDAG);
17698
17699 // If nnan is enabled, fold lots of things.
17700 if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
17701 // If allowed, fold (fadd (fneg x), x) -> 0.0
17702 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
17703 return DAG.getConstantFP(0.0, DL, VT);
17704
17705 // If allowed, fold (fadd x, (fneg x)) -> 0.0
17706 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
17707 return DAG.getConstantFP(0.0, DL, VT);
17708 }
17709
17710 // If 'unsafe math' or reassoc and nsz, fold lots of things.
17711 // TODO: break out portions of the transformations below for which Unsafe is
17712 // considered and which do not require both nsz and reassoc
17713 if ((Options.NoSignedZerosFPMath ||
17714 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
17715 AllowNewConst) {
17716 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
17717 if (N1CFP && N0.getOpcode() == ISD::FADD &&
17719 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
17720 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
17721 }
17722
17723 // We can fold chains of FADD's of the same value into multiplications.
17724 // This transform is not safe in general because we are reducing the number
17725 // of rounding steps.
17726 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
17727 if (N0.getOpcode() == ISD::FMUL) {
17728 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
17729 bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
17730
17731 // (fadd (fmul x, c), x) -> (fmul x, c+1)
17732 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
17733 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
17734 DAG.getConstantFP(1.0, DL, VT));
17735 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
17736 }
17737
17738 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
17739 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
17740 N1.getOperand(0) == N1.getOperand(1) &&
17741 N0.getOperand(0) == N1.getOperand(0)) {
17742 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
17743 DAG.getConstantFP(2.0, DL, VT));
17744 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
17745 }
17746 }
17747
17748 if (N1.getOpcode() == ISD::FMUL) {
17749 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
17750 bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
17751
17752 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
17753 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
17754 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
17755 DAG.getConstantFP(1.0, DL, VT));
17756 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
17757 }
17758
17759 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
17760 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
17761 N0.getOperand(0) == N0.getOperand(1) &&
17762 N1.getOperand(0) == N0.getOperand(0)) {
17763 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
17764 DAG.getConstantFP(2.0, DL, VT));
17765 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
17766 }
17767 }
17768
17769 if (N0.getOpcode() == ISD::FADD) {
17770 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
17771 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
17772 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
17773 (N0.getOperand(0) == N1)) {
17774 return DAG.getNode(ISD::FMUL, DL, VT, N1,
17775 DAG.getConstantFP(3.0, DL, VT));
17776 }
17777 }
17778
17779 if (N1.getOpcode() == ISD::FADD) {
17780 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
17781 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
17782 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
17783 N1.getOperand(0) == N0) {
17784 return DAG.getNode(ISD::FMUL, DL, VT, N0,
17785 DAG.getConstantFP(3.0, DL, VT));
17786 }
17787 }
17788
17789 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
17790 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
17791 N0.getOperand(0) == N0.getOperand(1) &&
17792 N1.getOperand(0) == N1.getOperand(1) &&
17793 N0.getOperand(0) == N1.getOperand(0)) {
17794 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
17795 DAG.getConstantFP(4.0, DL, VT));
17796 }
17797 }
17798 } // enable-unsafe-fp-math && AllowNewConst
17799
17800 if ((Options.NoSignedZerosFPMath ||
17801 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))) {
17802 // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
17803 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
17804 VT, N0, N1, Flags))
17805 return SD;
17806 }
17807
17808 // FADD -> FMA combines:
17809 if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {
17810 if (Fused.getOpcode() != ISD::DELETED_NODE)
17811 AddToWorklist(Fused.getNode());
17812 return Fused;
17813 }
17814 return SDValue();
17815}
17816
17817SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
17818 SDValue Chain = N->getOperand(0);
17819 SDValue N0 = N->getOperand(1);
17820 SDValue N1 = N->getOperand(2);
17821 EVT VT = N->getValueType(0);
17822 EVT ChainVT = N->getValueType(1);
17823 SDLoc DL(N);
17824 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17825
17826 // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
17827 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
17828 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
17829 N1, DAG, LegalOperations, ForCodeSize)) {
17830 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
17831 {Chain, N0, NegN1});
17832 }
17833
17834 // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
17835 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
17836 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
17837 N0, DAG, LegalOperations, ForCodeSize)) {
17838 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
17839 {Chain, N1, NegN0});
17840 }
17841 return SDValue();
17842}
17843
17844SDValue DAGCombiner::visitFSUB(SDNode *N) {
17845 SDValue N0 = N->getOperand(0);
17846 SDValue N1 = N->getOperand(1);
17847 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
17848 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
17849 EVT VT = N->getValueType(0);
17850 SDLoc DL(N);
17851 const TargetOptions &Options = DAG.getTarget().Options;
17852 const SDNodeFlags Flags = N->getFlags();
17853 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17854
17855 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17856 return R;
17857
17858 // fold (fsub c1, c2) -> c1-c2
17859 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
17860 return C;
17861
17862 // fold vector ops
17863 if (VT.isVector())
17864 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17865 return FoldedVOp;
17866
17867 if (SDValue NewSel = foldBinOpIntoSelect(N))
17868 return NewSel;
17869
17870 // (fsub A, 0) -> A
17871 if (N1CFP && N1CFP->isZero()) {
17872 if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
17873 Flags.hasNoSignedZeros()) {
17874 return N0;
17875 }
17876 }
17877
17878 if (N0 == N1) {
17879 // (fsub x, x) -> 0.0
17880 if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
17881 return DAG.getConstantFP(0.0f, DL, VT);
17882 }
17883
17884 // (fsub -0.0, N1) -> -N1
17885 if (N0CFP && N0CFP->isZero()) {
17886 if (N0CFP->isNegative() ||
17887 (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
17888 // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
17889 // flushed to zero, unless all users treat denorms as zero (DAZ).
17890 // FIXME: This transform will change the sign of a NaN and the behavior
17891 // of a signaling NaN. It is only valid when a NoNaN flag is present.
17892 DenormalMode DenormMode = DAG.getDenormalMode(VT);
17893 if (DenormMode == DenormalMode::getIEEE()) {
17894 if (SDValue NegN1 =
17895 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
17896 return NegN1;
17897 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
17898 return DAG.getNode(ISD::FNEG, DL, VT, N1);
17899 }
17900 }
17901 }
17902
17903 if ((Options.NoSignedZerosFPMath ||
17904 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
17905 N1.getOpcode() == ISD::FADD) {
17906 // X - (X + Y) -> -Y
17907 if (N0 == N1->getOperand(0))
17908 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
17909 // X - (Y + X) -> -Y
17910 if (N0 == N1->getOperand(1))
17911 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
17912 }
17913
17914 // fold (fsub A, (fneg B)) -> (fadd A, B)
17915 if (SDValue NegN1 =
17916 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
17917 return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
17918
17919 // FSUB -> FMA combines:
17920 if (SDValue Fused = visitFSUBForFMACombine<EmptyMatchContext>(N)) {
17921 AddToWorklist(Fused.getNode());
17922 return Fused;
17923 }
17924
17925 return SDValue();
17926}
17927
17928// Transform IEEE Floats:
17929// (fmul C, (uitofp Pow2))
17930// -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
17931// (fdiv C, (uitofp Pow2))
17932// -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
17933//
17934// The rationale is fmul/fdiv by a power of 2 is just change the exponent, so
17935// there is no need for more than an add/sub.
17936//
17937// This is valid under the following circumstances:
17938// 1) We are dealing with IEEE floats
17939// 2) C is normal
17940// 3) The fmul/fdiv add/sub will not go outside of min/max exponent bounds.
17941// TODO: Much of this could also be used for generating `ldexp` on targets the
17942// prefer it.
17943SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) {
17944 EVT VT = N->getValueType(0);
17946 return SDValue();
17947
17948 SDValue ConstOp, Pow2Op;
17949
17950 std::optional<int> Mantissa;
17951 auto GetConstAndPow2Ops = [&](unsigned ConstOpIdx) {
17952 if (ConstOpIdx == 1 && N->getOpcode() == ISD::FDIV)
17953 return false;
17954
17955 ConstOp = peekThroughBitcasts(N->getOperand(ConstOpIdx));
17956 Pow2Op = N->getOperand(1 - ConstOpIdx);
17957 if (Pow2Op.getOpcode() != ISD::UINT_TO_FP &&
17958 (Pow2Op.getOpcode() != ISD::SINT_TO_FP ||
17959 !DAG.computeKnownBits(Pow2Op).isNonNegative()))
17960 return false;
17961
17962 Pow2Op = Pow2Op.getOperand(0);
17963
17964 // `Log2(Pow2Op) < Pow2Op.getScalarSizeInBits()`.
17965 // TODO: We could use knownbits to make this bound more precise.
17966 int MaxExpChange = Pow2Op.getValueType().getScalarSizeInBits();
17967
17968 auto IsFPConstValid = [N, MaxExpChange, &Mantissa](ConstantFPSDNode *CFP) {
17969 if (CFP == nullptr)
17970 return false;
17971
17972 const APFloat &APF = CFP->getValueAPF();
17973
17974 // Make sure we have normal constant.
17975 if (!APF.isNormal())
17976 return false;
17977
17978 // Make sure the floats exponent is within the bounds that this transform
17979 // produces bitwise equals value.
17980 int CurExp = ilogb(APF);
17981 // FMul by pow2 will only increase exponent.
17982 int MinExp =
17983 N->getOpcode() == ISD::FMUL ? CurExp : (CurExp - MaxExpChange);
17984 // FDiv by pow2 will only decrease exponent.
17985 int MaxExp =
17986 N->getOpcode() == ISD::FDIV ? CurExp : (CurExp + MaxExpChange);
17987 if (MinExp <= APFloat::semanticsMinExponent(APF.getSemantics()) ||
17989 return false;
17990
17991 // Finally make sure we actually know the mantissa for the float type.
17992 int ThisMantissa = APFloat::semanticsPrecision(APF.getSemantics()) - 1;
17993 if (!Mantissa)
17994 Mantissa = ThisMantissa;
17995
17996 return *Mantissa == ThisMantissa && ThisMantissa > 0;
17997 };
17998
17999 // TODO: We may be able to include undefs.
18000 return ISD::matchUnaryFpPredicate(ConstOp, IsFPConstValid);
18001 };
18002
18003 if (!GetConstAndPow2Ops(0) && !GetConstAndPow2Ops(1))
18004 return SDValue();
18005
18006 if (!TLI.optimizeFMulOrFDivAsShiftAddBitcast(N, ConstOp, Pow2Op))
18007 return SDValue();
18008
18009 // Get log2 after all other checks have taken place. This is because
18010 // BuildLogBase2 may create a new node.
18011 SDLoc DL(N);
18012 // Get Log2 type with same bitwidth as the float type (VT).
18013 EVT NewIntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits());
18014 if (VT.isVector())
18015 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewIntVT,
18017
18018 SDValue Log2 = BuildLogBase2(Pow2Op, DL, DAG.isKnownNeverZero(Pow2Op),
18019 /*InexpensiveOnly*/ true, NewIntVT);
18020 if (!Log2)
18021 return SDValue();
18022
18023 // Perform actual transform.
18024 SDValue MantissaShiftCnt =
18025 DAG.getShiftAmountConstant(*Mantissa, NewIntVT, DL);
18026 // TODO: Sometimes Log2 is of form `(X + C)`. `(X + C) << C1` should fold to
18027 // `(X << C1) + (C << C1)`, but that isn't always the case because of the
18028 // cast. We could implement that by handle here to handle the casts.
18029 SDValue Shift = DAG.getNode(ISD::SHL, DL, NewIntVT, Log2, MantissaShiftCnt);
18030 SDValue ResAsInt =
18031 DAG.getNode(N->getOpcode() == ISD::FMUL ? ISD::ADD : ISD::SUB, DL,
18032 NewIntVT, DAG.getBitcast(NewIntVT, ConstOp), Shift);
18033 SDValue ResAsFP = DAG.getBitcast(VT, ResAsInt);
18034 return ResAsFP;
18035}
18036
18037SDValue DAGCombiner::visitFMUL(SDNode *N) {
18038 SDValue N0 = N->getOperand(0);
18039 SDValue N1 = N->getOperand(1);
18040 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
18041 EVT VT = N->getValueType(0);
18042 SDLoc DL(N);
18043 const SDNodeFlags Flags = N->getFlags();
18044 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18045
18046 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
18047 return R;
18048
18049 // fold (fmul c1, c2) -> c1*c2
18050 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
18051 return C;
18052
18053 // canonicalize constant to RHS
18056 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
18057
18058 // fold vector ops
18059 if (VT.isVector())
18060 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
18061 return FoldedVOp;
18062
18063 if (SDValue NewSel = foldBinOpIntoSelect(N))
18064 return NewSel;
18065
18066 if (Flags.hasAllowReassociation()) {
18067 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
18069 N0.getOpcode() == ISD::FMUL) {
18070 SDValue N00 = N0.getOperand(0);
18071 SDValue N01 = N0.getOperand(1);
18072 // Avoid an infinite loop by making sure that N00 is not a constant
18073 // (the inner multiply has not been constant folded yet).
18076 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
18077 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
18078 }
18079 }
18080
18081 // Match a special-case: we convert X * 2.0 into fadd.
18082 // fmul (fadd X, X), C -> fmul X, 2.0 * C
18083 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
18084 N0.getOperand(0) == N0.getOperand(1)) {
18085 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
18086 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
18087 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
18088 }
18089
18090 // Fold fmul(vecreduce(x), vecreduce(y)) -> vecreduce(fmul(x, y))
18091 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FMUL, ISD::FMUL, DL,
18092 VT, N0, N1, Flags))
18093 return SD;
18094 }
18095
18096 // fold (fmul X, 2.0) -> (fadd X, X)
18097 if (N1CFP && N1CFP->isExactlyValue(+2.0))
18098 return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
18099
18100 // fold (fmul X, -1.0) -> (fsub -0.0, X)
18101 if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
18102 if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
18103 return DAG.getNode(ISD::FSUB, DL, VT,
18104 DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
18105 }
18106 }
18107
18108 // -N0 * -N1 --> N0 * N1
18113 SDValue NegN0 =
18114 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
18115 if (NegN0) {
18116 HandleSDNode NegN0Handle(NegN0);
18117 SDValue NegN1 =
18118 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
18119 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
18121 return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
18122 }
18123
18124 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
18125 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
18126 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
18127 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
18128 TLI.isOperationLegal(ISD::FABS, VT)) {
18129 SDValue Select = N0, X = N1;
18130 if (Select.getOpcode() != ISD::SELECT)
18131 std::swap(Select, X);
18132
18133 SDValue Cond = Select.getOperand(0);
18134 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
18135 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
18136
18137 if (TrueOpnd && FalseOpnd &&
18138 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
18139 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
18140 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
18141 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
18142 switch (CC) {
18143 default: break;
18144 case ISD::SETOLT:
18145 case ISD::SETULT:
18146 case ISD::SETOLE:
18147 case ISD::SETULE:
18148 case ISD::SETLT:
18149 case ISD::SETLE:
18150 std::swap(TrueOpnd, FalseOpnd);
18151 [[fallthrough]];
18152 case ISD::SETOGT:
18153 case ISD::SETUGT:
18154 case ISD::SETOGE:
18155 case ISD::SETUGE:
18156 case ISD::SETGT:
18157 case ISD::SETGE:
18158 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
18159 TLI.isOperationLegal(ISD::FNEG, VT))
18160 return DAG.getNode(ISD::FNEG, DL, VT,
18161 DAG.getNode(ISD::FABS, DL, VT, X));
18162 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
18163 return DAG.getNode(ISD::FABS, DL, VT, X);
18164
18165 break;
18166 }
18167 }
18168 }
18169
18170 // FMUL -> FMA combines:
18171 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
18172 AddToWorklist(Fused.getNode());
18173 return Fused;
18174 }
18175
18176 // Don't do `combineFMulOrFDivWithIntPow2` until after FMUL -> FMA has been
18177 // able to run.
18178 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
18179 return R;
18180
18181 return SDValue();
18182}
18183
18184template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
18185 SDValue N0 = N->getOperand(0);
18186 SDValue N1 = N->getOperand(1);
18187 SDValue N2 = N->getOperand(2);
18188 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
18189 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
18190 ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
18191 EVT VT = N->getValueType(0);
18192 SDLoc DL(N);
18193 const TargetOptions &Options = DAG.getTarget().Options;
18194 // FMA nodes have flags that propagate to the created nodes.
18195 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18196 MatchContextClass matcher(DAG, TLI, N);
18197
18198 // Constant fold FMA.
18199 if (SDValue C =
18200 DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1, N2}))
18201 return C;
18202
18203 // (-N0 * -N1) + N2 --> (N0 * N1) + N2
18208 SDValue NegN0 =
18209 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
18210 if (NegN0) {
18211 HandleSDNode NegN0Handle(NegN0);
18212 SDValue NegN1 =
18213 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
18214 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
18216 return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
18217 }
18218
18219 // FIXME: use fast math flags instead of Options.UnsafeFPMath
18220 // TODO: Finally migrate away from global TargetOptions.
18221 if ((Options.NoNaNsFPMath && Options.NoInfsFPMath) ||
18222 (N->getFlags().hasNoNaNs() && N->getFlags().hasNoInfs())) {
18223 if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros() ||
18224 (N2CFP && !N2CFP->isExactlyValue(-0.0))) {
18225 if (N0CFP && N0CFP->isZero())
18226 return N2;
18227 if (N1CFP && N1CFP->isZero())
18228 return N2;
18229 }
18230 }
18231
18232 // FIXME: Support splat of constant.
18233 if (N0CFP && N0CFP->isExactlyValue(1.0))
18234 return matcher.getNode(ISD::FADD, DL, VT, N1, N2);
18235 if (N1CFP && N1CFP->isExactlyValue(1.0))
18236 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
18237
18238 // Canonicalize (fma c, x, y) -> (fma x, c, y)
18241 return matcher.getNode(ISD::FMA, DL, VT, N1, N0, N2);
18242
18243 bool CanReassociate = N->getFlags().hasAllowReassociation();
18244 if (CanReassociate) {
18245 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
18246 if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) &&
18249 return matcher.getNode(
18250 ISD::FMUL, DL, VT, N0,
18251 matcher.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
18252 }
18253
18254 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
18255 if (matcher.match(N0, ISD::FMUL) &&
18258 return matcher.getNode(
18259 ISD::FMA, DL, VT, N0.getOperand(0),
18260 matcher.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)), N2);
18261 }
18262 }
18263
18264 // (fma x, -1, y) -> (fadd (fneg x), y)
18265 // FIXME: Support splat of constant.
18266 if (N1CFP) {
18267 if (N1CFP->isExactlyValue(1.0))
18268 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
18269
18270 if (N1CFP->isExactlyValue(-1.0) &&
18271 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
18272 SDValue RHSNeg = matcher.getNode(ISD::FNEG, DL, VT, N0);
18273 AddToWorklist(RHSNeg.getNode());
18274 return matcher.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
18275 }
18276
18277 // fma (fneg x), K, y -> fma x -K, y
18278 if (matcher.match(N0, ISD::FNEG) &&
18280 (N1.hasOneUse() &&
18281 !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) {
18282 return matcher.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
18283 matcher.getNode(ISD::FNEG, DL, VT, N1), N2);
18284 }
18285 }
18286
18287 // FIXME: Support splat of constant.
18288 if (CanReassociate) {
18289 // (fma x, c, x) -> (fmul x, (c+1))
18290 if (N1CFP && N0 == N2) {
18291 return matcher.getNode(ISD::FMUL, DL, VT, N0,
18292 matcher.getNode(ISD::FADD, DL, VT, N1,
18293 DAG.getConstantFP(1.0, DL, VT)));
18294 }
18295
18296 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
18297 if (N1CFP && matcher.match(N2, ISD::FNEG) && N2.getOperand(0) == N0) {
18298 return matcher.getNode(ISD::FMUL, DL, VT, N0,
18299 matcher.getNode(ISD::FADD, DL, VT, N1,
18300 DAG.getConstantFP(-1.0, DL, VT)));
18301 }
18302 }
18303
18304 // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
18305 // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
18306 if (!TLI.isFNegFree(VT))
18308 SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
18309 return matcher.getNode(ISD::FNEG, DL, VT, Neg);
18310 return SDValue();
18311}
18312
18313SDValue DAGCombiner::visitFMAD(SDNode *N) {
18314 SDValue N0 = N->getOperand(0);
18315 SDValue N1 = N->getOperand(1);
18316 SDValue N2 = N->getOperand(2);
18317 EVT VT = N->getValueType(0);
18318 SDLoc DL(N);
18319
18320 // Constant fold FMAD.
18321 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMAD, DL, VT, {N0, N1, N2}))
18322 return C;
18323
18324 return SDValue();
18325}
18326
18327// Combine multiple FDIVs with the same divisor into multiple FMULs by the
18328// reciprocal.
18329// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
18330// Notice that this is not always beneficial. One reason is different targets
18331// may have different costs for FDIV and FMUL, so sometimes the cost of two
18332// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
18333// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
18334SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
18335 // TODO: Limit this transform based on optsize/minsize - it always creates at
18336 // least 1 extra instruction. But the perf win may be substantial enough
18337 // that only minsize should restrict this.
18338 const SDNodeFlags Flags = N->getFlags();
18339 if (LegalDAG || !Flags.hasAllowReciprocal())
18340 return SDValue();
18341
18342 // Skip if current node is a reciprocal/fneg-reciprocal.
18343 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
18344 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
18345 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
18346 return SDValue();
18347
18348 // Exit early if the target does not want this transform or if there can't
18349 // possibly be enough uses of the divisor to make the transform worthwhile.
18350 unsigned MinUses = TLI.combineRepeatedFPDivisors();
18351
18352 // For splat vectors, scale the number of uses by the splat factor. If we can
18353 // convert the division into a scalar op, that will likely be much faster.
18354 unsigned NumElts = 1;
18355 EVT VT = N->getValueType(0);
18356 if (VT.isVector() && DAG.isSplatValue(N1))
18357 NumElts = VT.getVectorMinNumElements();
18358
18359 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
18360 return SDValue();
18361
18362 // Find all FDIV users of the same divisor.
18363 // Use a set because duplicates may be present in the user list.
18365 for (auto *U : N1->users()) {
18366 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
18367 // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
18368 if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
18369 U->getOperand(0) == U->getOperand(1).getOperand(0) &&
18370 U->getFlags().hasAllowReassociation() &&
18371 U->getFlags().hasNoSignedZeros())
18372 continue;
18373
18374 // This division is eligible for optimization only if global unsafe math
18375 // is enabled or if this division allows reciprocal formation.
18376 if (U->getFlags().hasAllowReciprocal())
18377 Users.insert(U);
18378 }
18379 }
18380
18381 // Now that we have the actual number of divisor uses, make sure it meets
18382 // the minimum threshold specified by the target.
18383 if ((Users.size() * NumElts) < MinUses)
18384 return SDValue();
18385
18386 SDLoc DL(N);
18387 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
18388 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
18389
18390 // Dividend / Divisor -> Dividend * Reciprocal
18391 for (auto *U : Users) {
18392 SDValue Dividend = U->getOperand(0);
18393 if (Dividend != FPOne) {
18394 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
18395 Reciprocal, Flags);
18396 CombineTo(U, NewNode);
18397 } else if (U != Reciprocal.getNode()) {
18398 // In the absence of fast-math-flags, this user node is always the
18399 // same node as Reciprocal, but with FMF they may be different nodes.
18400 CombineTo(U, Reciprocal);
18401 }
18402 }
18403 return SDValue(N, 0); // N was replaced.
18404}
18405
18406SDValue DAGCombiner::visitFDIV(SDNode *N) {
18407 SDValue N0 = N->getOperand(0);
18408 SDValue N1 = N->getOperand(1);
18409 EVT VT = N->getValueType(0);
18410 SDLoc DL(N);
18411 const TargetOptions &Options = DAG.getTarget().Options;
18412 SDNodeFlags Flags = N->getFlags();
18413 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18414
18415 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
18416 return R;
18417
18418 // fold (fdiv c1, c2) -> c1/c2
18419 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
18420 return C;
18421
18422 // fold vector ops
18423 if (VT.isVector())
18424 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
18425 return FoldedVOp;
18426
18427 if (SDValue NewSel = foldBinOpIntoSelect(N))
18428 return NewSel;
18429
18431 return V;
18432
18433 // fold (fdiv X, c2) -> (fmul X, 1/c2) if there is no loss in precision, or
18434 // the loss is acceptable with AllowReciprocal.
18435 if (auto *N1CFP = isConstOrConstSplatFP(N1, true)) {
18436 // Compute the reciprocal 1.0 / c2.
18437 const APFloat &N1APF = N1CFP->getValueAPF();
18438 APFloat Recip = APFloat::getOne(N1APF.getSemantics());
18440 // Only do the transform if the reciprocal is a legal fp immediate that
18441 // isn't too nasty (eg NaN, denormal, ...).
18442 if (((st == APFloat::opOK && !Recip.isDenormal()) ||
18443 (st == APFloat::opInexact && Flags.hasAllowReciprocal())) &&
18444 (!LegalOperations ||
18445 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
18446 // backend)... we should handle this gracefully after Legalize.
18447 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
18449 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
18450 return DAG.getNode(ISD::FMUL, DL, VT, N0,
18451 DAG.getConstantFP(Recip, DL, VT));
18452 }
18453
18454 if (Flags.hasAllowReciprocal()) {
18455 // If this FDIV is part of a reciprocal square root, it may be folded
18456 // into a target-specific square root estimate instruction.
18457 if (N1.getOpcode() == ISD::FSQRT) {
18458 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
18459 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
18460 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
18461 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
18462 if (SDValue RV =
18463 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
18464 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
18465 AddToWorklist(RV.getNode());
18466 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
18467 }
18468 } else if (N1.getOpcode() == ISD::FP_ROUND &&
18469 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
18470 if (SDValue RV =
18471 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
18472 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
18473 AddToWorklist(RV.getNode());
18474 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
18475 }
18476 } else if (N1.getOpcode() == ISD::FMUL) {
18477 // Look through an FMUL. Even though this won't remove the FDIV directly,
18478 // it's still worthwhile to get rid of the FSQRT if possible.
18479 SDValue Sqrt, Y;
18480 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
18481 Sqrt = N1.getOperand(0);
18482 Y = N1.getOperand(1);
18483 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
18484 Sqrt = N1.getOperand(1);
18485 Y = N1.getOperand(0);
18486 }
18487 if (Sqrt.getNode()) {
18488 // If the other multiply operand is known positive, pull it into the
18489 // sqrt. That will eliminate the division if we convert to an estimate.
18490 if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
18491 N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
18492 SDValue A;
18493 if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
18494 A = Y.getOperand(0);
18495 else if (Y == Sqrt.getOperand(0))
18496 A = Y;
18497 if (A) {
18498 // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
18499 // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
18500 SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
18501 SDValue AAZ =
18502 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
18503 if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
18504 return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
18505
18506 // Estimate creation failed. Clean up speculatively created nodes.
18507 recursivelyDeleteUnusedNodes(AAZ.getNode());
18508 }
18509 }
18510
18511 // We found a FSQRT, so try to make this fold:
18512 // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
18513 if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
18514 SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
18515 AddToWorklist(Div.getNode());
18516 return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
18517 }
18518 }
18519 }
18520
18521 // Fold into a reciprocal estimate and multiply instead of a real divide.
18522 if (Options.NoInfsFPMath || Flags.hasNoInfs())
18523 if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
18524 return RV;
18525 }
18526
18527 // Fold X/Sqrt(X) -> Sqrt(X)
18528 if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
18529 Flags.hasAllowReassociation())
18530 if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
18531 return N1;
18532
18533 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
18538 SDValue NegN0 =
18539 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
18540 if (NegN0) {
18541 HandleSDNode NegN0Handle(NegN0);
18542 SDValue NegN1 =
18543 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
18544 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
18546 return DAG.getNode(ISD::FDIV, DL, VT, NegN0, NegN1);
18547 }
18548
18549 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
18550 return R;
18551
18552 return SDValue();
18553}
18554
18555SDValue DAGCombiner::visitFREM(SDNode *N) {
18556 SDValue N0 = N->getOperand(0);
18557 SDValue N1 = N->getOperand(1);
18558 EVT VT = N->getValueType(0);
18559 SDNodeFlags Flags = N->getFlags();
18560 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18561 SDLoc DL(N);
18562
18563 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
18564 return R;
18565
18566 // fold (frem c1, c2) -> fmod(c1,c2)
18567 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, DL, VT, {N0, N1}))
18568 return C;
18569
18570 if (SDValue NewSel = foldBinOpIntoSelect(N))
18571 return NewSel;
18572
18573 // Lower frem N0, N1 => x - trunc(N0 / N1) * N1, providing N1 is an integer
18574 // power of 2.
18575 if (!TLI.isOperationLegal(ISD::FREM, VT) &&
18579 DAG.isKnownToBeAPowerOfTwoFP(N1)) {
18580 bool NeedsCopySign =
18581 !Flags.hasNoSignedZeros() && !DAG.cannotBeOrderedNegativeFP(N0);
18582 SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
18583 SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
18584 SDValue MLA;
18586 MLA = DAG.getNode(ISD::FMA, DL, VT, DAG.getNode(ISD::FNEG, DL, VT, Rnd),
18587 N1, N0);
18588 } else {
18589 SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
18590 MLA = DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
18591 }
18592 return NeedsCopySign ? DAG.getNode(ISD::FCOPYSIGN, DL, VT, MLA, N0) : MLA;
18593 }
18594
18595 return SDValue();
18596}
18597
18598SDValue DAGCombiner::visitFSQRT(SDNode *N) {
18599 SDNodeFlags Flags = N->getFlags();
18600 const TargetOptions &Options = DAG.getTarget().Options;
18601
18602 // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
18603 // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
18604 if (!Flags.hasApproximateFuncs() ||
18605 (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
18606 return SDValue();
18607
18608 SDValue N0 = N->getOperand(0);
18609 if (TLI.isFsqrtCheap(N0, DAG))
18610 return SDValue();
18611
18612 // FSQRT nodes have flags that propagate to the created nodes.
18613 // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
18614 // transform the fdiv, we may produce a sub-optimal estimate sequence
18615 // because the reciprocal calculation may not have to filter out a
18616 // 0.0 input.
18617 return buildSqrtEstimate(N0, Flags);
18618}
18619
18620/// copysign(x, fp_extend(y)) -> copysign(x, y)
18621/// copysign(x, fp_round(y)) -> copysign(x, y)
18622/// Operands to the functions are the type of X and Y respectively.
18623static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) {
18624 // Always fold no-op FP casts.
18625 if (XTy == YTy)
18626 return true;
18627
18628 // Do not optimize out type conversion of f128 type yet.
18629 // For some targets like x86_64, configuration is changed to keep one f128
18630 // value in one SSE register, but instruction selection cannot handle
18631 // FCOPYSIGN on SSE registers yet.
18632 if (YTy == MVT::f128)
18633 return false;
18634
18635 // Avoid mismatched vector operand types, for better instruction selection.
18636 return !YTy.isVector();
18637}
18638
18640 SDValue N1 = N->getOperand(1);
18641 if (N1.getOpcode() != ISD::FP_EXTEND &&
18642 N1.getOpcode() != ISD::FP_ROUND)
18643 return false;
18644 EVT N1VT = N1->getValueType(0);
18645 EVT N1Op0VT = N1->getOperand(0).getValueType();
18646 return CanCombineFCOPYSIGN_EXTEND_ROUND(N1VT, N1Op0VT);
18647}
18648
18649SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
18650 SDValue N0 = N->getOperand(0);
18651 SDValue N1 = N->getOperand(1);
18652 EVT VT = N->getValueType(0);
18653 SDLoc DL(N);
18654
18655 // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
18656 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, DL, VT, {N0, N1}))
18657 return C;
18658
18659 // copysign(x, fp_extend(y)) -> copysign(x, y)
18660 // copysign(x, fp_round(y)) -> copysign(x, y)
18662 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N0, N1.getOperand(0));
18663
18665 return SDValue(N, 0);
18666
18667 return SDValue();
18668}
18669
18670SDValue DAGCombiner::visitFPOW(SDNode *N) {
18671 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
18672 if (!ExponentC)
18673 return SDValue();
18674 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18675
18676 // Try to convert x ** (1/3) into cube root.
18677 // TODO: Handle the various flavors of long double.
18678 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
18679 // Some range near 1/3 should be fine.
18680 EVT VT = N->getValueType(0);
18681 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
18682 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
18683 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
18684 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
18685 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
18686 // For regular numbers, rounding may cause the results to differ.
18687 // Therefore, we require { nsz ninf nnan afn } for this transform.
18688 // TODO: We could select out the special cases if we don't have nsz/ninf.
18689 SDNodeFlags Flags = N->getFlags();
18690 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
18691 !Flags.hasApproximateFuncs())
18692 return SDValue();
18693
18694 // Do not create a cbrt() libcall if the target does not have it, and do not
18695 // turn a pow that has lowering support into a cbrt() libcall.
18696 if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
18699 return SDValue();
18700
18701 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
18702 }
18703
18704 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
18705 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
18706 // TODO: This could be extended (using a target hook) to handle smaller
18707 // power-of-2 fractional exponents.
18708 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
18709 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
18710 if (ExponentIs025 || ExponentIs075) {
18711 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
18712 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
18713 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
18714 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
18715 // For regular numbers, rounding may cause the results to differ.
18716 // Therefore, we require { nsz ninf afn } for this transform.
18717 // TODO: We could select out the special cases if we don't have nsz/ninf.
18718 SDNodeFlags Flags = N->getFlags();
18719
18720 // We only need no signed zeros for the 0.25 case.
18721 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
18722 !Flags.hasApproximateFuncs())
18723 return SDValue();
18724
18725 // Don't double the number of libcalls. We are trying to inline fast code.
18727 return SDValue();
18728
18729 // Assume that libcalls are the smallest code.
18730 // TODO: This restriction should probably be lifted for vectors.
18731 if (ForCodeSize)
18732 return SDValue();
18733
18734 // pow(X, 0.25) --> sqrt(sqrt(X))
18735 SDLoc DL(N);
18736 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
18737 SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
18738 if (ExponentIs025)
18739 return SqrtSqrt;
18740 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
18741 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
18742 }
18743
18744 return SDValue();
18745}
18746
18748 const TargetLowering &TLI) {
18749 // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
18750 // replacing casts with a libcall. We also must be allowed to ignore -0.0
18751 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
18752 // conversions would return +0.0.
18753 // FIXME: We should be able to use node-level FMF here.
18754 // TODO: If strict math, should we use FABS (+ range check for signed cast)?
18755 EVT VT = N->getValueType(0);
18756 if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
18758 return SDValue();
18759
18760 // fptosi/fptoui round towards zero, so converting from FP to integer and
18761 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
18762 SDValue N0 = N->getOperand(0);
18763 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
18764 N0.getOperand(0).getValueType() == VT)
18765 return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0));
18766
18767 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
18768 N0.getOperand(0).getValueType() == VT)
18769 return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0));
18770
18771 return SDValue();
18772}
18773
18774SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
18775 SDValue N0 = N->getOperand(0);
18776 EVT VT = N->getValueType(0);
18777 EVT OpVT = N0.getValueType();
18778 SDLoc DL(N);
18779
18780 // [us]itofp(undef) = 0, because the result value is bounded.
18781 if (N0.isUndef())
18782 return DAG.getConstantFP(0.0, DL, VT);
18783
18784 // fold (sint_to_fp c1) -> c1fp
18785 // ...but only if the target supports immediate floating-point values
18786 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18787 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SINT_TO_FP, DL, VT, {N0}))
18788 return C;
18789
18790 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
18791 // but UINT_TO_FP is legal on this target, try to convert.
18792 if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
18793 hasOperation(ISD::UINT_TO_FP, OpVT)) {
18794 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
18795 if (DAG.SignBitIsZero(N0))
18796 return DAG.getNode(ISD::UINT_TO_FP, DL, VT, N0);
18797 }
18798
18799 // The next optimizations are desirable only if SELECT_CC can be lowered.
18800 // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
18801 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
18802 !VT.isVector() &&
18803 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18804 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
18805 DAG.getConstantFP(0.0, DL, VT));
18806
18807 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
18808 // (select (setcc x, y, cc), 1.0, 0.0)
18809 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
18810 N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
18811 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18812 return DAG.getSelect(DL, VT, N0.getOperand(0),
18813 DAG.getConstantFP(1.0, DL, VT),
18814 DAG.getConstantFP(0.0, DL, VT));
18815
18816 if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
18817 return FTrunc;
18818
18819 // fold (sint_to_fp (trunc nsw x)) -> (sint_to_fp x)
18820 if (N0.getOpcode() == ISD::TRUNCATE && N0->getFlags().hasNoSignedWrap() &&
18822 N0.getOperand(0).getValueType()))
18823 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, N0.getOperand(0));
18824
18825 return SDValue();
18826}
18827
18828SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
18829 SDValue N0 = N->getOperand(0);
18830 EVT VT = N->getValueType(0);
18831 EVT OpVT = N0.getValueType();
18832 SDLoc DL(N);
18833
18834 // [us]itofp(undef) = 0, because the result value is bounded.
18835 if (N0.isUndef())
18836 return DAG.getConstantFP(0.0, DL, VT);
18837
18838 // fold (uint_to_fp c1) -> c1fp
18839 // ...but only if the target supports immediate floating-point values
18840 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18841 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UINT_TO_FP, DL, VT, {N0}))
18842 return C;
18843
18844 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
18845 // but SINT_TO_FP is legal on this target, try to convert.
18846 if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
18847 hasOperation(ISD::SINT_TO_FP, OpVT)) {
18848 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
18849 if (DAG.SignBitIsZero(N0))
18850 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, N0);
18851 }
18852
18853 // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
18854 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
18855 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18856 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
18857 DAG.getConstantFP(0.0, DL, VT));
18858
18859 if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
18860 return FTrunc;
18861
18862 // fold (uint_to_fp (trunc nuw x)) -> (uint_to_fp x)
18863 if (N0.getOpcode() == ISD::TRUNCATE && N0->getFlags().hasNoUnsignedWrap() &&
18865 N0.getOperand(0).getValueType()))
18866 return DAG.getNode(ISD::UINT_TO_FP, DL, VT, N0.getOperand(0));
18867
18868 return SDValue();
18869}
18870
18871// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
18873 SDValue N0 = N->getOperand(0);
18874 EVT VT = N->getValueType(0);
18875
18876 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
18877 return SDValue();
18878
18879 SDValue Src = N0.getOperand(0);
18880 EVT SrcVT = Src.getValueType();
18881 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
18882 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
18883
18884 // We can safely assume the conversion won't overflow the output range,
18885 // because (for example) (uint8_t)18293.f is undefined behavior.
18886
18887 // Since we can assume the conversion won't overflow, our decision as to
18888 // whether the input will fit in the float should depend on the minimum
18889 // of the input range and output range.
18890
18891 // This means this is also safe for a signed input and unsigned output, since
18892 // a negative input would lead to undefined behavior.
18893 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
18894 unsigned OutputSize = (int)VT.getScalarSizeInBits();
18895 unsigned ActualSize = std::min(InputSize, OutputSize);
18896 const fltSemantics &Sem = N0.getValueType().getFltSemantics();
18897
18898 // We can only fold away the float conversion if the input range can be
18899 // represented exactly in the float range.
18900 if (APFloat::semanticsPrecision(Sem) >= ActualSize) {
18901 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
18902 unsigned ExtOp =
18903 IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
18904 return DAG.getNode(ExtOp, DL, VT, Src);
18905 }
18906 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
18907 return DAG.getNode(ISD::TRUNCATE, DL, VT, Src);
18908 return DAG.getBitcast(VT, Src);
18909 }
18910 return SDValue();
18911}
18912
18913SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
18914 SDValue N0 = N->getOperand(0);
18915 EVT VT = N->getValueType(0);
18916 SDLoc DL(N);
18917
18918 // fold (fp_to_sint undef) -> undef
18919 if (N0.isUndef())
18920 return DAG.getUNDEF(VT);
18921
18922 // fold (fp_to_sint c1fp) -> c1
18923 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_TO_SINT, DL, VT, {N0}))
18924 return C;
18925
18926 return FoldIntToFPToInt(N, DL, DAG);
18927}
18928
18929SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
18930 SDValue N0 = N->getOperand(0);
18931 EVT VT = N->getValueType(0);
18932 SDLoc DL(N);
18933
18934 // fold (fp_to_uint undef) -> undef
18935 if (N0.isUndef())
18936 return DAG.getUNDEF(VT);
18937
18938 // fold (fp_to_uint c1fp) -> c1
18939 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_TO_UINT, DL, VT, {N0}))
18940 return C;
18941
18942 return FoldIntToFPToInt(N, DL, DAG);
18943}
18944
18945SDValue DAGCombiner::visitXROUND(SDNode *N) {
18946 SDValue N0 = N->getOperand(0);
18947 EVT VT = N->getValueType(0);
18948
18949 // fold (lrint|llrint undef) -> undef
18950 // fold (lround|llround undef) -> undef
18951 if (N0.isUndef())
18952 return DAG.getUNDEF(VT);
18953
18954 // fold (lrint|llrint c1fp) -> c1
18955 // fold (lround|llround c1fp) -> c1
18956 if (SDValue C =
18957 DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0}))
18958 return C;
18959
18960 return SDValue();
18961}
18962
18963SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
18964 SDValue N0 = N->getOperand(0);
18965 SDValue N1 = N->getOperand(1);
18966 EVT VT = N->getValueType(0);
18967 SDLoc DL(N);
18968
18969 // fold (fp_round c1fp) -> c1fp
18970 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_ROUND, DL, VT, {N0, N1}))
18971 return C;
18972
18973 // fold (fp_round (fp_extend x)) -> x
18974 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
18975 return N0.getOperand(0);
18976
18977 // fold (fp_round (fp_round x)) -> (fp_round x)
18978 if (N0.getOpcode() == ISD::FP_ROUND) {
18979 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
18980 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
18981
18982 // Avoid folding legal fp_rounds into non-legal ones.
18983 if (!hasOperation(ISD::FP_ROUND, VT))
18984 return SDValue();
18985
18986 // Skip this folding if it results in an fp_round from f80 to f16.
18987 //
18988 // f80 to f16 always generates an expensive (and as yet, unimplemented)
18989 // libcall to __truncxfhf2 instead of selecting native f16 conversion
18990 // instructions from f32 or f64. Moreover, the first (value-preserving)
18991 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
18992 // x86.
18993 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
18994 return SDValue();
18995
18996 // If the first fp_round isn't a value preserving truncation, it might
18997 // introduce a tie in the second fp_round, that wouldn't occur in the
18998 // single-step fp_round we want to fold to.
18999 // In other words, double rounding isn't the same as rounding.
19000 // Also, this is a value preserving truncation iff both fp_round's are.
19001 if ((N->getFlags().hasAllowContract() &&
19002 N0->getFlags().hasAllowContract()) ||
19003 N0IsTrunc)
19004 return DAG.getNode(
19005 ISD::FP_ROUND, DL, VT, N0.getOperand(0),
19006 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
19007 }
19008
19009 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
19010 // Note: From a legality perspective, this is a two step transform. First,
19011 // we duplicate the fp_round to the arguments of the copysign, then we
19012 // eliminate the fp_round on Y. The second step requires an additional
19013 // predicate to match the implementation above.
19014 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
19016 N0.getValueType())) {
19017 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
19018 N0.getOperand(0), N1);
19019 AddToWorklist(Tmp.getNode());
19020 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, Tmp, N0.getOperand(1));
19021 }
19022
19023 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
19024 return NewVSel;
19025
19026 return SDValue();
19027}
19028
19029// Eliminate a floating-point widening of a narrowed value if the fast math
19030// flags allow it.
19032 SDValue N0 = N->getOperand(0);
19033 EVT VT = N->getValueType(0);
19034
19035 unsigned NarrowingOp;
19036 switch (N->getOpcode()) {
19037 case ISD::FP16_TO_FP:
19038 NarrowingOp = ISD::FP_TO_FP16;
19039 break;
19040 case ISD::BF16_TO_FP:
19041 NarrowingOp = ISD::FP_TO_BF16;
19042 break;
19043 case ISD::FP_EXTEND:
19044 NarrowingOp = ISD::FP_ROUND;
19045 break;
19046 default:
19047 llvm_unreachable("Expected widening FP cast");
19048 }
19049
19050 if (N0.getOpcode() == NarrowingOp && N0.getOperand(0).getValueType() == VT) {
19051 const SDNodeFlags NarrowFlags = N0->getFlags();
19052 const SDNodeFlags WidenFlags = N->getFlags();
19053 // Narrowing can introduce inf and change the encoding of a nan, so the
19054 // widen must have the nnan and ninf flags to indicate that we don't need to
19055 // care about that. We are also removing a rounding step, and that requires
19056 // both the narrow and widen to allow contraction.
19057 if (WidenFlags.hasNoNaNs() && WidenFlags.hasNoInfs() &&
19058 NarrowFlags.hasAllowContract() && WidenFlags.hasAllowContract()) {
19059 return N0.getOperand(0);
19060 }
19061 }
19062
19063 return SDValue();
19064}
19065
19066SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
19067 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19068 SDValue N0 = N->getOperand(0);
19069 EVT VT = N->getValueType(0);
19070 SDLoc DL(N);
19071
19072 if (VT.isVector())
19073 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
19074 return FoldedVOp;
19075
19076 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
19077 if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::FP_ROUND)
19078 return SDValue();
19079
19080 // fold (fp_extend c1fp) -> c1fp
19081 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_EXTEND, DL, VT, {N0}))
19082 return C;
19083
19084 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
19085 if (N0.getOpcode() == ISD::FP16_TO_FP &&
19087 return DAG.getNode(ISD::FP16_TO_FP, DL, VT, N0.getOperand(0));
19088
19089 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
19090 // value of X.
19091 if (N0.getOpcode() == ISD::FP_ROUND && N0.getConstantOperandVal(1) == 1) {
19092 SDValue In = N0.getOperand(0);
19093 if (In.getValueType() == VT) return In;
19094 if (VT.bitsLT(In.getValueType()))
19095 return DAG.getNode(ISD::FP_ROUND, DL, VT, In, N0.getOperand(1));
19096 return DAG.getNode(ISD::FP_EXTEND, DL, VT, In);
19097 }
19098
19099 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
19100 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
19102 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
19103 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT,
19104 LN0->getChain(),
19105 LN0->getBasePtr(), N0.getValueType(),
19106 LN0->getMemOperand());
19107 CombineTo(N, ExtLoad);
19108 CombineTo(
19109 N0.getNode(),
19110 DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad,
19111 DAG.getIntPtrConstant(1, SDLoc(N0), /*isTarget=*/true)),
19112 ExtLoad.getValue(1));
19113 return SDValue(N, 0); // Return N so it doesn't get rechecked!
19114 }
19115
19116 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
19117 return NewVSel;
19118
19119 if (SDValue CastEliminated = eliminateFPCastPair(N))
19120 return CastEliminated;
19121
19122 return SDValue();
19123}
19124
19125SDValue DAGCombiner::visitFCEIL(SDNode *N) {
19126 SDValue N0 = N->getOperand(0);
19127 EVT VT = N->getValueType(0);
19128
19129 // fold (fceil c1) -> fceil(c1)
19130 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FCEIL, SDLoc(N), VT, {N0}))
19131 return C;
19132
19133 return SDValue();
19134}
19135
19136SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
19137 SDValue N0 = N->getOperand(0);
19138 EVT VT = N->getValueType(0);
19139
19140 // fold (ftrunc c1) -> ftrunc(c1)
19141 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FTRUNC, SDLoc(N), VT, {N0}))
19142 return C;
19143
19144 // fold ftrunc (known rounded int x) -> x
19145 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
19146 // likely to be generated to extract integer from a rounded floating value.
19147 switch (N0.getOpcode()) {
19148 default: break;
19149 case ISD::FRINT:
19150 case ISD::FTRUNC:
19151 case ISD::FNEARBYINT:
19152 case ISD::FROUNDEVEN:
19153 case ISD::FFLOOR:
19154 case ISD::FCEIL:
19155 return N0;
19156 }
19157
19158 return SDValue();
19159}
19160
19161SDValue DAGCombiner::visitFFREXP(SDNode *N) {
19162 SDValue N0 = N->getOperand(0);
19163
19164 // fold (ffrexp c1) -> ffrexp(c1)
19166 return DAG.getNode(ISD::FFREXP, SDLoc(N), N->getVTList(), N0);
19167 return SDValue();
19168}
19169
19170SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
19171 SDValue N0 = N->getOperand(0);
19172 EVT VT = N->getValueType(0);
19173
19174 // fold (ffloor c1) -> ffloor(c1)
19175 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FFLOOR, SDLoc(N), VT, {N0}))
19176 return C;
19177
19178 return SDValue();
19179}
19180
19181SDValue DAGCombiner::visitFNEG(SDNode *N) {
19182 SDValue N0 = N->getOperand(0);
19183 EVT VT = N->getValueType(0);
19184 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19185
19186 // Constant fold FNEG.
19187 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FNEG, SDLoc(N), VT, {N0}))
19188 return C;
19189
19190 if (SDValue NegN0 =
19191 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
19192 return NegN0;
19193
19194 // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
19195 // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
19196 // know it was called from a context with a nsz flag if the input fsub does
19197 // not.
19198 if (N0.getOpcode() == ISD::FSUB &&
19200 N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
19201 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
19202 N0.getOperand(0));
19203 }
19204
19206 return SDValue(N, 0);
19207
19208 if (SDValue Cast = foldSignChangeInBitcast(N))
19209 return Cast;
19210
19211 return SDValue();
19212}
19213
19214SDValue DAGCombiner::visitFMinMax(SDNode *N) {
19215 SDValue N0 = N->getOperand(0);
19216 SDValue N1 = N->getOperand(1);
19217 EVT VT = N->getValueType(0);
19218 const SDNodeFlags Flags = N->getFlags();
19219 unsigned Opc = N->getOpcode();
19220 bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
19221 bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
19222 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19223
19224 // Constant fold.
19225 if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
19226 return C;
19227
19228 // Canonicalize to constant on RHS.
19231 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
19232
19233 if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
19234 const APFloat &AF = N1CFP->getValueAPF();
19235
19236 // minnum(X, nan) -> X
19237 // maxnum(X, nan) -> X
19238 // minimum(X, nan) -> nan
19239 // maximum(X, nan) -> nan
19240 if (AF.isNaN())
19241 return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
19242
19243 // In the following folds, inf can be replaced with the largest finite
19244 // float, if the ninf flag is set.
19245 if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
19246 // minnum(X, -inf) -> -inf
19247 // maxnum(X, +inf) -> +inf
19248 // minimum(X, -inf) -> -inf if nnan
19249 // maximum(X, +inf) -> +inf if nnan
19250 if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
19251 return N->getOperand(1);
19252
19253 // minnum(X, +inf) -> X if nnan
19254 // maxnum(X, -inf) -> X if nnan
19255 // minimum(X, +inf) -> X
19256 // maximum(X, -inf) -> X
19257 if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
19258 return N->getOperand(0);
19259 }
19260 }
19261
19262 if (SDValue SD = reassociateReduction(
19263 PropagatesNaN
19266 Opc, SDLoc(N), VT, N0, N1, Flags))
19267 return SD;
19268
19269 return SDValue();
19270}
19271
19272SDValue DAGCombiner::visitFABS(SDNode *N) {
19273 SDValue N0 = N->getOperand(0);
19274 EVT VT = N->getValueType(0);
19275 SDLoc DL(N);
19276
19277 // fold (fabs c1) -> fabs(c1)
19278 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FABS, DL, VT, {N0}))
19279 return C;
19280
19282 return SDValue(N, 0);
19283
19284 if (SDValue Cast = foldSignChangeInBitcast(N))
19285 return Cast;
19286
19287 return SDValue();
19288}
19289
19290SDValue DAGCombiner::visitBRCOND(SDNode *N) {
19291 SDValue Chain = N->getOperand(0);
19292 SDValue N1 = N->getOperand(1);
19293 SDValue N2 = N->getOperand(2);
19294
19295 // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
19296 // nondeterministic jumps).
19297 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
19298 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
19299 N1->getOperand(0), N2, N->getFlags());
19300 }
19301
19302 // Variant of the previous fold where there is a SETCC in between:
19303 // BRCOND(SETCC(FREEZE(X), CONST, Cond))
19304 // =>
19305 // BRCOND(FREEZE(SETCC(X, CONST, Cond)))
19306 // =>
19307 // BRCOND(SETCC(X, CONST, Cond))
19308 // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
19309 // isn't equivalent to true or false.
19310 // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
19311 // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
19312 if (N1->getOpcode() == ISD::SETCC && N1.hasOneUse()) {
19313 SDValue S0 = N1->getOperand(0), S1 = N1->getOperand(1);
19314 ISD::CondCode Cond = cast<CondCodeSDNode>(N1->getOperand(2))->get();
19315 ConstantSDNode *S0C = dyn_cast<ConstantSDNode>(S0);
19316 ConstantSDNode *S1C = dyn_cast<ConstantSDNode>(S1);
19317 bool Updated = false;
19318
19319 // Is 'X Cond C' always true or false?
19320 auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
19321 bool False = (Cond == ISD::SETULT && C->isZero()) ||
19322 (Cond == ISD::SETLT && C->isMinSignedValue()) ||
19323 (Cond == ISD::SETUGT && C->isAllOnes()) ||
19324 (Cond == ISD::SETGT && C->isMaxSignedValue());
19325 bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
19326 (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
19327 (Cond == ISD::SETUGE && C->isZero()) ||
19328 (Cond == ISD::SETGE && C->isMinSignedValue());
19329 return True || False;
19330 };
19331
19332 if (S0->getOpcode() == ISD::FREEZE && S0.hasOneUse() && S1C) {
19333 if (!IsAlwaysTrueOrFalse(Cond, S1C)) {
19334 S0 = S0->getOperand(0);
19335 Updated = true;
19336 }
19337 }
19338 if (S1->getOpcode() == ISD::FREEZE && S1.hasOneUse() && S0C) {
19339 if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond), S0C)) {
19340 S1 = S1->getOperand(0);
19341 Updated = true;
19342 }
19343 }
19344
19345 if (Updated)
19346 return DAG.getNode(
19347 ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
19348 DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2,
19349 N->getFlags());
19350 }
19351
19352 // If N is a constant we could fold this into a fallthrough or unconditional
19353 // branch. However that doesn't happen very often in normal code, because
19354 // Instcombine/SimplifyCFG should have handled the available opportunities.
19355 // If we did this folding here, it would be necessary to update the
19356 // MachineBasicBlock CFG, which is awkward.
19357
19358 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
19359 // on the target.
19360 if (N1.getOpcode() == ISD::SETCC &&
19362 N1.getOperand(0).getValueType())) {
19363 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
19364 Chain, N1.getOperand(2),
19365 N1.getOperand(0), N1.getOperand(1), N2);
19366 }
19367
19368 if (N1.hasOneUse()) {
19369 // rebuildSetCC calls visitXor which may change the Chain when there is a
19370 // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
19371 HandleSDNode ChainHandle(Chain);
19372 if (SDValue NewN1 = rebuildSetCC(N1))
19373 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
19374 ChainHandle.getValue(), NewN1, N2, N->getFlags());
19375 }
19376
19377 return SDValue();
19378}
19379
19380SDValue DAGCombiner::rebuildSetCC(SDValue N) {
19381 if (N.getOpcode() == ISD::SRL ||
19382 (N.getOpcode() == ISD::TRUNCATE &&
19383 (N.getOperand(0).hasOneUse() &&
19384 N.getOperand(0).getOpcode() == ISD::SRL))) {
19385 // Look pass the truncate.
19386 if (N.getOpcode() == ISD::TRUNCATE)
19387 N = N.getOperand(0);
19388
19389 // Match this pattern so that we can generate simpler code:
19390 //
19391 // %a = ...
19392 // %b = and i32 %a, 2
19393 // %c = srl i32 %b, 1
19394 // brcond i32 %c ...
19395 //
19396 // into
19397 //
19398 // %a = ...
19399 // %b = and i32 %a, 2
19400 // %c = setcc eq %b, 0
19401 // brcond %c ...
19402 //
19403 // This applies only when the AND constant value has one bit set and the
19404 // SRL constant is equal to the log2 of the AND constant. The back-end is
19405 // smart enough to convert the result into a TEST/JMP sequence.
19406 SDValue Op0 = N.getOperand(0);
19407 SDValue Op1 = N.getOperand(1);
19408
19409 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
19410 SDValue AndOp1 = Op0.getOperand(1);
19411
19412 if (AndOp1.getOpcode() == ISD::Constant) {
19413 const APInt &AndConst = AndOp1->getAsAPIntVal();
19414
19415 if (AndConst.isPowerOf2() &&
19416 Op1->getAsAPIntVal() == AndConst.logBase2()) {
19417 SDLoc DL(N);
19418 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
19419 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
19420 ISD::SETNE);
19421 }
19422 }
19423 }
19424 }
19425
19426 // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
19427 // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
19428 if (N.getOpcode() == ISD::XOR) {
19429 // Because we may call this on a speculatively constructed
19430 // SimplifiedSetCC Node, we need to simplify this node first.
19431 // Ideally this should be folded into SimplifySetCC and not
19432 // here. For now, grab a handle to N so we don't lose it from
19433 // replacements interal to the visit.
19434 HandleSDNode XORHandle(N);
19435 while (N.getOpcode() == ISD::XOR) {
19436 SDValue Tmp = visitXOR(N.getNode());
19437 // No simplification done.
19438 if (!Tmp.getNode())
19439 break;
19440 // Returning N is form in-visit replacement that may invalidated
19441 // N. Grab value from Handle.
19442 if (Tmp.getNode() == N.getNode())
19443 N = XORHandle.getValue();
19444 else // Node simplified. Try simplifying again.
19445 N = Tmp;
19446 }
19447
19448 if (N.getOpcode() != ISD::XOR)
19449 return N;
19450
19451 SDValue Op0 = N->getOperand(0);
19452 SDValue Op1 = N->getOperand(1);
19453
19454 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
19455 bool Equal = false;
19456 // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
19457 if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
19458 Op0.getValueType() == MVT::i1) {
19459 N = Op0;
19460 Op0 = N->getOperand(0);
19461 Op1 = N->getOperand(1);
19462 Equal = true;
19463 }
19464
19465 EVT SetCCVT = N.getValueType();
19466 if (LegalTypes)
19467 SetCCVT = getSetCCResultType(SetCCVT);
19468 // Replace the uses of XOR with SETCC. Note, avoid this transformation if
19469 // it would introduce illegal operations post-legalization as this can
19470 // result in infinite looping between converting xor->setcc here, and
19471 // expanding setcc->xor in LegalizeSetCCCondCode if requested.
19473 if (!LegalOperations || TLI.isCondCodeLegal(CC, Op0.getSimpleValueType()))
19474 return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1, CC);
19475 }
19476 }
19477
19478 return SDValue();
19479}
19480
19481// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
19482//
19483SDValue DAGCombiner::visitBR_CC(SDNode *N) {
19484 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
19485 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
19486
19487 // If N is a constant we could fold this into a fallthrough or unconditional
19488 // branch. However that doesn't happen very often in normal code, because
19489 // Instcombine/SimplifyCFG should have handled the available opportunities.
19490 // If we did this folding here, it would be necessary to update the
19491 // MachineBasicBlock CFG, which is awkward.
19492
19493 // Use SimplifySetCC to simplify SETCC's.
19495 CondLHS, CondRHS, CC->get(), SDLoc(N),
19496 false);
19497 if (Simp.getNode()) AddToWorklist(Simp.getNode());
19498
19499 // fold to a simpler setcc
19500 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
19501 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
19502 N->getOperand(0), Simp.getOperand(2),
19503 Simp.getOperand(0), Simp.getOperand(1),
19504 N->getOperand(4));
19505
19506 return SDValue();
19507}
19508
19509static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
19510 bool &IsLoad, bool &IsMasked, SDValue &Ptr,
19511 const TargetLowering &TLI) {
19512 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19513 if (LD->isIndexed())
19514 return false;
19515 EVT VT = LD->getMemoryVT();
19516 if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
19517 return false;
19518 Ptr = LD->getBasePtr();
19519 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19520 if (ST->isIndexed())
19521 return false;
19522 EVT VT = ST->getMemoryVT();
19523 if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
19524 return false;
19525 Ptr = ST->getBasePtr();
19526 IsLoad = false;
19527 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
19528 if (LD->isIndexed())
19529 return false;
19530 EVT VT = LD->getMemoryVT();
19531 if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
19532 !TLI.isIndexedMaskedLoadLegal(Dec, VT))
19533 return false;
19534 Ptr = LD->getBasePtr();
19535 IsMasked = true;
19536 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
19537 if (ST->isIndexed())
19538 return false;
19539 EVT VT = ST->getMemoryVT();
19540 if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
19541 !TLI.isIndexedMaskedStoreLegal(Dec, VT))
19542 return false;
19543 Ptr = ST->getBasePtr();
19544 IsLoad = false;
19545 IsMasked = true;
19546 } else {
19547 return false;
19548 }
19549 return true;
19550}
19551
19552/// Try turning a load/store into a pre-indexed load/store when the base
19553/// pointer is an add or subtract and it has other uses besides the load/store.
19554/// After the transformation, the new indexed load/store has effectively folded
19555/// the add/subtract in and all of its other uses are redirected to the
19556/// new load/store.
19557bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
19558 if (Level < AfterLegalizeDAG)
19559 return false;
19560
19561 bool IsLoad = true;
19562 bool IsMasked = false;
19563 SDValue Ptr;
19564 if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
19565 Ptr, TLI))
19566 return false;
19567
19568 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
19569 // out. There is no reason to make this a preinc/predec.
19570 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
19571 Ptr->hasOneUse())
19572 return false;
19573
19574 // Ask the target to do addressing mode selection.
19578 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
19579 return false;
19580
19581 // Backends without true r+i pre-indexed forms may need to pass a
19582 // constant base with a variable offset so that constant coercion
19583 // will work with the patterns in canonical form.
19584 bool Swapped = false;
19585 if (isa<ConstantSDNode>(BasePtr)) {
19586 std::swap(BasePtr, Offset);
19587 Swapped = true;
19588 }
19589
19590 // Don't create a indexed load / store with zero offset.
19592 return false;
19593
19594 // Try turning it into a pre-indexed load / store except when:
19595 // 1) The new base ptr is a frame index.
19596 // 2) If N is a store and the new base ptr is either the same as or is a
19597 // predecessor of the value being stored.
19598 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
19599 // that would create a cycle.
19600 // 4) All uses are load / store ops that use it as old base ptr.
19601
19602 // Check #1. Preinc'ing a frame index would require copying the stack pointer
19603 // (plus the implicit offset) to a register to preinc anyway.
19604 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
19605 return false;
19606
19607 // Check #2.
19608 if (!IsLoad) {
19609 SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
19610 : cast<StoreSDNode>(N)->getValue();
19611
19612 // Would require a copy.
19613 if (Val == BasePtr)
19614 return false;
19615
19616 // Would create a cycle.
19617 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
19618 return false;
19619 }
19620
19621 // Caches for hasPredecessorHelper.
19624 Worklist.push_back(N);
19625
19626 // If the offset is a constant, there may be other adds of constants that
19627 // can be folded with this one. We should do this to avoid having to keep
19628 // a copy of the original base pointer.
19629 SmallVector<SDNode *, 16> OtherUses;
19631 if (isa<ConstantSDNode>(Offset))
19632 for (SDUse &Use : BasePtr->uses()) {
19633 // Skip the use that is Ptr and uses of other results from BasePtr's
19634 // node (important for nodes that return multiple results).
19635 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
19636 continue;
19637
19638 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist,
19639 MaxSteps))
19640 continue;
19641
19642 if (Use.getUser()->getOpcode() != ISD::ADD &&
19643 Use.getUser()->getOpcode() != ISD::SUB) {
19644 OtherUses.clear();
19645 break;
19646 }
19647
19648 SDValue Op1 = Use.getUser()->getOperand((Use.getOperandNo() + 1) & 1);
19649 if (!isa<ConstantSDNode>(Op1)) {
19650 OtherUses.clear();
19651 break;
19652 }
19653
19654 // FIXME: In some cases, we can be smarter about this.
19655 if (Op1.getValueType() != Offset.getValueType()) {
19656 OtherUses.clear();
19657 break;
19658 }
19659
19660 OtherUses.push_back(Use.getUser());
19661 }
19662
19663 if (Swapped)
19664 std::swap(BasePtr, Offset);
19665
19666 // Now check for #3 and #4.
19667 bool RealUse = false;
19668
19669 for (SDNode *User : Ptr->users()) {
19670 if (User == N)
19671 continue;
19672 if (SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))
19673 return false;
19674
19675 // If Ptr may be folded in addressing mode of other use, then it's
19676 // not profitable to do this transformation.
19677 if (!canFoldInAddressingMode(Ptr.getNode(), User, DAG, TLI))
19678 RealUse = true;
19679 }
19680
19681 if (!RealUse)
19682 return false;
19683
19685 if (!IsMasked) {
19686 if (IsLoad)
19687 Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
19688 else
19689 Result =
19690 DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
19691 } else {
19692 if (IsLoad)
19693 Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
19694 Offset, AM);
19695 else
19696 Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
19697 Offset, AM);
19698 }
19699 ++PreIndexedNodes;
19700 ++NodesCombined;
19701 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
19702 Result.dump(&DAG); dbgs() << '\n');
19703 WorklistRemover DeadNodes(*this);
19704 if (IsLoad) {
19705 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
19706 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
19707 } else {
19708 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
19709 }
19710
19711 // Finally, since the node is now dead, remove it from the graph.
19712 deleteAndRecombine(N);
19713
19714 if (Swapped)
19715 std::swap(BasePtr, Offset);
19716
19717 // Replace other uses of BasePtr that can be updated to use Ptr
19718 for (SDNode *OtherUse : OtherUses) {
19719 unsigned OffsetIdx = 1;
19720 if (OtherUse->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
19721 OffsetIdx = 0;
19722 assert(OtherUse->getOperand(!OffsetIdx).getNode() == BasePtr.getNode() &&
19723 "Expected BasePtr operand");
19724
19725 // We need to replace ptr0 in the following expression:
19726 // x0 * offset0 + y0 * ptr0 = t0
19727 // knowing that
19728 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
19729 //
19730 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
19731 // indexed load/store and the expression that needs to be re-written.
19732 //
19733 // Therefore, we have:
19734 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
19735
19736 auto *CN = cast<ConstantSDNode>(OtherUse->getOperand(OffsetIdx));
19737 const APInt &Offset0 = CN->getAPIntValue();
19738 const APInt &Offset1 = Offset->getAsAPIntVal();
19739 int X0 = (OtherUse->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
19740 int Y0 = (OtherUse->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
19741 int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
19742 int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
19743
19744 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
19745
19746 APInt CNV = Offset0;
19747 if (X0 < 0) CNV = -CNV;
19748 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
19749 else CNV = CNV - Offset1;
19750
19751 SDLoc DL(OtherUse);
19752
19753 // We can now generate the new expression.
19754 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
19755 SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
19756
19757 SDValue NewUse =
19758 DAG.getNode(Opcode, DL, OtherUse->getValueType(0), NewOp1, NewOp2);
19759 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUse, 0), NewUse);
19760 deleteAndRecombine(OtherUse);
19761 }
19762
19763 // Replace the uses of Ptr with uses of the updated base value.
19764 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
19765 deleteAndRecombine(Ptr.getNode());
19766 AddToWorklist(Result.getNode());
19767
19768 return true;
19769}
19770
19772 SDValue &BasePtr, SDValue &Offset,
19774 SelectionDAG &DAG,
19775 const TargetLowering &TLI) {
19776 if (PtrUse == N ||
19777 (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
19778 return false;
19779
19780 if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
19781 return false;
19782
19783 // Don't create a indexed load / store with zero offset.
19785 return false;
19786
19787 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
19788 return false;
19789
19792 for (SDNode *User : BasePtr->users()) {
19793 if (User == Ptr.getNode())
19794 continue;
19795
19796 // No if there's a later user which could perform the index instead.
19797 if (isa<MemSDNode>(User)) {
19798 bool IsLoad = true;
19799 bool IsMasked = false;
19800 SDValue OtherPtr;
19802 IsMasked, OtherPtr, TLI)) {
19804 Worklist.push_back(User);
19805 if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps))
19806 return false;
19807 }
19808 }
19809
19810 // If all the uses are load / store addresses, then don't do the
19811 // transformation.
19812 if (User->getOpcode() == ISD::ADD || User->getOpcode() == ISD::SUB) {
19813 for (SDNode *UserUser : User->users())
19814 if (canFoldInAddressingMode(User, UserUser, DAG, TLI))
19815 return false;
19816 }
19817 }
19818 return true;
19819}
19820
19822 bool &IsMasked, SDValue &Ptr,
19823 SDValue &BasePtr, SDValue &Offset,
19825 SelectionDAG &DAG,
19826 const TargetLowering &TLI) {
19828 IsMasked, Ptr, TLI) ||
19829 Ptr->hasOneUse())
19830 return nullptr;
19831
19832 // Try turning it into a post-indexed load / store except when
19833 // 1) All uses are load / store ops that use it as base ptr (and
19834 // it may be folded as addressing mmode).
19835 // 2) Op must be independent of N, i.e. Op is neither a predecessor
19836 // nor a successor of N. Otherwise, if Op is folded that would
19837 // create a cycle.
19839 for (SDNode *Op : Ptr->users()) {
19840 // Check for #1.
19841 if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
19842 continue;
19843
19844 // Check for #2.
19847 // Ptr is predecessor to both N and Op.
19848 Visited.insert(Ptr.getNode());
19849 Worklist.push_back(N);
19850 Worklist.push_back(Op);
19851 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) &&
19852 !SDNode::hasPredecessorHelper(Op, Visited, Worklist, MaxSteps))
19853 return Op;
19854 }
19855 return nullptr;
19856}
19857
19858/// Try to combine a load/store with a add/sub of the base pointer node into a
19859/// post-indexed load/store. The transformation folded the add/subtract into the
19860/// new indexed load/store effectively and all of its uses are redirected to the
19861/// new load/store.
19862bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
19863 if (Level < AfterLegalizeDAG)
19864 return false;
19865
19866 bool IsLoad = true;
19867 bool IsMasked = false;
19868 SDValue Ptr;
19872 SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
19873 Offset, AM, DAG, TLI);
19874 if (!Op)
19875 return false;
19876
19878 if (!IsMasked)
19879 Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
19880 Offset, AM)
19881 : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
19882 BasePtr, Offset, AM);
19883 else
19884 Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
19885 BasePtr, Offset, AM)
19887 BasePtr, Offset, AM);
19888 ++PostIndexedNodes;
19889 ++NodesCombined;
19890 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";
19891 Result.dump(&DAG); dbgs() << '\n');
19892 WorklistRemover DeadNodes(*this);
19893 if (IsLoad) {
19894 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
19895 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
19896 } else {
19897 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
19898 }
19899
19900 // Finally, since the node is now dead, remove it from the graph.
19901 deleteAndRecombine(N);
19902
19903 // Replace the uses of Use with uses of the updated base value.
19905 Result.getValue(IsLoad ? 1 : 0));
19906 deleteAndRecombine(Op);
19907 return true;
19908}
19909
19910/// Return the base-pointer arithmetic from an indexed \p LD.
19911SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
19912 ISD::MemIndexedMode AM = LD->getAddressingMode();
19913 assert(AM != ISD::UNINDEXED);
19914 SDValue BP = LD->getOperand(1);
19915 SDValue Inc = LD->getOperand(2);
19916
19917 // Some backends use TargetConstants for load offsets, but don't expect
19918 // TargetConstants in general ADD nodes. We can convert these constants into
19919 // regular Constants (if the constant is not opaque).
19921 !cast<ConstantSDNode>(Inc)->isOpaque()) &&
19922 "Cannot split out indexing using opaque target constants");
19923 if (Inc.getOpcode() == ISD::TargetConstant) {
19924 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
19925 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
19926 ConstInc->getValueType(0));
19927 }
19928
19929 unsigned Opc =
19930 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
19931 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
19932}
19933
19935 return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
19936}
19937
19938bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
19939 EVT STType = Val.getValueType();
19940 EVT STMemType = ST->getMemoryVT();
19941 if (STType == STMemType)
19942 return true;
19943 if (isTypeLegal(STMemType))
19944 return false; // fail.
19945 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
19946 TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
19947 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
19948 return true;
19949 }
19950 if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
19951 STType.isInteger() && STMemType.isInteger()) {
19952 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
19953 return true;
19954 }
19955 if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
19956 Val = DAG.getBitcast(STMemType, Val);
19957 return true;
19958 }
19959 return false; // fail.
19960}
19961
19962bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
19963 EVT LDMemType = LD->getMemoryVT();
19964 EVT LDType = LD->getValueType(0);
19965 assert(Val.getValueType() == LDMemType &&
19966 "Attempting to extend value of non-matching type");
19967 if (LDType == LDMemType)
19968 return true;
19969 if (LDMemType.isInteger() && LDType.isInteger()) {
19970 switch (LD->getExtensionType()) {
19971 case ISD::NON_EXTLOAD:
19972 Val = DAG.getBitcast(LDType, Val);
19973 return true;
19974 case ISD::EXTLOAD:
19975 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
19976 return true;
19977 case ISD::SEXTLOAD:
19978 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
19979 return true;
19980 case ISD::ZEXTLOAD:
19981 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
19982 return true;
19983 }
19984 }
19985 return false;
19986}
19987
19988StoreSDNode *DAGCombiner::getUniqueStoreFeeding(LoadSDNode *LD,
19989 int64_t &Offset) {
19990 SDValue Chain = LD->getOperand(0);
19991
19992 // Look through CALLSEQ_START.
19993 if (Chain.getOpcode() == ISD::CALLSEQ_START)
19994 Chain = Chain->getOperand(0);
19995
19996 StoreSDNode *ST = nullptr;
19998 if (Chain.getOpcode() == ISD::TokenFactor) {
19999 // Look for unique store within the TokenFactor.
20000 for (SDValue Op : Chain->ops()) {
20001 StoreSDNode *Store = dyn_cast<StoreSDNode>(Op.getNode());
20002 if (!Store)
20003 continue;
20004 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
20005 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
20006 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
20007 continue;
20008 // Make sure the store is not aliased with any nodes in TokenFactor.
20009 GatherAllAliases(Store, Chain, Aliases);
20010 if (Aliases.empty() ||
20011 (Aliases.size() == 1 && Aliases.front().getNode() == Store))
20012 ST = Store;
20013 break;
20014 }
20015 } else {
20016 StoreSDNode *Store = dyn_cast<StoreSDNode>(Chain.getNode());
20017 if (Store) {
20018 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
20019 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
20020 if (BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
20021 ST = Store;
20022 }
20023 }
20024
20025 return ST;
20026}
20027
20028SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
20029 if (OptLevel == CodeGenOptLevel::None || !LD->isSimple())
20030 return SDValue();
20031 SDValue Chain = LD->getOperand(0);
20032 int64_t Offset;
20033
20034 StoreSDNode *ST = getUniqueStoreFeeding(LD, Offset);
20035 // TODO: Relax this restriction for unordered atomics (see D66309)
20036 if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace())
20037 return SDValue();
20038
20039 EVT LDType = LD->getValueType(0);
20040 EVT LDMemType = LD->getMemoryVT();
20041 EVT STMemType = ST->getMemoryVT();
20042 EVT STType = ST->getValue().getValueType();
20043
20044 // There are two cases to consider here:
20045 // 1. The store is fixed width and the load is scalable. In this case we
20046 // don't know at compile time if the store completely envelops the load
20047 // so we abandon the optimisation.
20048 // 2. The store is scalable and the load is fixed width. We could
20049 // potentially support a limited number of cases here, but there has been
20050 // no cost-benefit analysis to prove it's worth it.
20051 bool LdStScalable = LDMemType.isScalableVT();
20052 if (LdStScalable != STMemType.isScalableVT())
20053 return SDValue();
20054
20055 // If we are dealing with scalable vectors on a big endian platform the
20056 // calculation of offsets below becomes trickier, since we do not know at
20057 // compile time the absolute size of the vector. Until we've done more
20058 // analysis on big-endian platforms it seems better to bail out for now.
20059 if (LdStScalable && DAG.getDataLayout().isBigEndian())
20060 return SDValue();
20061
20062 // Normalize for Endianness. After this Offset=0 will denote that the least
20063 // significant bit in the loaded value maps to the least significant bit in
20064 // the stored value). With Offset=n (for n > 0) the loaded value starts at the
20065 // n:th least significant byte of the stored value.
20066 int64_t OrigOffset = Offset;
20067 if (DAG.getDataLayout().isBigEndian())
20068 Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedValue() -
20069 (int64_t)LDMemType.getStoreSizeInBits().getFixedValue()) /
20070 8 -
20071 Offset;
20072
20073 // Check that the stored value cover all bits that are loaded.
20074 bool STCoversLD;
20075
20076 TypeSize LdMemSize = LDMemType.getSizeInBits();
20077 TypeSize StMemSize = STMemType.getSizeInBits();
20078 if (LdStScalable)
20079 STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
20080 else
20081 STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedValue() <=
20082 StMemSize.getFixedValue());
20083
20084 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
20085 if (LD->isIndexed()) {
20086 // Cannot handle opaque target constants and we must respect the user's
20087 // request not to split indexes from loads.
20088 if (!canSplitIdx(LD))
20089 return SDValue();
20090 SDValue Idx = SplitIndexingFromLoad(LD);
20091 SDValue Ops[] = {Val, Idx, Chain};
20092 return CombineTo(LD, Ops, 3);
20093 }
20094 return CombineTo(LD, Val, Chain);
20095 };
20096
20097 if (!STCoversLD)
20098 return SDValue();
20099
20100 // Memory as copy space (potentially masked).
20101 if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
20102 // Simple case: Direct non-truncating forwarding
20103 if (LDType.getSizeInBits() == LdMemSize)
20104 return ReplaceLd(LD, ST->getValue(), Chain);
20105 // Can we model the truncate and extension with an and mask?
20106 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
20107 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
20108 // Mask to size of LDMemType
20109 auto Mask =
20111 StMemSize.getFixedValue()),
20112 SDLoc(ST), STType);
20113 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
20114 return ReplaceLd(LD, Val, Chain);
20115 }
20116 }
20117
20118 // Handle some cases for big-endian that would be Offset 0 and handled for
20119 // little-endian.
20120 SDValue Val = ST->getValue();
20121 if (DAG.getDataLayout().isBigEndian() && Offset > 0 && OrigOffset == 0) {
20122 if (STType.isInteger() && !STType.isVector() && LDType.isInteger() &&
20123 !LDType.isVector() && isTypeLegal(STType) &&
20124 TLI.isOperationLegal(ISD::SRL, STType)) {
20125 Val = DAG.getNode(ISD::SRL, SDLoc(LD), STType, Val,
20126 DAG.getConstant(Offset * 8, SDLoc(LD), STType));
20127 Offset = 0;
20128 }
20129 }
20130
20131 // TODO: Deal with nonzero offset.
20132 if (LD->getBasePtr().isUndef() || Offset != 0)
20133 return SDValue();
20134 // Model necessary truncations / extenstions.
20135 // Truncate Value To Stored Memory Size.
20136 do {
20137 if (!getTruncatedStoreValue(ST, Val))
20138 break;
20139 if (!isTypeLegal(LDMemType))
20140 break;
20141 if (STMemType != LDMemType) {
20142 // TODO: Support vectors? This requires extract_subvector/bitcast.
20143 if (!STMemType.isVector() && !LDMemType.isVector() &&
20144 STMemType.isInteger() && LDMemType.isInteger())
20145 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
20146 else
20147 break;
20148 }
20149 if (!extendLoadedValueToExtension(LD, Val))
20150 break;
20151 return ReplaceLd(LD, Val, Chain);
20152 } while (false);
20153
20154 // On failure, cleanup dead nodes we may have created.
20155 if (Val->use_empty())
20156 deleteAndRecombine(Val.getNode());
20157 return SDValue();
20158}
20159
20160SDValue DAGCombiner::visitLOAD(SDNode *N) {
20161 LoadSDNode *LD = cast<LoadSDNode>(N);
20162 SDValue Chain = LD->getChain();
20163 SDValue Ptr = LD->getBasePtr();
20164
20165 // If load is not volatile and there are no uses of the loaded value (and
20166 // the updated indexed value in case of indexed loads), change uses of the
20167 // chain value into uses of the chain input (i.e. delete the dead load).
20168 // TODO: Allow this for unordered atomics (see D66309)
20169 if (LD->isSimple()) {
20170 if (N->getValueType(1) == MVT::Other) {
20171 // Unindexed loads.
20172 if (!N->hasAnyUseOfValue(0)) {
20173 // It's not safe to use the two value CombineTo variant here. e.g.
20174 // v1, chain2 = load chain1, loc
20175 // v2, chain3 = load chain2, loc
20176 // v3 = add v2, c
20177 // Now we replace use of chain2 with chain1. This makes the second load
20178 // isomorphic to the one we are deleting, and thus makes this load live.
20179 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
20180 dbgs() << "\nWith chain: "; Chain.dump(&DAG);
20181 dbgs() << "\n");
20182 WorklistRemover DeadNodes(*this);
20183 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
20184 AddUsersToWorklist(Chain.getNode());
20185 if (N->use_empty())
20186 deleteAndRecombine(N);
20187
20188 return SDValue(N, 0); // Return N so it doesn't get rechecked!
20189 }
20190 } else {
20191 // Indexed loads.
20192 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
20193
20194 // If this load has an opaque TargetConstant offset, then we cannot split
20195 // the indexing into an add/sub directly (that TargetConstant may not be
20196 // valid for a different type of node, and we cannot convert an opaque
20197 // target constant into a regular constant).
20198 bool CanSplitIdx = canSplitIdx(LD);
20199
20200 if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
20201 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
20202 SDValue Index;
20203 if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
20204 Index = SplitIndexingFromLoad(LD);
20205 // Try to fold the base pointer arithmetic into subsequent loads and
20206 // stores.
20207 AddUsersToWorklist(N);
20208 } else
20209 Index = DAG.getUNDEF(N->getValueType(1));
20210 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
20211 dbgs() << "\nWith: "; Undef.dump(&DAG);
20212 dbgs() << " and 2 other values\n");
20213 WorklistRemover DeadNodes(*this);
20214 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
20215 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
20216 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
20217 deleteAndRecombine(N);
20218 return SDValue(N, 0); // Return N so it doesn't get rechecked!
20219 }
20220 }
20221 }
20222
20223 // If this load is directly stored, replace the load value with the stored
20224 // value.
20225 if (auto V = ForwardStoreValueToDirectLoad(LD))
20226 return V;
20227
20228 // Try to infer better alignment information than the load already has.
20229 if (OptLevel != CodeGenOptLevel::None && LD->isUnindexed() &&
20230 !LD->isAtomic()) {
20231 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
20232 if (*Alignment > LD->getAlign() &&
20233 isAligned(*Alignment, LD->getSrcValueOffset())) {
20234 SDValue NewLoad = DAG.getExtLoad(
20235 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
20236 LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
20237 LD->getMemOperand()->getFlags(), LD->getAAInfo());
20238 // NewLoad will always be N as we are only refining the alignment
20239 assert(NewLoad.getNode() == N);
20240 (void)NewLoad;
20241 }
20242 }
20243 }
20244
20245 if (LD->isUnindexed()) {
20246 // Walk up chain skipping non-aliasing memory nodes.
20247 SDValue BetterChain = FindBetterChain(LD, Chain);
20248
20249 // If there is a better chain.
20250 if (Chain != BetterChain) {
20251 SDValue ReplLoad;
20252
20253 // Replace the chain to void dependency.
20254 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
20255 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
20256 BetterChain, Ptr, LD->getMemOperand());
20257 } else {
20258 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
20259 LD->getValueType(0),
20260 BetterChain, Ptr, LD->getMemoryVT(),
20261 LD->getMemOperand());
20262 }
20263
20264 // Create token factor to keep old chain connected.
20265 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
20266 MVT::Other, Chain, ReplLoad.getValue(1));
20267
20268 // Replace uses with load result and token factor
20269 return CombineTo(N, ReplLoad.getValue(0), Token);
20270 }
20271 }
20272
20273 // Try transforming N to an indexed load.
20274 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
20275 return SDValue(N, 0);
20276
20277 // Try to slice up N to more direct loads if the slices are mapped to
20278 // different register banks or pairing can take place.
20279 if (SliceUpLoad(N))
20280 return SDValue(N, 0);
20281
20282 return SDValue();
20283}
20284
20285namespace {
20286
20287/// Helper structure used to slice a load in smaller loads.
20288/// Basically a slice is obtained from the following sequence:
20289/// Origin = load Ty1, Base
20290/// Shift = srl Ty1 Origin, CstTy Amount
20291/// Inst = trunc Shift to Ty2
20292///
20293/// Then, it will be rewritten into:
20294/// Slice = load SliceTy, Base + SliceOffset
20295/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
20296///
20297/// SliceTy is deduced from the number of bits that are actually used to
20298/// build Inst.
20299struct LoadedSlice {
20300 /// Helper structure used to compute the cost of a slice.
20301 struct Cost {
20302 /// Are we optimizing for code size.
20303 bool ForCodeSize = false;
20304
20305 /// Various cost.
20306 unsigned Loads = 0;
20307 unsigned Truncates = 0;
20308 unsigned CrossRegisterBanksCopies = 0;
20309 unsigned ZExts = 0;
20310 unsigned Shift = 0;
20311
20312 explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
20313
20314 /// Get the cost of one isolated slice.
20315 Cost(const LoadedSlice &LS, bool ForCodeSize)
20316 : ForCodeSize(ForCodeSize), Loads(1) {
20317 EVT TruncType = LS.Inst->getValueType(0);
20318 EVT LoadedType = LS.getLoadedType();
20319 if (TruncType != LoadedType &&
20320 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
20321 ZExts = 1;
20322 }
20323
20324 /// Account for slicing gain in the current cost.
20325 /// Slicing provide a few gains like removing a shift or a
20326 /// truncate. This method allows to grow the cost of the original
20327 /// load with the gain from this slice.
20328 void addSliceGain(const LoadedSlice &LS) {
20329 // Each slice saves a truncate.
20330 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
20331 if (!TLI.isTruncateFree(LS.Inst->getOperand(0), LS.Inst->getValueType(0)))
20332 ++Truncates;
20333 // If there is a shift amount, this slice gets rid of it.
20334 if (LS.Shift)
20335 ++Shift;
20336 // If this slice can merge a cross register bank copy, account for it.
20337 if (LS.canMergeExpensiveCrossRegisterBankCopy())
20338 ++CrossRegisterBanksCopies;
20339 }
20340
20341 Cost &operator+=(const Cost &RHS) {
20342 Loads += RHS.Loads;
20343 Truncates += RHS.Truncates;
20344 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
20345 ZExts += RHS.ZExts;
20346 Shift += RHS.Shift;
20347 return *this;
20348 }
20349
20350 bool operator==(const Cost &RHS) const {
20351 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
20352 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
20353 ZExts == RHS.ZExts && Shift == RHS.Shift;
20354 }
20355
20356 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
20357
20358 bool operator<(const Cost &RHS) const {
20359 // Assume cross register banks copies are as expensive as loads.
20360 // FIXME: Do we want some more target hooks?
20361 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
20362 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
20363 // Unless we are optimizing for code size, consider the
20364 // expensive operation first.
20365 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
20366 return ExpensiveOpsLHS < ExpensiveOpsRHS;
20367 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
20368 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
20369 }
20370
20371 bool operator>(const Cost &RHS) const { return RHS < *this; }
20372
20373 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
20374
20375 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
20376 };
20377
20378 // The last instruction that represent the slice. This should be a
20379 // truncate instruction.
20380 SDNode *Inst;
20381
20382 // The original load instruction.
20383 LoadSDNode *Origin;
20384
20385 // The right shift amount in bits from the original load.
20386 unsigned Shift;
20387
20388 // The DAG from which Origin came from.
20389 // This is used to get some contextual information about legal types, etc.
20390 SelectionDAG *DAG;
20391
20392 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
20393 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
20394 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
20395
20396 /// Get the bits used in a chunk of bits \p BitWidth large.
20397 /// \return Result is \p BitWidth and has used bits set to 1 and
20398 /// not used bits set to 0.
20399 APInt getUsedBits() const {
20400 // Reproduce the trunc(lshr) sequence:
20401 // - Start from the truncated value.
20402 // - Zero extend to the desired bit width.
20403 // - Shift left.
20404 assert(Origin && "No original load to compare against.");
20405 unsigned BitWidth = Origin->getValueSizeInBits(0);
20406 assert(Inst && "This slice is not bound to an instruction");
20407 assert(Inst->getValueSizeInBits(0) <= BitWidth &&
20408 "Extracted slice is bigger than the whole type!");
20409 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
20410 UsedBits.setAllBits();
20411 UsedBits = UsedBits.zext(BitWidth);
20412 UsedBits <<= Shift;
20413 return UsedBits;
20414 }
20415
20416 /// Get the size of the slice to be loaded in bytes.
20417 unsigned getLoadedSize() const {
20418 unsigned SliceSize = getUsedBits().popcount();
20419 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
20420 return SliceSize / 8;
20421 }
20422
20423 /// Get the type that will be loaded for this slice.
20424 /// Note: This may not be the final type for the slice.
20425 EVT getLoadedType() const {
20426 assert(DAG && "Missing context");
20427 LLVMContext &Ctxt = *DAG->getContext();
20428 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
20429 }
20430
20431 /// Get the alignment of the load used for this slice.
20432 Align getAlign() const {
20433 Align Alignment = Origin->getAlign();
20434 uint64_t Offset = getOffsetFromBase();
20435 if (Offset != 0)
20436 Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
20437 return Alignment;
20438 }
20439
20440 /// Check if this slice can be rewritten with legal operations.
20441 bool isLegal() const {
20442 // An invalid slice is not legal.
20443 if (!Origin || !Inst || !DAG)
20444 return false;
20445
20446 // Offsets are for indexed load only, we do not handle that.
20447 if (!Origin->getOffset().isUndef())
20448 return false;
20449
20450 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
20451
20452 // Check that the type is legal.
20453 EVT SliceType = getLoadedType();
20454 if (!TLI.isTypeLegal(SliceType))
20455 return false;
20456
20457 // Check that the load is legal for this type.
20458 if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
20459 return false;
20460
20461 // Check that the offset can be computed.
20462 // 1. Check its type.
20463 EVT PtrType = Origin->getBasePtr().getValueType();
20464 if (PtrType == MVT::Untyped || PtrType.isExtended())
20465 return false;
20466
20467 // 2. Check that it fits in the immediate.
20468 if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
20469 return false;
20470
20471 // 3. Check that the computation is legal.
20472 if (!TLI.isOperationLegal(ISD::ADD, PtrType))
20473 return false;
20474
20475 // Check that the zext is legal if it needs one.
20476 EVT TruncateType = Inst->getValueType(0);
20477 if (TruncateType != SliceType &&
20478 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
20479 return false;
20480
20481 return true;
20482 }
20483
20484 /// Get the offset in bytes of this slice in the original chunk of
20485 /// bits.
20486 /// \pre DAG != nullptr.
20487 uint64_t getOffsetFromBase() const {
20488 assert(DAG && "Missing context.");
20489 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
20490 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
20491 uint64_t Offset = Shift / 8;
20492 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
20493 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
20494 "The size of the original loaded type is not a multiple of a"
20495 " byte.");
20496 // If Offset is bigger than TySizeInBytes, it means we are loading all
20497 // zeros. This should have been optimized before in the process.
20498 assert(TySizeInBytes > Offset &&
20499 "Invalid shift amount for given loaded size");
20500 if (IsBigEndian)
20501 Offset = TySizeInBytes - Offset - getLoadedSize();
20502 return Offset;
20503 }
20504
20505 /// Generate the sequence of instructions to load the slice
20506 /// represented by this object and redirect the uses of this slice to
20507 /// this new sequence of instructions.
20508 /// \pre this->Inst && this->Origin are valid Instructions and this
20509 /// object passed the legal check: LoadedSlice::isLegal returned true.
20510 /// \return The last instruction of the sequence used to load the slice.
20511 SDValue loadSlice() const {
20512 assert(Inst && Origin && "Unable to replace a non-existing slice.");
20513 const SDValue &OldBaseAddr = Origin->getBasePtr();
20514 SDValue BaseAddr = OldBaseAddr;
20515 // Get the offset in that chunk of bytes w.r.t. the endianness.
20516 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
20517 assert(Offset >= 0 && "Offset too big to fit in int64_t!");
20518 if (Offset) {
20519 // BaseAddr = BaseAddr + Offset.
20520 EVT ArithType = BaseAddr.getValueType();
20521 SDLoc DL(Origin);
20522 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
20523 DAG->getConstant(Offset, DL, ArithType));
20524 }
20525
20526 // Create the type of the loaded slice according to its size.
20527 EVT SliceType = getLoadedType();
20528
20529 // Create the load for the slice.
20530 SDValue LastInst =
20531 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
20533 Origin->getMemOperand()->getFlags());
20534 // If the final type is not the same as the loaded type, this means that
20535 // we have to pad with zero. Create a zero extend for that.
20536 EVT FinalType = Inst->getValueType(0);
20537 if (SliceType != FinalType)
20538 LastInst =
20539 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
20540 return LastInst;
20541 }
20542
20543 /// Check if this slice can be merged with an expensive cross register
20544 /// bank copy. E.g.,
20545 /// i = load i32
20546 /// f = bitcast i32 i to float
20547 bool canMergeExpensiveCrossRegisterBankCopy() const {
20548 if (!Inst || !Inst->hasOneUse())
20549 return false;
20550 SDNode *User = *Inst->user_begin();
20551 if (User->getOpcode() != ISD::BITCAST)
20552 return false;
20553 assert(DAG && "Missing context");
20554 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
20555 EVT ResVT = User->getValueType(0);
20556 const TargetRegisterClass *ResRC =
20557 TLI.getRegClassFor(ResVT.getSimpleVT(), User->isDivergent());
20558 const TargetRegisterClass *ArgRC =
20559 TLI.getRegClassFor(User->getOperand(0).getValueType().getSimpleVT(),
20560 User->getOperand(0)->isDivergent());
20561 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
20562 return false;
20563
20564 // At this point, we know that we perform a cross-register-bank copy.
20565 // Check if it is expensive.
20567 // Assume bitcasts are cheap, unless both register classes do not
20568 // explicitly share a common sub class.
20569 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
20570 return false;
20571
20572 // Check if it will be merged with the load.
20573 // 1. Check the alignment / fast memory access constraint.
20574 unsigned IsFast = 0;
20575 if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
20576 Origin->getAddressSpace(), getAlign(),
20577 Origin->getMemOperand()->getFlags(), &IsFast) ||
20578 !IsFast)
20579 return false;
20580
20581 // 2. Check that the load is a legal operation for that type.
20582 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
20583 return false;
20584
20585 // 3. Check that we do not have a zext in the way.
20586 if (Inst->getValueType(0) != getLoadedType())
20587 return false;
20588
20589 return true;
20590 }
20591};
20592
20593} // end anonymous namespace
20594
20595/// Check that all bits set in \p UsedBits form a dense region, i.e.,
20596/// \p UsedBits looks like 0..0 1..1 0..0.
20597static bool areUsedBitsDense(const APInt &UsedBits) {
20598 // If all the bits are one, this is dense!
20599 if (UsedBits.isAllOnes())
20600 return true;
20601
20602 // Get rid of the unused bits on the right.
20603 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countr_zero());
20604 // Get rid of the unused bits on the left.
20605 if (NarrowedUsedBits.countl_zero())
20606 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
20607 // Check that the chunk of bits is completely used.
20608 return NarrowedUsedBits.isAllOnes();
20609}
20610
20611/// Check whether or not \p First and \p Second are next to each other
20612/// in memory. This means that there is no hole between the bits loaded
20613/// by \p First and the bits loaded by \p Second.
20614static bool areSlicesNextToEachOther(const LoadedSlice &First,
20615 const LoadedSlice &Second) {
20616 assert(First.Origin == Second.Origin && First.Origin &&
20617 "Unable to match different memory origins.");
20618 APInt UsedBits = First.getUsedBits();
20619 assert((UsedBits & Second.getUsedBits()) == 0 &&
20620 "Slices are not supposed to overlap.");
20621 UsedBits |= Second.getUsedBits();
20622 return areUsedBitsDense(UsedBits);
20623}
20624
20625/// Adjust the \p GlobalLSCost according to the target
20626/// paring capabilities and the layout of the slices.
20627/// \pre \p GlobalLSCost should account for at least as many loads as
20628/// there is in the slices in \p LoadedSlices.
20630 LoadedSlice::Cost &GlobalLSCost) {
20631 unsigned NumberOfSlices = LoadedSlices.size();
20632 // If there is less than 2 elements, no pairing is possible.
20633 if (NumberOfSlices < 2)
20634 return;
20635
20636 // Sort the slices so that elements that are likely to be next to each
20637 // other in memory are next to each other in the list.
20638 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
20639 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
20640 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
20641 });
20642 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
20643 // First (resp. Second) is the first (resp. Second) potentially candidate
20644 // to be placed in a paired load.
20645 const LoadedSlice *First = nullptr;
20646 const LoadedSlice *Second = nullptr;
20647 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
20648 // Set the beginning of the pair.
20649 First = Second) {
20650 Second = &LoadedSlices[CurrSlice];
20651
20652 // If First is NULL, it means we start a new pair.
20653 // Get to the next slice.
20654 if (!First)
20655 continue;
20656
20657 EVT LoadedType = First->getLoadedType();
20658
20659 // If the types of the slices are different, we cannot pair them.
20660 if (LoadedType != Second->getLoadedType())
20661 continue;
20662
20663 // Check if the target supplies paired loads for this type.
20664 Align RequiredAlignment;
20665 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
20666 // move to the next pair, this type is hopeless.
20667 Second = nullptr;
20668 continue;
20669 }
20670 // Check if we meet the alignment requirement.
20671 if (First->getAlign() < RequiredAlignment)
20672 continue;
20673
20674 // Check that both loads are next to each other in memory.
20675 if (!areSlicesNextToEachOther(*First, *Second))
20676 continue;
20677
20678 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
20679 --GlobalLSCost.Loads;
20680 // Move to the next pair.
20681 Second = nullptr;
20682 }
20683}
20684
20685/// Check the profitability of all involved LoadedSlice.
20686/// Currently, it is considered profitable if there is exactly two
20687/// involved slices (1) which are (2) next to each other in memory, and
20688/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
20689///
20690/// Note: The order of the elements in \p LoadedSlices may be modified, but not
20691/// the elements themselves.
20692///
20693/// FIXME: When the cost model will be mature enough, we can relax
20694/// constraints (1) and (2).
20696 const APInt &UsedBits, bool ForCodeSize) {
20697 unsigned NumberOfSlices = LoadedSlices.size();
20699 return NumberOfSlices > 1;
20700
20701 // Check (1).
20702 if (NumberOfSlices != 2)
20703 return false;
20704
20705 // Check (2).
20706 if (!areUsedBitsDense(UsedBits))
20707 return false;
20708
20709 // Check (3).
20710 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
20711 // The original code has one big load.
20712 OrigCost.Loads = 1;
20713 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
20714 const LoadedSlice &LS = LoadedSlices[CurrSlice];
20715 // Accumulate the cost of all the slices.
20716 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
20717 GlobalSlicingCost += SliceCost;
20718
20719 // Account as cost in the original configuration the gain obtained
20720 // with the current slices.
20721 OrigCost.addSliceGain(LS);
20722 }
20723
20724 // If the target supports paired load, adjust the cost accordingly.
20725 adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
20726 return OrigCost > GlobalSlicingCost;
20727}
20728
20729/// If the given load, \p LI, is used only by trunc or trunc(lshr)
20730/// operations, split it in the various pieces being extracted.
20731///
20732/// This sort of thing is introduced by SROA.
20733/// This slicing takes care not to insert overlapping loads.
20734/// \pre LI is a simple load (i.e., not an atomic or volatile load).
20735bool DAGCombiner::SliceUpLoad(SDNode *N) {
20736 if (Level < AfterLegalizeDAG)
20737 return false;
20738
20739 LoadSDNode *LD = cast<LoadSDNode>(N);
20740 if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
20741 !LD->getValueType(0).isInteger())
20742 return false;
20743
20744 // The algorithm to split up a load of a scalable vector into individual
20745 // elements currently requires knowing the length of the loaded type,
20746 // so will need adjusting to work on scalable vectors.
20747 if (LD->getValueType(0).isScalableVector())
20748 return false;
20749
20750 // Keep track of already used bits to detect overlapping values.
20751 // In that case, we will just abort the transformation.
20752 APInt UsedBits(LD->getValueSizeInBits(0), 0);
20753
20754 SmallVector<LoadedSlice, 4> LoadedSlices;
20755
20756 // Check if this load is used as several smaller chunks of bits.
20757 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
20758 // of computation for each trunc.
20759 for (SDUse &U : LD->uses()) {
20760 // Skip the uses of the chain.
20761 if (U.getResNo() != 0)
20762 continue;
20763
20764 SDNode *User = U.getUser();
20765 unsigned Shift = 0;
20766
20767 // Check if this is a trunc(lshr).
20768 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
20769 isa<ConstantSDNode>(User->getOperand(1))) {
20770 Shift = User->getConstantOperandVal(1);
20771 User = *User->user_begin();
20772 }
20773
20774 // At this point, User is a Truncate, iff we encountered, trunc or
20775 // trunc(lshr).
20776 if (User->getOpcode() != ISD::TRUNCATE)
20777 return false;
20778
20779 // The width of the type must be a power of 2 and greater than 8-bits.
20780 // Otherwise the load cannot be represented in LLVM IR.
20781 // Moreover, if we shifted with a non-8-bits multiple, the slice
20782 // will be across several bytes. We do not support that.
20783 unsigned Width = User->getValueSizeInBits(0);
20784 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
20785 return false;
20786
20787 // Build the slice for this chain of computations.
20788 LoadedSlice LS(User, LD, Shift, &DAG);
20789 APInt CurrentUsedBits = LS.getUsedBits();
20790
20791 // Check if this slice overlaps with another.
20792 if ((CurrentUsedBits & UsedBits) != 0)
20793 return false;
20794 // Update the bits used globally.
20795 UsedBits |= CurrentUsedBits;
20796
20797 // Check if the new slice would be legal.
20798 if (!LS.isLegal())
20799 return false;
20800
20801 // Record the slice.
20802 LoadedSlices.push_back(LS);
20803 }
20804
20805 // Abort slicing if it does not seem to be profitable.
20806 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
20807 return false;
20808
20809 ++SlicedLoads;
20810
20811 // Rewrite each chain to use an independent load.
20812 // By construction, each chain can be represented by a unique load.
20813
20814 // Prepare the argument for the new token factor for all the slices.
20815 SmallVector<SDValue, 8> ArgChains;
20816 for (const LoadedSlice &LS : LoadedSlices) {
20817 SDValue SliceInst = LS.loadSlice();
20818 CombineTo(LS.Inst, SliceInst, true);
20819 if (SliceInst.getOpcode() != ISD::LOAD)
20820 SliceInst = SliceInst.getOperand(0);
20821 assert(SliceInst->getOpcode() == ISD::LOAD &&
20822 "It takes more than a zext to get to the loaded slice!!");
20823 ArgChains.push_back(SliceInst.getValue(1));
20824 }
20825
20826 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
20827 ArgChains);
20828 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
20829 AddToWorklist(Chain.getNode());
20830 return true;
20831}
20832
20833/// Check to see if V is (and load (ptr), imm), where the load is having
20834/// specific bytes cleared out. If so, return the byte size being masked out
20835/// and the shift amount.
20836static std::pair<unsigned, unsigned>
20838 std::pair<unsigned, unsigned> Result(0, 0);
20839
20840 // Check for the structure we're looking for.
20841 if (V->getOpcode() != ISD::AND ||
20842 !isa<ConstantSDNode>(V->getOperand(1)) ||
20843 !ISD::isNormalLoad(V->getOperand(0).getNode()))
20844 return Result;
20845
20846 // Check the chain and pointer.
20847 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
20848 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
20849
20850 // This only handles simple types.
20851 if (V.getValueType() != MVT::i16 &&
20852 V.getValueType() != MVT::i32 &&
20853 V.getValueType() != MVT::i64)
20854 return Result;
20855
20856 // Check the constant mask. Invert it so that the bits being masked out are
20857 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
20858 // follow the sign bit for uniformity.
20859 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
20860 unsigned NotMaskLZ = llvm::countl_zero(NotMask);
20861 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
20862 unsigned NotMaskTZ = llvm::countr_zero(NotMask);
20863 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
20864 if (NotMaskLZ == 64) return Result; // All zero mask.
20865
20866 // See if we have a continuous run of bits. If so, we have 0*1+0*
20867 if (llvm::countr_one(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
20868 return Result;
20869
20870 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
20871 if (V.getValueType() != MVT::i64 && NotMaskLZ)
20872 NotMaskLZ -= 64-V.getValueSizeInBits();
20873
20874 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
20875 switch (MaskedBytes) {
20876 case 1:
20877 case 2:
20878 case 4: break;
20879 default: return Result; // All one mask, or 5-byte mask.
20880 }
20881
20882 // Verify that the first bit starts at a multiple of mask so that the access
20883 // is aligned the same as the access width.
20884 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
20885
20886 // For narrowing to be valid, it must be the case that the load the
20887 // immediately preceding memory operation before the store.
20888 if (LD == Chain.getNode())
20889 ; // ok.
20890 else if (Chain->getOpcode() == ISD::TokenFactor &&
20891 SDValue(LD, 1).hasOneUse()) {
20892 // LD has only 1 chain use so they are no indirect dependencies.
20893 if (!LD->isOperandOf(Chain.getNode()))
20894 return Result;
20895 } else
20896 return Result; // Fail.
20897
20898 Result.first = MaskedBytes;
20899 Result.second = NotMaskTZ/8;
20900 return Result;
20901}
20902
20903/// Check to see if IVal is something that provides a value as specified by
20904/// MaskInfo. If so, replace the specified store with a narrower store of
20905/// truncated IVal.
20906static SDValue
20907ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
20908 SDValue IVal, StoreSDNode *St,
20909 DAGCombiner *DC) {
20910 unsigned NumBytes = MaskInfo.first;
20911 unsigned ByteShift = MaskInfo.second;
20912 SelectionDAG &DAG = DC->getDAG();
20913
20914 // Check to see if IVal is all zeros in the part being masked in by the 'or'
20915 // that uses this. If not, this is not a replacement.
20916 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
20917 ByteShift*8, (ByteShift+NumBytes)*8);
20918 if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
20919
20920 // Check that it is legal on the target to do this. It is legal if the new
20921 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
20922 // legalization. If the source type is legal, but the store type isn't, see
20923 // if we can use a truncating store.
20924 MVT VT = MVT::getIntegerVT(NumBytes * 8);
20925 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20926 bool UseTruncStore;
20927 if (DC->isTypeLegal(VT))
20928 UseTruncStore = false;
20929 else if (TLI.isTypeLegal(IVal.getValueType()) &&
20930 TLI.isTruncStoreLegal(IVal.getValueType(), VT))
20931 UseTruncStore = true;
20932 else
20933 return SDValue();
20934
20935 // Can't do this for indexed stores.
20936 if (St->isIndexed())
20937 return SDValue();
20938
20939 // Check that the target doesn't think this is a bad idea.
20940 if (St->getMemOperand() &&
20941 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
20942 *St->getMemOperand()))
20943 return SDValue();
20944
20945 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
20946 // shifted by ByteShift and truncated down to NumBytes.
20947 if (ByteShift) {
20948 SDLoc DL(IVal);
20949 IVal = DAG.getNode(
20950 ISD::SRL, DL, IVal.getValueType(), IVal,
20951 DAG.getShiftAmountConstant(ByteShift * 8, IVal.getValueType(), DL));
20952 }
20953
20954 // Figure out the offset for the store and the alignment of the access.
20955 unsigned StOffset;
20956 if (DAG.getDataLayout().isLittleEndian())
20957 StOffset = ByteShift;
20958 else
20959 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
20960
20961 SDValue Ptr = St->getBasePtr();
20962 if (StOffset) {
20963 SDLoc DL(IVal);
20965 }
20966
20967 ++OpsNarrowed;
20968 if (UseTruncStore)
20969 return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr,
20970 St->getPointerInfo().getWithOffset(StOffset), VT,
20971 St->getBaseAlign());
20972
20973 // Truncate down to the new size.
20974 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
20975
20976 return DAG.getStore(St->getChain(), SDLoc(St), IVal, Ptr,
20977 St->getPointerInfo().getWithOffset(StOffset),
20978 St->getBaseAlign());
20979}
20980
20981/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
20982/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
20983/// narrowing the load and store if it would end up being a win for performance
20984/// or code size.
20985SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
20986 StoreSDNode *ST = cast<StoreSDNode>(N);
20987 if (!ST->isSimple())
20988 return SDValue();
20989
20990 SDValue Chain = ST->getChain();
20991 SDValue Value = ST->getValue();
20992 SDValue Ptr = ST->getBasePtr();
20993 EVT VT = Value.getValueType();
20994
20995 if (ST->isTruncatingStore() || VT.isVector())
20996 return SDValue();
20997
20998 unsigned Opc = Value.getOpcode();
20999
21000 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
21001 !Value.hasOneUse())
21002 return SDValue();
21003
21004 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
21005 // is a byte mask indicating a consecutive number of bytes, check to see if
21006 // Y is known to provide just those bytes. If so, we try to replace the
21007 // load + replace + store sequence with a single (narrower) store, which makes
21008 // the load dead.
21010 std::pair<unsigned, unsigned> MaskedLoad;
21011 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
21012 if (MaskedLoad.first)
21013 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
21014 Value.getOperand(1), ST,this))
21015 return NewST;
21016
21017 // Or is commutative, so try swapping X and Y.
21018 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
21019 if (MaskedLoad.first)
21020 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
21021 Value.getOperand(0), ST,this))
21022 return NewST;
21023 }
21024
21026 return SDValue();
21027
21028 if (Value.getOperand(1).getOpcode() != ISD::Constant)
21029 return SDValue();
21030
21031 SDValue N0 = Value.getOperand(0);
21032 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
21033 Chain == SDValue(N0.getNode(), 1)) {
21034 LoadSDNode *LD = cast<LoadSDNode>(N0);
21035 if (LD->getBasePtr() != Ptr ||
21036 LD->getPointerInfo().getAddrSpace() !=
21037 ST->getPointerInfo().getAddrSpace())
21038 return SDValue();
21039
21040 // Find the type NewVT to narrow the load / op / store to.
21041 SDValue N1 = Value.getOperand(1);
21042 unsigned BitWidth = N1.getValueSizeInBits();
21043 APInt Imm = N1->getAsAPIntVal();
21044 if (Opc == ISD::AND)
21045 Imm.flipAllBits();
21046 if (Imm == 0 || Imm.isAllOnes())
21047 return SDValue();
21048 // Find least/most significant bit that need to be part of the narrowed
21049 // operation. We assume target will need to address/access full bytes, so
21050 // we make sure to align LSB and MSB at byte boundaries.
21051 unsigned BitsPerByteMask = 7u;
21052 unsigned LSB = Imm.countr_zero() & ~BitsPerByteMask;
21053 unsigned MSB = (Imm.getActiveBits() - 1) | BitsPerByteMask;
21054 unsigned NewBW = NextPowerOf2(MSB - LSB);
21055 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
21056 // The narrowing should be profitable, the load/store operation should be
21057 // legal (or custom) and the store size should be equal to the NewVT width.
21058 while (NewBW < BitWidth &&
21059 (NewVT.getStoreSizeInBits() != NewBW ||
21060 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
21062 !TLI.isNarrowingProfitable(N, VT, NewVT)))) {
21063 NewBW = NextPowerOf2(NewBW);
21064 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
21065 }
21066 if (NewBW >= BitWidth)
21067 return SDValue();
21068
21069 // If we come this far NewVT/NewBW reflect a power-of-2 sized type that is
21070 // large enough to cover all bits that should be modified. This type might
21071 // however be larger than really needed (such as i32 while we actually only
21072 // need to modify one byte). Now we need to find our how to align the memory
21073 // accesses to satisfy preferred alignments as well as avoiding to access
21074 // memory outside the store size of the orignal access.
21075
21076 unsigned VTStoreSize = VT.getStoreSizeInBits().getFixedValue();
21077
21078 // Let ShAmt denote amount of bits to skip, counted from the least
21079 // significant bits of Imm. And let PtrOff how much the pointer needs to be
21080 // offsetted (in bytes) for the new access.
21081 unsigned ShAmt = 0;
21082 uint64_t PtrOff = 0;
21083 for (; ShAmt + NewBW <= VTStoreSize; ShAmt += 8) {
21084 // Make sure the range [ShAmt, ShAmt+NewBW) cover both LSB and MSB.
21085 if (ShAmt > LSB)
21086 return SDValue();
21087 if (ShAmt + NewBW < MSB)
21088 continue;
21089
21090 // Calculate PtrOff.
21091 unsigned PtrAdjustmentInBits = DAG.getDataLayout().isBigEndian()
21092 ? VTStoreSize - NewBW - ShAmt
21093 : ShAmt;
21094 PtrOff = PtrAdjustmentInBits / 8;
21095
21096 // Now check if narrow access is allowed and fast, considering alignments.
21097 unsigned IsFast = 0;
21098 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
21099 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
21100 LD->getAddressSpace(), NewAlign,
21101 LD->getMemOperand()->getFlags(), &IsFast) &&
21102 IsFast)
21103 break;
21104 }
21105 // If loop above did not find any accepted ShAmt we need to exit here.
21106 if (ShAmt + NewBW > VTStoreSize)
21107 return SDValue();
21108
21109 APInt NewImm = Imm.lshr(ShAmt).trunc(NewBW);
21110 if (Opc == ISD::AND)
21111 NewImm.flipAllBits();
21112 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
21113 SDValue NewPtr =
21115 SDValue NewLD =
21116 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
21117 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
21118 LD->getMemOperand()->getFlags(), LD->getAAInfo());
21119 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
21120 DAG.getConstant(NewImm, SDLoc(Value), NewVT));
21121 SDValue NewST =
21122 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
21123 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
21124
21125 AddToWorklist(NewPtr.getNode());
21126 AddToWorklist(NewLD.getNode());
21127 AddToWorklist(NewVal.getNode());
21128 WorklistRemover DeadNodes(*this);
21129 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
21130 ++OpsNarrowed;
21131 return NewST;
21132 }
21133
21134 return SDValue();
21135}
21136
21137/// For a given floating point load / store pair, if the load value isn't used
21138/// by any other operations, then consider transforming the pair to integer
21139/// load / store operations if the target deems the transformation profitable.
21140SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
21141 StoreSDNode *ST = cast<StoreSDNode>(N);
21142 SDValue Value = ST->getValue();
21143 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
21144 Value.hasOneUse()) {
21145 LoadSDNode *LD = cast<LoadSDNode>(Value);
21146 EVT VT = LD->getMemoryVT();
21147 if (!VT.isSimple() || !VT.isFloatingPoint() || VT != ST->getMemoryVT() ||
21148 LD->isNonTemporal() || ST->isNonTemporal() ||
21149 LD->getPointerInfo().getAddrSpace() != 0 ||
21150 ST->getPointerInfo().getAddrSpace() != 0)
21151 return SDValue();
21152
21153 TypeSize VTSize = VT.getSizeInBits();
21154
21155 // We don't know the size of scalable types at compile time so we cannot
21156 // create an integer of the equivalent size.
21157 if (VTSize.isScalable())
21158 return SDValue();
21159
21160 unsigned FastLD = 0, FastST = 0;
21161 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedValue());
21162 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
21163 !TLI.isOperationLegal(ISD::STORE, IntVT) ||
21166 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
21167 *LD->getMemOperand(), &FastLD) ||
21168 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
21169 *ST->getMemOperand(), &FastST) ||
21170 !FastLD || !FastST)
21171 return SDValue();
21172
21173 SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(),
21174 LD->getBasePtr(), LD->getMemOperand());
21175
21176 SDValue NewST = DAG.getStore(ST->getChain(), SDLoc(N), NewLD,
21177 ST->getBasePtr(), ST->getMemOperand());
21178
21179 AddToWorklist(NewLD.getNode());
21180 AddToWorklist(NewST.getNode());
21181 WorklistRemover DeadNodes(*this);
21182 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
21183 ++LdStFP2Int;
21184 return NewST;
21185 }
21186
21187 return SDValue();
21188}
21189
21190// This is a helper function for visitMUL to check the profitability
21191// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
21192// MulNode is the original multiply, AddNode is (add x, c1),
21193// and ConstNode is c2.
21194//
21195// If the (add x, c1) has multiple uses, we could increase
21196// the number of adds if we make this transformation.
21197// It would only be worth doing this if we can remove a
21198// multiply in the process. Check for that here.
21199// To illustrate:
21200// (A + c1) * c3
21201// (A + c2) * c3
21202// We're checking for cases where we have common "c3 * A" expressions.
21203bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
21204 SDValue ConstNode) {
21205 // If the add only has one use, and the target thinks the folding is
21206 // profitable or does not lead to worse code, this would be OK to do.
21207 if (AddNode->hasOneUse() &&
21208 TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
21209 return true;
21210
21211 // Walk all the users of the constant with which we're multiplying.
21212 for (SDNode *User : ConstNode->users()) {
21213 if (User == MulNode) // This use is the one we're on right now. Skip it.
21214 continue;
21215
21216 if (User->getOpcode() == ISD::MUL) { // We have another multiply use.
21217 SDNode *OtherOp;
21218 SDNode *MulVar = AddNode.getOperand(0).getNode();
21219
21220 // OtherOp is what we're multiplying against the constant.
21221 if (User->getOperand(0) == ConstNode)
21222 OtherOp = User->getOperand(1).getNode();
21223 else
21224 OtherOp = User->getOperand(0).getNode();
21225
21226 // Check to see if multiply is with the same operand of our "add".
21227 //
21228 // ConstNode = CONST
21229 // User = ConstNode * A <-- visiting User. OtherOp is A.
21230 // ...
21231 // AddNode = (A + c1) <-- MulVar is A.
21232 // = AddNode * ConstNode <-- current visiting instruction.
21233 //
21234 // If we make this transformation, we will have a common
21235 // multiply (ConstNode * A) that we can save.
21236 if (OtherOp == MulVar)
21237 return true;
21238
21239 // Now check to see if a future expansion will give us a common
21240 // multiply.
21241 //
21242 // ConstNode = CONST
21243 // AddNode = (A + c1)
21244 // ... = AddNode * ConstNode <-- current visiting instruction.
21245 // ...
21246 // OtherOp = (A + c2)
21247 // User = OtherOp * ConstNode <-- visiting User.
21248 //
21249 // If we make this transformation, we will have a common
21250 // multiply (CONST * A) after we also do the same transformation
21251 // to the "t2" instruction.
21252 if (OtherOp->getOpcode() == ISD::ADD &&
21254 OtherOp->getOperand(0).getNode() == MulVar)
21255 return true;
21256 }
21257 }
21258
21259 // Didn't find a case where this would be profitable.
21260 return false;
21261}
21262
21263SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
21264 unsigned NumStores) {
21267 SDLoc StoreDL(StoreNodes[0].MemNode);
21268
21269 for (unsigned i = 0; i < NumStores; ++i) {
21270 Visited.insert(StoreNodes[i].MemNode);
21271 }
21272
21273 // don't include nodes that are children or repeated nodes.
21274 for (unsigned i = 0; i < NumStores; ++i) {
21275 if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
21276 Chains.push_back(StoreNodes[i].MemNode->getChain());
21277 }
21278
21279 assert(!Chains.empty() && "Chain should have generated a chain");
21280 return DAG.getTokenFactor(StoreDL, Chains);
21281}
21282
21283bool DAGCombiner::hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes) {
21284 const Value *UnderlyingObj = nullptr;
21285 for (const auto &MemOp : StoreNodes) {
21286 const MachineMemOperand *MMO = MemOp.MemNode->getMemOperand();
21287 // Pseudo value like stack frame has its own frame index and size, should
21288 // not use the first store's frame index for other frames.
21289 if (MMO->getPseudoValue())
21290 return false;
21291
21292 if (!MMO->getValue())
21293 return false;
21294
21295 const Value *Obj = getUnderlyingObject(MMO->getValue());
21296
21297 if (UnderlyingObj && UnderlyingObj != Obj)
21298 return false;
21299
21300 if (!UnderlyingObj)
21301 UnderlyingObj = Obj;
21302 }
21303
21304 return true;
21305}
21306
21307bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
21308 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
21309 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
21310 // Make sure we have something to merge.
21311 if (NumStores < 2)
21312 return false;
21313
21314 assert((!UseTrunc || !UseVector) &&
21315 "This optimization cannot emit a vector truncating store");
21316
21317 // The latest Node in the DAG.
21318 SDLoc DL(StoreNodes[0].MemNode);
21319
21320 TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
21321 unsigned SizeInBits = NumStores * ElementSizeBits;
21322 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
21323
21324 std::optional<MachineMemOperand::Flags> Flags;
21325 AAMDNodes AAInfo;
21326 for (unsigned I = 0; I != NumStores; ++I) {
21327 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
21328 if (!Flags) {
21329 Flags = St->getMemOperand()->getFlags();
21330 AAInfo = St->getAAInfo();
21331 continue;
21332 }
21333 // Skip merging if there's an inconsistent flag.
21334 if (Flags != St->getMemOperand()->getFlags())
21335 return false;
21336 // Concatenate AA metadata.
21337 AAInfo = AAInfo.concat(St->getAAInfo());
21338 }
21339
21340 EVT StoreTy;
21341 if (UseVector) {
21342 unsigned Elts = NumStores * NumMemElts;
21343 // Get the type for the merged vector store.
21344 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
21345 } else
21346 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
21347
21348 SDValue StoredVal;
21349 if (UseVector) {
21350 if (IsConstantSrc) {
21351 SmallVector<SDValue, 8> BuildVector;
21352 for (unsigned I = 0; I != NumStores; ++I) {
21353 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
21354 SDValue Val = St->getValue();
21355 // If constant is of the wrong type, convert it now. This comes up
21356 // when one of our stores was truncating.
21357 if (MemVT != Val.getValueType()) {
21358 Val = peekThroughBitcasts(Val);
21359 // Deal with constants of wrong size.
21360 if (ElementSizeBits != Val.getValueSizeInBits()) {
21361 auto *C = dyn_cast<ConstantSDNode>(Val);
21362 if (!C)
21363 // Not clear how to truncate FP values.
21364 // TODO: Handle truncation of build_vector constants
21365 return false;
21366
21367 EVT IntMemVT =
21369 Val = DAG.getConstant(C->getAPIntValue()
21370 .zextOrTrunc(Val.getValueSizeInBits())
21371 .zextOrTrunc(ElementSizeBits),
21372 SDLoc(C), IntMemVT);
21373 }
21374 // Make sure correctly size type is the correct type.
21375 Val = DAG.getBitcast(MemVT, Val);
21376 }
21377 BuildVector.push_back(Val);
21378 }
21379 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
21381 DL, StoreTy, BuildVector);
21382 } else {
21384 for (unsigned i = 0; i < NumStores; ++i) {
21385 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
21387 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
21388 // type MemVT. If the underlying value is not the correct
21389 // type, but it is an extraction of an appropriate vector we
21390 // can recast Val to be of the correct type. This may require
21391 // converting between EXTRACT_VECTOR_ELT and
21392 // EXTRACT_SUBVECTOR.
21393 if ((MemVT != Val.getValueType()) &&
21396 EVT MemVTScalarTy = MemVT.getScalarType();
21397 // We may need to add a bitcast here to get types to line up.
21398 if (MemVTScalarTy != Val.getValueType().getScalarType()) {
21399 Val = DAG.getBitcast(MemVT, Val);
21400 } else if (MemVT.isVector() &&
21402 Val = DAG.getNode(ISD::BUILD_VECTOR, DL, MemVT, Val);
21403 } else {
21404 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
21406 SDValue Vec = Val.getOperand(0);
21407 SDValue Idx = Val.getOperand(1);
21408 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
21409 }
21410 }
21411 Ops.push_back(Val);
21412 }
21413
21414 // Build the extracted vector elements back into a vector.
21415 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
21417 DL, StoreTy, Ops);
21418 }
21419 } else {
21420 // We should always use a vector store when merging extracted vector
21421 // elements, so this path implies a store of constants.
21422 assert(IsConstantSrc && "Merged vector elements should use vector store");
21423
21424 APInt StoreInt(SizeInBits, 0);
21425
21426 // Construct a single integer constant which is made of the smaller
21427 // constant inputs.
21428 bool IsLE = DAG.getDataLayout().isLittleEndian();
21429 for (unsigned i = 0; i < NumStores; ++i) {
21430 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
21431 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
21432
21433 SDValue Val = St->getValue();
21434 Val = peekThroughBitcasts(Val);
21435 StoreInt <<= ElementSizeBits;
21436 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
21437 StoreInt |= C->getAPIntValue()
21438 .zextOrTrunc(ElementSizeBits)
21439 .zextOrTrunc(SizeInBits);
21440 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
21441 StoreInt |= C->getValueAPF()
21442 .bitcastToAPInt()
21443 .zextOrTrunc(ElementSizeBits)
21444 .zextOrTrunc(SizeInBits);
21445 // If fp truncation is necessary give up for now.
21446 if (MemVT.getSizeInBits() != ElementSizeBits)
21447 return false;
21448 } else if (ISD::isBuildVectorOfConstantSDNodes(Val.getNode()) ||
21450 // Not yet handled
21451 return false;
21452 } else {
21453 llvm_unreachable("Invalid constant element type");
21454 }
21455 }
21456
21457 // Create the new Load and Store operations.
21458 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
21459 }
21460
21461 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
21462 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
21463 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
21464
21465 // make sure we use trunc store if it's necessary to be legal.
21466 // When generate the new widen store, if the first store's pointer info can
21467 // not be reused, discard the pointer info except the address space because
21468 // now the widen store can not be represented by the original pointer info
21469 // which is for the narrow memory object.
21470 SDValue NewStore;
21471 if (!UseTrunc) {
21472 NewStore = DAG.getStore(
21473 NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
21474 CanReusePtrInfo
21475 ? FirstInChain->getPointerInfo()
21476 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
21477 FirstInChain->getAlign(), *Flags, AAInfo);
21478 } else { // Must be realized as a trunc store
21479 EVT LegalizedStoredValTy =
21480 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
21481 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
21482 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
21483 SDValue ExtendedStoreVal =
21484 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
21485 LegalizedStoredValTy);
21486 NewStore = DAG.getTruncStore(
21487 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
21488 CanReusePtrInfo
21489 ? FirstInChain->getPointerInfo()
21490 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
21491 StoredVal.getValueType() /*TVT*/, FirstInChain->getAlign(), *Flags,
21492 AAInfo);
21493 }
21494
21495 // Replace all merged stores with the new store.
21496 for (unsigned i = 0; i < NumStores; ++i)
21497 CombineTo(StoreNodes[i].MemNode, NewStore);
21498
21499 AddToWorklist(NewChain.getNode());
21500 return true;
21501}
21502
21503SDNode *
21504DAGCombiner::getStoreMergeCandidates(StoreSDNode *St,
21505 SmallVectorImpl<MemOpLink> &StoreNodes) {
21506 // This holds the base pointer, index, and the offset in bytes from the base
21507 // pointer. We must have a base and an offset. Do not handle stores to undef
21508 // base pointers.
21510 if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
21511 return nullptr;
21512
21514 StoreSource StoreSrc = getStoreSource(Val);
21515 assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
21516
21517 // Match on loadbaseptr if relevant.
21518 EVT MemVT = St->getMemoryVT();
21519 BaseIndexOffset LBasePtr;
21520 EVT LoadVT;
21521 if (StoreSrc == StoreSource::Load) {
21522 auto *Ld = cast<LoadSDNode>(Val);
21523 LBasePtr = BaseIndexOffset::match(Ld, DAG);
21524 LoadVT = Ld->getMemoryVT();
21525 // Load and store should be the same type.
21526 if (MemVT != LoadVT)
21527 return nullptr;
21528 // Loads must only have one use.
21529 if (!Ld->hasNUsesOfValue(1, 0))
21530 return nullptr;
21531 // The memory operands must not be volatile/indexed/atomic.
21532 // TODO: May be able to relax for unordered atomics (see D66309)
21533 if (!Ld->isSimple() || Ld->isIndexed())
21534 return nullptr;
21535 }
21536 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
21537 int64_t &Offset) -> bool {
21538 // The memory operands must not be volatile/indexed/atomic.
21539 // TODO: May be able to relax for unordered atomics (see D66309)
21540 if (!Other->isSimple() || Other->isIndexed())
21541 return false;
21542 // Don't mix temporal stores with non-temporal stores.
21543 if (St->isNonTemporal() != Other->isNonTemporal())
21544 return false;
21546 return false;
21547 SDValue OtherBC = peekThroughBitcasts(Other->getValue());
21548 // Allow merging constants of different types as integers.
21549 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
21550 : Other->getMemoryVT() != MemVT;
21551 switch (StoreSrc) {
21552 case StoreSource::Load: {
21553 if (NoTypeMatch)
21554 return false;
21555 // The Load's Base Ptr must also match.
21556 auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
21557 if (!OtherLd)
21558 return false;
21559 BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
21560 if (LoadVT != OtherLd->getMemoryVT())
21561 return false;
21562 // Loads must only have one use.
21563 if (!OtherLd->hasNUsesOfValue(1, 0))
21564 return false;
21565 // The memory operands must not be volatile/indexed/atomic.
21566 // TODO: May be able to relax for unordered atomics (see D66309)
21567 if (!OtherLd->isSimple() || OtherLd->isIndexed())
21568 return false;
21569 // Don't mix temporal loads with non-temporal loads.
21570 if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
21571 return false;
21572 if (!TLI.areTwoSDNodeTargetMMOFlagsMergeable(*cast<LoadSDNode>(Val),
21573 *OtherLd))
21574 return false;
21575 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
21576 return false;
21577 break;
21578 }
21579 case StoreSource::Constant:
21580 if (NoTypeMatch)
21581 return false;
21582 if (getStoreSource(OtherBC) != StoreSource::Constant)
21583 return false;
21584 break;
21585 case StoreSource::Extract:
21586 // Do not merge truncated stores here.
21587 if (Other->isTruncatingStore())
21588 return false;
21589 if (!MemVT.bitsEq(OtherBC.getValueType()))
21590 return false;
21591 if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
21592 OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
21593 return false;
21594 break;
21595 default:
21596 llvm_unreachable("Unhandled store source for merging");
21597 }
21599 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
21600 };
21601
21602 // We are looking for a root node which is an ancestor to all mergable
21603 // stores. We search up through a load, to our root and then down
21604 // through all children. For instance we will find Store{1,2,3} if
21605 // St is Store1, Store2. or Store3 where the root is not a load
21606 // which always true for nonvolatile ops. TODO: Expand
21607 // the search to find all valid candidates through multiple layers of loads.
21608 //
21609 // Root
21610 // |-------|-------|
21611 // Load Load Store3
21612 // | |
21613 // Store1 Store2
21614 //
21615 // FIXME: We should be able to climb and
21616 // descend TokenFactors to find candidates as well.
21617
21618 SDNode *RootNode = St->getChain().getNode();
21619 // Bail out if we already analyzed this root node and found nothing.
21620 if (ChainsWithoutMergeableStores.contains(RootNode))
21621 return nullptr;
21622
21623 // Check if the pair of StoreNode and the RootNode already bail out many
21624 // times which is over the limit in dependence check.
21625 auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
21626 SDNode *RootNode) -> bool {
21627 auto RootCount = StoreRootCountMap.find(StoreNode);
21628 return RootCount != StoreRootCountMap.end() &&
21629 RootCount->second.first == RootNode &&
21630 RootCount->second.second > StoreMergeDependenceLimit;
21631 };
21632
21633 auto TryToAddCandidate = [&](SDUse &Use) {
21634 // This must be a chain use.
21635 if (Use.getOperandNo() != 0)
21636 return;
21637 if (auto *OtherStore = dyn_cast<StoreSDNode>(Use.getUser())) {
21639 int64_t PtrDiff;
21640 if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
21641 !OverLimitInDependenceCheck(OtherStore, RootNode))
21642 StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
21643 }
21644 };
21645
21646 unsigned NumNodesExplored = 0;
21647 const unsigned MaxSearchNodes = 1024;
21648 if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
21649 RootNode = Ldn->getChain().getNode();
21650 // Bail out if we already analyzed this root node and found nothing.
21651 if (ChainsWithoutMergeableStores.contains(RootNode))
21652 return nullptr;
21653 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
21654 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
21655 SDNode *User = I->getUser();
21656 if (I->getOperandNo() == 0 && isa<LoadSDNode>(User)) { // walk down chain
21657 for (SDUse &U2 : User->uses())
21658 TryToAddCandidate(U2);
21659 }
21660 // Check stores that depend on the root (e.g. Store 3 in the chart above).
21661 if (I->getOperandNo() == 0 && isa<StoreSDNode>(User)) {
21662 TryToAddCandidate(*I);
21663 }
21664 }
21665 } else {
21666 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
21667 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
21668 TryToAddCandidate(*I);
21669 }
21670
21671 return RootNode;
21672}
21673
21674// We need to check that merging these stores does not cause a loop in the
21675// DAG. Any store candidate may depend on another candidate indirectly through
21676// its operands. Check in parallel by searching up from operands of candidates.
21677bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
21678 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
21679 SDNode *RootNode) {
21680 // FIXME: We should be able to truncate a full search of
21681 // predecessors by doing a BFS and keeping tabs the originating
21682 // stores from which worklist nodes come from in a similar way to
21683 // TokenFactor simplfication.
21684
21687
21688 // RootNode is a predecessor to all candidates so we need not search
21689 // past it. Add RootNode (peeking through TokenFactors). Do not count
21690 // these towards size check.
21691
21692 Worklist.push_back(RootNode);
21693 while (!Worklist.empty()) {
21694 auto N = Worklist.pop_back_val();
21695 if (!Visited.insert(N).second)
21696 continue; // Already present in Visited.
21697 if (N->getOpcode() == ISD::TokenFactor) {
21698 for (SDValue Op : N->ops())
21699 Worklist.push_back(Op.getNode());
21700 }
21701 }
21702
21703 // Don't count pruning nodes towards max.
21704 unsigned int Max = 1024 + Visited.size();
21705 // Search Ops of store candidates.
21706 for (unsigned i = 0; i < NumStores; ++i) {
21707 SDNode *N = StoreNodes[i].MemNode;
21708 // Of the 4 Store Operands:
21709 // * Chain (Op 0) -> We have already considered these
21710 // in candidate selection, but only by following the
21711 // chain dependencies. We could still have a chain
21712 // dependency to a load, that has a non-chain dep to
21713 // another load, that depends on a store, etc. So it is
21714 // possible to have dependencies that consist of a mix
21715 // of chain and non-chain deps, and we need to include
21716 // chain operands in the analysis here..
21717 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
21718 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
21719 // but aren't necessarily fromt the same base node, so
21720 // cycles possible (e.g. via indexed store).
21721 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
21722 // non-indexed stores). Not constant on all targets (e.g. ARM)
21723 // and so can participate in a cycle.
21724 for (const SDValue &Op : N->op_values())
21725 Worklist.push_back(Op.getNode());
21726 }
21727 // Search through DAG. We can stop early if we find a store node.
21728 for (unsigned i = 0; i < NumStores; ++i)
21729 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
21730 Max)) {
21731 // If the searching bail out, record the StoreNode and RootNode in the
21732 // StoreRootCountMap. If we have seen the pair many times over a limit,
21733 // we won't add the StoreNode into StoreNodes set again.
21734 if (Visited.size() >= Max) {
21735 auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
21736 if (RootCount.first == RootNode)
21737 RootCount.second++;
21738 else
21739 RootCount = {RootNode, 1};
21740 }
21741 return false;
21742 }
21743 return true;
21744}
21745
21746bool DAGCombiner::hasCallInLdStChain(StoreSDNode *St, LoadSDNode *Ld) {
21749 Worklist.emplace_back(St->getChain().getNode(), false);
21750
21751 while (!Worklist.empty()) {
21752 auto [Node, FoundCall] = Worklist.pop_back_val();
21753 if (!Visited.insert(Node).second || Node->getNumOperands() == 0)
21754 continue;
21755
21756 switch (Node->getOpcode()) {
21757 case ISD::CALLSEQ_END:
21758 Worklist.emplace_back(Node->getOperand(0).getNode(), true);
21759 break;
21760 case ISD::TokenFactor:
21761 for (SDValue Op : Node->ops())
21762 Worklist.emplace_back(Op.getNode(), FoundCall);
21763 break;
21764 case ISD::LOAD:
21765 if (Node == Ld)
21766 return FoundCall;
21767 [[fallthrough]];
21768 default:
21769 assert(Node->getOperand(0).getValueType() == MVT::Other &&
21770 "Invalid chain type");
21771 Worklist.emplace_back(Node->getOperand(0).getNode(), FoundCall);
21772 break;
21773 }
21774 }
21775 return false;
21776}
21777
21778unsigned
21779DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
21780 int64_t ElementSizeBytes) const {
21781 while (true) {
21782 // Find a store past the width of the first store.
21783 size_t StartIdx = 0;
21784 while ((StartIdx + 1 < StoreNodes.size()) &&
21785 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
21786 StoreNodes[StartIdx + 1].OffsetFromBase)
21787 ++StartIdx;
21788
21789 // Bail if we don't have enough candidates to merge.
21790 if (StartIdx + 1 >= StoreNodes.size())
21791 return 0;
21792
21793 // Trim stores that overlapped with the first store.
21794 if (StartIdx)
21795 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
21796
21797 // Scan the memory operations on the chain and find the first
21798 // non-consecutive store memory address.
21799 unsigned NumConsecutiveStores = 1;
21800 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
21801 // Check that the addresses are consecutive starting from the second
21802 // element in the list of stores.
21803 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
21804 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
21805 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
21806 break;
21807 NumConsecutiveStores = i + 1;
21808 }
21809 if (NumConsecutiveStores > 1)
21810 return NumConsecutiveStores;
21811
21812 // There are no consecutive stores at the start of the list.
21813 // Remove the first store and try again.
21814 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
21815 }
21816}
21817
21818bool DAGCombiner::tryStoreMergeOfConstants(
21819 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
21820 EVT MemVT, SDNode *RootNode, bool AllowVectors) {
21821 LLVMContext &Context = *DAG.getContext();
21822 const DataLayout &DL = DAG.getDataLayout();
21823 int64_t ElementSizeBytes = MemVT.getStoreSize();
21824 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
21825 bool MadeChange = false;
21826
21827 // Store the constants into memory as one consecutive store.
21828 while (NumConsecutiveStores >= 2) {
21829 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
21830 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
21831 Align FirstStoreAlign = FirstInChain->getAlign();
21832 unsigned LastLegalType = 1;
21833 unsigned LastLegalVectorType = 1;
21834 bool LastIntegerTrunc = false;
21835 bool NonZero = false;
21836 unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
21837 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
21838 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
21839 SDValue StoredVal = ST->getValue();
21840 bool IsElementZero = false;
21841 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
21842 IsElementZero = C->isZero();
21843 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
21844 IsElementZero = C->getConstantFPValue()->isNullValue();
21845 else if (ISD::isBuildVectorAllZeros(StoredVal.getNode()))
21846 IsElementZero = true;
21847 if (IsElementZero) {
21848 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
21849 FirstZeroAfterNonZero = i;
21850 }
21851 NonZero |= !IsElementZero;
21852
21853 // Find a legal type for the constant store.
21854 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
21855 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
21856 unsigned IsFast = 0;
21857
21858 // Break early when size is too large to be legal.
21859 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
21860 break;
21861
21862 if (TLI.isTypeLegal(StoreTy) &&
21863 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
21864 DAG.getMachineFunction()) &&
21865 TLI.allowsMemoryAccess(Context, DL, StoreTy,
21866 *FirstInChain->getMemOperand(), &IsFast) &&
21867 IsFast) {
21868 LastIntegerTrunc = false;
21869 LastLegalType = i + 1;
21870 // Or check whether a truncstore is legal.
21871 } else if (TLI.getTypeAction(Context, StoreTy) ==
21873 EVT LegalizedStoredValTy =
21874 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
21875 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
21876 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
21877 DAG.getMachineFunction()) &&
21878 TLI.allowsMemoryAccess(Context, DL, StoreTy,
21879 *FirstInChain->getMemOperand(), &IsFast) &&
21880 IsFast) {
21881 LastIntegerTrunc = true;
21882 LastLegalType = i + 1;
21883 }
21884 }
21885
21886 // We only use vectors if the target allows it and the function is not
21887 // marked with the noimplicitfloat attribute.
21888 if (TLI.storeOfVectorConstantIsCheap(!NonZero, MemVT, i + 1, FirstStoreAS) &&
21889 AllowVectors) {
21890 // Find a legal type for the vector store.
21891 unsigned Elts = (i + 1) * NumMemElts;
21892 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
21893 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
21894 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
21895 TLI.allowsMemoryAccess(Context, DL, Ty,
21896 *FirstInChain->getMemOperand(), &IsFast) &&
21897 IsFast)
21898 LastLegalVectorType = i + 1;
21899 }
21900 }
21901
21902 bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
21903 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
21904 bool UseTrunc = LastIntegerTrunc && !UseVector;
21905
21906 // Check if we found a legal integer type that creates a meaningful
21907 // merge.
21908 if (NumElem < 2) {
21909 // We know that candidate stores are in order and of correct
21910 // shape. While there is no mergeable sequence from the
21911 // beginning one may start later in the sequence. The only
21912 // reason a merge of size N could have failed where another of
21913 // the same size would not have, is if the alignment has
21914 // improved or we've dropped a non-zero value. Drop as many
21915 // candidates as we can here.
21916 unsigned NumSkip = 1;
21917 while ((NumSkip < NumConsecutiveStores) &&
21918 (NumSkip < FirstZeroAfterNonZero) &&
21919 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
21920 NumSkip++;
21921
21922 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
21923 NumConsecutiveStores -= NumSkip;
21924 continue;
21925 }
21926
21927 // Check that we can merge these candidates without causing a cycle.
21928 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
21929 RootNode)) {
21930 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
21931 NumConsecutiveStores -= NumElem;
21932 continue;
21933 }
21934
21935 MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
21936 /*IsConstantSrc*/ true,
21937 UseVector, UseTrunc);
21938
21939 // Remove merged stores for next iteration.
21940 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
21941 NumConsecutiveStores -= NumElem;
21942 }
21943 return MadeChange;
21944}
21945
21946bool DAGCombiner::tryStoreMergeOfExtracts(
21947 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
21948 EVT MemVT, SDNode *RootNode) {
21949 LLVMContext &Context = *DAG.getContext();
21950 const DataLayout &DL = DAG.getDataLayout();
21951 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
21952 bool MadeChange = false;
21953
21954 // Loop on Consecutive Stores on success.
21955 while (NumConsecutiveStores >= 2) {
21956 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
21957 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
21958 Align FirstStoreAlign = FirstInChain->getAlign();
21959 unsigned NumStoresToMerge = 1;
21960 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
21961 // Find a legal type for the vector store.
21962 unsigned Elts = (i + 1) * NumMemElts;
21963 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
21964 unsigned IsFast = 0;
21965
21966 // Break early when size is too large to be legal.
21967 if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
21968 break;
21969
21970 if (TLI.isTypeLegal(Ty) &&
21971 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
21972 TLI.allowsMemoryAccess(Context, DL, Ty,
21973 *FirstInChain->getMemOperand(), &IsFast) &&
21974 IsFast)
21975 NumStoresToMerge = i + 1;
21976 }
21977
21978 // Check if we found a legal integer type creating a meaningful
21979 // merge.
21980 if (NumStoresToMerge < 2) {
21981 // We know that candidate stores are in order and of correct
21982 // shape. While there is no mergeable sequence from the
21983 // beginning one may start later in the sequence. The only
21984 // reason a merge of size N could have failed where another of
21985 // the same size would not have, is if the alignment has
21986 // improved. Drop as many candidates as we can here.
21987 unsigned NumSkip = 1;
21988 while ((NumSkip < NumConsecutiveStores) &&
21989 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
21990 NumSkip++;
21991
21992 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
21993 NumConsecutiveStores -= NumSkip;
21994 continue;
21995 }
21996
21997 // Check that we can merge these candidates without causing a cycle.
21998 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
21999 RootNode)) {
22000 StoreNodes.erase(StoreNodes.begin(),
22001 StoreNodes.begin() + NumStoresToMerge);
22002 NumConsecutiveStores -= NumStoresToMerge;
22003 continue;
22004 }
22005
22006 MadeChange |= mergeStoresOfConstantsOrVecElts(
22007 StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
22008 /*UseVector*/ true, /*UseTrunc*/ false);
22009
22010 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
22011 NumConsecutiveStores -= NumStoresToMerge;
22012 }
22013 return MadeChange;
22014}
22015
22016bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
22017 unsigned NumConsecutiveStores, EVT MemVT,
22018 SDNode *RootNode, bool AllowVectors,
22019 bool IsNonTemporalStore,
22020 bool IsNonTemporalLoad) {
22021 LLVMContext &Context = *DAG.getContext();
22022 const DataLayout &DL = DAG.getDataLayout();
22023 int64_t ElementSizeBytes = MemVT.getStoreSize();
22024 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
22025 bool MadeChange = false;
22026
22027 // Look for load nodes which are used by the stored values.
22028 SmallVector<MemOpLink, 8> LoadNodes;
22029
22030 // Find acceptable loads. Loads need to have the same chain (token factor),
22031 // must not be zext, volatile, indexed, and they must be consecutive.
22032 BaseIndexOffset LdBasePtr;
22033
22034 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
22035 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
22037 LoadSDNode *Ld = cast<LoadSDNode>(Val);
22038
22039 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
22040 // If this is not the first ptr that we check.
22041 int64_t LdOffset = 0;
22042 if (LdBasePtr.getBase().getNode()) {
22043 // The base ptr must be the same.
22044 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
22045 break;
22046 } else {
22047 // Check that all other base pointers are the same as this one.
22048 LdBasePtr = LdPtr;
22049 }
22050
22051 // We found a potential memory operand to merge.
22052 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
22053 }
22054
22055 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
22056 Align RequiredAlignment;
22057 bool NeedRotate = false;
22058 if (LoadNodes.size() == 2) {
22059 // If we have load/store pair instructions and we only have two values,
22060 // don't bother merging.
22061 if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
22062 StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
22063 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
22064 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
22065 break;
22066 }
22067 // If the loads are reversed, see if we can rotate the halves into place.
22068 int64_t Offset0 = LoadNodes[0].OffsetFromBase;
22069 int64_t Offset1 = LoadNodes[1].OffsetFromBase;
22070 EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
22071 if (Offset0 - Offset1 == ElementSizeBytes &&
22072 (hasOperation(ISD::ROTL, PairVT) ||
22073 hasOperation(ISD::ROTR, PairVT))) {
22074 std::swap(LoadNodes[0], LoadNodes[1]);
22075 NeedRotate = true;
22076 }
22077 }
22078 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
22079 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
22080 Align FirstStoreAlign = FirstInChain->getAlign();
22081 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
22082
22083 // Scan the memory operations on the chain and find the first
22084 // non-consecutive load memory address. These variables hold the index in
22085 // the store node array.
22086
22087 unsigned LastConsecutiveLoad = 1;
22088
22089 // This variable refers to the size and not index in the array.
22090 unsigned LastLegalVectorType = 1;
22091 unsigned LastLegalIntegerType = 1;
22092 bool isDereferenceable = true;
22093 bool DoIntegerTruncate = false;
22094 int64_t StartAddress = LoadNodes[0].OffsetFromBase;
22095 SDValue LoadChain = FirstLoad->getChain();
22096 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
22097 // All loads must share the same chain.
22098 if (LoadNodes[i].MemNode->getChain() != LoadChain)
22099 break;
22100
22101 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
22102 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
22103 break;
22104 LastConsecutiveLoad = i;
22105
22106 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
22107 isDereferenceable = false;
22108
22109 // Find a legal type for the vector store.
22110 unsigned Elts = (i + 1) * NumMemElts;
22111 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
22112
22113 // Break early when size is too large to be legal.
22114 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
22115 break;
22116
22117 unsigned IsFastSt = 0;
22118 unsigned IsFastLd = 0;
22119 // Don't try vector types if we need a rotate. We may still fail the
22120 // legality checks for the integer type, but we can't handle the rotate
22121 // case with vectors.
22122 // FIXME: We could use a shuffle in place of the rotate.
22123 if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
22124 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
22125 DAG.getMachineFunction()) &&
22126 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22127 *FirstInChain->getMemOperand(), &IsFastSt) &&
22128 IsFastSt &&
22129 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22130 *FirstLoad->getMemOperand(), &IsFastLd) &&
22131 IsFastLd) {
22132 LastLegalVectorType = i + 1;
22133 }
22134
22135 // Find a legal type for the integer store.
22136 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
22137 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
22138 if (TLI.isTypeLegal(StoreTy) &&
22139 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
22140 DAG.getMachineFunction()) &&
22141 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22142 *FirstInChain->getMemOperand(), &IsFastSt) &&
22143 IsFastSt &&
22144 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22145 *FirstLoad->getMemOperand(), &IsFastLd) &&
22146 IsFastLd) {
22147 LastLegalIntegerType = i + 1;
22148 DoIntegerTruncate = false;
22149 // Or check whether a truncstore and extload is legal.
22150 } else if (TLI.getTypeAction(Context, StoreTy) ==
22152 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
22153 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
22154 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
22155 DAG.getMachineFunction()) &&
22156 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
22157 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
22158 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
22159 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22160 *FirstInChain->getMemOperand(), &IsFastSt) &&
22161 IsFastSt &&
22162 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22163 *FirstLoad->getMemOperand(), &IsFastLd) &&
22164 IsFastLd) {
22165 LastLegalIntegerType = i + 1;
22166 DoIntegerTruncate = true;
22167 }
22168 }
22169 }
22170
22171 // Only use vector types if the vector type is larger than the integer
22172 // type. If they are the same, use integers.
22173 bool UseVectorTy =
22174 LastLegalVectorType > LastLegalIntegerType && AllowVectors;
22175 unsigned LastLegalType =
22176 std::max(LastLegalVectorType, LastLegalIntegerType);
22177
22178 // We add +1 here because the LastXXX variables refer to location while
22179 // the NumElem refers to array/index size.
22180 unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
22181 NumElem = std::min(LastLegalType, NumElem);
22182 Align FirstLoadAlign = FirstLoad->getAlign();
22183
22184 if (NumElem < 2) {
22185 // We know that candidate stores are in order and of correct
22186 // shape. While there is no mergeable sequence from the
22187 // beginning one may start later in the sequence. The only
22188 // reason a merge of size N could have failed where another of
22189 // the same size would not have is if the alignment or either
22190 // the load or store has improved. Drop as many candidates as we
22191 // can here.
22192 unsigned NumSkip = 1;
22193 while ((NumSkip < LoadNodes.size()) &&
22194 (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
22195 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
22196 NumSkip++;
22197 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
22198 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
22199 NumConsecutiveStores -= NumSkip;
22200 continue;
22201 }
22202
22203 // Check that we can merge these candidates without causing a cycle.
22204 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
22205 RootNode)) {
22206 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
22207 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
22208 NumConsecutiveStores -= NumElem;
22209 continue;
22210 }
22211
22212 // Find if it is better to use vectors or integers to load and store
22213 // to memory.
22214 EVT JointMemOpVT;
22215 if (UseVectorTy) {
22216 // Find a legal type for the vector store.
22217 unsigned Elts = NumElem * NumMemElts;
22218 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
22219 } else {
22220 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
22221 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
22222 }
22223
22224 // Check if there is a call in the load/store chain.
22225 if (!TLI.shouldMergeStoreOfLoadsOverCall(MemVT, JointMemOpVT) &&
22226 hasCallInLdStChain(cast<StoreSDNode>(StoreNodes[0].MemNode),
22227 cast<LoadSDNode>(LoadNodes[0].MemNode))) {
22228 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
22229 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
22230 NumConsecutiveStores -= NumElem;
22231 continue;
22232 }
22233
22234 SDLoc LoadDL(LoadNodes[0].MemNode);
22235 SDLoc StoreDL(StoreNodes[0].MemNode);
22236
22237 // The merged loads are required to have the same incoming chain, so
22238 // using the first's chain is acceptable.
22239
22240 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
22241 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
22242 AddToWorklist(NewStoreChain.getNode());
22243
22244 MachineMemOperand::Flags LdMMOFlags =
22245 isDereferenceable ? MachineMemOperand::MODereferenceable
22247 if (IsNonTemporalLoad)
22249
22250 LdMMOFlags |= TLI.getTargetMMOFlags(*FirstLoad);
22251
22252 MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
22255
22256 StMMOFlags |= TLI.getTargetMMOFlags(*StoreNodes[0].MemNode);
22257
22258 SDValue NewLoad, NewStore;
22259 if (UseVectorTy || !DoIntegerTruncate) {
22260 NewLoad = DAG.getLoad(
22261 JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
22262 FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
22263 SDValue StoreOp = NewLoad;
22264 if (NeedRotate) {
22265 unsigned LoadWidth = ElementSizeBytes * 8 * 2;
22266 assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
22267 "Unexpected type for rotate-able load pair");
22268 SDValue RotAmt =
22269 DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
22270 // Target can convert to the identical ROTR if it does not have ROTL.
22271 StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
22272 }
22273 NewStore = DAG.getStore(
22274 NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
22275 CanReusePtrInfo ? FirstInChain->getPointerInfo()
22276 : MachinePointerInfo(FirstStoreAS),
22277 FirstStoreAlign, StMMOFlags);
22278 } else { // This must be the truncstore/extload case
22279 EVT ExtendedTy =
22280 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
22281 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
22282 FirstLoad->getChain(), FirstLoad->getBasePtr(),
22283 FirstLoad->getPointerInfo(), JointMemOpVT,
22284 FirstLoadAlign, LdMMOFlags);
22285 NewStore = DAG.getTruncStore(
22286 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
22287 CanReusePtrInfo ? FirstInChain->getPointerInfo()
22288 : MachinePointerInfo(FirstStoreAS),
22289 JointMemOpVT, FirstInChain->getAlign(),
22290 FirstInChain->getMemOperand()->getFlags());
22291 }
22292
22293 // Transfer chain users from old loads to the new load.
22294 for (unsigned i = 0; i < NumElem; ++i) {
22295 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
22297 SDValue(NewLoad.getNode(), 1));
22298 }
22299
22300 // Replace all stores with the new store. Recursively remove corresponding
22301 // values if they are no longer used.
22302 for (unsigned i = 0; i < NumElem; ++i) {
22303 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
22304 CombineTo(StoreNodes[i].MemNode, NewStore);
22305 if (Val->use_empty())
22306 recursivelyDeleteUnusedNodes(Val.getNode());
22307 }
22308
22309 MadeChange = true;
22310 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
22311 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
22312 NumConsecutiveStores -= NumElem;
22313 }
22314 return MadeChange;
22315}
22316
22317bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
22318 if (OptLevel == CodeGenOptLevel::None || !EnableStoreMerging)
22319 return false;
22320
22321 // TODO: Extend this function to merge stores of scalable vectors.
22322 // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
22323 // store since we know <vscale x 16 x i8> is exactly twice as large as
22324 // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
22325 EVT MemVT = St->getMemoryVT();
22326 if (MemVT.isScalableVT())
22327 return false;
22328 if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
22329 return false;
22330
22331 // This function cannot currently deal with non-byte-sized memory sizes.
22332 int64_t ElementSizeBytes = MemVT.getStoreSize();
22333 if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
22334 return false;
22335
22336 // Do not bother looking at stored values that are not constants, loads, or
22337 // extracted vector elements.
22338 SDValue StoredVal = peekThroughBitcasts(St->getValue());
22339 const StoreSource StoreSrc = getStoreSource(StoredVal);
22340 if (StoreSrc == StoreSource::Unknown)
22341 return false;
22342
22343 SmallVector<MemOpLink, 8> StoreNodes;
22344 // Find potential store merge candidates by searching through chain sub-DAG
22345 SDNode *RootNode = getStoreMergeCandidates(St, StoreNodes);
22346
22347 // Check if there is anything to merge.
22348 if (StoreNodes.size() < 2)
22349 return false;
22350
22351 // Sort the memory operands according to their distance from the
22352 // base pointer.
22353 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
22354 return LHS.OffsetFromBase < RHS.OffsetFromBase;
22355 });
22356
22357 bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
22358 Attribute::NoImplicitFloat);
22359 bool IsNonTemporalStore = St->isNonTemporal();
22360 bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
22361 cast<LoadSDNode>(StoredVal)->isNonTemporal();
22362
22363 // Store Merge attempts to merge the lowest stores. This generally
22364 // works out as if successful, as the remaining stores are checked
22365 // after the first collection of stores is merged. However, in the
22366 // case that a non-mergeable store is found first, e.g., {p[-2],
22367 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
22368 // mergeable cases. To prevent this, we prune such stores from the
22369 // front of StoreNodes here.
22370 bool MadeChange = false;
22371 while (StoreNodes.size() > 1) {
22372 unsigned NumConsecutiveStores =
22373 getConsecutiveStores(StoreNodes, ElementSizeBytes);
22374 // There are no more stores in the list to examine.
22375 if (NumConsecutiveStores == 0)
22376 return MadeChange;
22377
22378 // We have at least 2 consecutive stores. Try to merge them.
22379 assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
22380 switch (StoreSrc) {
22381 case StoreSource::Constant:
22382 MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
22383 MemVT, RootNode, AllowVectors);
22384 break;
22385
22386 case StoreSource::Extract:
22387 MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
22388 MemVT, RootNode);
22389 break;
22390
22391 case StoreSource::Load:
22392 MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
22393 MemVT, RootNode, AllowVectors,
22394 IsNonTemporalStore, IsNonTemporalLoad);
22395 break;
22396
22397 default:
22398 llvm_unreachable("Unhandled store source type");
22399 }
22400 }
22401
22402 // Remember if we failed to optimize, to save compile time.
22403 if (!MadeChange)
22404 ChainsWithoutMergeableStores.insert(RootNode);
22405
22406 return MadeChange;
22407}
22408
22409SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
22410 SDLoc SL(ST);
22411 SDValue ReplStore;
22412
22413 // Replace the chain to avoid dependency.
22414 if (ST->isTruncatingStore()) {
22415 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
22416 ST->getBasePtr(), ST->getMemoryVT(),
22417 ST->getMemOperand());
22418 } else {
22419 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
22420 ST->getMemOperand());
22421 }
22422
22423 // Create token to keep both nodes around.
22424 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
22425 MVT::Other, ST->getChain(), ReplStore);
22426
22427 // Make sure the new and old chains are cleaned up.
22428 AddToWorklist(Token.getNode());
22429
22430 // Don't add users to work list.
22431 return CombineTo(ST, Token, false);
22432}
22433
22434SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
22435 SDValue Value = ST->getValue();
22436 if (Value.getOpcode() == ISD::TargetConstantFP)
22437 return SDValue();
22438
22439 if (!ISD::isNormalStore(ST))
22440 return SDValue();
22441
22442 SDLoc DL(ST);
22443
22444 SDValue Chain = ST->getChain();
22445 SDValue Ptr = ST->getBasePtr();
22446
22447 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
22448
22449 // NOTE: If the original store is volatile, this transform must not increase
22450 // the number of stores. For example, on x86-32 an f64 can be stored in one
22451 // processor operation but an i64 (which is not legal) requires two. So the
22452 // transform should not be done in this case.
22453
22454 SDValue Tmp;
22455 switch (CFP->getSimpleValueType(0).SimpleTy) {
22456 default:
22457 llvm_unreachable("Unknown FP type");
22458 case MVT::f16: // We don't do this for these yet.
22459 case MVT::bf16:
22460 case MVT::f80:
22461 case MVT::f128:
22462 case MVT::ppcf128:
22463 return SDValue();
22464 case MVT::f32:
22465 if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
22466 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
22467 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
22468 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
22469 MVT::i32);
22470 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
22471 }
22472
22473 return SDValue();
22474 case MVT::f64:
22475 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
22476 ST->isSimple()) ||
22477 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
22478 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
22479 getZExtValue(), SDLoc(CFP), MVT::i64);
22480 return DAG.getStore(Chain, DL, Tmp,
22481 Ptr, ST->getMemOperand());
22482 }
22483
22484 if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32) &&
22485 !TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) {
22486 // Many FP stores are not made apparent until after legalize, e.g. for
22487 // argument passing. Since this is so common, custom legalize the
22488 // 64-bit integer store into two 32-bit stores.
22490 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
22491 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
22492 if (DAG.getDataLayout().isBigEndian())
22493 std::swap(Lo, Hi);
22494
22495 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
22496 AAMDNodes AAInfo = ST->getAAInfo();
22497
22498 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
22499 ST->getBaseAlign(), MMOFlags, AAInfo);
22501 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
22502 ST->getPointerInfo().getWithOffset(4),
22503 ST->getBaseAlign(), MMOFlags, AAInfo);
22504 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
22505 St0, St1);
22506 }
22507
22508 return SDValue();
22509 }
22510}
22511
22512// (store (insert_vector_elt (load p), x, i), p) -> (store x, p+offset)
22513//
22514// If a store of a load with an element inserted into it has no other
22515// uses in between the chain, then we can consider the vector store
22516// dead and replace it with just the single scalar element store.
22517SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
22518 SDLoc DL(ST);
22519 SDValue Value = ST->getValue();
22520 SDValue Ptr = ST->getBasePtr();
22521 SDValue Chain = ST->getChain();
22522 if (Value.getOpcode() != ISD::INSERT_VECTOR_ELT || !Value.hasOneUse())
22523 return SDValue();
22524
22525 SDValue Elt = Value.getOperand(1);
22526 SDValue Idx = Value.getOperand(2);
22527
22528 // If the element isn't byte sized or is implicitly truncated then we can't
22529 // compute an offset.
22530 EVT EltVT = Elt.getValueType();
22531 if (!EltVT.isByteSized() ||
22532 EltVT != Value.getOperand(0).getValueType().getVectorElementType())
22533 return SDValue();
22534
22535 auto *Ld = dyn_cast<LoadSDNode>(Value.getOperand(0));
22536 if (!Ld || Ld->getBasePtr() != Ptr ||
22537 ST->getMemoryVT() != Ld->getMemoryVT() || !ST->isSimple() ||
22538 !ISD::isNormalStore(ST) ||
22539 Ld->getAddressSpace() != ST->getAddressSpace() ||
22541 return SDValue();
22542
22543 unsigned IsFast;
22544 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
22545 Elt.getValueType(), ST->getAddressSpace(),
22546 ST->getAlign(), ST->getMemOperand()->getFlags(),
22547 &IsFast) ||
22548 !IsFast)
22549 return SDValue();
22550
22551 MachinePointerInfo PointerInfo(ST->getAddressSpace());
22552
22553 // If the offset is a known constant then try to recover the pointer
22554 // info
22555 SDValue NewPtr;
22556 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
22557 unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8;
22558 NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(COffset), DL);
22559 PointerInfo = ST->getPointerInfo().getWithOffset(COffset);
22560 } else {
22561 NewPtr = TLI.getVectorElementPointer(DAG, Ptr, Value.getValueType(), Idx);
22562 }
22563
22564 return DAG.getStore(Chain, DL, Elt, NewPtr, PointerInfo, ST->getAlign(),
22565 ST->getMemOperand()->getFlags());
22566}
22567
22568SDValue DAGCombiner::visitATOMIC_STORE(SDNode *N) {
22569 AtomicSDNode *ST = cast<AtomicSDNode>(N);
22570 SDValue Val = ST->getVal();
22571 EVT VT = Val.getValueType();
22572 EVT MemVT = ST->getMemoryVT();
22573
22574 if (MemVT.bitsLT(VT)) { // Is truncating store
22575 APInt TruncDemandedBits = APInt::getLowBitsSet(VT.getScalarSizeInBits(),
22576 MemVT.getScalarSizeInBits());
22577 // See if we can simplify the operation with SimplifyDemandedBits, which
22578 // only works if the value has a single use.
22579 if (SimplifyDemandedBits(Val, TruncDemandedBits))
22580 return SDValue(N, 0);
22581 }
22582
22583 return SDValue();
22584}
22585
22587 const SDLoc &Dl) {
22588 if (!Store->isSimple() || !ISD::isNormalStore(Store))
22589 return SDValue();
22590
22591 SDValue StoredVal = Store->getValue();
22592 SDValue StorePtr = Store->getBasePtr();
22593 SDValue StoreOffset = Store->getOffset();
22594 EVT VT = Store->getMemoryVT();
22595 unsigned AddrSpace = Store->getAddressSpace();
22596 Align Alignment = Store->getAlign();
22597 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22598
22599 if (!TLI.isOperationLegalOrCustom(ISD::MSTORE, VT) ||
22600 !TLI.allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment))
22601 return SDValue();
22602
22603 SDValue Mask, OtherVec, LoadCh;
22604 unsigned LoadPos;
22605 if (sd_match(StoredVal,
22606 m_VSelect(m_Value(Mask), m_Value(OtherVec),
22607 m_Load(m_Value(LoadCh), m_Specific(StorePtr),
22608 m_Specific(StoreOffset))))) {
22609 LoadPos = 2;
22610 } else if (sd_match(StoredVal,
22611 m_VSelect(m_Value(Mask),
22612 m_Load(m_Value(LoadCh), m_Specific(StorePtr),
22613 m_Specific(StoreOffset)),
22614 m_Value(OtherVec)))) {
22615 LoadPos = 1;
22616 } else {
22617 return SDValue();
22618 }
22619
22620 auto *Load = cast<LoadSDNode>(StoredVal.getOperand(LoadPos));
22621 if (!Load->isSimple() || !ISD::isNormalLoad(Load) ||
22622 Load->getAddressSpace() != AddrSpace)
22623 return SDValue();
22624
22625 if (!Store->getChain().reachesChainWithoutSideEffects(LoadCh))
22626 return SDValue();
22627
22628 if (LoadPos == 1)
22629 Mask = DAG.getNOT(Dl, Mask, Mask.getValueType());
22630
22631 return DAG.getMaskedStore(Store->getChain(), Dl, OtherVec, StorePtr,
22632 StoreOffset, Mask, VT, Store->getMemOperand(),
22633 Store->getAddressingMode());
22634}
22635
22636SDValue DAGCombiner::visitSTORE(SDNode *N) {
22637 StoreSDNode *ST = cast<StoreSDNode>(N);
22638 SDValue Chain = ST->getChain();
22639 SDValue Value = ST->getValue();
22640 SDValue Ptr = ST->getBasePtr();
22641
22642 // If this is a store of a bit convert, store the input value if the
22643 // resultant store does not need a higher alignment than the original.
22644 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
22645 ST->isUnindexed()) {
22646 EVT SVT = Value.getOperand(0).getValueType();
22647 // If the store is volatile, we only want to change the store type if the
22648 // resulting store is legal. Otherwise we might increase the number of
22649 // memory accesses. We don't care if the original type was legal or not
22650 // as we assume software couldn't rely on the number of accesses of an
22651 // illegal type.
22652 // TODO: May be able to relax for unordered atomics (see D66309)
22653 if (((!LegalOperations && ST->isSimple()) ||
22654 TLI.isOperationLegal(ISD::STORE, SVT)) &&
22655 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
22656 DAG, *ST->getMemOperand())) {
22657 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
22658 ST->getMemOperand());
22659 }
22660 }
22661
22662 // Turn 'store undef, Ptr' -> nothing.
22663 if (Value.isUndef() && ST->isUnindexed() && !ST->isVolatile())
22664 return Chain;
22665
22666 // Try to infer better alignment information than the store already has.
22667 if (OptLevel != CodeGenOptLevel::None && ST->isUnindexed() &&
22668 !ST->isAtomic()) {
22669 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
22670 if (*Alignment > ST->getAlign() &&
22671 isAligned(*Alignment, ST->getSrcValueOffset())) {
22672 SDValue NewStore =
22673 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
22674 ST->getMemoryVT(), *Alignment,
22675 ST->getMemOperand()->getFlags(), ST->getAAInfo());
22676 // NewStore will always be N as we are only refining the alignment
22677 assert(NewStore.getNode() == N);
22678 (void)NewStore;
22679 }
22680 }
22681 }
22682
22683 // Try transforming a pair floating point load / store ops to integer
22684 // load / store ops.
22685 if (SDValue NewST = TransformFPLoadStorePair(N))
22686 return NewST;
22687
22688 // Try transforming several stores into STORE (BSWAP).
22689 if (SDValue Store = mergeTruncStores(ST))
22690 return Store;
22691
22692 if (ST->isUnindexed()) {
22693 // Walk up chain skipping non-aliasing memory nodes, on this store and any
22694 // adjacent stores.
22695 if (findBetterNeighborChains(ST)) {
22696 // replaceStoreChain uses CombineTo, which handled all of the worklist
22697 // manipulation. Return the original node to not do anything else.
22698 return SDValue(ST, 0);
22699 }
22700 Chain = ST->getChain();
22701 }
22702
22703 // FIXME: is there such a thing as a truncating indexed store?
22704 if (ST->isTruncatingStore() && ST->isUnindexed() &&
22705 Value.getValueType().isInteger() &&
22706 (!isa<ConstantSDNode>(Value) ||
22707 !cast<ConstantSDNode>(Value)->isOpaque())) {
22708 // Convert a truncating store of a extension into a standard store.
22709 if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
22710 Value.getOpcode() == ISD::SIGN_EXTEND ||
22711 Value.getOpcode() == ISD::ANY_EXTEND) &&
22712 Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
22713 TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
22714 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
22715 ST->getMemOperand());
22716
22717 APInt TruncDemandedBits =
22718 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
22719 ST->getMemoryVT().getScalarSizeInBits());
22720
22721 // See if we can simplify the operation with SimplifyDemandedBits, which
22722 // only works if the value has a single use.
22723 AddToWorklist(Value.getNode());
22724 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
22725 // Re-visit the store if anything changed and the store hasn't been merged
22726 // with another node (N is deleted) SimplifyDemandedBits will add Value's
22727 // node back to the worklist if necessary, but we also need to re-visit
22728 // the Store node itself.
22729 if (N->getOpcode() != ISD::DELETED_NODE)
22730 AddToWorklist(N);
22731 return SDValue(N, 0);
22732 }
22733
22734 // Otherwise, see if we can simplify the input to this truncstore with
22735 // knowledge that only the low bits are being used. For example:
22736 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
22737 if (SDValue Shorter =
22738 TLI.SimplifyMultipleUseDemandedBits(Value, TruncDemandedBits, DAG))
22739 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
22740 ST->getMemOperand());
22741
22742 // If we're storing a truncated constant, see if we can simplify it.
22743 // TODO: Move this to targetShrinkDemandedConstant?
22744 if (auto *Cst = dyn_cast<ConstantSDNode>(Value))
22745 if (!Cst->isOpaque()) {
22746 const APInt &CValue = Cst->getAPIntValue();
22747 APInt NewVal = CValue & TruncDemandedBits;
22748 if (NewVal != CValue) {
22749 SDValue Shorter =
22750 DAG.getConstant(NewVal, SDLoc(N), Value.getValueType());
22751 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr,
22752 ST->getMemoryVT(), ST->getMemOperand());
22753 }
22754 }
22755 }
22756
22757 // If this is a load followed by a store to the same location, then the store
22758 // is dead/noop. Peek through any truncates if canCombineTruncStore failed.
22759 // TODO: Add big-endian truncate support with test coverage.
22760 // TODO: Can relax for unordered atomics (see D66309)
22761 SDValue TruncVal = DAG.getDataLayout().isLittleEndian()
22763 : Value;
22764 if (auto *Ld = dyn_cast<LoadSDNode>(TruncVal)) {
22765 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
22766 ST->isUnindexed() && ST->isSimple() &&
22767 Ld->getAddressSpace() == ST->getAddressSpace() &&
22768 // There can't be any side effects between the load and store, such as
22769 // a call or store.
22771 // The store is dead, remove it.
22772 return Chain;
22773 }
22774 }
22775
22776 // Try scalarizing vector stores of loads where we only change one element
22777 if (SDValue NewST = replaceStoreOfInsertLoad(ST))
22778 return NewST;
22779
22780 // TODO: Can relax for unordered atomics (see D66309)
22781 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
22782 if (ST->isUnindexed() && ST->isSimple() &&
22783 ST1->isUnindexed() && ST1->isSimple()) {
22784 if (OptLevel != CodeGenOptLevel::None && ST1->getBasePtr() == Ptr &&
22785 ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
22786 ST->getAddressSpace() == ST1->getAddressSpace()) {
22787 // If this is a store followed by a store with the same value to the
22788 // same location, then the store is dead/noop.
22789 return Chain;
22790 }
22791
22792 if (OptLevel != CodeGenOptLevel::None && ST1->hasOneUse() &&
22793 !ST1->getBasePtr().isUndef() &&
22794 ST->getAddressSpace() == ST1->getAddressSpace()) {
22795 // If we consider two stores and one smaller in size is a scalable
22796 // vector type and another one a bigger size store with a fixed type,
22797 // then we could not allow the scalable store removal because we don't
22798 // know its final size in the end.
22799 if (ST->getMemoryVT().isScalableVector() ||
22800 ST1->getMemoryVT().isScalableVector()) {
22801 if (ST1->getBasePtr() == Ptr &&
22802 TypeSize::isKnownLE(ST1->getMemoryVT().getStoreSize(),
22803 ST->getMemoryVT().getStoreSize())) {
22804 CombineTo(ST1, ST1->getChain());
22805 return SDValue(N, 0);
22806 }
22807 } else {
22808 const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
22809 const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
22810 // If this is a store who's preceding store to a subset of the current
22811 // location and no one other node is chained to that store we can
22812 // effectively drop the store. Do not remove stores to undef as they
22813 // may be used as data sinks.
22814 if (STBase.contains(DAG, ST->getMemoryVT().getFixedSizeInBits(),
22815 ChainBase,
22816 ST1->getMemoryVT().getFixedSizeInBits())) {
22817 CombineTo(ST1, ST1->getChain());
22818 return SDValue(N, 0);
22819 }
22820 }
22821 }
22822 }
22823 }
22824
22825 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
22826 // truncating store. We can do this even if this is already a truncstore.
22827 if ((Value.getOpcode() == ISD::FP_ROUND ||
22828 Value.getOpcode() == ISD::TRUNCATE) &&
22829 Value->hasOneUse() && ST->isUnindexed() &&
22830 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
22831 ST->getMemoryVT(), LegalOperations)) {
22832 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
22833 Ptr, ST->getMemoryVT(), ST->getMemOperand());
22834 }
22835
22836 // Always perform this optimization before types are legal. If the target
22837 // prefers, also try this after legalization to catch stores that were created
22838 // by intrinsics or other nodes.
22839 if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
22840 while (true) {
22841 // There can be multiple store sequences on the same chain.
22842 // Keep trying to merge store sequences until we are unable to do so
22843 // or until we merge the last store on the chain.
22844 bool Changed = mergeConsecutiveStores(ST);
22845 if (!Changed) break;
22846 // Return N as merge only uses CombineTo and no worklist clean
22847 // up is necessary.
22848 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
22849 return SDValue(N, 0);
22850 }
22851 }
22852
22853 // Try transforming N to an indexed store.
22854 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
22855 return SDValue(N, 0);
22856
22857 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
22858 //
22859 // Make sure to do this only after attempting to merge stores in order to
22860 // avoid changing the types of some subset of stores due to visit order,
22861 // preventing their merging.
22862 if (isa<ConstantFPSDNode>(ST->getValue())) {
22863 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
22864 return NewSt;
22865 }
22866
22867 if (SDValue NewSt = splitMergedValStore(ST))
22868 return NewSt;
22869
22870 if (SDValue MaskedStore = foldToMaskedStore(ST, DAG, SDLoc(N)))
22871 return MaskedStore;
22872
22873 return ReduceLoadOpStoreWidth(N);
22874}
22875
22876SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
22877 const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
22878 const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(), 0, false);
22879
22880 // We walk up the chains to find stores.
22881 SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
22882 while (!Chains.empty()) {
22883 SDValue Chain = Chains.pop_back_val();
22884 if (!Chain.hasOneUse())
22885 continue;
22886 switch (Chain.getOpcode()) {
22887 case ISD::TokenFactor:
22888 for (unsigned Nops = Chain.getNumOperands(); Nops;)
22889 Chains.push_back(Chain.getOperand(--Nops));
22890 break;
22892 case ISD::LIFETIME_END:
22893 // We can forward past any lifetime start/end that can be proven not to
22894 // alias the node.
22895 if (!mayAlias(Chain.getNode(), N))
22896 Chains.push_back(Chain.getOperand(0));
22897 break;
22898 case ISD::STORE: {
22899 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
22900 // TODO: Can relax for unordered atomics (see D66309)
22901 if (!ST->isSimple() || ST->isIndexed())
22902 continue;
22903 const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
22904 // The bounds of a scalable store are not known until runtime, so this
22905 // store cannot be elided.
22906 if (StoreSize.isScalable())
22907 continue;
22908 const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
22909 // If we store purely within object bounds just before its lifetime ends,
22910 // we can remove the store.
22912 if (LifetimeEndBase.contains(
22913 DAG, MFI.getObjectSize(LifetimeEnd->getFrameIndex()) * 8,
22914 StoreBase, StoreSize.getFixedValue() * 8)) {
22915 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
22916 dbgs() << "\nwithin LIFETIME_END of : ";
22917 LifetimeEndBase.dump(); dbgs() << "\n");
22918 CombineTo(ST, ST->getChain());
22919 return SDValue(N, 0);
22920 }
22921 }
22922 }
22923 }
22924 return SDValue();
22925}
22926
22927/// For the instruction sequence of store below, F and I values
22928/// are bundled together as an i64 value before being stored into memory.
22929/// Sometimes it is more efficent to generate separate stores for F and I,
22930/// which can remove the bitwise instructions or sink them to colder places.
22931///
22932/// (store (or (zext (bitcast F to i32) to i64),
22933/// (shl (zext I to i64), 32)), addr) -->
22934/// (store F, addr) and (store I, addr+4)
22935///
22936/// Similarly, splitting for other merged store can also be beneficial, like:
22937/// For pair of {i32, i32}, i64 store --> two i32 stores.
22938/// For pair of {i32, i16}, i64 store --> two i32 stores.
22939/// For pair of {i16, i16}, i32 store --> two i16 stores.
22940/// For pair of {i16, i8}, i32 store --> two i16 stores.
22941/// For pair of {i8, i8}, i16 store --> two i8 stores.
22942///
22943/// We allow each target to determine specifically which kind of splitting is
22944/// supported.
22945///
22946/// The store patterns are commonly seen from the simple code snippet below
22947/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
22948/// void goo(const std::pair<int, float> &);
22949/// hoo() {
22950/// ...
22951/// goo(std::make_pair(tmp, ftmp));
22952/// ...
22953/// }
22954///
22955SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
22956 if (OptLevel == CodeGenOptLevel::None)
22957 return SDValue();
22958
22959 // Can't change the number of memory accesses for a volatile store or break
22960 // atomicity for an atomic one.
22961 if (!ST->isSimple())
22962 return SDValue();
22963
22964 SDValue Val = ST->getValue();
22965 SDLoc DL(ST);
22966
22967 // Match OR operand.
22968 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
22969 return SDValue();
22970
22971 // Match SHL operand and get Lower and Higher parts of Val.
22972 SDValue Op1 = Val.getOperand(0);
22973 SDValue Op2 = Val.getOperand(1);
22974 SDValue Lo, Hi;
22975 if (Op1.getOpcode() != ISD::SHL) {
22976 std::swap(Op1, Op2);
22977 if (Op1.getOpcode() != ISD::SHL)
22978 return SDValue();
22979 }
22980 Lo = Op2;
22981 Hi = Op1.getOperand(0);
22982 if (!Op1.hasOneUse())
22983 return SDValue();
22984
22985 // Match shift amount to HalfValBitSize.
22986 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
22987 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
22988 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
22989 return SDValue();
22990
22991 // Lo and Hi are zero-extended from int with size less equal than 32
22992 // to i64.
22993 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
22994 !Lo.getOperand(0).getValueType().isScalarInteger() ||
22995 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
22996 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
22997 !Hi.getOperand(0).getValueType().isScalarInteger() ||
22998 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
22999 return SDValue();
23000
23001 // Use the EVT of low and high parts before bitcast as the input
23002 // of target query.
23003 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
23004 ? Lo.getOperand(0).getValueType()
23005 : Lo.getValueType();
23006 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
23007 ? Hi.getOperand(0).getValueType()
23008 : Hi.getValueType();
23009 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
23010 return SDValue();
23011
23012 // Start to split store.
23013 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
23014 AAMDNodes AAInfo = ST->getAAInfo();
23015
23016 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
23017 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
23018 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
23019 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
23020
23021 SDValue Chain = ST->getChain();
23022 SDValue Ptr = ST->getBasePtr();
23023 // Lower value store.
23024 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
23025 ST->getBaseAlign(), MMOFlags, AAInfo);
23026 Ptr =
23027 DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(HalfValBitSize / 8), DL);
23028 // Higher value store.
23029 SDValue St1 = DAG.getStore(
23030 St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
23031 ST->getBaseAlign(), MMOFlags, AAInfo);
23032 return St1;
23033}
23034
23035// Merge an insertion into an existing shuffle:
23036// (insert_vector_elt (vector_shuffle X, Y, Mask),
23037// .(extract_vector_elt X, N), InsIndex)
23038// --> (vector_shuffle X, Y, NewMask)
23039// and variations where shuffle operands may be CONCAT_VECTORS.
23041 SmallVectorImpl<int> &NewMask, SDValue Elt,
23042 unsigned InsIndex) {
23043 if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
23044 !isa<ConstantSDNode>(Elt.getOperand(1)))
23045 return false;
23046
23047 // Vec's operand 0 is using indices from 0 to N-1 and
23048 // operand 1 from N to 2N - 1, where N is the number of
23049 // elements in the vectors.
23050 SDValue InsertVal0 = Elt.getOperand(0);
23051 int ElementOffset = -1;
23052
23053 // We explore the inputs of the shuffle in order to see if we find the
23054 // source of the extract_vector_elt. If so, we can use it to modify the
23055 // shuffle rather than perform an insert_vector_elt.
23057 ArgWorkList.emplace_back(Mask.size(), Y);
23058 ArgWorkList.emplace_back(0, X);
23059
23060 while (!ArgWorkList.empty()) {
23061 int ArgOffset;
23062 SDValue ArgVal;
23063 std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
23064
23065 if (ArgVal == InsertVal0) {
23066 ElementOffset = ArgOffset;
23067 break;
23068 }
23069
23070 // Peek through concat_vector.
23071 if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
23072 int CurrentArgOffset =
23073 ArgOffset + ArgVal.getValueType().getVectorNumElements();
23074 int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
23075 for (SDValue Op : reverse(ArgVal->ops())) {
23076 CurrentArgOffset -= Step;
23077 ArgWorkList.emplace_back(CurrentArgOffset, Op);
23078 }
23079
23080 // Make sure we went through all the elements and did not screw up index
23081 // computation.
23082 assert(CurrentArgOffset == ArgOffset);
23083 }
23084 }
23085
23086 // If we failed to find a match, see if we can replace an UNDEF shuffle
23087 // operand.
23088 if (ElementOffset == -1) {
23089 if (!Y.isUndef() || InsertVal0.getValueType() != Y.getValueType())
23090 return false;
23091 ElementOffset = Mask.size();
23092 Y = InsertVal0;
23093 }
23094
23095 NewMask.assign(Mask.begin(), Mask.end());
23096 NewMask[InsIndex] = ElementOffset + Elt.getConstantOperandVal(1);
23097 assert(NewMask[InsIndex] < (int)(2 * Mask.size()) && NewMask[InsIndex] >= 0 &&
23098 "NewMask[InsIndex] is out of bound");
23099 return true;
23100}
23101
23102// Merge an insertion into an existing shuffle:
23103// (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
23104// InsIndex)
23105// --> (vector_shuffle X, Y) and variations where shuffle operands may be
23106// CONCAT_VECTORS.
23107SDValue DAGCombiner::mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex) {
23108 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
23109 "Expected extract_vector_elt");
23110 SDValue InsertVal = N->getOperand(1);
23111 SDValue Vec = N->getOperand(0);
23112
23113 auto *SVN = dyn_cast<ShuffleVectorSDNode>(Vec);
23114 if (!SVN || !Vec.hasOneUse())
23115 return SDValue();
23116
23117 ArrayRef<int> Mask = SVN->getMask();
23118 SDValue X = Vec.getOperand(0);
23119 SDValue Y = Vec.getOperand(1);
23120
23121 SmallVector<int, 16> NewMask(Mask);
23122 if (mergeEltWithShuffle(X, Y, Mask, NewMask, InsertVal, InsIndex)) {
23123 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
23124 Vec.getValueType(), SDLoc(N), X, Y, NewMask, DAG);
23125 if (LegalShuffle)
23126 return LegalShuffle;
23127 }
23128
23129 return SDValue();
23130}
23131
23132// Convert a disguised subvector insertion into a shuffle:
23133// insert_vector_elt V, (bitcast X from vector type), IdxC -->
23134// bitcast(shuffle (bitcast V), (extended X), Mask)
23135// Note: We do not use an insert_subvector node because that requires a
23136// legal subvector type.
23137SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
23138 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
23139 "Expected extract_vector_elt");
23140 SDValue InsertVal = N->getOperand(1);
23141
23142 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
23143 !InsertVal.getOperand(0).getValueType().isVector())
23144 return SDValue();
23145
23146 SDValue SubVec = InsertVal.getOperand(0);
23147 SDValue DestVec = N->getOperand(0);
23148 EVT SubVecVT = SubVec.getValueType();
23149 EVT VT = DestVec.getValueType();
23150 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
23151 // If the source only has a single vector element, the cost of creating adding
23152 // it to a vector is likely to exceed the cost of a insert_vector_elt.
23153 if (NumSrcElts == 1)
23154 return SDValue();
23155 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
23156 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
23157
23158 // Step 1: Create a shuffle mask that implements this insert operation. The
23159 // vector that we are inserting into will be operand 0 of the shuffle, so
23160 // those elements are just 'i'. The inserted subvector is in the first
23161 // positions of operand 1 of the shuffle. Example:
23162 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
23163 SmallVector<int, 16> Mask(NumMaskVals);
23164 for (unsigned i = 0; i != NumMaskVals; ++i) {
23165 if (i / NumSrcElts == InsIndex)
23166 Mask[i] = (i % NumSrcElts) + NumMaskVals;
23167 else
23168 Mask[i] = i;
23169 }
23170
23171 // Bail out if the target can not handle the shuffle we want to create.
23172 EVT SubVecEltVT = SubVecVT.getVectorElementType();
23173 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
23174 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
23175 return SDValue();
23176
23177 // Step 2: Create a wide vector from the inserted source vector by appending
23178 // undefined elements. This is the same size as our destination vector.
23179 SDLoc DL(N);
23180 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
23181 ConcatOps[0] = SubVec;
23182 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
23183
23184 // Step 3: Shuffle in the padded subvector.
23185 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
23186 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
23187 AddToWorklist(PaddedSubV.getNode());
23188 AddToWorklist(DestVecBC.getNode());
23189 AddToWorklist(Shuf.getNode());
23190 return DAG.getBitcast(VT, Shuf);
23191}
23192
23193// Combine insert(shuffle(load, <u,0,1,2>), load, 0) into a single load if
23194// possible and the new load will be quick. We use more loads but less shuffles
23195// and inserts.
23196SDValue DAGCombiner::combineInsertEltToLoad(SDNode *N, unsigned InsIndex) {
23197 EVT VT = N->getValueType(0);
23198
23199 // InsIndex is expected to be the first of last lane.
23200 if (!VT.isFixedLengthVector() ||
23201 (InsIndex != 0 && InsIndex != VT.getVectorNumElements() - 1))
23202 return SDValue();
23203
23204 // Look for a shuffle with the mask u,0,1,2,3,4,5,6 or 1,2,3,4,5,6,7,u
23205 // depending on the InsIndex.
23206 auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
23207 SDValue Scalar = N->getOperand(1);
23208 if (!Shuffle || !all_of(enumerate(Shuffle->getMask()), [&](auto P) {
23209 return InsIndex == P.index() || P.value() < 0 ||
23210 (InsIndex == 0 && P.value() == (int)P.index() - 1) ||
23211 (InsIndex == VT.getVectorNumElements() - 1 &&
23212 P.value() == (int)P.index() + 1);
23213 }))
23214 return SDValue();
23215
23216 // We optionally skip over an extend so long as both loads are extended in the
23217 // same way from the same type.
23218 unsigned Extend = 0;
23219 if (Scalar.getOpcode() == ISD::ZERO_EXTEND ||
23220 Scalar.getOpcode() == ISD::SIGN_EXTEND ||
23221 Scalar.getOpcode() == ISD::ANY_EXTEND) {
23222 Extend = Scalar.getOpcode();
23223 Scalar = Scalar.getOperand(0);
23224 }
23225
23226 auto *ScalarLoad = dyn_cast<LoadSDNode>(Scalar);
23227 if (!ScalarLoad)
23228 return SDValue();
23229
23230 SDValue Vec = Shuffle->getOperand(0);
23231 if (Extend) {
23232 if (Vec.getOpcode() != Extend)
23233 return SDValue();
23234 Vec = Vec.getOperand(0);
23235 }
23236 auto *VecLoad = dyn_cast<LoadSDNode>(Vec);
23237 if (!VecLoad || Vec.getValueType().getScalarType() != Scalar.getValueType())
23238 return SDValue();
23239
23240 int EltSize = ScalarLoad->getValueType(0).getScalarSizeInBits();
23241 if (EltSize == 0 || EltSize % 8 != 0 || !ScalarLoad->isSimple() ||
23242 !VecLoad->isSimple() || VecLoad->getExtensionType() != ISD::NON_EXTLOAD ||
23243 ScalarLoad->getExtensionType() != ISD::NON_EXTLOAD ||
23244 ScalarLoad->getAddressSpace() != VecLoad->getAddressSpace())
23245 return SDValue();
23246
23247 // Check that the offset between the pointers to produce a single continuous
23248 // load.
23249 if (InsIndex == 0) {
23250 if (!DAG.areNonVolatileConsecutiveLoads(ScalarLoad, VecLoad, EltSize / 8,
23251 -1))
23252 return SDValue();
23253 } else {
23255 VecLoad, ScalarLoad, VT.getVectorNumElements() * EltSize / 8, -1))
23256 return SDValue();
23257 }
23258
23259 // And that the new unaligned load will be fast.
23260 unsigned IsFast = 0;
23261 Align NewAlign = commonAlignment(VecLoad->getAlign(), EltSize / 8);
23262 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
23263 Vec.getValueType(), VecLoad->getAddressSpace(),
23264 NewAlign, VecLoad->getMemOperand()->getFlags(),
23265 &IsFast) ||
23266 !IsFast)
23267 return SDValue();
23268
23269 // Calculate the new Ptr and create the new load.
23270 SDLoc DL(N);
23271 SDValue Ptr = ScalarLoad->getBasePtr();
23272 if (InsIndex != 0)
23273 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), VecLoad->getBasePtr(),
23274 DAG.getConstant(EltSize / 8, DL, Ptr.getValueType()));
23275 MachinePointerInfo PtrInfo =
23276 InsIndex == 0 ? ScalarLoad->getPointerInfo()
23277 : VecLoad->getPointerInfo().getWithOffset(EltSize / 8);
23278
23279 SDValue Load = DAG.getLoad(VecLoad->getValueType(0), DL,
23280 ScalarLoad->getChain(), Ptr, PtrInfo, NewAlign);
23281 DAG.makeEquivalentMemoryOrdering(ScalarLoad, Load.getValue(1));
23282 DAG.makeEquivalentMemoryOrdering(VecLoad, Load.getValue(1));
23283 return Extend ? DAG.getNode(Extend, DL, VT, Load) : Load;
23284}
23285
23286SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
23287 SDValue InVec = N->getOperand(0);
23288 SDValue InVal = N->getOperand(1);
23289 SDValue EltNo = N->getOperand(2);
23290 SDLoc DL(N);
23291
23292 EVT VT = InVec.getValueType();
23293 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
23294
23295 // Insert into out-of-bounds element is undefined.
23296 if (IndexC && VT.isFixedLengthVector() &&
23297 IndexC->getZExtValue() >= VT.getVectorNumElements())
23298 return DAG.getUNDEF(VT);
23299
23300 // Remove redundant insertions:
23301 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
23302 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23303 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
23304 return InVec;
23305
23306 if (!IndexC) {
23307 // If this is variable insert to undef vector, it might be better to splat:
23308 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
23309 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
23310 return DAG.getSplat(VT, DL, InVal);
23311 return SDValue();
23312 }
23313
23314 if (VT.isScalableVector())
23315 return SDValue();
23316
23317 unsigned NumElts = VT.getVectorNumElements();
23318
23319 // We must know which element is being inserted for folds below here.
23320 unsigned Elt = IndexC->getZExtValue();
23321
23322 // Handle <1 x ???> vector insertion special cases.
23323 if (NumElts == 1) {
23324 // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
23325 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23326 InVal.getOperand(0).getValueType() == VT &&
23327 isNullConstant(InVal.getOperand(1)))
23328 return InVal.getOperand(0);
23329 }
23330
23331 // Canonicalize insert_vector_elt dag nodes.
23332 // Example:
23333 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
23334 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
23335 //
23336 // Do this only if the child insert_vector node has one use; also
23337 // do this only if indices are both constants and Idx1 < Idx0.
23338 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
23339 && isa<ConstantSDNode>(InVec.getOperand(2))) {
23340 unsigned OtherElt = InVec.getConstantOperandVal(2);
23341 if (Elt < OtherElt) {
23342 // Swap nodes.
23343 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
23344 InVec.getOperand(0), InVal, EltNo);
23345 AddToWorklist(NewOp.getNode());
23346 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
23347 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
23348 }
23349 }
23350
23351 if (SDValue Shuf = mergeInsertEltWithShuffle(N, Elt))
23352 return Shuf;
23353
23354 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
23355 return Shuf;
23356
23357 if (SDValue Shuf = combineInsertEltToLoad(N, Elt))
23358 return Shuf;
23359
23360 // Attempt to convert an insert_vector_elt chain into a legal build_vector.
23361 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
23362 // vXi1 vector - we don't need to recurse.
23363 if (NumElts == 1)
23364 return DAG.getBuildVector(VT, DL, {InVal});
23365
23366 // If we haven't already collected the element, insert into the op list.
23367 EVT MaxEltVT = InVal.getValueType();
23368 auto AddBuildVectorOp = [&](SmallVectorImpl<SDValue> &Ops, SDValue Elt,
23369 unsigned Idx) {
23370 if (!Ops[Idx]) {
23371 Ops[Idx] = Elt;
23372 if (VT.isInteger()) {
23373 EVT EltVT = Elt.getValueType();
23374 MaxEltVT = MaxEltVT.bitsGE(EltVT) ? MaxEltVT : EltVT;
23375 }
23376 }
23377 };
23378
23379 // Ensure all the operands are the same value type, fill any missing
23380 // operands with UNDEF and create the BUILD_VECTOR.
23381 auto CanonicalizeBuildVector = [&](SmallVectorImpl<SDValue> &Ops,
23382 bool FreezeUndef = false) {
23383 assert(Ops.size() == NumElts && "Unexpected vector size");
23384 SDValue UndefOp = FreezeUndef ? DAG.getFreeze(DAG.getUNDEF(MaxEltVT))
23385 : DAG.getUNDEF(MaxEltVT);
23386 for (SDValue &Op : Ops) {
23387 if (Op)
23388 Op = VT.isInteger() ? DAG.getAnyExtOrTrunc(Op, DL, MaxEltVT) : Op;
23389 else
23390 Op = UndefOp;
23391 }
23392 return DAG.getBuildVector(VT, DL, Ops);
23393 };
23394
23395 SmallVector<SDValue, 8> Ops(NumElts, SDValue());
23396 Ops[Elt] = InVal;
23397
23398 // Recurse up a INSERT_VECTOR_ELT chain to build a BUILD_VECTOR.
23399 for (SDValue CurVec = InVec; CurVec;) {
23400 // UNDEF - build new BUILD_VECTOR from already inserted operands.
23401 if (CurVec.isUndef())
23402 return CanonicalizeBuildVector(Ops);
23403
23404 // FREEZE(UNDEF) - build new BUILD_VECTOR from already inserted operands.
23405 if (ISD::isFreezeUndef(CurVec.getNode()) && CurVec.hasOneUse())
23406 return CanonicalizeBuildVector(Ops, /*FreezeUndef=*/true);
23407
23408 // BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR.
23409 if (CurVec.getOpcode() == ISD::BUILD_VECTOR && CurVec.hasOneUse()) {
23410 for (unsigned I = 0; I != NumElts; ++I)
23411 AddBuildVectorOp(Ops, CurVec.getOperand(I), I);
23412 return CanonicalizeBuildVector(Ops);
23413 }
23414
23415 // SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR.
23416 if (CurVec.getOpcode() == ISD::SCALAR_TO_VECTOR && CurVec.hasOneUse()) {
23417 AddBuildVectorOp(Ops, CurVec.getOperand(0), 0);
23418 return CanonicalizeBuildVector(Ops);
23419 }
23420
23421 // INSERT_VECTOR_ELT - insert operand and continue up the chain.
23422 if (CurVec.getOpcode() == ISD::INSERT_VECTOR_ELT && CurVec.hasOneUse())
23423 if (auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2)))
23424 if (CurIdx->getAPIntValue().ult(NumElts)) {
23425 unsigned Idx = CurIdx->getZExtValue();
23426 AddBuildVectorOp(Ops, CurVec.getOperand(1), Idx);
23427
23428 // Found entire BUILD_VECTOR.
23429 if (all_of(Ops, [](SDValue Op) { return !!Op; }))
23430 return CanonicalizeBuildVector(Ops);
23431
23432 CurVec = CurVec->getOperand(0);
23433 continue;
23434 }
23435
23436 // VECTOR_SHUFFLE - if all the operands match the shuffle's sources,
23437 // update the shuffle mask (and second operand if we started with unary
23438 // shuffle) and create a new legal shuffle.
23439 if (CurVec.getOpcode() == ISD::VECTOR_SHUFFLE && CurVec.hasOneUse()) {
23440 auto *SVN = cast<ShuffleVectorSDNode>(CurVec);
23441 SDValue LHS = SVN->getOperand(0);
23442 SDValue RHS = SVN->getOperand(1);
23444 bool Merged = true;
23445 for (auto I : enumerate(Ops)) {
23446 SDValue &Op = I.value();
23447 if (Op) {
23448 SmallVector<int, 16> NewMask;
23449 if (!mergeEltWithShuffle(LHS, RHS, Mask, NewMask, Op, I.index())) {
23450 Merged = false;
23451 break;
23452 }
23453 Mask = std::move(NewMask);
23454 }
23455 }
23456 if (Merged)
23457 if (SDValue NewShuffle =
23458 TLI.buildLegalVectorShuffle(VT, DL, LHS, RHS, Mask, DAG))
23459 return NewShuffle;
23460 }
23461
23462 if (!LegalOperations) {
23463 bool IsNull = llvm::isNullConstant(InVal);
23464 // We can convert to AND/OR mask if all insertions are zero or -1
23465 // respectively.
23466 if ((IsNull || llvm::isAllOnesConstant(InVal)) &&
23467 all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
23468 count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
23469 SDValue Zero = DAG.getConstant(0, DL, MaxEltVT);
23470 SDValue AllOnes = DAG.getAllOnesConstant(DL, MaxEltVT);
23472
23473 // Build the mask and return the corresponding DAG node.
23474 auto BuildMaskAndNode = [&](SDValue TrueVal, SDValue FalseVal,
23475 unsigned MaskOpcode) {
23476 for (unsigned I = 0; I != NumElts; ++I)
23477 Mask[I] = Ops[I] ? TrueVal : FalseVal;
23478 return DAG.getNode(MaskOpcode, DL, VT, CurVec,
23479 DAG.getBuildVector(VT, DL, Mask));
23480 };
23481
23482 // If all elements are zero, we can use AND with all ones.
23483 if (IsNull)
23484 return BuildMaskAndNode(Zero, AllOnes, ISD::AND);
23485
23486 // If all elements are -1, we can use OR with zero.
23487 return BuildMaskAndNode(AllOnes, Zero, ISD::OR);
23488 }
23489 }
23490
23491 // Failed to find a match in the chain - bail.
23492 break;
23493 }
23494
23495 // See if we can fill in the missing constant elements as zeros.
23496 // TODO: Should we do this for any constant?
23497 APInt DemandedZeroElts = APInt::getZero(NumElts);
23498 for (unsigned I = 0; I != NumElts; ++I)
23499 if (!Ops[I])
23500 DemandedZeroElts.setBit(I);
23501
23502 if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) {
23503 SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT)
23504 : DAG.getConstantFP(0, DL, MaxEltVT);
23505 for (unsigned I = 0; I != NumElts; ++I)
23506 if (!Ops[I])
23507 Ops[I] = Zero;
23508
23509 return CanonicalizeBuildVector(Ops);
23510 }
23511 }
23512
23513 return SDValue();
23514}
23515
23516/// Transform a vector binary operation into a scalar binary operation by moving
23517/// the math/logic after an extract element of a vector.
23519 const SDLoc &DL, bool LegalTypes) {
23520 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23521 SDValue Vec = ExtElt->getOperand(0);
23522 SDValue Index = ExtElt->getOperand(1);
23523 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
23524 unsigned Opc = Vec.getOpcode();
23525 if (!IndexC || !Vec.hasOneUse() || (!TLI.isBinOp(Opc) && Opc != ISD::SETCC) ||
23526 Vec->getNumValues() != 1)
23527 return SDValue();
23528
23529 // Targets may want to avoid this to prevent an expensive register transfer.
23530 if (!TLI.shouldScalarizeBinop(Vec))
23531 return SDValue();
23532
23533 EVT ResVT = ExtElt->getValueType(0);
23534 if (Opc == ISD::SETCC &&
23535 (ResVT != Vec.getValueType().getVectorElementType() || LegalTypes))
23536 return SDValue();
23537
23538 // Extracting an element of a vector constant is constant-folded, so this
23539 // transform is just replacing a vector op with a scalar op while moving the
23540 // extract.
23541 SDValue Op0 = Vec.getOperand(0);
23542 SDValue Op1 = Vec.getOperand(1);
23543 APInt SplatVal;
23544 if (!isAnyConstantBuildVector(Op0, true) &&
23545 !ISD::isConstantSplatVector(Op0.getNode(), SplatVal) &&
23546 !isAnyConstantBuildVector(Op1, true) &&
23547 !ISD::isConstantSplatVector(Op1.getNode(), SplatVal))
23548 return SDValue();
23549
23550 // extractelt (op X, C), IndexC --> op (extractelt X, IndexC), C'
23551 // extractelt (op C, X), IndexC --> op C', (extractelt X, IndexC)
23552 if (Opc == ISD::SETCC) {
23553 EVT OpVT = Op0.getValueType().getVectorElementType();
23554 Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op0, Index);
23555 Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op1, Index);
23556 SDValue NewVal = DAG.getSetCC(
23557 DL, ResVT, Op0, Op1, cast<CondCodeSDNode>(Vec->getOperand(2))->get());
23558 // We may need to sign- or zero-extend the result to match the same
23559 // behaviour as the vector version of SETCC.
23560 unsigned VecBoolContents = TLI.getBooleanContents(Vec.getValueType());
23561 if (ResVT != MVT::i1 &&
23562 VecBoolContents != TargetLowering::UndefinedBooleanContent &&
23563 VecBoolContents != TLI.getBooleanContents(ResVT)) {
23565 NewVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ResVT, NewVal,
23566 DAG.getValueType(MVT::i1));
23567 else
23568 NewVal = DAG.getZeroExtendInReg(NewVal, DL, MVT::i1);
23569 }
23570 return NewVal;
23571 }
23572 Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op0, Index);
23573 Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op1, Index);
23574 return DAG.getNode(Opc, DL, ResVT, Op0, Op1);
23575}
23576
23577// Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract,
23578// recursively analyse all of it's users. and try to model themselves as
23579// bit sequence extractions. If all of them agree on the new, narrower element
23580// type, and all of them can be modelled as ISD::EXTRACT_VECTOR_ELT's of that
23581// new element type, do so now.
23582// This is mainly useful to recover from legalization that scalarized
23583// the vector as wide elements, but tries to rebuild it with narrower elements.
23584//
23585// Some more nodes could be modelled if that helps cover interesting patterns.
23586bool DAGCombiner::refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(
23587 SDNode *N) {
23588 // We perform this optimization post type-legalization because
23589 // the type-legalizer often scalarizes integer-promoted vectors.
23590 // Performing this optimization before may cause legalizaton cycles.
23591 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
23592 return false;
23593
23594 // TODO: Add support for big-endian.
23595 if (DAG.getDataLayout().isBigEndian())
23596 return false;
23597
23598 SDValue VecOp = N->getOperand(0);
23599 EVT VecVT = VecOp.getValueType();
23600 assert(!VecVT.isScalableVector() && "Only for fixed vectors.");
23601
23602 // We must start with a constant extraction index.
23603 auto *IndexC = dyn_cast<ConstantSDNode>(N->getOperand(1));
23604 if (!IndexC)
23605 return false;
23606
23607 assert(IndexC->getZExtValue() < VecVT.getVectorNumElements() &&
23608 "Original ISD::EXTRACT_VECTOR_ELT is undefinend?");
23609
23610 // TODO: deal with the case of implicit anyext of the extraction.
23611 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
23612 EVT ScalarVT = N->getValueType(0);
23613 if (VecVT.getScalarType() != ScalarVT)
23614 return false;
23615
23616 // TODO: deal with the cases other than everything being integer-typed.
23617 if (!ScalarVT.isScalarInteger())
23618 return false;
23619
23620 struct Entry {
23622
23623 // Which bits of VecOp does it contain?
23624 unsigned BitPos;
23625 int NumBits;
23626 // NOTE: the actual width of \p Producer may be wider than NumBits!
23627
23628 Entry(Entry &&) = default;
23629 Entry(SDNode *Producer_, unsigned BitPos_, int NumBits_)
23630 : Producer(Producer_), BitPos(BitPos_), NumBits(NumBits_) {}
23631
23632 Entry() = delete;
23633 Entry(const Entry &) = delete;
23634 Entry &operator=(const Entry &) = delete;
23635 Entry &operator=(Entry &&) = delete;
23636 };
23637 SmallVector<Entry, 32> Worklist;
23639
23640 // We start at the "root" ISD::EXTRACT_VECTOR_ELT.
23641 Worklist.emplace_back(N, /*BitPos=*/VecEltBitWidth * IndexC->getZExtValue(),
23642 /*NumBits=*/VecEltBitWidth);
23643
23644 while (!Worklist.empty()) {
23645 Entry E = Worklist.pop_back_val();
23646 // Does the node not even use any of the VecOp bits?
23647 if (!(E.NumBits > 0 && E.BitPos < VecVT.getSizeInBits() &&
23648 E.BitPos + E.NumBits <= VecVT.getSizeInBits()))
23649 return false; // Let's allow the other combines clean this up first.
23650 // Did we fail to model any of the users of the Producer?
23651 bool ProducerIsLeaf = false;
23652 // Look at each user of this Producer.
23653 for (SDNode *User : E.Producer->users()) {
23654 switch (User->getOpcode()) {
23655 // TODO: support ISD::BITCAST
23656 // TODO: support ISD::ANY_EXTEND
23657 // TODO: support ISD::ZERO_EXTEND
23658 // TODO: support ISD::SIGN_EXTEND
23659 case ISD::TRUNCATE:
23660 // Truncation simply means we keep position, but extract less bits.
23661 Worklist.emplace_back(User, E.BitPos,
23662 /*NumBits=*/User->getValueSizeInBits(0));
23663 break;
23664 // TODO: support ISD::SRA
23665 // TODO: support ISD::SHL
23666 case ISD::SRL:
23667 // We should be shifting the Producer by a constant amount.
23668 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(User->getOperand(1));
23669 User->getOperand(0).getNode() == E.Producer && ShAmtC) {
23670 // Logical right-shift means that we start extraction later,
23671 // but stop it at the same position we did previously.
23672 unsigned ShAmt = ShAmtC->getZExtValue();
23673 Worklist.emplace_back(User, E.BitPos + ShAmt, E.NumBits - ShAmt);
23674 break;
23675 }
23676 [[fallthrough]];
23677 default:
23678 // We can not model this user of the Producer.
23679 // Which means the current Producer will be a ISD::EXTRACT_VECTOR_ELT.
23680 ProducerIsLeaf = true;
23681 // Profitability check: all users that we can not model
23682 // must be ISD::BUILD_VECTOR's.
23683 if (User->getOpcode() != ISD::BUILD_VECTOR)
23684 return false;
23685 break;
23686 }
23687 }
23688 if (ProducerIsLeaf)
23689 Leafs.emplace_back(std::move(E));
23690 }
23691
23692 unsigned NewVecEltBitWidth = Leafs.front().NumBits;
23693
23694 // If we are still at the same element granularity, give up,
23695 if (NewVecEltBitWidth == VecEltBitWidth)
23696 return false;
23697
23698 // The vector width must be a multiple of the new element width.
23699 if (VecVT.getSizeInBits() % NewVecEltBitWidth != 0)
23700 return false;
23701
23702 // All leafs must agree on the new element width.
23703 // All leafs must not expect any "padding" bits ontop of that width.
23704 // All leafs must start extraction from multiple of that width.
23705 if (!all_of(Leafs, [NewVecEltBitWidth](const Entry &E) {
23706 return (unsigned)E.NumBits == NewVecEltBitWidth &&
23707 E.Producer->getValueSizeInBits(0) == NewVecEltBitWidth &&
23708 E.BitPos % NewVecEltBitWidth == 0;
23709 }))
23710 return false;
23711
23712 EVT NewScalarVT = EVT::getIntegerVT(*DAG.getContext(), NewVecEltBitWidth);
23713 EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewScalarVT,
23714 VecVT.getSizeInBits() / NewVecEltBitWidth);
23715
23716 if (LegalTypes &&
23717 !(TLI.isTypeLegal(NewScalarVT) && TLI.isTypeLegal(NewVecVT)))
23718 return false;
23719
23720 if (LegalOperations &&
23721 !(TLI.isOperationLegalOrCustom(ISD::BITCAST, NewVecVT) &&
23723 return false;
23724
23725 SDValue NewVecOp = DAG.getBitcast(NewVecVT, VecOp);
23726 for (const Entry &E : Leafs) {
23727 SDLoc DL(E.Producer);
23728 unsigned NewIndex = E.BitPos / NewVecEltBitWidth;
23729 assert(NewIndex < NewVecVT.getVectorNumElements() &&
23730 "Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?");
23731 SDValue V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, NewScalarVT, NewVecOp,
23732 DAG.getVectorIdxConstant(NewIndex, DL));
23733 CombineTo(E.Producer, V);
23734 }
23735
23736 return true;
23737}
23738
23739SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
23740 SDValue VecOp = N->getOperand(0);
23741 SDValue Index = N->getOperand(1);
23742 EVT ScalarVT = N->getValueType(0);
23743 EVT VecVT = VecOp.getValueType();
23744 if (VecOp.isUndef())
23745 return DAG.getUNDEF(ScalarVT);
23746
23747 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
23748 //
23749 // This only really matters if the index is non-constant since other combines
23750 // on the constant elements already work.
23751 SDLoc DL(N);
23752 if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
23753 Index == VecOp.getOperand(2)) {
23754 SDValue Elt = VecOp.getOperand(1);
23755 AddUsersToWorklist(VecOp.getNode());
23756 return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
23757 }
23758
23759 // (vextract (scalar_to_vector val, 0) -> val
23760 if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
23761 // Only 0'th element of SCALAR_TO_VECTOR is defined.
23762 if (DAG.isKnownNeverZero(Index))
23763 return DAG.getUNDEF(ScalarVT);
23764
23765 // Check if the result type doesn't match the inserted element type.
23766 // The inserted element and extracted element may have mismatched bitwidth.
23767 // As a result, EXTRACT_VECTOR_ELT may extend or truncate the extracted vector.
23768 SDValue InOp = VecOp.getOperand(0);
23769 if (InOp.getValueType() != ScalarVT) {
23770 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
23771 if (InOp.getValueType().bitsGT(ScalarVT))
23772 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
23773 return DAG.getNode(ISD::ANY_EXTEND, DL, ScalarVT, InOp);
23774 }
23775 return InOp;
23776 }
23777
23778 // extract_vector_elt of out-of-bounds element -> UNDEF
23779 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
23780 if (IndexC && VecVT.isFixedLengthVector() &&
23781 IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
23782 return DAG.getUNDEF(ScalarVT);
23783
23784 // extract_vector_elt (build_vector x, y), 1 -> y
23785 if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
23786 VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
23787 TLI.isTypeLegal(VecVT)) {
23788 assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
23789 VecVT.isFixedLengthVector()) &&
23790 "BUILD_VECTOR used for scalable vectors");
23791 unsigned IndexVal =
23792 VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
23793 SDValue Elt = VecOp.getOperand(IndexVal);
23794 EVT InEltVT = Elt.getValueType();
23795
23796 if (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT) ||
23797 isNullConstant(Elt)) {
23798 // Sometimes build_vector's scalar input types do not match result type.
23799 if (ScalarVT == InEltVT)
23800 return Elt;
23801
23802 // TODO: It may be useful to truncate if free if the build_vector
23803 // implicitly converts.
23804 }
23805 }
23806
23807 if (SDValue BO = scalarizeExtractedBinOp(N, DAG, DL, LegalTypes))
23808 return BO;
23809
23810 if (VecVT.isScalableVector())
23811 return SDValue();
23812
23813 // All the code from this point onwards assumes fixed width vectors, but it's
23814 // possible that some of the combinations could be made to work for scalable
23815 // vectors too.
23816 unsigned NumElts = VecVT.getVectorNumElements();
23817 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
23818
23819 // See if the extracted element is constant, in which case fold it if its
23820 // a legal fp immediate.
23821 if (IndexC && ScalarVT.isFloatingPoint()) {
23822 APInt EltMask = APInt::getOneBitSet(NumElts, IndexC->getZExtValue());
23823 KnownBits KnownElt = DAG.computeKnownBits(VecOp, EltMask);
23824 if (KnownElt.isConstant()) {
23825 APFloat CstFP =
23826 APFloat(ScalarVT.getFltSemantics(), KnownElt.getConstant());
23827 if (TLI.isFPImmLegal(CstFP, ScalarVT))
23828 return DAG.getConstantFP(CstFP, DL, ScalarVT);
23829 }
23830 }
23831
23832 // TODO: These transforms should not require the 'hasOneUse' restriction, but
23833 // there are regressions on multiple targets without it. We can end up with a
23834 // mess of scalar and vector code if we reduce only part of the DAG to scalar.
23835 if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
23836 VecOp.hasOneUse()) {
23837 // The vector index of the LSBs of the source depend on the endian-ness.
23838 bool IsLE = DAG.getDataLayout().isLittleEndian();
23839 unsigned ExtractIndex = IndexC->getZExtValue();
23840 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
23841 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
23842 SDValue BCSrc = VecOp.getOperand(0);
23843 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
23844 return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT);
23845
23846 // TODO: Add support for SCALAR_TO_VECTOR implicit truncation.
23847 if (LegalTypes && BCSrc.getValueType().isInteger() &&
23848 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR &&
23849 BCSrc.getScalarValueSizeInBits() ==
23851 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
23852 // trunc i64 X to i32
23853 SDValue X = BCSrc.getOperand(0);
23854 EVT XVT = X.getValueType();
23855 assert(XVT.isScalarInteger() && ScalarVT.isScalarInteger() &&
23856 "Extract element and scalar to vector can't change element type "
23857 "from FP to integer.");
23858 unsigned XBitWidth = X.getValueSizeInBits();
23859 unsigned Scale = XBitWidth / VecEltBitWidth;
23860 BCTruncElt = IsLE ? 0 : Scale - 1;
23861
23862 // An extract element return value type can be wider than its vector
23863 // operand element type. In that case, the high bits are undefined, so
23864 // it's possible that we may need to extend rather than truncate.
23865 if (ExtractIndex < Scale && XBitWidth > VecEltBitWidth) {
23866 assert(XBitWidth % VecEltBitWidth == 0 &&
23867 "Scalar bitwidth must be a multiple of vector element bitwidth");
23868
23869 if (ExtractIndex != BCTruncElt) {
23870 unsigned ShiftIndex =
23871 IsLE ? ExtractIndex : (Scale - 1) - ExtractIndex;
23872 X = DAG.getNode(
23873 ISD::SRL, DL, XVT, X,
23874 DAG.getShiftAmountConstant(ShiftIndex * VecEltBitWidth, XVT, DL));
23875 }
23876
23877 return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
23878 }
23879 }
23880 }
23881
23882 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
23883 // We only perform this optimization before the op legalization phase because
23884 // we may introduce new vector instructions which are not backed by TD
23885 // patterns. For example on AVX, extracting elements from a wide vector
23886 // without using extract_subvector. However, if we can find an underlying
23887 // scalar value, then we can always use that.
23888 if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
23889 auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
23890 // Find the new index to extract from.
23891 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
23892
23893 // Extracting an undef index is undef.
23894 if (OrigElt == -1)
23895 return DAG.getUNDEF(ScalarVT);
23896
23897 // Select the right vector half to extract from.
23898 SDValue SVInVec;
23899 if (OrigElt < (int)NumElts) {
23900 SVInVec = VecOp.getOperand(0);
23901 } else {
23902 SVInVec = VecOp.getOperand(1);
23903 OrigElt -= NumElts;
23904 }
23905
23906 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
23907 // TODO: Check if shuffle mask is legal?
23908 if (LegalOperations && TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VecVT) &&
23909 !VecOp.hasOneUse())
23910 return SDValue();
23911
23912 SDValue InOp = SVInVec.getOperand(OrigElt);
23913 if (InOp.getValueType() != ScalarVT) {
23914 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
23915 InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
23916 }
23917
23918 return InOp;
23919 }
23920
23921 // FIXME: We should handle recursing on other vector shuffles and
23922 // scalar_to_vector here as well.
23923
23924 if (!LegalOperations ||
23925 // FIXME: Should really be just isOperationLegalOrCustom.
23928 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
23929 DAG.getVectorIdxConstant(OrigElt, DL));
23930 }
23931 }
23932
23933 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
23934 // simplify it based on the (valid) extraction indices.
23935 if (llvm::all_of(VecOp->users(), [&](SDNode *Use) {
23936 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23937 Use->getOperand(0) == VecOp &&
23938 isa<ConstantSDNode>(Use->getOperand(1));
23939 })) {
23940 APInt DemandedElts = APInt::getZero(NumElts);
23941 for (SDNode *User : VecOp->users()) {
23942 auto *CstElt = cast<ConstantSDNode>(User->getOperand(1));
23943 if (CstElt->getAPIntValue().ult(NumElts))
23944 DemandedElts.setBit(CstElt->getZExtValue());
23945 }
23946 if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
23947 // We simplified the vector operand of this extract element. If this
23948 // extract is not dead, visit it again so it is folded properly.
23949 if (N->getOpcode() != ISD::DELETED_NODE)
23950 AddToWorklist(N);
23951 return SDValue(N, 0);
23952 }
23953 APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
23954 if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
23955 // We simplified the vector operand of this extract element. If this
23956 // extract is not dead, visit it again so it is folded properly.
23957 if (N->getOpcode() != ISD::DELETED_NODE)
23958 AddToWorklist(N);
23959 return SDValue(N, 0);
23960 }
23961 }
23962
23963 if (refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(N))
23964 return SDValue(N, 0);
23965
23966 // Everything under here is trying to match an extract of a loaded value.
23967 // If the result of load has to be truncated, then it's not necessarily
23968 // profitable.
23969 bool BCNumEltsChanged = false;
23970 EVT ExtVT = VecVT.getVectorElementType();
23971 EVT LVT = ExtVT;
23972 if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
23973 return SDValue();
23974
23975 if (VecOp.getOpcode() == ISD::BITCAST) {
23976 // Don't duplicate a load with other uses.
23977 if (!VecOp.hasOneUse())
23978 return SDValue();
23979
23980 EVT BCVT = VecOp.getOperand(0).getValueType();
23981 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
23982 return SDValue();
23983 if (NumElts != BCVT.getVectorNumElements())
23984 BCNumEltsChanged = true;
23985 VecOp = VecOp.getOperand(0);
23986 ExtVT = BCVT.getVectorElementType();
23987 }
23988
23989 // extract (vector load $addr), i --> load $addr + i * size
23990 if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
23991 ISD::isNormalLoad(VecOp.getNode()) &&
23992 !Index->hasPredecessor(VecOp.getNode())) {
23993 auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
23994 if (VecLoad && VecLoad->isSimple()) {
23995 if (SDValue Scalarized = TLI.scalarizeExtractedVectorLoad(
23996 ScalarVT, SDLoc(N), VecVT, Index, VecLoad, DAG)) {
23997 ++OpsNarrowed;
23998 return Scalarized;
23999 }
24000 }
24001 }
24002
24003 // Perform only after legalization to ensure build_vector / vector_shuffle
24004 // optimizations have already been done.
24005 if (!LegalOperations || !IndexC)
24006 return SDValue();
24007
24008 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
24009 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
24010 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
24011 int Elt = IndexC->getZExtValue();
24012 LoadSDNode *LN0 = nullptr;
24013 if (ISD::isNormalLoad(VecOp.getNode())) {
24014 LN0 = cast<LoadSDNode>(VecOp);
24015 } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
24016 VecOp.getOperand(0).getValueType() == ExtVT &&
24017 ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
24018 // Don't duplicate a load with other uses.
24019 if (!VecOp.hasOneUse())
24020 return SDValue();
24021
24022 LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
24023 }
24024 if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
24025 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
24026 // =>
24027 // (load $addr+1*size)
24028
24029 // Don't duplicate a load with other uses.
24030 if (!VecOp.hasOneUse())
24031 return SDValue();
24032
24033 // If the bit convert changed the number of elements, it is unsafe
24034 // to examine the mask.
24035 if (BCNumEltsChanged)
24036 return SDValue();
24037
24038 // Select the input vector, guarding against out of range extract vector.
24039 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
24040 VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
24041
24042 if (VecOp.getOpcode() == ISD::BITCAST) {
24043 // Don't duplicate a load with other uses.
24044 if (!VecOp.hasOneUse())
24045 return SDValue();
24046
24047 VecOp = VecOp.getOperand(0);
24048 }
24049 if (ISD::isNormalLoad(VecOp.getNode())) {
24050 LN0 = cast<LoadSDNode>(VecOp);
24051 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
24052 Index = DAG.getConstant(Elt, DL, Index.getValueType());
24053 }
24054 } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
24055 VecVT.getVectorElementType() == ScalarVT &&
24056 (!LegalTypes ||
24057 TLI.isTypeLegal(
24059 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
24060 // -> extract_vector_elt a, 0
24061 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
24062 // -> extract_vector_elt a, 1
24063 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
24064 // -> extract_vector_elt b, 0
24065 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
24066 // -> extract_vector_elt b, 1
24067 EVT ConcatVT = VecOp.getOperand(0).getValueType();
24068 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
24069 SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, DL,
24070 Index.getValueType());
24071
24072 SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
24074 ConcatVT.getVectorElementType(),
24075 ConcatOp, NewIdx);
24076 return DAG.getNode(ISD::BITCAST, DL, ScalarVT, Elt);
24077 }
24078
24079 // Make sure we found a non-volatile load and the extractelement is
24080 // the only use.
24081 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
24082 return SDValue();
24083
24084 // If Idx was -1 above, Elt is going to be -1, so just return undef.
24085 if (Elt == -1)
24086 return DAG.getUNDEF(LVT);
24087
24088 if (SDValue Scalarized =
24089 TLI.scalarizeExtractedVectorLoad(LVT, DL, VecVT, Index, LN0, DAG)) {
24090 ++OpsNarrowed;
24091 return Scalarized;
24092 }
24093
24094 return SDValue();
24095}
24096
24097// Simplify (build_vec (ext )) to (bitcast (build_vec ))
24098SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
24099 // We perform this optimization post type-legalization because
24100 // the type-legalizer often scalarizes integer-promoted vectors.
24101 // Performing this optimization before may create bit-casts which
24102 // will be type-legalized to complex code sequences.
24103 // We perform this optimization only before the operation legalizer because we
24104 // may introduce illegal operations.
24105 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
24106 return SDValue();
24107
24108 unsigned NumInScalars = N->getNumOperands();
24109 SDLoc DL(N);
24110 EVT VT = N->getValueType(0);
24111
24112 // Check to see if this is a BUILD_VECTOR of a bunch of values
24113 // which come from any_extend or zero_extend nodes. If so, we can create
24114 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
24115 // optimizations. We do not handle sign-extend because we can't fill the sign
24116 // using shuffles.
24117 EVT SourceType = MVT::Other;
24118 bool AllAnyExt = true;
24119
24120 for (unsigned i = 0; i != NumInScalars; ++i) {
24121 SDValue In = N->getOperand(i);
24122 // Ignore undef inputs.
24123 if (In.isUndef()) continue;
24124
24125 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
24126 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
24127
24128 // Abort if the element is not an extension.
24129 if (!ZeroExt && !AnyExt) {
24130 SourceType = MVT::Other;
24131 break;
24132 }
24133
24134 // The input is a ZeroExt or AnyExt. Check the original type.
24135 EVT InTy = In.getOperand(0).getValueType();
24136
24137 // Check that all of the widened source types are the same.
24138 if (SourceType == MVT::Other)
24139 // First time.
24140 SourceType = InTy;
24141 else if (InTy != SourceType) {
24142 // Multiple income types. Abort.
24143 SourceType = MVT::Other;
24144 break;
24145 }
24146
24147 // Check if all of the extends are ANY_EXTENDs.
24148 AllAnyExt &= AnyExt;
24149 }
24150
24151 // In order to have valid types, all of the inputs must be extended from the
24152 // same source type and all of the inputs must be any or zero extend.
24153 // Scalar sizes must be a power of two.
24154 EVT OutScalarTy = VT.getScalarType();
24155 bool ValidTypes =
24156 SourceType != MVT::Other &&
24157 llvm::has_single_bit<uint32_t>(OutScalarTy.getSizeInBits()) &&
24158 llvm::has_single_bit<uint32_t>(SourceType.getSizeInBits());
24159
24160 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
24161 // turn into a single shuffle instruction.
24162 if (!ValidTypes)
24163 return SDValue();
24164
24165 // If we already have a splat buildvector, then don't fold it if it means
24166 // introducing zeros.
24167 if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
24168 return SDValue();
24169
24170 bool isLE = DAG.getDataLayout().isLittleEndian();
24171 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
24172 assert(ElemRatio > 1 && "Invalid element size ratio");
24173 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
24174 DAG.getConstant(0, DL, SourceType);
24175
24176 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
24177 SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
24178
24179 // Populate the new build_vector
24180 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
24181 SDValue Cast = N->getOperand(i);
24182 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
24183 Cast.getOpcode() == ISD::ZERO_EXTEND ||
24184 Cast.isUndef()) && "Invalid cast opcode");
24185 SDValue In;
24186 if (Cast.isUndef())
24187 In = DAG.getUNDEF(SourceType);
24188 else
24189 In = Cast->getOperand(0);
24190 unsigned Index = isLE ? (i * ElemRatio) :
24191 (i * ElemRatio + (ElemRatio - 1));
24192
24193 assert(Index < Ops.size() && "Invalid index");
24194 Ops[Index] = In;
24195 }
24196
24197 // The type of the new BUILD_VECTOR node.
24198 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
24199 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
24200 "Invalid vector size");
24201 // Check if the new vector type is legal.
24202 if (!isTypeLegal(VecVT) ||
24203 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
24205 return SDValue();
24206
24207 // Make the new BUILD_VECTOR.
24208 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
24209
24210 // The new BUILD_VECTOR node has the potential to be further optimized.
24211 AddToWorklist(BV.getNode());
24212 // Bitcast to the desired type.
24213 return DAG.getBitcast(VT, BV);
24214}
24215
24216// Simplify (build_vec (trunc $1)
24217// (trunc (srl $1 half-width))
24218// (trunc (srl $1 (2 * half-width))))
24219// to (bitcast $1)
24220SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
24221 assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
24222
24223 EVT VT = N->getValueType(0);
24224
24225 // Don't run this before LegalizeTypes if VT is legal.
24226 // Targets may have other preferences.
24227 if (Level < AfterLegalizeTypes && TLI.isTypeLegal(VT))
24228 return SDValue();
24229
24230 // Only for little endian
24231 if (!DAG.getDataLayout().isLittleEndian())
24232 return SDValue();
24233
24234 EVT OutScalarTy = VT.getScalarType();
24235 uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
24236
24237 // Only for power of two types to be sure that bitcast works well
24238 if (!isPowerOf2_64(ScalarTypeBitsize))
24239 return SDValue();
24240
24241 unsigned NumInScalars = N->getNumOperands();
24242
24243 // Look through bitcasts
24244 auto PeekThroughBitcast = [](SDValue Op) {
24245 if (Op.getOpcode() == ISD::BITCAST)
24246 return Op.getOperand(0);
24247 return Op;
24248 };
24249
24250 // The source value where all the parts are extracted.
24251 SDValue Src;
24252 for (unsigned i = 0; i != NumInScalars; ++i) {
24253 SDValue In = PeekThroughBitcast(N->getOperand(i));
24254 // Ignore undef inputs.
24255 if (In.isUndef()) continue;
24256
24257 if (In.getOpcode() != ISD::TRUNCATE)
24258 return SDValue();
24259
24260 In = PeekThroughBitcast(In.getOperand(0));
24261
24262 if (In.getOpcode() != ISD::SRL) {
24263 // For now only build_vec without shuffling, handle shifts here in the
24264 // future.
24265 if (i != 0)
24266 return SDValue();
24267
24268 Src = In;
24269 } else {
24270 // In is SRL
24271 SDValue part = PeekThroughBitcast(In.getOperand(0));
24272
24273 if (!Src) {
24274 Src = part;
24275 } else if (Src != part) {
24276 // Vector parts do not stem from the same variable
24277 return SDValue();
24278 }
24279
24280 SDValue ShiftAmtVal = In.getOperand(1);
24281 if (!isa<ConstantSDNode>(ShiftAmtVal))
24282 return SDValue();
24283
24284 uint64_t ShiftAmt = In.getConstantOperandVal(1);
24285
24286 // The extracted value is not extracted at the right position
24287 if (ShiftAmt != i * ScalarTypeBitsize)
24288 return SDValue();
24289 }
24290 }
24291
24292 // Only cast if the size is the same
24293 if (!Src || Src.getValueType().getSizeInBits() != VT.getSizeInBits())
24294 return SDValue();
24295
24296 return DAG.getBitcast(VT, Src);
24297}
24298
24299SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
24300 ArrayRef<int> VectorMask,
24301 SDValue VecIn1, SDValue VecIn2,
24302 unsigned LeftIdx, bool DidSplitVec) {
24303 EVT VT = N->getValueType(0);
24304 EVT InVT1 = VecIn1.getValueType();
24305 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
24306
24307 unsigned NumElems = VT.getVectorNumElements();
24308 unsigned ShuffleNumElems = NumElems;
24309
24310 // If we artificially split a vector in two already, then the offsets in the
24311 // operands will all be based off of VecIn1, even those in VecIn2.
24312 unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
24313
24314 uint64_t VTSize = VT.getFixedSizeInBits();
24315 uint64_t InVT1Size = InVT1.getFixedSizeInBits();
24316 uint64_t InVT2Size = InVT2.getFixedSizeInBits();
24317
24318 assert(InVT2Size <= InVT1Size &&
24319 "Inputs must be sorted to be in non-increasing vector size order.");
24320
24321 // We can't generate a shuffle node with mismatched input and output types.
24322 // Try to make the types match the type of the output.
24323 if (InVT1 != VT || InVT2 != VT) {
24324 if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
24325 // If the output vector length is a multiple of both input lengths,
24326 // we can concatenate them and pad the rest with undefs.
24327 unsigned NumConcats = VTSize / InVT1Size;
24328 assert(NumConcats >= 2 && "Concat needs at least two inputs!");
24329 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
24330 ConcatOps[0] = VecIn1;
24331 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
24332 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
24333 VecIn2 = SDValue();
24334 } else if (InVT1Size == VTSize * 2) {
24335 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
24336 return SDValue();
24337
24338 if (!VecIn2.getNode()) {
24339 // If we only have one input vector, and it's twice the size of the
24340 // output, split it in two.
24341 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
24342 DAG.getVectorIdxConstant(NumElems, DL));
24343 VecIn1 = DAG.getExtractSubvector(DL, VT, VecIn1, 0);
24344 // Since we now have shorter input vectors, adjust the offset of the
24345 // second vector's start.
24346 Vec2Offset = NumElems;
24347 } else {
24348 assert(InVT2Size <= InVT1Size &&
24349 "Second input is not going to be larger than the first one.");
24350
24351 // VecIn1 is wider than the output, and we have another, possibly
24352 // smaller input. Pad the smaller input with undefs, shuffle at the
24353 // input vector width, and extract the output.
24354 // The shuffle type is different than VT, so check legality again.
24355 if (LegalOperations &&
24357 return SDValue();
24358
24359 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
24360 // lower it back into a BUILD_VECTOR. So if the inserted type is
24361 // illegal, don't even try.
24362 if (InVT1 != InVT2) {
24363 if (!TLI.isTypeLegal(InVT2))
24364 return SDValue();
24365 VecIn2 = DAG.getInsertSubvector(DL, DAG.getUNDEF(InVT1), VecIn2, 0);
24366 }
24367 ShuffleNumElems = NumElems * 2;
24368 }
24369 } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
24370 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
24371 ConcatOps[0] = VecIn2;
24372 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
24373 } else if (InVT1Size / VTSize > 1 && InVT1Size % VTSize == 0) {
24374 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems) ||
24375 !TLI.isTypeLegal(InVT1) || !TLI.isTypeLegal(InVT2))
24376 return SDValue();
24377 // If dest vector has less than two elements, then use shuffle and extract
24378 // from larger regs will cost even more.
24379 if (VT.getVectorNumElements() <= 2 || !VecIn2.getNode())
24380 return SDValue();
24381 assert(InVT2Size <= InVT1Size &&
24382 "Second input is not going to be larger than the first one.");
24383
24384 // VecIn1 is wider than the output, and we have another, possibly
24385 // smaller input. Pad the smaller input with undefs, shuffle at the
24386 // input vector width, and extract the output.
24387 // The shuffle type is different than VT, so check legality again.
24388 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
24389 return SDValue();
24390
24391 if (InVT1 != InVT2) {
24392 VecIn2 = DAG.getInsertSubvector(DL, DAG.getUNDEF(InVT1), VecIn2, 0);
24393 }
24394 ShuffleNumElems = InVT1Size / VTSize * NumElems;
24395 } else {
24396 // TODO: Support cases where the length mismatch isn't exactly by a
24397 // factor of 2.
24398 // TODO: Move this check upwards, so that if we have bad type
24399 // mismatches, we don't create any DAG nodes.
24400 return SDValue();
24401 }
24402 }
24403
24404 // Initialize mask to undef.
24405 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
24406
24407 // Only need to run up to the number of elements actually used, not the
24408 // total number of elements in the shuffle - if we are shuffling a wider
24409 // vector, the high lanes should be set to undef.
24410 for (unsigned i = 0; i != NumElems; ++i) {
24411 if (VectorMask[i] <= 0)
24412 continue;
24413
24414 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
24415 if (VectorMask[i] == (int)LeftIdx) {
24416 Mask[i] = ExtIndex;
24417 } else if (VectorMask[i] == (int)LeftIdx + 1) {
24418 Mask[i] = Vec2Offset + ExtIndex;
24419 }
24420 }
24421
24422 // The type the input vectors may have changed above.
24423 InVT1 = VecIn1.getValueType();
24424
24425 // If we already have a VecIn2, it should have the same type as VecIn1.
24426 // If we don't, get an undef/zero vector of the appropriate type.
24427 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
24428 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
24429
24430 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
24431 if (ShuffleNumElems > NumElems)
24432 Shuffle = DAG.getExtractSubvector(DL, VT, Shuffle, 0);
24433
24434 return Shuffle;
24435}
24436
24438 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
24439
24440 // First, determine where the build vector is not undef.
24441 // TODO: We could extend this to handle zero elements as well as undefs.
24442 int NumBVOps = BV->getNumOperands();
24443 int ZextElt = -1;
24444 for (int i = 0; i != NumBVOps; ++i) {
24445 SDValue Op = BV->getOperand(i);
24446 if (Op.isUndef())
24447 continue;
24448 if (ZextElt == -1)
24449 ZextElt = i;
24450 else
24451 return SDValue();
24452 }
24453 // Bail out if there's no non-undef element.
24454 if (ZextElt == -1)
24455 return SDValue();
24456
24457 // The build vector contains some number of undef elements and exactly
24458 // one other element. That other element must be a zero-extended scalar
24459 // extracted from a vector at a constant index to turn this into a shuffle.
24460 // Also, require that the build vector does not implicitly truncate/extend
24461 // its elements.
24462 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
24463 EVT VT = BV->getValueType(0);
24464 SDValue Zext = BV->getOperand(ZextElt);
24465 if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
24467 !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
24469 return SDValue();
24470
24471 // The zero-extend must be a multiple of the source size, and we must be
24472 // building a vector of the same size as the source of the extract element.
24473 SDValue Extract = Zext.getOperand(0);
24474 unsigned DestSize = Zext.getValueSizeInBits();
24475 unsigned SrcSize = Extract.getValueSizeInBits();
24476 if (DestSize % SrcSize != 0 ||
24477 Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
24478 return SDValue();
24479
24480 // Create a shuffle mask that will combine the extracted element with zeros
24481 // and undefs.
24482 int ZextRatio = DestSize / SrcSize;
24483 int NumMaskElts = NumBVOps * ZextRatio;
24484 SmallVector<int, 32> ShufMask(NumMaskElts, -1);
24485 for (int i = 0; i != NumMaskElts; ++i) {
24486 if (i / ZextRatio == ZextElt) {
24487 // The low bits of the (potentially translated) extracted element map to
24488 // the source vector. The high bits map to zero. We will use a zero vector
24489 // as the 2nd source operand of the shuffle, so use the 1st element of
24490 // that vector (mask value is number-of-elements) for the high bits.
24491 int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - 1) : 0;
24492 ShufMask[i] = (i % ZextRatio == Low) ? Extract.getConstantOperandVal(1)
24493 : NumMaskElts;
24494 }
24495
24496 // Undef elements of the build vector remain undef because we initialize
24497 // the shuffle mask with -1.
24498 }
24499
24500 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
24501 // bitcast (shuffle V, ZeroVec, VectorMask)
24502 SDLoc DL(BV);
24503 EVT VecVT = Extract.getOperand(0).getValueType();
24504 SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
24505 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24506 SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
24507 ZeroVec, ShufMask, DAG);
24508 if (!Shuf)
24509 return SDValue();
24510 return DAG.getBitcast(VT, Shuf);
24511}
24512
24513// FIXME: promote to STLExtras.
24514template <typename R, typename T>
24515static auto getFirstIndexOf(R &&Range, const T &Val) {
24516 auto I = find(Range, Val);
24517 if (I == Range.end())
24518 return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
24519 return std::distance(Range.begin(), I);
24520}
24521
24522// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
24523// operations. If the types of the vectors we're extracting from allow it,
24524// turn this into a vector_shuffle node.
24525SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
24526 SDLoc DL(N);
24527 EVT VT = N->getValueType(0);
24528
24529 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
24530 if (!isTypeLegal(VT))
24531 return SDValue();
24532
24534 return V;
24535
24536 // May only combine to shuffle after legalize if shuffle is legal.
24537 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
24538 return SDValue();
24539
24540 bool UsesZeroVector = false;
24541 unsigned NumElems = N->getNumOperands();
24542
24543 // Record, for each element of the newly built vector, which input vector
24544 // that element comes from. -1 stands for undef, 0 for the zero vector,
24545 // and positive values for the input vectors.
24546 // VectorMask maps each element to its vector number, and VecIn maps vector
24547 // numbers to their initial SDValues.
24548
24549 SmallVector<int, 8> VectorMask(NumElems, -1);
24551 VecIn.push_back(SDValue());
24552
24553 // If we have a single extract_element with a constant index, track the index
24554 // value.
24555 unsigned OneConstExtractIndex = ~0u;
24556
24557 // Count the number of extract_vector_elt sources (i.e. non-constant or undef)
24558 unsigned NumExtracts = 0;
24559
24560 for (unsigned i = 0; i != NumElems; ++i) {
24561 SDValue Op = N->getOperand(i);
24562
24563 if (Op.isUndef())
24564 continue;
24565
24566 // See if we can use a blend with a zero vector.
24567 // TODO: Should we generalize this to a blend with an arbitrary constant
24568 // vector?
24570 UsesZeroVector = true;
24571 VectorMask[i] = 0;
24572 continue;
24573 }
24574
24575 // Not an undef or zero. If the input is something other than an
24576 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
24577 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
24578 return SDValue();
24579
24580 SDValue ExtractedFromVec = Op.getOperand(0);
24581 if (ExtractedFromVec.getValueType().isScalableVector())
24582 return SDValue();
24583 auto *ExtractIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
24584 if (!ExtractIdx)
24585 return SDValue();
24586
24587 if (ExtractIdx->getAsAPIntVal().uge(
24588 ExtractedFromVec.getValueType().getVectorNumElements()))
24589 return SDValue();
24590
24591 // All inputs must have the same element type as the output.
24592 if (VT.getVectorElementType() !=
24593 ExtractedFromVec.getValueType().getVectorElementType())
24594 return SDValue();
24595
24596 OneConstExtractIndex = ExtractIdx->getZExtValue();
24597 ++NumExtracts;
24598
24599 // Have we seen this input vector before?
24600 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
24601 // a map back from SDValues to numbers isn't worth it.
24602 int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
24603 if (Idx == -1) { // A new source vector?
24604 Idx = VecIn.size();
24605 VecIn.push_back(ExtractedFromVec);
24606 }
24607
24608 VectorMask[i] = Idx;
24609 }
24610
24611 // If we didn't find at least one input vector, bail out.
24612 if (VecIn.size() < 2)
24613 return SDValue();
24614
24615 // If all the Operands of BUILD_VECTOR extract from same
24616 // vector, then split the vector efficiently based on the maximum
24617 // vector access index and adjust the VectorMask and
24618 // VecIn accordingly.
24619 bool DidSplitVec = false;
24620 if (VecIn.size() == 2) {
24621 // If we only found a single constant indexed extract_vector_elt feeding the
24622 // build_vector, do not produce a more complicated shuffle if the extract is
24623 // cheap with other constant/undef elements. Skip broadcast patterns with
24624 // multiple uses in the build_vector.
24625
24626 // TODO: This should be more aggressive about skipping the shuffle
24627 // formation, particularly if VecIn[1].hasOneUse(), and regardless of the
24628 // index.
24629 if (NumExtracts == 1 &&
24632 TLI.isExtractVecEltCheap(VT, OneConstExtractIndex))
24633 return SDValue();
24634
24635 unsigned MaxIndex = 0;
24636 unsigned NearestPow2 = 0;
24637 SDValue Vec = VecIn.back();
24638 EVT InVT = Vec.getValueType();
24639 SmallVector<unsigned, 8> IndexVec(NumElems, 0);
24640
24641 for (unsigned i = 0; i < NumElems; i++) {
24642 if (VectorMask[i] <= 0)
24643 continue;
24644 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
24645 IndexVec[i] = Index;
24646 MaxIndex = std::max(MaxIndex, Index);
24647 }
24648
24649 NearestPow2 = PowerOf2Ceil(MaxIndex);
24650 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
24651 NumElems * 2 < NearestPow2) {
24652 unsigned SplitSize = NearestPow2 / 2;
24653 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
24654 InVT.getVectorElementType(), SplitSize);
24655 if (TLI.isTypeLegal(SplitVT) &&
24656 SplitSize + SplitVT.getVectorNumElements() <=
24657 InVT.getVectorNumElements()) {
24658 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
24659 DAG.getVectorIdxConstant(SplitSize, DL));
24660 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
24661 DAG.getVectorIdxConstant(0, DL));
24662 VecIn.pop_back();
24663 VecIn.push_back(VecIn1);
24664 VecIn.push_back(VecIn2);
24665 DidSplitVec = true;
24666
24667 for (unsigned i = 0; i < NumElems; i++) {
24668 if (VectorMask[i] <= 0)
24669 continue;
24670 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
24671 }
24672 }
24673 }
24674 }
24675
24676 // Sort input vectors by decreasing vector element count,
24677 // while preserving the relative order of equally-sized vectors.
24678 // Note that we keep the first "implicit zero vector as-is.
24679 SmallVector<SDValue, 8> SortedVecIn(VecIn);
24680 llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
24681 [](const SDValue &a, const SDValue &b) {
24682 return a.getValueType().getVectorNumElements() >
24683 b.getValueType().getVectorNumElements();
24684 });
24685
24686 // We now also need to rebuild the VectorMask, because it referenced element
24687 // order in VecIn, and we just sorted them.
24688 for (int &SourceVectorIndex : VectorMask) {
24689 if (SourceVectorIndex <= 0)
24690 continue;
24691 unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
24692 assert(Idx > 0 && Idx < SortedVecIn.size() &&
24693 VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
24694 SourceVectorIndex = Idx;
24695 }
24696
24697 VecIn = std::move(SortedVecIn);
24698
24699 // TODO: Should this fire if some of the input vectors has illegal type (like
24700 // it does now), or should we let legalization run its course first?
24701
24702 // Shuffle phase:
24703 // Take pairs of vectors, and shuffle them so that the result has elements
24704 // from these vectors in the correct places.
24705 // For example, given:
24706 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
24707 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
24708 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
24709 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
24710 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
24711 // We will generate:
24712 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
24713 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
24714 SmallVector<SDValue, 4> Shuffles;
24715 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
24716 unsigned LeftIdx = 2 * In + 1;
24717 SDValue VecLeft = VecIn[LeftIdx];
24718 SDValue VecRight =
24719 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
24720
24721 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
24722 VecRight, LeftIdx, DidSplitVec))
24723 Shuffles.push_back(Shuffle);
24724 else
24725 return SDValue();
24726 }
24727
24728 // If we need the zero vector as an "ingredient" in the blend tree, add it
24729 // to the list of shuffles.
24730 if (UsesZeroVector)
24731 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
24732 : DAG.getConstantFP(0.0, DL, VT));
24733
24734 // If we only have one shuffle, we're done.
24735 if (Shuffles.size() == 1)
24736 return Shuffles[0];
24737
24738 // Update the vector mask to point to the post-shuffle vectors.
24739 for (int &Vec : VectorMask)
24740 if (Vec == 0)
24741 Vec = Shuffles.size() - 1;
24742 else
24743 Vec = (Vec - 1) / 2;
24744
24745 // More than one shuffle. Generate a binary tree of blends, e.g. if from
24746 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
24747 // generate:
24748 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
24749 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
24750 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
24751 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
24752 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
24753 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
24754 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
24755
24756 // Make sure the initial size of the shuffle list is even.
24757 if (Shuffles.size() % 2)
24758 Shuffles.push_back(DAG.getUNDEF(VT));
24759
24760 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
24761 if (CurSize % 2) {
24762 Shuffles[CurSize] = DAG.getUNDEF(VT);
24763 CurSize++;
24764 }
24765 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
24766 int Left = 2 * In;
24767 int Right = 2 * In + 1;
24768 SmallVector<int, 8> Mask(NumElems, -1);
24769 SDValue L = Shuffles[Left];
24770 ArrayRef<int> LMask;
24771 bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE &&
24772 L.use_empty() && L.getOperand(1).isUndef() &&
24773 L.getOperand(0).getValueType() == L.getValueType();
24774 if (IsLeftShuffle) {
24775 LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask();
24776 L = L.getOperand(0);
24777 }
24778 SDValue R = Shuffles[Right];
24779 ArrayRef<int> RMask;
24780 bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE &&
24781 R.use_empty() && R.getOperand(1).isUndef() &&
24782 R.getOperand(0).getValueType() == R.getValueType();
24783 if (IsRightShuffle) {
24784 RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask();
24785 R = R.getOperand(0);
24786 }
24787 for (unsigned I = 0; I != NumElems; ++I) {
24788 if (VectorMask[I] == Left) {
24789 Mask[I] = I;
24790 if (IsLeftShuffle)
24791 Mask[I] = LMask[I];
24792 VectorMask[I] = In;
24793 } else if (VectorMask[I] == Right) {
24794 Mask[I] = I + NumElems;
24795 if (IsRightShuffle)
24796 Mask[I] = RMask[I] + NumElems;
24797 VectorMask[I] = In;
24798 }
24799 }
24800
24801 Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask);
24802 }
24803 }
24804 return Shuffles[0];
24805}
24806
24807// Try to turn a build vector of zero extends of extract vector elts into a
24808// a vector zero extend and possibly an extract subvector.
24809// TODO: Support sign extend?
24810// TODO: Allow undef elements?
24811SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
24812 if (LegalOperations)
24813 return SDValue();
24814
24815 EVT VT = N->getValueType(0);
24816
24817 bool FoundZeroExtend = false;
24818 SDValue Op0 = N->getOperand(0);
24819 auto checkElem = [&](SDValue Op) -> int64_t {
24820 unsigned Opc = Op.getOpcode();
24821 FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
24822 if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
24823 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
24824 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
24825 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
24826 return C->getZExtValue();
24827 return -1;
24828 };
24829
24830 // Make sure the first element matches
24831 // (zext (extract_vector_elt X, C))
24832 // Offset must be a constant multiple of the
24833 // known-minimum vector length of the result type.
24834 int64_t Offset = checkElem(Op0);
24835 if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
24836 return SDValue();
24837
24838 unsigned NumElems = N->getNumOperands();
24839 SDValue In = Op0.getOperand(0).getOperand(0);
24840 EVT InSVT = In.getValueType().getScalarType();
24841 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
24842
24843 // Don't create an illegal input type after type legalization.
24844 if (LegalTypes && !TLI.isTypeLegal(InVT))
24845 return SDValue();
24846
24847 // Ensure all the elements come from the same vector and are adjacent.
24848 for (unsigned i = 1; i != NumElems; ++i) {
24849 if ((Offset + i) != checkElem(N->getOperand(i)))
24850 return SDValue();
24851 }
24852
24853 SDLoc DL(N);
24854 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
24855 Op0.getOperand(0).getOperand(1));
24856 return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
24857 VT, In);
24858}
24859
24860// If this is a very simple BUILD_VECTOR with first element being a ZERO_EXTEND,
24861// and all other elements being constant zero's, granularize the BUILD_VECTOR's
24862// element width, absorbing the ZERO_EXTEND, turning it into a constant zero op.
24863// This patten can appear during legalization.
24864//
24865// NOTE: This can be generalized to allow more than a single
24866// non-constant-zero op, UNDEF's, and to be KnownBits-based,
24867SDValue DAGCombiner::convertBuildVecZextToBuildVecWithZeros(SDNode *N) {
24868 // Don't run this after legalization. Targets may have other preferences.
24869 if (Level >= AfterLegalizeDAG)
24870 return SDValue();
24871
24872 // FIXME: support big-endian.
24873 if (DAG.getDataLayout().isBigEndian())
24874 return SDValue();
24875
24876 EVT VT = N->getValueType(0);
24877 EVT OpVT = N->getOperand(0).getValueType();
24878 assert(!VT.isScalableVector() && "Encountered scalable BUILD_VECTOR?");
24879
24880 EVT OpIntVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
24881
24882 if (!TLI.isTypeLegal(OpIntVT) ||
24883 (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::BITCAST, OpIntVT)))
24884 return SDValue();
24885
24886 unsigned EltBitwidth = VT.getScalarSizeInBits();
24887 // NOTE: the actual width of operands may be wider than that!
24888
24889 // Analyze all operands of this BUILD_VECTOR. What is the largest number of
24890 // active bits they all have? We'll want to truncate them all to that width.
24891 unsigned ActiveBits = 0;
24892 APInt KnownZeroOps(VT.getVectorNumElements(), 0);
24893 for (auto I : enumerate(N->ops())) {
24894 SDValue Op = I.value();
24895 // FIXME: support UNDEF elements?
24896 if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
24897 unsigned OpActiveBits =
24898 Cst->getAPIntValue().trunc(EltBitwidth).getActiveBits();
24899 if (OpActiveBits == 0) {
24900 KnownZeroOps.setBit(I.index());
24901 continue;
24902 }
24903 // Profitability check: don't allow non-zero constant operands.
24904 return SDValue();
24905 }
24906 // Profitability check: there must only be a single non-zero operand,
24907 // and it must be the first operand of the BUILD_VECTOR.
24908 if (I.index() != 0)
24909 return SDValue();
24910 // The operand must be a zero-extension itself.
24911 // FIXME: this could be generalized to known leading zeros check.
24912 if (Op.getOpcode() != ISD::ZERO_EXTEND)
24913 return SDValue();
24914 unsigned CurrActiveBits =
24915 Op.getOperand(0).getValueSizeInBits().getFixedValue();
24916 assert(!ActiveBits && "Already encountered non-constant-zero operand?");
24917 ActiveBits = CurrActiveBits;
24918 // We want to at least halve the element size.
24919 if (2 * ActiveBits > EltBitwidth)
24920 return SDValue();
24921 }
24922
24923 // This BUILD_VECTOR must have at least one non-constant-zero operand.
24924 if (ActiveBits == 0)
24925 return SDValue();
24926
24927 // We have EltBitwidth bits, the *minimal* chunk size is ActiveBits,
24928 // into how many chunks can we split our element width?
24929 EVT NewScalarIntVT, NewIntVT;
24930 std::optional<unsigned> Factor;
24931 // We can split the element into at least two chunks, but not into more
24932 // than |_ EltBitwidth / ActiveBits _| chunks. Find a largest split factor
24933 // for which the element width is a multiple of it,
24934 // and the resulting types/operations on that chunk width are legal.
24935 assert(2 * ActiveBits <= EltBitwidth &&
24936 "We know that half or less bits of the element are active.");
24937 for (unsigned Scale = EltBitwidth / ActiveBits; Scale >= 2; --Scale) {
24938 if (EltBitwidth % Scale != 0)
24939 continue;
24940 unsigned ChunkBitwidth = EltBitwidth / Scale;
24941 assert(ChunkBitwidth >= ActiveBits && "As per starting point.");
24942 NewScalarIntVT = EVT::getIntegerVT(*DAG.getContext(), ChunkBitwidth);
24943 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewScalarIntVT,
24944 Scale * N->getNumOperands());
24945 if (!TLI.isTypeLegal(NewScalarIntVT) || !TLI.isTypeLegal(NewIntVT) ||
24946 (LegalOperations &&
24947 !(TLI.isOperationLegalOrCustom(ISD::TRUNCATE, NewScalarIntVT) &&
24949 continue;
24950 Factor = Scale;
24951 break;
24952 }
24953 if (!Factor)
24954 return SDValue();
24955
24956 SDLoc DL(N);
24957 SDValue ZeroOp = DAG.getConstant(0, DL, NewScalarIntVT);
24958
24959 // Recreate the BUILD_VECTOR, with elements now being Factor times smaller.
24961 NewOps.reserve(NewIntVT.getVectorNumElements());
24962 for (auto I : enumerate(N->ops())) {
24963 SDValue Op = I.value();
24964 assert(!Op.isUndef() && "FIXME: after allowing UNDEF's, handle them here.");
24965 unsigned SrcOpIdx = I.index();
24966 if (KnownZeroOps[SrcOpIdx]) {
24967 NewOps.append(*Factor, ZeroOp);
24968 continue;
24969 }
24970 Op = DAG.getBitcast(OpIntVT, Op);
24971 Op = DAG.getNode(ISD::TRUNCATE, DL, NewScalarIntVT, Op);
24972 NewOps.emplace_back(Op);
24973 NewOps.append(*Factor - 1, ZeroOp);
24974 }
24975 assert(NewOps.size() == NewIntVT.getVectorNumElements());
24976 SDValue NewBV = DAG.getBuildVector(NewIntVT, DL, NewOps);
24977 NewBV = DAG.getBitcast(VT, NewBV);
24978 return NewBV;
24979}
24980
24981SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
24982 EVT VT = N->getValueType(0);
24983
24984 // A vector built entirely of undefs is undef.
24986 return DAG.getUNDEF(VT);
24987
24988 // If this is a splat of a bitcast from another vector, change to a
24989 // concat_vector.
24990 // For example:
24991 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
24992 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
24993 //
24994 // If X is a build_vector itself, the concat can become a larger build_vector.
24995 // TODO: Maybe this is useful for non-splat too?
24996 if (!LegalOperations) {
24997 SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue();
24998 // Only change build_vector to a concat_vector if the splat value type is
24999 // same as the vector element type.
25000 if (Splat && Splat.getValueType() == VT.getVectorElementType()) {
25002 EVT SrcVT = Splat.getValueType();
25003 if (SrcVT.isVector()) {
25004 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
25005 EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
25006 SrcVT.getVectorElementType(), NumElts);
25007 if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
25008 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
25009 SDValue Concat =
25010 DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), NewVT, Ops);
25011 return DAG.getBitcast(VT, Concat);
25012 }
25013 }
25014 }
25015 }
25016
25017 // Check if we can express BUILD VECTOR via subvector extract.
25018 if (!LegalTypes && (N->getNumOperands() > 1)) {
25019 SDValue Op0 = N->getOperand(0);
25020 auto checkElem = [&](SDValue Op) -> uint64_t {
25021 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
25022 (Op0.getOperand(0) == Op.getOperand(0)))
25023 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
25024 return CNode->getZExtValue();
25025 return -1;
25026 };
25027
25028 int Offset = checkElem(Op0);
25029 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
25030 if (Offset + i != checkElem(N->getOperand(i))) {
25031 Offset = -1;
25032 break;
25033 }
25034 }
25035
25036 if ((Offset == 0) &&
25037 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
25038 return Op0.getOperand(0);
25039 if ((Offset != -1) &&
25040 ((Offset % N->getValueType(0).getVectorNumElements()) ==
25041 0)) // IDX must be multiple of output size.
25042 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
25043 Op0.getOperand(0), Op0.getOperand(1));
25044 }
25045
25046 if (SDValue V = convertBuildVecZextToZext(N))
25047 return V;
25048
25049 if (SDValue V = convertBuildVecZextToBuildVecWithZeros(N))
25050 return V;
25051
25052 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
25053 return V;
25054
25055 if (SDValue V = reduceBuildVecTruncToBitCast(N))
25056 return V;
25057
25058 if (SDValue V = reduceBuildVecToShuffle(N))
25059 return V;
25060
25061 // A splat of a single element is a SPLAT_VECTOR if supported on the target.
25062 // Do this late as some of the above may replace the splat.
25064 if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
25065 assert(!V.isUndef() && "Splat of undef should have been handled earlier");
25066 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
25067 }
25068
25069 return SDValue();
25070}
25071
25073 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25074 EVT OpVT = N->getOperand(0).getValueType();
25075
25076 // If the operands are legal vectors, leave them alone.
25077 if (TLI.isTypeLegal(OpVT) || OpVT.isScalableVector())
25078 return SDValue();
25079
25080 SDLoc DL(N);
25081 EVT VT = N->getValueType(0);
25083 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
25084
25085 // Keep track of what we encounter.
25086 EVT AnyFPVT;
25087
25088 for (const SDValue &Op : N->ops()) {
25089 if (ISD::BITCAST == Op.getOpcode() &&
25090 !Op.getOperand(0).getValueType().isVector())
25091 Ops.push_back(Op.getOperand(0));
25092 else if (Op.isUndef())
25093 Ops.push_back(DAG.getNode(ISD::UNDEF, DL, SVT));
25094 else
25095 return SDValue();
25096
25097 // Note whether we encounter an integer or floating point scalar.
25098 // If it's neither, bail out, it could be something weird like x86mmx.
25099 EVT LastOpVT = Ops.back().getValueType();
25100 if (LastOpVT.isFloatingPoint())
25101 AnyFPVT = LastOpVT;
25102 else if (!LastOpVT.isInteger())
25103 return SDValue();
25104 }
25105
25106 // If any of the operands is a floating point scalar bitcast to a vector,
25107 // use floating point types throughout, and bitcast everything.
25108 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
25109 if (AnyFPVT != EVT()) {
25110 SVT = AnyFPVT;
25111 for (SDValue &Op : Ops) {
25112 if (Op.getValueType() == SVT)
25113 continue;
25114 if (Op.isUndef())
25115 Op = DAG.getNode(ISD::UNDEF, DL, SVT);
25116 else
25117 Op = DAG.getBitcast(SVT, Op);
25118 }
25119 }
25120
25121 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
25122 VT.getSizeInBits() / SVT.getSizeInBits());
25123 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
25124}
25125
25126// Attempt to merge nested concat_vectors/undefs.
25127// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
25128// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
25130 SelectionDAG &DAG) {
25131 EVT VT = N->getValueType(0);
25132
25133 // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
25134 EVT SubVT;
25135 SDValue FirstConcat;
25136 for (const SDValue &Op : N->ops()) {
25137 if (Op.isUndef())
25138 continue;
25139 if (Op.getOpcode() != ISD::CONCAT_VECTORS)
25140 return SDValue();
25141 if (!FirstConcat) {
25142 SubVT = Op.getOperand(0).getValueType();
25143 if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
25144 return SDValue();
25145 FirstConcat = Op;
25146 continue;
25147 }
25148 if (SubVT != Op.getOperand(0).getValueType())
25149 return SDValue();
25150 }
25151 assert(FirstConcat && "Concat of all-undefs found");
25152
25153 SmallVector<SDValue> ConcatOps;
25154 for (const SDValue &Op : N->ops()) {
25155 if (Op.isUndef()) {
25156 ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
25157 continue;
25158 }
25159 ConcatOps.append(Op->op_begin(), Op->op_end());
25160 }
25161 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
25162}
25163
25164// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
25165// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
25166// most two distinct vectors the same size as the result, attempt to turn this
25167// into a legal shuffle.
25169 EVT VT = N->getValueType(0);
25170 EVT OpVT = N->getOperand(0).getValueType();
25171
25172 // We currently can't generate an appropriate shuffle for a scalable vector.
25173 if (VT.isScalableVector())
25174 return SDValue();
25175
25176 int NumElts = VT.getVectorNumElements();
25177 int NumOpElts = OpVT.getVectorNumElements();
25178
25179 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
25181
25182 for (SDValue Op : N->ops()) {
25184
25185 // UNDEF nodes convert to UNDEF shuffle mask values.
25186 if (Op.isUndef()) {
25187 Mask.append((unsigned)NumOpElts, -1);
25188 continue;
25189 }
25190
25191 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
25192 return SDValue();
25193
25194 // What vector are we extracting the subvector from and at what index?
25195 SDValue ExtVec = Op.getOperand(0);
25196 int ExtIdx = Op.getConstantOperandVal(1);
25197
25198 // We want the EVT of the original extraction to correctly scale the
25199 // extraction index.
25200 EVT ExtVT = ExtVec.getValueType();
25201 ExtVec = peekThroughBitcasts(ExtVec);
25202
25203 // UNDEF nodes convert to UNDEF shuffle mask values.
25204 if (ExtVec.isUndef()) {
25205 Mask.append((unsigned)NumOpElts, -1);
25206 continue;
25207 }
25208
25209 // Ensure that we are extracting a subvector from a vector the same
25210 // size as the result.
25211 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
25212 return SDValue();
25213
25214 // Scale the subvector index to account for any bitcast.
25215 int NumExtElts = ExtVT.getVectorNumElements();
25216 if (0 == (NumExtElts % NumElts))
25217 ExtIdx /= (NumExtElts / NumElts);
25218 else if (0 == (NumElts % NumExtElts))
25219 ExtIdx *= (NumElts / NumExtElts);
25220 else
25221 return SDValue();
25222
25223 // At most we can reference 2 inputs in the final shuffle.
25224 if (SV0.isUndef() || SV0 == ExtVec) {
25225 SV0 = ExtVec;
25226 for (int i = 0; i != NumOpElts; ++i)
25227 Mask.push_back(i + ExtIdx);
25228 } else if (SV1.isUndef() || SV1 == ExtVec) {
25229 SV1 = ExtVec;
25230 for (int i = 0; i != NumOpElts; ++i)
25231 Mask.push_back(i + ExtIdx + NumElts);
25232 } else {
25233 return SDValue();
25234 }
25235 }
25236
25237 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25238 return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
25239 DAG.getBitcast(VT, SV1), Mask, DAG);
25240}
25241
25243 unsigned CastOpcode = N->getOperand(0).getOpcode();
25244 switch (CastOpcode) {
25245 case ISD::SINT_TO_FP:
25246 case ISD::UINT_TO_FP:
25247 case ISD::FP_TO_SINT:
25248 case ISD::FP_TO_UINT:
25249 // TODO: Allow more opcodes?
25250 // case ISD::BITCAST:
25251 // case ISD::TRUNCATE:
25252 // case ISD::ZERO_EXTEND:
25253 // case ISD::SIGN_EXTEND:
25254 // case ISD::FP_EXTEND:
25255 break;
25256 default:
25257 return SDValue();
25258 }
25259
25260 EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
25261 if (!SrcVT.isVector())
25262 return SDValue();
25263
25264 // All operands of the concat must be the same kind of cast from the same
25265 // source type.
25267 for (SDValue Op : N->ops()) {
25268 if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
25269 Op.getOperand(0).getValueType() != SrcVT)
25270 return SDValue();
25271 SrcOps.push_back(Op.getOperand(0));
25272 }
25273
25274 // The wider cast must be supported by the target. This is unusual because
25275 // the operation support type parameter depends on the opcode. In addition,
25276 // check the other type in the cast to make sure this is really legal.
25277 EVT VT = N->getValueType(0);
25278 EVT SrcEltVT = SrcVT.getVectorElementType();
25279 ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
25280 EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
25281 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25282 switch (CastOpcode) {
25283 case ISD::SINT_TO_FP:
25284 case ISD::UINT_TO_FP:
25285 if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
25286 !TLI.isTypeLegal(VT))
25287 return SDValue();
25288 break;
25289 case ISD::FP_TO_SINT:
25290 case ISD::FP_TO_UINT:
25291 if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
25292 !TLI.isTypeLegal(ConcatSrcVT))
25293 return SDValue();
25294 break;
25295 default:
25296 llvm_unreachable("Unexpected cast opcode");
25297 }
25298
25299 // concat (cast X), (cast Y)... -> cast (concat X, Y...)
25300 SDLoc DL(N);
25301 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
25302 return DAG.getNode(CastOpcode, DL, VT, NewConcat);
25303}
25304
25305// See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of
25306// the operands is a SHUFFLE_VECTOR, and all other operands are also operands
25307// to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR.
25309 SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
25310 bool LegalOperations) {
25311 EVT VT = N->getValueType(0);
25312 EVT OpVT = N->getOperand(0).getValueType();
25313 if (VT.isScalableVector())
25314 return SDValue();
25315
25316 // For now, only allow simple 2-operand concatenations.
25317 if (N->getNumOperands() != 2)
25318 return SDValue();
25319
25320 // Don't create illegal types/shuffles when not allowed to.
25321 if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
25322 (LegalOperations &&
25324 return SDValue();
25325
25326 // Analyze all of the operands of the CONCAT_VECTORS. Out of all of them,
25327 // we want to find one that is: (1) a SHUFFLE_VECTOR (2) only used by us,
25328 // and (3) all operands of CONCAT_VECTORS must be either that SHUFFLE_VECTOR,
25329 // or one of the operands of that SHUFFLE_VECTOR (but not UNDEF!).
25330 // (4) and for now, the SHUFFLE_VECTOR must be unary.
25331 ShuffleVectorSDNode *SVN = nullptr;
25332 for (SDValue Op : N->ops()) {
25333 if (auto *CurSVN = dyn_cast<ShuffleVectorSDNode>(Op);
25334 CurSVN && CurSVN->getOperand(1).isUndef() && N->isOnlyUserOf(CurSVN) &&
25335 all_of(N->ops(), [CurSVN](SDValue Op) {
25336 // FIXME: can we allow UNDEF operands?
25337 return !Op.isUndef() &&
25338 (Op.getNode() == CurSVN || is_contained(CurSVN->ops(), Op));
25339 })) {
25340 SVN = CurSVN;
25341 break;
25342 }
25343 }
25344 if (!SVN)
25345 return SDValue();
25346
25347 // We are going to pad the shuffle operands, so any indice, that was picking
25348 // from the second operand, must be adjusted.
25349 SmallVector<int, 16> AdjustedMask(SVN->getMask());
25350 assert(SVN->getOperand(1).isUndef() && "Expected unary shuffle!");
25351
25352 // Identity masks for the operands of the (padded) shuffle.
25353 SmallVector<int, 32> IdentityMask(2 * OpVT.getVectorNumElements());
25354 MutableArrayRef<int> FirstShufOpIdentityMask =
25355 MutableArrayRef<int>(IdentityMask)
25357 MutableArrayRef<int> SecondShufOpIdentityMask =
25359 std::iota(FirstShufOpIdentityMask.begin(), FirstShufOpIdentityMask.end(), 0);
25360 std::iota(SecondShufOpIdentityMask.begin(), SecondShufOpIdentityMask.end(),
25362
25363 // New combined shuffle mask.
25365 Mask.reserve(VT.getVectorNumElements());
25366 for (SDValue Op : N->ops()) {
25367 assert(!Op.isUndef() && "Not expecting to concatenate UNDEF.");
25368 if (Op.getNode() == SVN) {
25369 append_range(Mask, AdjustedMask);
25370 continue;
25371 }
25372 if (Op == SVN->getOperand(0)) {
25373 append_range(Mask, FirstShufOpIdentityMask);
25374 continue;
25375 }
25376 if (Op == SVN->getOperand(1)) {
25377 append_range(Mask, SecondShufOpIdentityMask);
25378 continue;
25379 }
25380 llvm_unreachable("Unexpected operand!");
25381 }
25382
25383 // Don't create illegal shuffle masks.
25384 if (!TLI.isShuffleMaskLegal(Mask, VT))
25385 return SDValue();
25386
25387 // Pad the shuffle operands with UNDEF.
25388 SDLoc dl(N);
25389 std::array<SDValue, 2> ShufOps;
25390 for (auto I : zip(SVN->ops(), ShufOps)) {
25391 SDValue ShufOp = std::get<0>(I);
25392 SDValue &NewShufOp = std::get<1>(I);
25393 if (ShufOp.isUndef())
25394 NewShufOp = DAG.getUNDEF(VT);
25395 else {
25396 SmallVector<SDValue, 2> ShufOpParts(N->getNumOperands(),
25397 DAG.getUNDEF(OpVT));
25398 ShufOpParts[0] = ShufOp;
25399 NewShufOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, ShufOpParts);
25400 }
25401 }
25402 // Finally, create the new wide shuffle.
25403 return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask);
25404}
25405
25407 const TargetLowering &TLI,
25408 bool LegalTypes,
25409 bool LegalOperations) {
25410 EVT VT = N->getValueType(0);
25411
25412 // Post-legalization we can only create wider SPLAT_VECTOR operations if both
25413 // the type and operation is legal. The Hexagon target has custom
25414 // legalization for SPLAT_VECTOR that splits the operation into two parts and
25415 // concatenates them. Therefore, custom lowering must also be rejected in
25416 // order to avoid an infinite loop.
25417 if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
25418 (LegalOperations && !TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT)))
25419 return SDValue();
25420
25421 SDValue Op0 = N->getOperand(0);
25422 if (!llvm::all_equal(N->op_values()) || Op0.getOpcode() != ISD::SPLAT_VECTOR)
25423 return SDValue();
25424
25425 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, Op0.getOperand(0));
25426}
25427
25428SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
25429 // If we only have one input vector, we don't need to do any concatenation.
25430 if (N->getNumOperands() == 1)
25431 return N->getOperand(0);
25432
25433 // Check if all of the operands are undefs.
25434 EVT VT = N->getValueType(0);
25436 return DAG.getUNDEF(VT);
25437
25438 // Optimize concat_vectors where all but the first of the vectors are undef.
25439 if (all_of(drop_begin(N->ops()),
25440 [](const SDValue &Op) { return Op.isUndef(); })) {
25441 SDValue In = N->getOperand(0);
25442 assert(In.getValueType().isVector() && "Must concat vectors");
25443
25444 // If the input is a concat_vectors, just make a larger concat by padding
25445 // with smaller undefs.
25446 //
25447 // Legalizing in AArch64TargetLowering::LowerCONCAT_VECTORS() and combining
25448 // here could cause an infinite loop. That legalizing happens when LegalDAG
25449 // is true and input of AArch64TargetLowering::LowerCONCAT_VECTORS() is
25450 // scalable.
25451 if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse() &&
25452 !(LegalDAG && In.getValueType().isScalableVector())) {
25453 unsigned NumOps = N->getNumOperands() * In.getNumOperands();
25454 SmallVector<SDValue, 4> Ops(In->ops());
25455 Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
25456 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
25457 }
25458
25460
25461 // concat_vectors(scalar_to_vector(scalar), undef) ->
25462 // scalar_to_vector(scalar)
25463 if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
25464 Scalar.hasOneUse()) {
25465 EVT SVT = Scalar.getValueType().getVectorElementType();
25466 if (SVT == Scalar.getOperand(0).getValueType())
25467 Scalar = Scalar.getOperand(0);
25468 }
25469
25470 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
25471 if (!Scalar.getValueType().isVector() && In.hasOneUse()) {
25472 // If the bitcast type isn't legal, it might be a trunc of a legal type;
25473 // look through the trunc so we can still do the transform:
25474 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
25475 if (Scalar->getOpcode() == ISD::TRUNCATE &&
25476 !TLI.isTypeLegal(Scalar.getValueType()) &&
25477 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
25478 Scalar = Scalar->getOperand(0);
25479
25480 EVT SclTy = Scalar.getValueType();
25481
25482 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
25483 return SDValue();
25484
25485 // Bail out if the vector size is not a multiple of the scalar size.
25486 if (VT.getSizeInBits() % SclTy.getSizeInBits())
25487 return SDValue();
25488
25489 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
25490 if (VNTNumElms < 2)
25491 return SDValue();
25492
25493 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
25494 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
25495 return SDValue();
25496
25497 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
25498 return DAG.getBitcast(VT, Res);
25499 }
25500 }
25501
25502 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
25503 // We have already tested above for an UNDEF only concatenation.
25504 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
25505 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
25506 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
25507 return Op.isUndef() || ISD::BUILD_VECTOR == Op.getOpcode();
25508 };
25509 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
25511 EVT SVT = VT.getScalarType();
25512
25513 EVT MinVT = SVT;
25514 if (!SVT.isFloatingPoint()) {
25515 // If BUILD_VECTOR are from built from integer, they may have different
25516 // operand types. Get the smallest type and truncate all operands to it.
25517 bool FoundMinVT = false;
25518 for (const SDValue &Op : N->ops())
25519 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
25520 EVT OpSVT = Op.getOperand(0).getValueType();
25521 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
25522 FoundMinVT = true;
25523 }
25524 assert(FoundMinVT && "Concat vector type mismatch");
25525 }
25526
25527 for (const SDValue &Op : N->ops()) {
25528 EVT OpVT = Op.getValueType();
25529 unsigned NumElts = OpVT.getVectorNumElements();
25530
25531 if (Op.isUndef())
25532 Opnds.append(NumElts, DAG.getUNDEF(MinVT));
25533
25534 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
25535 if (SVT.isFloatingPoint()) {
25536 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
25537 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
25538 } else {
25539 for (unsigned i = 0; i != NumElts; ++i)
25540 Opnds.push_back(
25541 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
25542 }
25543 }
25544 }
25545
25546 assert(VT.getVectorNumElements() == Opnds.size() &&
25547 "Concat vector type mismatch");
25548 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
25549 }
25550
25551 if (SDValue V =
25552 combineConcatVectorOfSplats(N, DAG, TLI, LegalTypes, LegalOperations))
25553 return V;
25554
25555 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
25556 // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
25558 return V;
25559
25560 if (Level <= AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
25561 // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
25563 return V;
25564
25565 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
25567 return V;
25568 }
25569
25570 if (SDValue V = combineConcatVectorOfCasts(N, DAG))
25571 return V;
25572
25574 N, DAG, TLI, LegalTypes, LegalOperations))
25575 return V;
25576
25577 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
25578 // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
25579 // operands and look for a CONCAT operations that place the incoming vectors
25580 // at the exact same location.
25581 //
25582 // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
25583 SDValue SingleSource = SDValue();
25584 unsigned PartNumElem =
25585 N->getOperand(0).getValueType().getVectorMinNumElements();
25586
25587 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
25588 SDValue Op = N->getOperand(i);
25589
25590 if (Op.isUndef())
25591 continue;
25592
25593 // Check if this is the identity extract:
25594 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
25595 return SDValue();
25596
25597 // Find the single incoming vector for the extract_subvector.
25598 if (SingleSource.getNode()) {
25599 if (Op.getOperand(0) != SingleSource)
25600 return SDValue();
25601 } else {
25602 SingleSource = Op.getOperand(0);
25603
25604 // Check the source type is the same as the type of the result.
25605 // If not, this concat may extend the vector, so we can not
25606 // optimize it away.
25607 if (SingleSource.getValueType() != N->getValueType(0))
25608 return SDValue();
25609 }
25610
25611 // Check that we are reading from the identity index.
25612 unsigned IdentityIndex = i * PartNumElem;
25613 if (Op.getConstantOperandAPInt(1) != IdentityIndex)
25614 return SDValue();
25615 }
25616
25617 if (SingleSource.getNode())
25618 return SingleSource;
25619
25620 return SDValue();
25621}
25622
25623SDValue DAGCombiner::visitVECTOR_INTERLEAVE(SDNode *N) {
25624 // Check to see if all operands are identical.
25625 if (!llvm::all_equal(N->op_values()))
25626 return SDValue();
25627
25628 // Check to see if the identical operand is a splat.
25629 if (!DAG.isSplatValue(N->getOperand(0)))
25630 return SDValue();
25631
25632 // interleave splat(X), splat(X).... --> splat(X), splat(X)....
25634 Ops.append(N->op_values().begin(), N->op_values().end());
25635 return CombineTo(N, &Ops);
25636}
25637
25638// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
25639// if the subvector can be sourced for free.
25640static SDValue getSubVectorSrc(SDValue V, unsigned Index, EVT SubVT) {
25641 if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
25642 V.getOperand(1).getValueType() == SubVT &&
25643 V.getConstantOperandAPInt(2) == Index) {
25644 return V.getOperand(1);
25645 }
25646 if (V.getOpcode() == ISD::CONCAT_VECTORS &&
25647 V.getOperand(0).getValueType() == SubVT &&
25648 (Index % SubVT.getVectorMinNumElements()) == 0) {
25649 uint64_t SubIdx = Index / SubVT.getVectorMinNumElements();
25650 return V.getOperand(SubIdx);
25651 }
25652 return SDValue();
25653}
25654
25656 unsigned Index, const SDLoc &DL,
25657 SelectionDAG &DAG,
25658 bool LegalOperations) {
25659 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25660 unsigned BinOpcode = BinOp.getOpcode();
25661 if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
25662 return SDValue();
25663
25664 EVT VecVT = BinOp.getValueType();
25665 SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
25666 if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
25667 return SDValue();
25668 if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
25669 return SDValue();
25670
25671 SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
25672 SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
25673
25674 // TODO: We could handle the case where only 1 operand is being inserted by
25675 // creating an extract of the other operand, but that requires checking
25676 // number of uses and/or costs.
25677 if (!Sub0 || !Sub1)
25678 return SDValue();
25679
25680 // We are inserting both operands of the wide binop only to extract back
25681 // to the narrow vector size. Eliminate all of the insert/extract:
25682 // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
25683 return DAG.getNode(BinOpcode, DL, SubVT, Sub0, Sub1, BinOp->getFlags());
25684}
25685
25686/// If we are extracting a subvector produced by a wide binary operator try
25687/// to use a narrow binary operator and/or avoid concatenation and extraction.
25688static SDValue narrowExtractedVectorBinOp(EVT VT, SDValue Src, unsigned Index,
25689 const SDLoc &DL, SelectionDAG &DAG,
25690 bool LegalOperations) {
25691 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
25692 // some of these bailouts with other transforms.
25693
25694 if (SDValue V = narrowInsertExtractVectorBinOp(VT, Src, Index, DL, DAG,
25695 LegalOperations))
25696 return V;
25697
25698 // We are looking for an optionally bitcasted wide vector binary operator
25699 // feeding an extract subvector.
25700 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25701 SDValue BinOp = peekThroughBitcasts(Src);
25702 unsigned BOpcode = BinOp.getOpcode();
25703 if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
25704 return SDValue();
25705
25706 // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
25707 // reduced to the unary fneg when it is visited, and we probably want to deal
25708 // with fneg in a target-specific way.
25709 if (BOpcode == ISD::FSUB) {
25710 auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
25711 if (C && C->getValueAPF().isNegZero())
25712 return SDValue();
25713 }
25714
25715 // The binop must be a vector type, so we can extract some fraction of it.
25716 EVT WideBVT = BinOp.getValueType();
25717 // The optimisations below currently assume we are dealing with fixed length
25718 // vectors. It is possible to add support for scalable vectors, but at the
25719 // moment we've done no analysis to prove whether they are profitable or not.
25720 if (!WideBVT.isFixedLengthVector())
25721 return SDValue();
25722
25723 assert((Index % VT.getVectorNumElements()) == 0 &&
25724 "Extract index is not a multiple of the vector length.");
25725
25726 // Bail out if this is not a proper multiple width extraction.
25727 unsigned WideWidth = WideBVT.getSizeInBits();
25728 unsigned NarrowWidth = VT.getSizeInBits();
25729 if (WideWidth % NarrowWidth != 0)
25730 return SDValue();
25731
25732 // Bail out if we are extracting a fraction of a single operation. This can
25733 // occur because we potentially looked through a bitcast of the binop.
25734 unsigned NarrowingRatio = WideWidth / NarrowWidth;
25735 unsigned WideNumElts = WideBVT.getVectorNumElements();
25736 if (WideNumElts % NarrowingRatio != 0)
25737 return SDValue();
25738
25739 // Bail out if the target does not support a narrower version of the binop.
25740 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
25741 WideNumElts / NarrowingRatio);
25742 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT,
25743 LegalOperations))
25744 return SDValue();
25745
25746 // If extraction is cheap, we don't need to look at the binop operands
25747 // for concat ops. The narrow binop alone makes this transform profitable.
25748 // We can't just reuse the original extract index operand because we may have
25749 // bitcasted.
25750 unsigned ConcatOpNum = Index / VT.getVectorNumElements();
25751 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
25752 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
25753 BinOp.hasOneUse() && Src->hasOneUse()) {
25754 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
25755 SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
25756 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
25757 BinOp.getOperand(0), NewExtIndex);
25758 SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
25759 BinOp.getOperand(1), NewExtIndex);
25760 SDValue NarrowBinOp =
25761 DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
25762 return DAG.getBitcast(VT, NarrowBinOp);
25763 }
25764
25765 // Only handle the case where we are doubling and then halving. A larger ratio
25766 // may require more than two narrow binops to replace the wide binop.
25767 if (NarrowingRatio != 2)
25768 return SDValue();
25769
25770 // TODO: The motivating case for this transform is an x86 AVX1 target. That
25771 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
25772 // flavors, but no other 256-bit integer support. This could be extended to
25773 // handle any binop, but that may require fixing/adding other folds to avoid
25774 // codegen regressions.
25775 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
25776 return SDValue();
25777
25778 // We need at least one concatenation operation of a binop operand to make
25779 // this transform worthwhile. The concat must double the input vector sizes.
25780 auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
25781 if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
25782 return V.getOperand(ConcatOpNum);
25783 return SDValue();
25784 };
25785 SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
25786 SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
25787
25788 if (SubVecL || SubVecR) {
25789 // If a binop operand was not the result of a concat, we must extract a
25790 // half-sized operand for our new narrow binop:
25791 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
25792 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
25793 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
25794 SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
25795 SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
25796 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
25797 BinOp.getOperand(0), IndexC);
25798
25799 SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
25800 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
25801 BinOp.getOperand(1), IndexC);
25802
25803 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
25804 return DAG.getBitcast(VT, NarrowBinOp);
25805 }
25806
25807 return SDValue();
25808}
25809
25810/// If we are extracting a subvector from a wide vector load, convert to a
25811/// narrow load to eliminate the extraction:
25812/// (extract_subvector (load wide vector)) --> (load narrow vector)
25813static SDValue narrowExtractedVectorLoad(EVT VT, SDValue Src, unsigned Index,
25814 const SDLoc &DL, SelectionDAG &DAG) {
25815 // TODO: Add support for big-endian. The offset calculation must be adjusted.
25816 if (DAG.getDataLayout().isBigEndian())
25817 return SDValue();
25818
25819 auto *Ld = dyn_cast<LoadSDNode>(Src);
25820 if (!Ld || !ISD::isNormalLoad(Ld) || !Ld->isSimple())
25821 return SDValue();
25822
25823 // We can only create byte sized loads.
25824 if (!VT.isByteSized())
25825 return SDValue();
25826
25827 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25829 return SDValue();
25830
25831 unsigned NumElts = VT.getVectorMinNumElements();
25832 // A fixed length vector being extracted from a scalable vector
25833 // may not be any *smaller* than the scalable one.
25834 if (Index == 0 && NumElts >= Ld->getValueType(0).getVectorMinNumElements())
25835 return SDValue();
25836
25837 // The definition of EXTRACT_SUBVECTOR states that the index must be a
25838 // multiple of the minimum number of elements in the result type.
25839 assert(Index % NumElts == 0 && "The extract subvector index is not a "
25840 "multiple of the result's element count");
25841
25842 // It's fine to use TypeSize here as we know the offset will not be negative.
25843 TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
25844 std::optional<unsigned> ByteOffset;
25845 if (Offset.isFixed())
25846 ByteOffset = Offset.getFixedValue();
25847
25848 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT, ByteOffset))
25849 return SDValue();
25850
25851 // The narrow load will be offset from the base address of the old load if
25852 // we are extracting from something besides index 0 (little-endian).
25853 // TODO: Use "BaseIndexOffset" to make this more effective.
25854 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
25855
25857 MachineMemOperand *MMO;
25858 if (Offset.isScalable()) {
25859 MachinePointerInfo MPI =
25861 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, VT.getStoreSize());
25862 } else
25863 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedValue(),
25864 VT.getStoreSize());
25865
25866 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
25867 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
25868 return NewLd;
25869}
25870
25871/// Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
25872/// try to produce VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
25873/// EXTRACT_SUBVECTOR(Op?, ?),
25874/// Mask'))
25875/// iff it is legal and profitable to do so. Notably, the trimmed mask
25876/// (containing only the elements that are extracted)
25877/// must reference at most two subvectors.
25879 unsigned Index,
25880 const SDLoc &DL,
25881 SelectionDAG &DAG,
25882 bool LegalOperations) {
25883 // Only deal with non-scalable vectors.
25884 EVT WideVT = Src.getValueType();
25885 if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
25886 return SDValue();
25887
25888 // The operand must be a shufflevector.
25889 auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(Src);
25890 if (!WideShuffleVector)
25891 return SDValue();
25892
25893 // The old shuffleneeds to go away.
25894 if (!WideShuffleVector->hasOneUse())
25895 return SDValue();
25896
25897 // And the narrow shufflevector that we'll form must be legal.
25898 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25899 if (LegalOperations &&
25901 return SDValue();
25902
25903 int NumEltsExtracted = NarrowVT.getVectorNumElements();
25904 assert((Index % NumEltsExtracted) == 0 &&
25905 "Extract index is not a multiple of the output vector length.");
25906
25907 int WideNumElts = WideVT.getVectorNumElements();
25908
25909 SmallVector<int, 16> NewMask;
25910 NewMask.reserve(NumEltsExtracted);
25911 SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
25912 DemandedSubvectors;
25913
25914 // Try to decode the wide mask into narrow mask from at most two subvectors.
25915 for (int M : WideShuffleVector->getMask().slice(Index, NumEltsExtracted)) {
25916 assert((M >= -1) && (M < (2 * WideNumElts)) &&
25917 "Out-of-bounds shuffle mask?");
25918
25919 if (M < 0) {
25920 // Does not depend on operands, does not require adjustment.
25921 NewMask.emplace_back(M);
25922 continue;
25923 }
25924
25925 // From which operand of the shuffle does this shuffle mask element pick?
25926 int WideShufOpIdx = M / WideNumElts;
25927 // Which element of that operand is picked?
25928 int OpEltIdx = M % WideNumElts;
25929
25930 assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
25931 "Shuffle mask vector decomposition failure.");
25932
25933 // And which NumEltsExtracted-sized subvector of that operand is that?
25934 int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
25935 // And which element within that subvector of that operand is that?
25936 int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
25937
25938 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
25939 "Shuffle mask subvector decomposition failure.");
25940
25941 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
25942 WideShufOpIdx * WideNumElts) == M &&
25943 "Shuffle mask full decomposition failure.");
25944
25945 SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
25946
25947 if (Op.isUndef()) {
25948 // Picking from an undef operand. Let's adjust mask instead.
25949 NewMask.emplace_back(-1);
25950 continue;
25951 }
25952
25953 const std::pair<SDValue, int> DemandedSubvector =
25954 std::make_pair(Op, OpSubvecIdx);
25955
25956 if (DemandedSubvectors.insert(DemandedSubvector)) {
25957 if (DemandedSubvectors.size() > 2)
25958 return SDValue(); // We can't handle more than two subvectors.
25959 // How many elements into the WideVT does this subvector start?
25960 int Index = NumEltsExtracted * OpSubvecIdx;
25961 // Bail out if the extraction isn't going to be cheap.
25962 if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
25963 return SDValue();
25964 }
25965
25966 // Ok, but from which operand of the new shuffle will this element pick?
25967 int NewOpIdx =
25968 getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
25969 assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
25970
25971 int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
25972 NewMask.emplace_back(AdjM);
25973 }
25974 assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
25975 assert(DemandedSubvectors.size() <= 2 &&
25976 "Should have ended up demanding at most two subvectors.");
25977
25978 // Did we discover that the shuffle does not actually depend on operands?
25979 if (DemandedSubvectors.empty())
25980 return DAG.getUNDEF(NarrowVT);
25981
25982 // Profitability check: only deal with extractions from the first subvector
25983 // unless the mask becomes an identity mask.
25984 if (!ShuffleVectorInst::isIdentityMask(NewMask, NewMask.size()) ||
25985 any_of(NewMask, [](int M) { return M < 0; }))
25986 for (auto &DemandedSubvector : DemandedSubvectors)
25987 if (DemandedSubvector.second != 0)
25988 return SDValue();
25989
25990 // We still perform the exact same EXTRACT_SUBVECTOR, just on different
25991 // operand[s]/index[es], so there is no point in checking for it's legality.
25992
25993 // Do not turn a legal shuffle into an illegal one.
25994 if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
25995 !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
25996 return SDValue();
25997
25999 for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
26000 &DemandedSubvector : DemandedSubvectors) {
26001 // How many elements into the WideVT does this subvector start?
26002 int Index = NumEltsExtracted * DemandedSubvector.second;
26003 SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
26004 NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
26005 DemandedSubvector.first, IndexC));
26006 }
26007 assert((NewOps.size() == 1 || NewOps.size() == 2) &&
26008 "Should end up with either one or two ops");
26009
26010 // If we ended up with only one operand, pad with an undef.
26011 if (NewOps.size() == 1)
26012 NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
26013
26014 return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
26015}
26016
26017SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
26018 EVT NVT = N->getValueType(0);
26019 SDValue V = N->getOperand(0);
26020 uint64_t ExtIdx = N->getConstantOperandVal(1);
26021 SDLoc DL(N);
26022
26023 // Extract from UNDEF is UNDEF.
26024 if (V.isUndef())
26025 return DAG.getUNDEF(NVT);
26026
26027 if (SDValue NarrowLoad = narrowExtractedVectorLoad(NVT, V, ExtIdx, DL, DAG))
26028 return NarrowLoad;
26029
26030 // Combine an extract of an extract into a single extract_subvector.
26031 // ext (ext X, C), 0 --> ext X, C
26032 if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
26033 // The index has to be a multiple of the new result type's known minimum
26034 // vector length.
26035 if (V.getConstantOperandVal(1) % NVT.getVectorMinNumElements() == 0 &&
26036 TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
26037 V.getConstantOperandVal(1)) &&
26039 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, V.getOperand(0),
26040 V.getOperand(1));
26041 }
26042 }
26043
26044 // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
26045 if (V.getOpcode() == ISD::SPLAT_VECTOR)
26046 if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
26047 if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
26048 return DAG.getSplatVector(NVT, DL, V.getOperand(0));
26049
26050 // extract_subvector(insert_subvector(x,y,c1),c2)
26051 // --> extract_subvector(y,c2-c1)
26052 // iff we're just extracting from the inserted subvector.
26053 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
26054 SDValue InsSub = V.getOperand(1);
26055 EVT InsSubVT = InsSub.getValueType();
26056 unsigned NumInsElts = InsSubVT.getVectorMinNumElements();
26057 unsigned InsIdx = V.getConstantOperandVal(2);
26058 unsigned NumSubElts = NVT.getVectorMinNumElements();
26059 if (InsIdx <= ExtIdx && (ExtIdx + NumSubElts) <= (InsIdx + NumInsElts) &&
26060 TLI.isExtractSubvectorCheap(NVT, InsSubVT, ExtIdx - InsIdx) &&
26061 InsSubVT.isFixedLengthVector() && NVT.isFixedLengthVector() &&
26062 V.getValueType().isFixedLengthVector())
26063 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, InsSub,
26064 DAG.getVectorIdxConstant(ExtIdx - InsIdx, DL));
26065 }
26066
26067 // Try to move vector bitcast after extract_subv by scaling extraction index:
26068 // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
26069 if (V.getOpcode() == ISD::BITCAST &&
26070 V.getOperand(0).getValueType().isVector() &&
26071 (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
26072 SDValue SrcOp = V.getOperand(0);
26073 EVT SrcVT = SrcOp.getValueType();
26074 unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
26075 unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
26076 if ((SrcNumElts % DestNumElts) == 0) {
26077 unsigned SrcDestRatio = SrcNumElts / DestNumElts;
26078 ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
26079 EVT NewExtVT =
26080 EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), NewExtEC);
26082 SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
26083 SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
26084 V.getOperand(0), NewIndex);
26085 return DAG.getBitcast(NVT, NewExtract);
26086 }
26087 }
26088 if ((DestNumElts % SrcNumElts) == 0) {
26089 unsigned DestSrcRatio = DestNumElts / SrcNumElts;
26090 if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
26091 ElementCount NewExtEC =
26092 NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
26093 EVT ScalarVT = SrcVT.getScalarType();
26094 if ((ExtIdx % DestSrcRatio) == 0) {
26095 unsigned IndexValScaled = ExtIdx / DestSrcRatio;
26096 EVT NewExtVT =
26097 EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
26099 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
26100 SDValue NewExtract =
26101 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
26102 V.getOperand(0), NewIndex);
26103 return DAG.getBitcast(NVT, NewExtract);
26104 }
26105 if (NewExtEC.isScalar() &&
26107 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
26108 SDValue NewExtract =
26109 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
26110 V.getOperand(0), NewIndex);
26111 return DAG.getBitcast(NVT, NewExtract);
26112 }
26113 }
26114 }
26115 }
26116 }
26117
26118 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
26119 unsigned ExtNumElts = NVT.getVectorMinNumElements();
26120 EVT ConcatSrcVT = V.getOperand(0).getValueType();
26121 assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
26122 "Concat and extract subvector do not change element type");
26123
26124 unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
26125 unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
26126
26127 // If the concatenated source types match this extract, it's a direct
26128 // simplification:
26129 // extract_subvec (concat V1, V2, ...), i --> Vi
26130 if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
26131 return V.getOperand(ConcatOpIdx);
26132
26133 // If the concatenated source vectors are a multiple length of this extract,
26134 // then extract a fraction of one of those source vectors directly from a
26135 // concat operand. Example:
26136 // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
26137 // v2i8 extract_subvec v8i8 Y, 6
26138 if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
26139 ConcatSrcNumElts % ExtNumElts == 0) {
26140 unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
26141 assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
26142 "Trying to extract from >1 concat operand?");
26143 assert(NewExtIdx % ExtNumElts == 0 &&
26144 "Extract index is not a multiple of the input vector length.");
26145 SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
26146 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
26147 V.getOperand(ConcatOpIdx), NewIndexC);
26148 }
26149 }
26150
26152 NVT, V, ExtIdx, DL, DAG, LegalOperations))
26153 return Shuffle;
26154
26155 if (SDValue NarrowBOp =
26156 narrowExtractedVectorBinOp(NVT, V, ExtIdx, DL, DAG, LegalOperations))
26157 return NarrowBOp;
26158
26160
26161 // If the input is a build vector. Try to make a smaller build vector.
26162 if (V.getOpcode() == ISD::BUILD_VECTOR) {
26163 EVT InVT = V.getValueType();
26164 unsigned ExtractSize = NVT.getSizeInBits();
26165 unsigned EltSize = InVT.getScalarSizeInBits();
26166 // Only do this if we won't split any elements.
26167 if (ExtractSize % EltSize == 0) {
26168 unsigned NumElems = ExtractSize / EltSize;
26169 EVT EltVT = InVT.getVectorElementType();
26170 EVT ExtractVT =
26171 NumElems == 1 ? EltVT
26172 : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
26173 if ((Level < AfterLegalizeDAG ||
26174 (NumElems == 1 ||
26175 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
26176 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
26177 unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
26178
26179 if (NumElems == 1) {
26180 SDValue Src = V->getOperand(IdxVal);
26181 if (EltVT != Src.getValueType())
26182 Src = DAG.getNode(ISD::TRUNCATE, DL, EltVT, Src);
26183 return DAG.getBitcast(NVT, Src);
26184 }
26185
26186 // Extract the pieces from the original build_vector.
26187 SDValue BuildVec =
26188 DAG.getBuildVector(ExtractVT, DL, V->ops().slice(IdxVal, NumElems));
26189 return DAG.getBitcast(NVT, BuildVec);
26190 }
26191 }
26192 }
26193
26194 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
26195 // Handle only simple case where vector being inserted and vector
26196 // being extracted are of same size.
26197 EVT SmallVT = V.getOperand(1).getValueType();
26198 if (NVT.bitsEq(SmallVT)) {
26199 // Combine:
26200 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
26201 // Into:
26202 // indices are equal or bit offsets are equal => V1
26203 // otherwise => (extract_subvec V1, ExtIdx)
26204 uint64_t InsIdx = V.getConstantOperandVal(2);
26205 if (InsIdx * SmallVT.getScalarSizeInBits() ==
26206 ExtIdx * NVT.getScalarSizeInBits()) {
26207 if (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))
26208 return DAG.getBitcast(NVT, V.getOperand(1));
26209 } else {
26210 return DAG.getNode(
26212 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
26213 N->getOperand(1));
26214 }
26215 }
26216 }
26217
26218 // If only EXTRACT_SUBVECTOR nodes use the source vector we can
26219 // simplify it based on the (valid) extractions.
26220 if (!V.getValueType().isScalableVector() &&
26221 llvm::all_of(V->users(), [&](SDNode *Use) {
26222 return Use->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
26223 Use->getOperand(0) == V;
26224 })) {
26225 unsigned NumElts = V.getValueType().getVectorNumElements();
26226 APInt DemandedElts = APInt::getZero(NumElts);
26227 for (SDNode *User : V->users()) {
26228 unsigned ExtIdx = User->getConstantOperandVal(1);
26229 unsigned NumSubElts = User->getValueType(0).getVectorNumElements();
26230 DemandedElts.setBits(ExtIdx, ExtIdx + NumSubElts);
26231 }
26232 if (SimplifyDemandedVectorElts(V, DemandedElts, /*AssumeSingleUse=*/true)) {
26233 // We simplified the vector operand of this extract subvector. If this
26234 // extract is not dead, visit it again so it is folded properly.
26235 if (N->getOpcode() != ISD::DELETED_NODE)
26236 AddToWorklist(N);
26237 return SDValue(N, 0);
26238 }
26239 } else {
26241 return SDValue(N, 0);
26242 }
26243
26244 return SDValue();
26245}
26246
26247/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
26248/// followed by concatenation. Narrow vector ops may have better performance
26249/// than wide ops, and this can unlock further narrowing of other vector ops.
26250/// Targets can invert this transform later if it is not profitable.
26252 SelectionDAG &DAG) {
26253 SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
26254 if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
26255 N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
26256 !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
26257 return SDValue();
26258
26259 // Split the wide shuffle mask into halves. Any mask element that is accessing
26260 // operand 1 is offset down to account for narrowing of the vectors.
26261 ArrayRef<int> Mask = Shuf->getMask();
26262 EVT VT = Shuf->getValueType(0);
26263 unsigned NumElts = VT.getVectorNumElements();
26264 unsigned HalfNumElts = NumElts / 2;
26265 SmallVector<int, 16> Mask0(HalfNumElts, -1);
26266 SmallVector<int, 16> Mask1(HalfNumElts, -1);
26267 for (unsigned i = 0; i != NumElts; ++i) {
26268 if (Mask[i] == -1)
26269 continue;
26270 // If we reference the upper (undef) subvector then the element is undef.
26271 if ((Mask[i] % NumElts) >= HalfNumElts)
26272 continue;
26273 int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
26274 if (i < HalfNumElts)
26275 Mask0[i] = M;
26276 else
26277 Mask1[i - HalfNumElts] = M;
26278 }
26279
26280 // Ask the target if this is a valid transform.
26281 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26282 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
26283 HalfNumElts);
26284 if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
26285 !TLI.isShuffleMaskLegal(Mask1, HalfVT))
26286 return SDValue();
26287
26288 // shuffle (concat X, undef), (concat Y, undef), Mask -->
26289 // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
26290 SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
26291 SDLoc DL(Shuf);
26292 SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
26293 SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
26294 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
26295}
26296
26297// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
26298// or turn a shuffle of a single concat into simpler shuffle then concat.
26300 EVT VT = N->getValueType(0);
26301 unsigned NumElts = VT.getVectorNumElements();
26302
26303 SDValue N0 = N->getOperand(0);
26304 SDValue N1 = N->getOperand(1);
26305 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
26306 ArrayRef<int> Mask = SVN->getMask();
26307
26309 EVT ConcatVT = N0.getOperand(0).getValueType();
26310 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
26311 unsigned NumConcats = NumElts / NumElemsPerConcat;
26312
26313 auto IsUndefMaskElt = [](int i) { return i == -1; };
26314
26315 // Special case: shuffle(concat(A,B)) can be more efficiently represented
26316 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
26317 // half vector elements.
26318 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
26319 llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
26320 IsUndefMaskElt)) {
26321 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
26322 N0.getOperand(1),
26323 Mask.slice(0, NumElemsPerConcat));
26324 N1 = DAG.getUNDEF(ConcatVT);
26325 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
26326 }
26327
26328 // Look at every vector that's inserted. We're looking for exact
26329 // subvector-sized copies from a concatenated vector
26330 for (unsigned I = 0; I != NumConcats; ++I) {
26331 unsigned Begin = I * NumElemsPerConcat;
26332 ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
26333
26334 // Make sure we're dealing with a copy.
26335 if (llvm::all_of(SubMask, IsUndefMaskElt)) {
26336 Ops.push_back(DAG.getUNDEF(ConcatVT));
26337 continue;
26338 }
26339
26340 int OpIdx = -1;
26341 for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
26342 if (IsUndefMaskElt(SubMask[i]))
26343 continue;
26344 if ((SubMask[i] % (int)NumElemsPerConcat) != i)
26345 return SDValue();
26346 int EltOpIdx = SubMask[i] / NumElemsPerConcat;
26347 if (0 <= OpIdx && EltOpIdx != OpIdx)
26348 return SDValue();
26349 OpIdx = EltOpIdx;
26350 }
26351 assert(0 <= OpIdx && "Unknown concat_vectors op");
26352
26353 if (OpIdx < (int)N0.getNumOperands())
26354 Ops.push_back(N0.getOperand(OpIdx));
26355 else
26356 Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
26357 }
26358
26359 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
26360}
26361
26362// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
26363// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
26364//
26365// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
26366// a simplification in some sense, but it isn't appropriate in general: some
26367// BUILD_VECTORs are substantially cheaper than others. The general case
26368// of a BUILD_VECTOR requires inserting each element individually (or
26369// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
26370// all constants is a single constant pool load. A BUILD_VECTOR where each
26371// element is identical is a splat. A BUILD_VECTOR where most of the operands
26372// are undef lowers to a small number of element insertions.
26373//
26374// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
26375// We don't fold shuffles where one side is a non-zero constant, and we don't
26376// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
26377// non-constant operands. This seems to work out reasonably well in practice.
26379 SelectionDAG &DAG,
26380 const TargetLowering &TLI) {
26381 EVT VT = SVN->getValueType(0);
26382 unsigned NumElts = VT.getVectorNumElements();
26383 SDValue N0 = SVN->getOperand(0);
26384 SDValue N1 = SVN->getOperand(1);
26385
26386 if (!N0->hasOneUse())
26387 return SDValue();
26388
26389 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
26390 // discussed above.
26391 if (!N1.isUndef()) {
26392 if (!N1->hasOneUse())
26393 return SDValue();
26394
26395 bool N0AnyConst = isAnyConstantBuildVector(N0);
26396 bool N1AnyConst = isAnyConstantBuildVector(N1);
26397 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
26398 return SDValue();
26399 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
26400 return SDValue();
26401 }
26402
26403 // If both inputs are splats of the same value then we can safely merge this
26404 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
26405 bool IsSplat = false;
26406 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
26407 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
26408 if (BV0 && BV1)
26409 if (SDValue Splat0 = BV0->getSplatValue())
26410 IsSplat = (Splat0 == BV1->getSplatValue());
26411
26413 SmallSet<SDValue, 16> DuplicateOps;
26414 for (int M : SVN->getMask()) {
26415 SDValue Op = DAG.getUNDEF(VT.getScalarType());
26416 if (M >= 0) {
26417 int Idx = M < (int)NumElts ? M : M - NumElts;
26418 SDValue &S = (M < (int)NumElts ? N0 : N1);
26419 if (S.getOpcode() == ISD::BUILD_VECTOR) {
26420 Op = S.getOperand(Idx);
26421 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
26422 SDValue Op0 = S.getOperand(0);
26423 Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
26424 } else {
26425 // Operand can't be combined - bail out.
26426 return SDValue();
26427 }
26428 }
26429
26430 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
26431 // generating a splat; semantically, this is fine, but it's likely to
26432 // generate low-quality code if the target can't reconstruct an appropriate
26433 // shuffle.
26434 if (!Op.isUndef() && !isIntOrFPConstant(Op))
26435 if (!IsSplat && !DuplicateOps.insert(Op).second)
26436 return SDValue();
26437
26438 Ops.push_back(Op);
26439 }
26440
26441 // BUILD_VECTOR requires all inputs to be of the same type, find the
26442 // maximum type and extend them all.
26443 EVT SVT = VT.getScalarType();
26444 if (SVT.isInteger())
26445 for (SDValue &Op : Ops)
26446 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
26447 if (SVT != VT.getScalarType())
26448 for (SDValue &Op : Ops)
26449 Op = Op.isUndef() ? DAG.getUNDEF(SVT)
26450 : (TLI.isZExtFree(Op.getValueType(), SVT)
26451 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
26452 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT));
26453 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
26454}
26455
26456// Match shuffles that can be converted to *_vector_extend_in_reg.
26457// This is often generated during legalization.
26458// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)),
26459// and returns the EVT to which the extension should be performed.
26460// NOTE: this assumes that the src is the first operand of the shuffle.
26462 unsigned Opcode, EVT VT, std::function<bool(unsigned)> Match,
26463 SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
26464 bool LegalOperations) {
26465 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
26466
26467 // TODO Add support for big-endian when we have a test case.
26468 if (!VT.isInteger() || IsBigEndian)
26469 return std::nullopt;
26470
26471 unsigned NumElts = VT.getVectorNumElements();
26472 unsigned EltSizeInBits = VT.getScalarSizeInBits();
26473
26474 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
26475 // power-of-2 extensions as they are the most likely.
26476 // FIXME: should try Scale == NumElts case too,
26477 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
26478 // The vector width must be a multiple of Scale.
26479 if (NumElts % Scale != 0)
26480 continue;
26481
26482 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
26483 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
26484
26485 if ((LegalTypes && !TLI.isTypeLegal(OutVT)) ||
26486 (LegalOperations && !TLI.isOperationLegalOrCustom(Opcode, OutVT)))
26487 continue;
26488
26489 if (Match(Scale))
26490 return OutVT;
26491 }
26492
26493 return std::nullopt;
26494}
26495
26496// Match shuffles that can be converted to any_vector_extend_in_reg.
26497// This is often generated during legalization.
26498// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
26500 SelectionDAG &DAG,
26501 const TargetLowering &TLI,
26502 bool LegalOperations) {
26503 EVT VT = SVN->getValueType(0);
26504 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
26505
26506 // TODO Add support for big-endian when we have a test case.
26507 if (!VT.isInteger() || IsBigEndian)
26508 return SDValue();
26509
26510 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
26511 auto isAnyExtend = [NumElts = VT.getVectorNumElements(),
26512 Mask = SVN->getMask()](unsigned Scale) {
26513 for (unsigned i = 0; i != NumElts; ++i) {
26514 if (Mask[i] < 0)
26515 continue;
26516 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
26517 continue;
26518 return false;
26519 }
26520 return true;
26521 };
26522
26523 unsigned Opcode = ISD::ANY_EXTEND_VECTOR_INREG;
26524 SDValue N0 = SVN->getOperand(0);
26525 // Never create an illegal type. Only create unsupported operations if we
26526 // are pre-legalization.
26527 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
26528 Opcode, VT, isAnyExtend, DAG, TLI, /*LegalTypes=*/true, LegalOperations);
26529 if (!OutVT)
26530 return SDValue();
26531 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT, N0));
26532}
26533
26534// Match shuffles that can be converted to zero_extend_vector_inreg.
26535// This is often generated during legalization.
26536// e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src))
26538 SelectionDAG &DAG,
26539 const TargetLowering &TLI,
26540 bool LegalOperations) {
26541 bool LegalTypes = true;
26542 EVT VT = SVN->getValueType(0);
26543 assert(!VT.isScalableVector() && "Encountered scalable shuffle?");
26544 unsigned NumElts = VT.getVectorNumElements();
26545 unsigned EltSizeInBits = VT.getScalarSizeInBits();
26546
26547 // TODO: add support for big-endian when we have a test case.
26548 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
26549 if (!VT.isInteger() || IsBigEndian)
26550 return SDValue();
26551
26552 SmallVector<int, 16> Mask(SVN->getMask());
26553 auto ForEachDecomposedIndice = [NumElts, &Mask](auto Fn) {
26554 for (int &Indice : Mask) {
26555 if (Indice < 0)
26556 continue;
26557 int OpIdx = (unsigned)Indice < NumElts ? 0 : 1;
26558 int OpEltIdx = (unsigned)Indice < NumElts ? Indice : Indice - NumElts;
26559 Fn(Indice, OpIdx, OpEltIdx);
26560 }
26561 };
26562
26563 // Which elements of which operand does this shuffle demand?
26564 std::array<APInt, 2> OpsDemandedElts;
26565 for (APInt &OpDemandedElts : OpsDemandedElts)
26566 OpDemandedElts = APInt::getZero(NumElts);
26567 ForEachDecomposedIndice(
26568 [&OpsDemandedElts](int &Indice, int OpIdx, int OpEltIdx) {
26569 OpsDemandedElts[OpIdx].setBit(OpEltIdx);
26570 });
26571
26572 // Element-wise(!), which of these demanded elements are know to be zero?
26573 std::array<APInt, 2> OpsKnownZeroElts;
26574 for (auto I : zip(SVN->ops(), OpsDemandedElts, OpsKnownZeroElts))
26575 std::get<2>(I) =
26576 DAG.computeVectorKnownZeroElements(std::get<0>(I), std::get<1>(I));
26577
26578 // Manifest zeroable element knowledge in the shuffle mask.
26579 // NOTE: we don't have 'zeroable' sentinel value in generic DAG,
26580 // this is a local invention, but it won't leak into DAG.
26581 // FIXME: should we not manifest them, but just check when matching?
26582 bool HadZeroableElts = false;
26583 ForEachDecomposedIndice([&OpsKnownZeroElts, &HadZeroableElts](
26584 int &Indice, int OpIdx, int OpEltIdx) {
26585 if (OpsKnownZeroElts[OpIdx][OpEltIdx]) {
26586 Indice = -2; // Zeroable element.
26587 HadZeroableElts = true;
26588 }
26589 });
26590
26591 // Don't proceed unless we've refined at least one zeroable mask indice.
26592 // If we didn't, then we are still trying to match the same shuffle mask
26593 // we previously tried to match as ISD::ANY_EXTEND_VECTOR_INREG,
26594 // and evidently failed. Proceeding will lead to endless combine loops.
26595 if (!HadZeroableElts)
26596 return SDValue();
26597
26598 // The shuffle may be more fine-grained than we want. Widen elements first.
26599 // FIXME: should we do this before manifesting zeroable shuffle mask indices?
26600 SmallVector<int, 16> ScaledMask;
26601 getShuffleMaskWithWidestElts(Mask, ScaledMask);
26602 assert(Mask.size() >= ScaledMask.size() &&
26603 Mask.size() % ScaledMask.size() == 0 && "Unexpected mask widening.");
26604 int Prescale = Mask.size() / ScaledMask.size();
26605
26606 NumElts = ScaledMask.size();
26607 EltSizeInBits *= Prescale;
26608
26609 EVT PrescaledVT = EVT::getVectorVT(
26610 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits),
26611 NumElts);
26612
26613 if (LegalTypes && !TLI.isTypeLegal(PrescaledVT) && TLI.isTypeLegal(VT))
26614 return SDValue();
26615
26616 // For example,
26617 // shuffle<0,z,1,-1> == (v2i64 zero_extend_vector_inreg(v4i32))
26618 // But not shuffle<z,z,1,-1> and not shuffle<0,z,z,-1> ! (for same types)
26619 auto isZeroExtend = [NumElts, &ScaledMask](unsigned Scale) {
26620 assert(Scale >= 2 && Scale <= NumElts && NumElts % Scale == 0 &&
26621 "Unexpected mask scaling factor.");
26622 ArrayRef<int> Mask = ScaledMask;
26623 for (unsigned SrcElt = 0, NumSrcElts = NumElts / Scale;
26624 SrcElt != NumSrcElts; ++SrcElt) {
26625 // Analyze the shuffle mask in Scale-sized chunks.
26626 ArrayRef<int> MaskChunk = Mask.take_front(Scale);
26627 assert(MaskChunk.size() == Scale && "Unexpected mask size.");
26628 Mask = Mask.drop_front(MaskChunk.size());
26629 // The first indice in this chunk must be SrcElt, but not zero!
26630 // FIXME: undef should be fine, but that results in more-defined result.
26631 if (int FirstIndice = MaskChunk[0]; (unsigned)FirstIndice != SrcElt)
26632 return false;
26633 // The rest of the indices in this chunk must be zeros.
26634 // FIXME: undef should be fine, but that results in more-defined result.
26635 if (!all_of(MaskChunk.drop_front(1),
26636 [](int Indice) { return Indice == -2; }))
26637 return false;
26638 }
26639 assert(Mask.empty() && "Did not process the whole mask?");
26640 return true;
26641 };
26642
26643 unsigned Opcode = ISD::ZERO_EXTEND_VECTOR_INREG;
26644 for (bool Commuted : {false, true}) {
26645 SDValue Op = SVN->getOperand(!Commuted ? 0 : 1);
26646 if (Commuted)
26648 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
26649 Opcode, PrescaledVT, isZeroExtend, DAG, TLI, LegalTypes,
26650 LegalOperations);
26651 if (OutVT)
26652 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT,
26653 DAG.getBitcast(PrescaledVT, Op)));
26654 }
26655 return SDValue();
26656}
26657
26658// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
26659// each source element of a large type into the lowest elements of a smaller
26660// destination type. This is often generated during legalization.
26661// If the source node itself was a '*_extend_vector_inreg' node then we should
26662// then be able to remove it.
26664 SelectionDAG &DAG) {
26665 EVT VT = SVN->getValueType(0);
26666 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
26667
26668 // TODO Add support for big-endian when we have a test case.
26669 if (!VT.isInteger() || IsBigEndian)
26670 return SDValue();
26671
26673
26674 unsigned Opcode = N0.getOpcode();
26675 if (!ISD::isExtVecInRegOpcode(Opcode))
26676 return SDValue();
26677
26678 SDValue N00 = N0.getOperand(0);
26679 ArrayRef<int> Mask = SVN->getMask();
26680 unsigned NumElts = VT.getVectorNumElements();
26681 unsigned EltSizeInBits = VT.getScalarSizeInBits();
26682 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
26683 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
26684
26685 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
26686 return SDValue();
26687 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
26688
26689 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
26690 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
26691 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
26692 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
26693 for (unsigned i = 0; i != NumElts; ++i) {
26694 if (Mask[i] < 0)
26695 continue;
26696 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
26697 continue;
26698 return false;
26699 }
26700 return true;
26701 };
26702
26703 // At the moment we just handle the case where we've truncated back to the
26704 // same size as before the extension.
26705 // TODO: handle more extension/truncation cases as cases arise.
26706 if (EltSizeInBits != ExtSrcSizeInBits)
26707 return SDValue();
26708
26709 // We can remove *extend_vector_inreg only if the truncation happens at
26710 // the same scale as the extension.
26711 if (isTruncate(ExtScale))
26712 return DAG.getBitcast(VT, N00);
26713
26714 return SDValue();
26715}
26716
26717// Combine shuffles of splat-shuffles of the form:
26718// shuffle (shuffle V, undef, splat-mask), undef, M
26719// If splat-mask contains undef elements, we need to be careful about
26720// introducing undef's in the folded mask which are not the result of composing
26721// the masks of the shuffles.
26723 SelectionDAG &DAG) {
26724 EVT VT = Shuf->getValueType(0);
26725 unsigned NumElts = VT.getVectorNumElements();
26726
26727 if (!Shuf->getOperand(1).isUndef())
26728 return SDValue();
26729
26730 // See if this unary non-splat shuffle actually *is* a splat shuffle,
26731 // in disguise, with all demanded elements being identical.
26732 // FIXME: this can be done per-operand.
26733 if (!Shuf->isSplat()) {
26734 APInt DemandedElts(NumElts, 0);
26735 for (int Idx : Shuf->getMask()) {
26736 if (Idx < 0)
26737 continue; // Ignore sentinel indices.
26738 assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?");
26739 DemandedElts.setBit(Idx);
26740 }
26741 assert(DemandedElts.popcount() > 1 && "Is a splat shuffle already?");
26742 APInt UndefElts;
26743 if (DAG.isSplatValue(Shuf->getOperand(0), DemandedElts, UndefElts)) {
26744 // Even if all demanded elements are splat, some of them could be undef.
26745 // Which lowest demanded element is *not* known-undef?
26746 std::optional<unsigned> MinNonUndefIdx;
26747 for (int Idx : Shuf->getMask()) {
26748 if (Idx < 0 || UndefElts[Idx])
26749 continue; // Ignore sentinel indices, and undef elements.
26750 MinNonUndefIdx = std::min<unsigned>(Idx, MinNonUndefIdx.value_or(~0U));
26751 }
26752 if (!MinNonUndefIdx)
26753 return DAG.getUNDEF(VT); // All undef - result is undef.
26754 assert(*MinNonUndefIdx < NumElts && "Expected valid element index.");
26755 SmallVector<int, 8> SplatMask(Shuf->getMask());
26756 for (int &Idx : SplatMask) {
26757 if (Idx < 0)
26758 continue; // Passthrough sentinel indices.
26759 // Otherwise, just pick the lowest demanded non-undef element.
26760 // Or sentinel undef, if we know we'd pick a known-undef element.
26761 Idx = UndefElts[Idx] ? -1 : *MinNonUndefIdx;
26762 }
26763 assert(SplatMask != Shuf->getMask() && "Expected mask to change!");
26764 return DAG.getVectorShuffle(VT, SDLoc(Shuf), Shuf->getOperand(0),
26765 Shuf->getOperand(1), SplatMask);
26766 }
26767 }
26768
26769 // If the inner operand is a known splat with no undefs, just return that directly.
26770 // TODO: Create DemandedElts mask from Shuf's mask.
26771 // TODO: Allow undef elements and merge with the shuffle code below.
26772 if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false))
26773 return Shuf->getOperand(0);
26774
26775 auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
26776 if (!Splat || !Splat->isSplat())
26777 return SDValue();
26778
26779 ArrayRef<int> ShufMask = Shuf->getMask();
26780 ArrayRef<int> SplatMask = Splat->getMask();
26781 assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
26782
26783 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
26784 // every undef mask element in the splat-shuffle has a corresponding undef
26785 // element in the user-shuffle's mask or if the composition of mask elements
26786 // would result in undef.
26787 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
26788 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
26789 // In this case it is not legal to simplify to the splat-shuffle because we
26790 // may be exposing the users of the shuffle an undef element at index 1
26791 // which was not there before the combine.
26792 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
26793 // In this case the composition of masks yields SplatMask, so it's ok to
26794 // simplify to the splat-shuffle.
26795 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
26796 // In this case the composed mask includes all undef elements of SplatMask
26797 // and in addition sets element zero to undef. It is safe to simplify to
26798 // the splat-shuffle.
26799 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
26800 ArrayRef<int> SplatMask) {
26801 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
26802 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
26803 SplatMask[UserMask[i]] != -1)
26804 return false;
26805 return true;
26806 };
26807 if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
26808 return Shuf->getOperand(0);
26809
26810 // Create a new shuffle with a mask that is composed of the two shuffles'
26811 // masks.
26812 SmallVector<int, 32> NewMask;
26813 for (int Idx : ShufMask)
26814 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
26815
26816 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
26817 Splat->getOperand(0), Splat->getOperand(1),
26818 NewMask);
26819}
26820
26821// Combine shuffles of bitcasts into a shuffle of the bitcast type, providing
26822// the mask can be treated as a larger type.
26824 SelectionDAG &DAG,
26825 const TargetLowering &TLI,
26826 bool LegalOperations) {
26827 SDValue Op0 = SVN->getOperand(0);
26828 SDValue Op1 = SVN->getOperand(1);
26829 EVT VT = SVN->getValueType(0);
26830 if (Op0.getOpcode() != ISD::BITCAST)
26831 return SDValue();
26832 EVT InVT = Op0.getOperand(0).getValueType();
26833 if (!InVT.isVector() ||
26834 (!Op1.isUndef() && (Op1.getOpcode() != ISD::BITCAST ||
26835 Op1.getOperand(0).getValueType() != InVT)))
26836 return SDValue();
26838 (Op1.isUndef() || isAnyConstantBuildVector(Op1.getOperand(0))))
26839 return SDValue();
26840
26841 int VTLanes = VT.getVectorNumElements();
26842 int InLanes = InVT.getVectorNumElements();
26843 if (VTLanes <= InLanes || VTLanes % InLanes != 0 ||
26844 (LegalOperations &&
26846 return SDValue();
26847 int Factor = VTLanes / InLanes;
26848
26849 // Check that each group of lanes in the mask are either undef or make a valid
26850 // mask for the wider lane type.
26851 ArrayRef<int> Mask = SVN->getMask();
26852 SmallVector<int> NewMask;
26853 if (!widenShuffleMaskElts(Factor, Mask, NewMask))
26854 return SDValue();
26855
26856 if (!TLI.isShuffleMaskLegal(NewMask, InVT))
26857 return SDValue();
26858
26859 // Create the new shuffle with the new mask and bitcast it back to the
26860 // original type.
26861 SDLoc DL(SVN);
26862 Op0 = Op0.getOperand(0);
26863 Op1 = Op1.isUndef() ? DAG.getUNDEF(InVT) : Op1.getOperand(0);
26864 SDValue NewShuf = DAG.getVectorShuffle(InVT, DL, Op0, Op1, NewMask);
26865 return DAG.getBitcast(VT, NewShuf);
26866}
26867
26868/// Combine shuffle of shuffle of the form:
26869/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
26871 SelectionDAG &DAG) {
26872 if (!OuterShuf->getOperand(1).isUndef())
26873 return SDValue();
26874 auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
26875 if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
26876 return SDValue();
26877
26878 ArrayRef<int> OuterMask = OuterShuf->getMask();
26879 ArrayRef<int> InnerMask = InnerShuf->getMask();
26880 unsigned NumElts = OuterMask.size();
26881 assert(NumElts == InnerMask.size() && "Mask length mismatch");
26882 SmallVector<int, 32> CombinedMask(NumElts, -1);
26883 int SplatIndex = -1;
26884 for (unsigned i = 0; i != NumElts; ++i) {
26885 // Undef lanes remain undef.
26886 int OuterMaskElt = OuterMask[i];
26887 if (OuterMaskElt == -1)
26888 continue;
26889
26890 // Peek through the shuffle masks to get the underlying source element.
26891 int InnerMaskElt = InnerMask[OuterMaskElt];
26892 if (InnerMaskElt == -1)
26893 continue;
26894
26895 // Initialize the splatted element.
26896 if (SplatIndex == -1)
26897 SplatIndex = InnerMaskElt;
26898
26899 // Non-matching index - this is not a splat.
26900 if (SplatIndex != InnerMaskElt)
26901 return SDValue();
26902
26903 CombinedMask[i] = InnerMaskElt;
26904 }
26905 assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
26906 getSplatIndex(CombinedMask) != -1) &&
26907 "Expected a splat mask");
26908
26909 // TODO: The transform may be a win even if the mask is not legal.
26910 EVT VT = OuterShuf->getValueType(0);
26911 assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
26912 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
26913 return SDValue();
26914
26915 return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
26916 InnerShuf->getOperand(1), CombinedMask);
26917}
26918
26919/// If the shuffle mask is taking exactly one element from the first vector
26920/// operand and passing through all other elements from the second vector
26921/// operand, return the index of the mask element that is choosing an element
26922/// from the first operand. Otherwise, return -1.
26924 int MaskSize = Mask.size();
26925 int EltFromOp0 = -1;
26926 // TODO: This does not match if there are undef elements in the shuffle mask.
26927 // Should we ignore undefs in the shuffle mask instead? The trade-off is
26928 // removing an instruction (a shuffle), but losing the knowledge that some
26929 // vector lanes are not needed.
26930 for (int i = 0; i != MaskSize; ++i) {
26931 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
26932 // We're looking for a shuffle of exactly one element from operand 0.
26933 if (EltFromOp0 != -1)
26934 return -1;
26935 EltFromOp0 = i;
26936 } else if (Mask[i] != i + MaskSize) {
26937 // Nothing from operand 1 can change lanes.
26938 return -1;
26939 }
26940 }
26941 return EltFromOp0;
26942}
26943
26944/// If a shuffle inserts exactly one element from a source vector operand into
26945/// another vector operand and we can access the specified element as a scalar,
26946/// then we can eliminate the shuffle.
26947SDValue DAGCombiner::replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf) {
26948 // First, check if we are taking one element of a vector and shuffling that
26949 // element into another vector.
26950 ArrayRef<int> Mask = Shuf->getMask();
26951 SmallVector<int, 16> CommutedMask(Mask);
26952 SDValue Op0 = Shuf->getOperand(0);
26953 SDValue Op1 = Shuf->getOperand(1);
26954 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
26955 if (ShufOp0Index == -1) {
26956 // Commute mask and check again.
26958 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
26959 if (ShufOp0Index == -1)
26960 return SDValue();
26961 // Commute operands to match the commuted shuffle mask.
26962 std::swap(Op0, Op1);
26963 Mask = CommutedMask;
26964 }
26965
26966 // The shuffle inserts exactly one element from operand 0 into operand 1.
26967 // Now see if we can access that element as a scalar via a real insert element
26968 // instruction.
26969 // TODO: We can try harder to locate the element as a scalar. Examples: it
26970 // could be an operand of BUILD_VECTOR, or a constant.
26971 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
26972 "Shuffle mask value must be from operand 0");
26973
26974 SDValue Elt;
26975 if (sd_match(Op0, m_InsertElt(m_Value(), m_Value(Elt),
26976 m_SpecificInt(Mask[ShufOp0Index])))) {
26977 // There's an existing insertelement with constant insertion index, so we
26978 // don't need to check the legality/profitability of a replacement operation
26979 // that differs at most in the constant value. The target should be able to
26980 // lower any of those in a similar way. If not, legalization will expand
26981 // this to a scalar-to-vector plus shuffle.
26982 //
26983 // Note that the shuffle may move the scalar from the position that the
26984 // insert element used. Therefore, our new insert element occurs at the
26985 // shuffle's mask index value, not the insert's index value.
26986 //
26987 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
26988 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
26989 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
26990 Op1, Elt, NewInsIndex);
26991 }
26992
26993 if (!hasOperation(ISD::INSERT_VECTOR_ELT, Op0.getValueType()))
26994 return SDValue();
26995
26997 Mask[ShufOp0Index] == 0) {
26998 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
26999 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
27000 Op1, Elt, NewInsIndex);
27001 }
27002
27003 return SDValue();
27004}
27005
27006/// If we have a unary shuffle of a shuffle, see if it can be folded away
27007/// completely. This has the potential to lose undef knowledge because the first
27008/// shuffle may not have an undef mask element where the second one does. So
27009/// only call this after doing simplifications based on demanded elements.
27011 // shuf (shuf0 X, Y, Mask0), undef, Mask
27012 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
27013 if (!Shuf0 || !Shuf->getOperand(1).isUndef())
27014 return SDValue();
27015
27016 ArrayRef<int> Mask = Shuf->getMask();
27017 ArrayRef<int> Mask0 = Shuf0->getMask();
27018 for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
27019 // Ignore undef elements.
27020 if (Mask[i] == -1)
27021 continue;
27022 assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
27023
27024 // Is the element of the shuffle operand chosen by this shuffle the same as
27025 // the element chosen by the shuffle operand itself?
27026 if (Mask0[Mask[i]] != Mask0[i])
27027 return SDValue();
27028 }
27029 // Every element of this shuffle is identical to the result of the previous
27030 // shuffle, so we can replace this value.
27031 return Shuf->getOperand(0);
27032}
27033
27034SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
27035 EVT VT = N->getValueType(0);
27036 unsigned NumElts = VT.getVectorNumElements();
27037
27038 SDValue N0 = N->getOperand(0);
27039 SDValue N1 = N->getOperand(1);
27040
27041 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
27042
27043 // Canonicalize shuffle undef, undef -> undef
27044 if (N0.isUndef() && N1.isUndef())
27045 return DAG.getUNDEF(VT);
27046
27047 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
27048
27049 // Canonicalize shuffle v, v -> v, undef
27050 if (N0 == N1)
27051 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
27052 createUnaryMask(SVN->getMask(), NumElts));
27053
27054 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
27055 if (N0.isUndef())
27056 return DAG.getCommutedVectorShuffle(*SVN);
27057
27058 // Remove references to rhs if it is undef
27059 if (N1.isUndef()) {
27060 bool Changed = false;
27061 SmallVector<int, 8> NewMask;
27062 for (unsigned i = 0; i != NumElts; ++i) {
27063 int Idx = SVN->getMaskElt(i);
27064 if (Idx >= (int)NumElts) {
27065 Idx = -1;
27066 Changed = true;
27067 }
27068 NewMask.push_back(Idx);
27069 }
27070 if (Changed)
27071 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
27072 }
27073
27074 if (SDValue InsElt = replaceShuffleOfInsert(SVN))
27075 return InsElt;
27076
27077 // A shuffle of a single vector that is a splatted value can always be folded.
27078 if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
27079 return V;
27080
27081 if (SDValue V = formSplatFromShuffles(SVN, DAG))
27082 return V;
27083
27084 // If it is a splat, check if the argument vector is another splat or a
27085 // build_vector.
27086 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
27087 int SplatIndex = SVN->getSplatIndex();
27088 if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
27089 TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) {
27090 // splat (vector_bo L, R), Index -->
27091 // splat (scalar_bo (extelt L, Index), (extelt R, Index))
27092 SDValue L = N0.getOperand(0), R = N0.getOperand(1);
27093 SDLoc DL(N);
27094 EVT EltVT = VT.getScalarType();
27095 SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
27096 SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
27097 SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
27098 SDValue NewBO =
27099 DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags());
27100 SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
27102 return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
27103 }
27104
27105 // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x)
27106 // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x)
27107 if ((!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) &&
27108 N0.hasOneUse()) {
27109 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == 0)
27110 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(0));
27111
27113 if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
27114 if (Idx->getAPIntValue() == SplatIndex)
27115 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
27116
27117 // Look through a bitcast if LE and splatting lane 0, through to a
27118 // scalar_to_vector or a build_vector.
27119 if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(0).hasOneUse() &&
27120 SplatIndex == 0 && DAG.getDataLayout().isLittleEndian() &&
27123 EVT N00VT = N0.getOperand(0).getValueType();
27124 if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() &&
27125 VT.isInteger() && N00VT.isInteger()) {
27126 EVT InVT =
27129 SDLoc(N), InVT);
27130 return DAG.getSplatBuildVector(VT, SDLoc(N), Op);
27131 }
27132 }
27133 }
27134
27135 // If this is a bit convert that changes the element type of the vector but
27136 // not the number of vector elements, look through it. Be careful not to
27137 // look though conversions that change things like v4f32 to v2f64.
27138 SDNode *V = N0.getNode();
27139 if (V->getOpcode() == ISD::BITCAST) {
27140 SDValue ConvInput = V->getOperand(0);
27141 if (ConvInput.getValueType().isVector() &&
27142 ConvInput.getValueType().getVectorNumElements() == NumElts)
27143 V = ConvInput.getNode();
27144 }
27145
27146 if (V->getOpcode() == ISD::BUILD_VECTOR) {
27147 assert(V->getNumOperands() == NumElts &&
27148 "BUILD_VECTOR has wrong number of operands");
27149 SDValue Base;
27150 bool AllSame = true;
27151 for (unsigned i = 0; i != NumElts; ++i) {
27152 if (!V->getOperand(i).isUndef()) {
27153 Base = V->getOperand(i);
27154 break;
27155 }
27156 }
27157 // Splat of <u, u, u, u>, return <u, u, u, u>
27158 if (!Base.getNode())
27159 return N0;
27160 for (unsigned i = 0; i != NumElts; ++i) {
27161 if (V->getOperand(i) != Base) {
27162 AllSame = false;
27163 break;
27164 }
27165 }
27166 // Splat of <x, x, x, x>, return <x, x, x, x>
27167 if (AllSame)
27168 return N0;
27169
27170 // Canonicalize any other splat as a build_vector, but avoid defining any
27171 // undefined elements in the mask.
27172 SDValue Splatted = V->getOperand(SplatIndex);
27173 SmallVector<SDValue, 8> Ops(NumElts, Splatted);
27174 EVT EltVT = Splatted.getValueType();
27175
27176 for (unsigned i = 0; i != NumElts; ++i) {
27177 if (SVN->getMaskElt(i) < 0)
27178 Ops[i] = DAG.getUNDEF(EltVT);
27179 }
27180
27181 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
27182
27183 // We may have jumped through bitcasts, so the type of the
27184 // BUILD_VECTOR may not match the type of the shuffle.
27185 if (V->getValueType(0) != VT)
27186 NewBV = DAG.getBitcast(VT, NewBV);
27187 return NewBV;
27188 }
27189 }
27190
27191 // Simplify source operands based on shuffle mask.
27193 return SDValue(N, 0);
27194
27195 // This is intentionally placed after demanded elements simplification because
27196 // it could eliminate knowledge of undef elements created by this shuffle.
27197 if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
27198 return ShufOp;
27199
27200 // Match shuffles that can be converted to any_vector_extend_in_reg.
27201 if (SDValue V =
27202 combineShuffleToAnyExtendVectorInreg(SVN, DAG, TLI, LegalOperations))
27203 return V;
27204
27205 // Combine "truncate_vector_in_reg" style shuffles.
27206 if (SDValue V = combineTruncationShuffle(SVN, DAG))
27207 return V;
27208
27209 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
27210 Level < AfterLegalizeVectorOps &&
27211 (N1.isUndef() ||
27212 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
27213 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
27214 if (SDValue V = partitionShuffleOfConcats(N, DAG))
27215 return V;
27216 }
27217
27218 // A shuffle of a concat of the same narrow vector can be reduced to use
27219 // only low-half elements of a concat with undef:
27220 // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
27221 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
27222 N0.getNumOperands() == 2 &&
27223 N0.getOperand(0) == N0.getOperand(1)) {
27224 int HalfNumElts = (int)NumElts / 2;
27225 SmallVector<int, 8> NewMask;
27226 for (unsigned i = 0; i != NumElts; ++i) {
27227 int Idx = SVN->getMaskElt(i);
27228 if (Idx >= HalfNumElts) {
27229 assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
27230 Idx -= HalfNumElts;
27231 }
27232 NewMask.push_back(Idx);
27233 }
27234 if (TLI.isShuffleMaskLegal(NewMask, VT)) {
27235 SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
27236 SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
27237 N0.getOperand(0), UndefVec);
27238 return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
27239 }
27240 }
27241
27242 // See if we can replace a shuffle with an insert_subvector.
27243 // e.g. v2i32 into v8i32:
27244 // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
27245 // --> insert_subvector(lhs,rhs1,4).
27246 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
27248 auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
27249 // Ensure RHS subvectors are legal.
27250 assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
27251 EVT SubVT = RHS.getOperand(0).getValueType();
27252 int NumSubVecs = RHS.getNumOperands();
27253 int NumSubElts = SubVT.getVectorNumElements();
27254 assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
27255 if (!TLI.isTypeLegal(SubVT))
27256 return SDValue();
27257
27258 // Don't bother if we have an unary shuffle (matches undef + LHS elts).
27259 if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
27260 return SDValue();
27261
27262 // Search [NumSubElts] spans for RHS sequence.
27263 // TODO: Can we avoid nested loops to increase performance?
27264 SmallVector<int> InsertionMask(NumElts);
27265 for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
27266 for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
27267 // Reset mask to identity.
27268 std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
27269
27270 // Add subvector insertion.
27271 std::iota(InsertionMask.begin() + SubIdx,
27272 InsertionMask.begin() + SubIdx + NumSubElts,
27273 NumElts + (SubVec * NumSubElts));
27274
27275 // See if the shuffle mask matches the reference insertion mask.
27276 bool MatchingShuffle = true;
27277 for (int i = 0; i != (int)NumElts; ++i) {
27278 int ExpectIdx = InsertionMask[i];
27279 int ActualIdx = Mask[i];
27280 if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
27281 MatchingShuffle = false;
27282 break;
27283 }
27284 }
27285
27286 if (MatchingShuffle)
27287 return DAG.getInsertSubvector(SDLoc(N), LHS, RHS.getOperand(SubVec),
27288 SubIdx);
27289 }
27290 }
27291 return SDValue();
27292 };
27293 ArrayRef<int> Mask = SVN->getMask();
27294 if (N1.getOpcode() == ISD::CONCAT_VECTORS)
27295 if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
27296 return InsertN1;
27297 if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
27298 SmallVector<int> CommuteMask(Mask);
27300 if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
27301 return InsertN0;
27302 }
27303 }
27304
27305 // If we're not performing a select/blend shuffle, see if we can convert the
27306 // shuffle into a AND node, with all the out-of-lane elements are known zero.
27307 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
27308 bool IsInLaneMask = true;
27309 ArrayRef<int> Mask = SVN->getMask();
27310 SmallVector<int, 16> ClearMask(NumElts, -1);
27311 APInt DemandedLHS = APInt::getZero(NumElts);
27312 APInt DemandedRHS = APInt::getZero(NumElts);
27313 for (int I = 0; I != (int)NumElts; ++I) {
27314 int M = Mask[I];
27315 if (M < 0)
27316 continue;
27317 ClearMask[I] = M == I ? I : (I + NumElts);
27318 IsInLaneMask &= (M == I) || (M == (int)(I + NumElts));
27319 if (M != I) {
27320 APInt &Demanded = M < (int)NumElts ? DemandedLHS : DemandedRHS;
27321 Demanded.setBit(M % NumElts);
27322 }
27323 }
27324 // TODO: Should we try to mask with N1 as well?
27325 if (!IsInLaneMask && (!DemandedLHS.isZero() || !DemandedRHS.isZero()) &&
27326 (DemandedLHS.isZero() || DAG.MaskedVectorIsZero(N0, DemandedLHS)) &&
27327 (DemandedRHS.isZero() || DAG.MaskedVectorIsZero(N1, DemandedRHS))) {
27328 SDLoc DL(N);
27331 // Transform the type to a legal type so that the buildvector constant
27332 // elements are not illegal. Make sure that the result is larger than the
27333 // original type, incase the value is split into two (eg i64->i32).
27334 if (!TLI.isTypeLegal(IntSVT) && LegalTypes)
27335 IntSVT = TLI.getTypeToTransformTo(*DAG.getContext(), IntSVT);
27336 if (IntSVT.getSizeInBits() >= IntVT.getScalarSizeInBits()) {
27337 SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT);
27338 SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT);
27339 SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT));
27340 for (int I = 0; I != (int)NumElts; ++I)
27341 if (0 <= Mask[I])
27342 AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt;
27343
27344 // See if a clear mask is legal instead of going via
27345 // XformToShuffleWithZero which loses UNDEF mask elements.
27346 if (TLI.isVectorClearMaskLegal(ClearMask, IntVT))
27347 return DAG.getBitcast(
27348 VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0),
27349 DAG.getConstant(0, DL, IntVT), ClearMask));
27350
27351 if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
27352 return DAG.getBitcast(
27353 VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0),
27354 DAG.getBuildVector(IntVT, DL, AndMask)));
27355 }
27356 }
27357 }
27358
27359 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
27360 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
27361 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
27362 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
27363 return Res;
27364
27365 // If this shuffle only has a single input that is a bitcasted shuffle,
27366 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
27367 // back to their original types.
27368 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
27369 N1.isUndef() && Level < AfterLegalizeVectorOps &&
27370 TLI.isTypeLegal(VT)) {
27371
27373 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
27374 EVT SVT = VT.getScalarType();
27375 EVT InnerVT = BC0->getValueType(0);
27376 EVT InnerSVT = InnerVT.getScalarType();
27377
27378 // Determine which shuffle works with the smaller scalar type.
27379 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
27380 EVT ScaleSVT = ScaleVT.getScalarType();
27381
27382 if (TLI.isTypeLegal(ScaleVT) &&
27383 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
27384 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
27385 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
27386 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
27387
27388 // Scale the shuffle masks to the smaller scalar type.
27389 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
27390 SmallVector<int, 8> InnerMask;
27391 SmallVector<int, 8> OuterMask;
27392 narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
27393 narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
27394
27395 // Merge the shuffle masks.
27396 SmallVector<int, 8> NewMask;
27397 for (int M : OuterMask)
27398 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
27399
27400 // Test for shuffle mask legality over both commutations.
27401 SDValue SV0 = BC0->getOperand(0);
27402 SDValue SV1 = BC0->getOperand(1);
27403 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
27404 if (!LegalMask) {
27405 std::swap(SV0, SV1);
27407 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
27408 }
27409
27410 if (LegalMask) {
27411 SV0 = DAG.getBitcast(ScaleVT, SV0);
27412 SV1 = DAG.getBitcast(ScaleVT, SV1);
27413 return DAG.getBitcast(
27414 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
27415 }
27416 }
27417 }
27418 }
27419
27420 // Match shuffles of bitcasts, so long as the mask can be treated as the
27421 // larger type.
27422 if (SDValue V = combineShuffleOfBitcast(SVN, DAG, TLI, LegalOperations))
27423 return V;
27424
27425 // Compute the combined shuffle mask for a shuffle with SV0 as the first
27426 // operand, and SV1 as the second operand.
27427 // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
27428 // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
27429 auto MergeInnerShuffle =
27430 [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
27431 ShuffleVectorSDNode *OtherSVN, SDValue N1,
27432 const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
27433 SmallVectorImpl<int> &Mask) -> bool {
27434 // Don't try to fold splats; they're likely to simplify somehow, or they
27435 // might be free.
27436 if (OtherSVN->isSplat())
27437 return false;
27438
27439 SV0 = SV1 = SDValue();
27440 Mask.clear();
27441
27442 for (unsigned i = 0; i != NumElts; ++i) {
27443 int Idx = SVN->getMaskElt(i);
27444 if (Idx < 0) {
27445 // Propagate Undef.
27446 Mask.push_back(Idx);
27447 continue;
27448 }
27449
27450 if (Commute)
27451 Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
27452
27453 SDValue CurrentVec;
27454 if (Idx < (int)NumElts) {
27455 // This shuffle index refers to the inner shuffle N0. Lookup the inner
27456 // shuffle mask to identify which vector is actually referenced.
27457 Idx = OtherSVN->getMaskElt(Idx);
27458 if (Idx < 0) {
27459 // Propagate Undef.
27460 Mask.push_back(Idx);
27461 continue;
27462 }
27463 CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
27464 : OtherSVN->getOperand(1);
27465 } else {
27466 // This shuffle index references an element within N1.
27467 CurrentVec = N1;
27468 }
27469
27470 // Simple case where 'CurrentVec' is UNDEF.
27471 if (CurrentVec.isUndef()) {
27472 Mask.push_back(-1);
27473 continue;
27474 }
27475
27476 // Canonicalize the shuffle index. We don't know yet if CurrentVec
27477 // will be the first or second operand of the combined shuffle.
27478 Idx = Idx % NumElts;
27479 if (!SV0.getNode() || SV0 == CurrentVec) {
27480 // Ok. CurrentVec is the left hand side.
27481 // Update the mask accordingly.
27482 SV0 = CurrentVec;
27483 Mask.push_back(Idx);
27484 continue;
27485 }
27486 if (!SV1.getNode() || SV1 == CurrentVec) {
27487 // Ok. CurrentVec is the right hand side.
27488 // Update the mask accordingly.
27489 SV1 = CurrentVec;
27490 Mask.push_back(Idx + NumElts);
27491 continue;
27492 }
27493
27494 // Last chance - see if the vector is another shuffle and if it
27495 // uses one of the existing candidate shuffle ops.
27496 if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
27497 int InnerIdx = CurrentSVN->getMaskElt(Idx);
27498 if (InnerIdx < 0) {
27499 Mask.push_back(-1);
27500 continue;
27501 }
27502 SDValue InnerVec = (InnerIdx < (int)NumElts)
27503 ? CurrentSVN->getOperand(0)
27504 : CurrentSVN->getOperand(1);
27505 if (InnerVec.isUndef()) {
27506 Mask.push_back(-1);
27507 continue;
27508 }
27509 InnerIdx %= NumElts;
27510 if (InnerVec == SV0) {
27511 Mask.push_back(InnerIdx);
27512 continue;
27513 }
27514 if (InnerVec == SV1) {
27515 Mask.push_back(InnerIdx + NumElts);
27516 continue;
27517 }
27518 }
27519
27520 // Bail out if we cannot convert the shuffle pair into a single shuffle.
27521 return false;
27522 }
27523
27524 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
27525 return true;
27526
27527 // Avoid introducing shuffles with illegal mask.
27528 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
27529 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
27530 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
27531 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
27532 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
27533 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
27534 if (TLI.isShuffleMaskLegal(Mask, VT))
27535 return true;
27536
27537 std::swap(SV0, SV1);
27539 return TLI.isShuffleMaskLegal(Mask, VT);
27540 };
27541
27542 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
27543 // Canonicalize shuffles according to rules:
27544 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
27545 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
27546 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
27547 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
27549 // The incoming shuffle must be of the same type as the result of the
27550 // current shuffle.
27551 assert(N1->getOperand(0).getValueType() == VT &&
27552 "Shuffle types don't match");
27553
27554 SDValue SV0 = N1->getOperand(0);
27555 SDValue SV1 = N1->getOperand(1);
27556 bool HasSameOp0 = N0 == SV0;
27557 bool IsSV1Undef = SV1.isUndef();
27558 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
27559 // Commute the operands of this shuffle so merging below will trigger.
27560 return DAG.getCommutedVectorShuffle(*SVN);
27561 }
27562
27563 // Canonicalize splat shuffles to the RHS to improve merging below.
27564 // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
27565 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
27566 N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
27567 cast<ShuffleVectorSDNode>(N0)->isSplat() &&
27568 !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
27569 return DAG.getCommutedVectorShuffle(*SVN);
27570 }
27571
27572 // Try to fold according to rules:
27573 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
27574 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
27575 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
27576 // Don't try to fold shuffles with illegal type.
27577 // Only fold if this shuffle is the only user of the other shuffle.
27578 // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
27579 for (int i = 0; i != 2; ++i) {
27580 if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
27581 N->isOnlyUserOf(N->getOperand(i).getNode())) {
27582 // The incoming shuffle must be of the same type as the result of the
27583 // current shuffle.
27584 auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
27585 assert(OtherSV->getOperand(0).getValueType() == VT &&
27586 "Shuffle types don't match");
27587
27588 SDValue SV0, SV1;
27590 if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
27591 SV0, SV1, Mask)) {
27592 // Check if all indices in Mask are Undef. In case, propagate Undef.
27593 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
27594 return DAG.getUNDEF(VT);
27595
27596 return DAG.getVectorShuffle(VT, SDLoc(N),
27597 SV0 ? SV0 : DAG.getUNDEF(VT),
27598 SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
27599 }
27600 }
27601 }
27602
27603 // Merge shuffles through binops if we are able to merge it with at least
27604 // one other shuffles.
27605 // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
27606 // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
27607 unsigned SrcOpcode = N0.getOpcode();
27608 if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
27609 (N1.isUndef() ||
27610 (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
27611 // Get binop source ops, or just pass on the undef.
27612 SDValue Op00 = N0.getOperand(0);
27613 SDValue Op01 = N0.getOperand(1);
27614 SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
27615 SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
27616 // TODO: We might be able to relax the VT check but we don't currently
27617 // have any isBinOp() that has different result/ops VTs so play safe until
27618 // we have test coverage.
27619 if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
27620 Op01.getValueType() == VT && Op11.getValueType() == VT &&
27621 (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
27622 Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
27623 Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
27624 Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
27625 auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
27626 SmallVectorImpl<int> &Mask, bool LeftOp,
27627 bool Commute) {
27628 SDValue InnerN = Commute ? N1 : N0;
27629 SDValue Op0 = LeftOp ? Op00 : Op01;
27630 SDValue Op1 = LeftOp ? Op10 : Op11;
27631 if (Commute)
27632 std::swap(Op0, Op1);
27633 // Only accept the merged shuffle if we don't introduce undef elements,
27634 // or the inner shuffle already contained undef elements.
27635 auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
27636 return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
27637 MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
27638 Mask) &&
27639 (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
27640 llvm::none_of(Mask, [](int M) { return M < 0; }));
27641 };
27642
27643 // Ensure we don't increase the number of shuffles - we must merge a
27644 // shuffle from at least one of the LHS and RHS ops.
27645 bool MergedLeft = false;
27646 SDValue LeftSV0, LeftSV1;
27647 SmallVector<int, 4> LeftMask;
27648 if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
27649 CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
27650 MergedLeft = true;
27651 } else {
27652 LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
27653 LeftSV0 = Op00, LeftSV1 = Op10;
27654 }
27655
27656 bool MergedRight = false;
27657 SDValue RightSV0, RightSV1;
27658 SmallVector<int, 4> RightMask;
27659 if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
27660 CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
27661 MergedRight = true;
27662 } else {
27663 RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
27664 RightSV0 = Op01, RightSV1 = Op11;
27665 }
27666
27667 if (MergedLeft || MergedRight) {
27668 SDLoc DL(N);
27670 VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
27671 LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
27673 VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
27674 RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
27675 return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
27676 }
27677 }
27678 }
27679 }
27680
27681 if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
27682 return V;
27683
27684 // Match shuffles that can be converted to ISD::ZERO_EXTEND_VECTOR_INREG.
27685 // Perform this really late, because it could eliminate knowledge
27686 // of undef elements created by this shuffle.
27687 if (Level < AfterLegalizeTypes)
27688 if (SDValue V = combineShuffleToZeroExtendVectorInReg(SVN, DAG, TLI,
27689 LegalOperations))
27690 return V;
27691
27692 return SDValue();
27693}
27694
27695SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
27696 EVT VT = N->getValueType(0);
27697 if (!VT.isFixedLengthVector())
27698 return SDValue();
27699
27700 // Try to convert a scalar binop with an extracted vector element to a vector
27701 // binop. This is intended to reduce potentially expensive register moves.
27702 // TODO: Check if both operands are extracted.
27703 // TODO: How to prefer scalar/vector ops with multiple uses of the extact?
27704 // TODO: Generalize this, so it can be called from visitINSERT_VECTOR_ELT().
27705 SDValue Scalar = N->getOperand(0);
27706 unsigned Opcode = Scalar.getOpcode();
27707 EVT VecEltVT = VT.getScalarType();
27708 if (Scalar.hasOneUse() && Scalar->getNumValues() == 1 &&
27709 TLI.isBinOp(Opcode) && Scalar.getValueType() == VecEltVT &&
27710 Scalar.getOperand(0).getValueType() == VecEltVT &&
27711 Scalar.getOperand(1).getValueType() == VecEltVT &&
27712 Scalar->isOnlyUserOf(Scalar.getOperand(0).getNode()) &&
27713 Scalar->isOnlyUserOf(Scalar.getOperand(1).getNode()) &&
27714 DAG.isSafeToSpeculativelyExecute(Opcode) && hasOperation(Opcode, VT)) {
27715 // Match an extract element and get a shuffle mask equivalent.
27716 SmallVector<int, 8> ShufMask(VT.getVectorNumElements(), -1);
27717
27718 for (int i : {0, 1}) {
27719 // s2v (bo (extelt V, Idx), C) --> shuffle (bo V, C'), {Idx, -1, -1...}
27720 // s2v (bo C, (extelt V, Idx)) --> shuffle (bo C', V), {Idx, -1, -1...}
27721 SDValue EE = Scalar.getOperand(i);
27722 auto *C = dyn_cast<ConstantSDNode>(Scalar.getOperand(i ? 0 : 1));
27723 if (C && EE.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
27724 EE.getOperand(0).getValueType() == VT &&
27725 isa<ConstantSDNode>(EE.getOperand(1))) {
27726 // Mask = {ExtractIndex, undef, undef....}
27727 ShufMask[0] = EE.getConstantOperandVal(1);
27728 // Make sure the shuffle is legal if we are crossing lanes.
27729 if (TLI.isShuffleMaskLegal(ShufMask, VT)) {
27730 SDLoc DL(N);
27731 SDValue V[] = {EE.getOperand(0),
27732 DAG.getConstant(C->getAPIntValue(), DL, VT)};
27733 SDValue VecBO = DAG.getNode(Opcode, DL, VT, V[i], V[1 - i]);
27734 return DAG.getVectorShuffle(VT, DL, VecBO, DAG.getUNDEF(VT),
27735 ShufMask);
27736 }
27737 }
27738 }
27739 }
27740
27741 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
27742 // with a VECTOR_SHUFFLE and possible truncate.
27743 if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
27744 !Scalar.getOperand(0).getValueType().isFixedLengthVector())
27745 return SDValue();
27746
27747 // If we have an implicit truncate, truncate here if it is legal.
27748 if (VecEltVT != Scalar.getValueType() &&
27749 Scalar.getValueType().isScalarInteger() && isTypeLegal(VecEltVT)) {
27750 SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(Scalar), VecEltVT, Scalar);
27751 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
27752 }
27753
27754 auto *ExtIndexC = dyn_cast<ConstantSDNode>(Scalar.getOperand(1));
27755 if (!ExtIndexC)
27756 return SDValue();
27757
27758 SDValue SrcVec = Scalar.getOperand(0);
27759 EVT SrcVT = SrcVec.getValueType();
27760 unsigned SrcNumElts = SrcVT.getVectorNumElements();
27761 unsigned VTNumElts = VT.getVectorNumElements();
27762 if (VecEltVT == SrcVT.getScalarType() && VTNumElts <= SrcNumElts) {
27763 // Create a shuffle equivalent for scalar-to-vector: {ExtIndex, -1, -1, ...}
27764 SmallVector<int, 8> Mask(SrcNumElts, -1);
27765 Mask[0] = ExtIndexC->getZExtValue();
27766 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
27767 SrcVT, SDLoc(N), SrcVec, DAG.getUNDEF(SrcVT), Mask, DAG);
27768 if (!LegalShuffle)
27769 return SDValue();
27770
27771 // If the initial vector is the same size, the shuffle is the result.
27772 if (VT == SrcVT)
27773 return LegalShuffle;
27774
27775 // If not, shorten the shuffled vector.
27776 if (VTNumElts != SrcNumElts) {
27777 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
27778 EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
27779 SrcVT.getVectorElementType(), VTNumElts);
27780 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, LegalShuffle,
27781 ZeroIdx);
27782 }
27783 }
27784
27785 return SDValue();
27786}
27787
27788SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
27789 EVT VT = N->getValueType(0);
27790 SDValue N0 = N->getOperand(0);
27791 SDValue N1 = N->getOperand(1);
27792 SDValue N2 = N->getOperand(2);
27793 uint64_t InsIdx = N->getConstantOperandVal(2);
27794
27795 // If inserting an UNDEF, just return the original vector.
27796 if (N1.isUndef())
27797 return N0;
27798
27799 // If this is an insert of an extracted vector into an undef vector, we can
27800 // just use the input to the extract if the types match, and can simplify
27801 // in some cases even if they don't.
27802 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
27803 N1.getOperand(1) == N2) {
27804 EVT SrcVT = N1.getOperand(0).getValueType();
27805 if (SrcVT == VT)
27806 return N1.getOperand(0);
27807 // TODO: To remove the zero check, need to adjust the offset to
27808 // a multiple of the new src type.
27809 if (isNullConstant(N2)) {
27810 if (VT.knownBitsGE(SrcVT) &&
27811 !(VT.isFixedLengthVector() && SrcVT.isScalableVector()))
27812 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
27813 VT, N0, N1.getOperand(0), N2);
27814 else if (VT.knownBitsLE(SrcVT) &&
27815 !(VT.isScalableVector() && SrcVT.isFixedLengthVector()))
27817 VT, N1.getOperand(0), N2);
27818 }
27819 }
27820
27821 // Handle case where we've ended up inserting back into the source vector
27822 // we extracted the subvector from.
27823 // insert_subvector(N0, extract_subvector(N0, N2), N2) --> N0
27824 if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && N1.getOperand(0) == N0 &&
27825 N1.getOperand(1) == N2)
27826 return N0;
27827
27828 // Simplify scalar inserts into an undef vector:
27829 // insert_subvector undef, (splat X), N2 -> splat X
27830 if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
27831 if (DAG.isConstantValueOfAnyType(N1.getOperand(0)) || N1.hasOneUse())
27832 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
27833
27834 // insert_subvector (splat X), (splat X), N2 -> splat X
27835 if (N0.getOpcode() == ISD::SPLAT_VECTOR && N0.getOpcode() == N1.getOpcode() &&
27836 N0.getOperand(0) == N1.getOperand(0))
27837 return N0;
27838
27839 // If we are inserting a bitcast value into an undef, with the same
27840 // number of elements, just use the bitcast input of the extract.
27841 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
27842 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
27843 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
27845 N1.getOperand(0).getOperand(1) == N2 &&
27847 VT.getVectorElementCount() &&
27849 VT.getSizeInBits()) {
27850 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
27851 }
27852
27853 // If both N1 and N2 are bitcast values on which insert_subvector
27854 // would makes sense, pull the bitcast through.
27855 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
27856 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
27857 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
27858 SDValue CN0 = N0.getOperand(0);
27859 SDValue CN1 = N1.getOperand(0);
27860 EVT CN0VT = CN0.getValueType();
27861 EVT CN1VT = CN1.getValueType();
27862 if (CN0VT.isVector() && CN1VT.isVector() &&
27863 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
27865 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
27866 CN0.getValueType(), CN0, CN1, N2);
27867 return DAG.getBitcast(VT, NewINSERT);
27868 }
27869 }
27870
27871 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
27872 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
27873 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
27874 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
27875 N0.getOperand(1).getValueType() == N1.getValueType() &&
27876 N0.getOperand(2) == N2)
27877 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
27878 N1, N2);
27879
27880 // Eliminate an intermediate insert into an undef vector:
27881 // insert_subvector undef, (insert_subvector undef, X, 0), 0 -->
27882 // insert_subvector undef, X, 0
27883 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
27884 N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)) &&
27885 isNullConstant(N2))
27886 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
27887 N1.getOperand(1), N2);
27888
27889 // Push subvector bitcasts to the output, adjusting the index as we go.
27890 // insert_subvector(bitcast(v), bitcast(s), c1)
27891 // -> bitcast(insert_subvector(v, s, c2))
27892 if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
27893 N1.getOpcode() == ISD::BITCAST) {
27894 SDValue N0Src = peekThroughBitcasts(N0);
27895 SDValue N1Src = peekThroughBitcasts(N1);
27896 EVT N0SrcSVT = N0Src.getValueType().getScalarType();
27897 EVT N1SrcSVT = N1Src.getValueType().getScalarType();
27898 if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
27899 N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
27900 EVT NewVT;
27901 SDLoc DL(N);
27902 SDValue NewIdx;
27903 LLVMContext &Ctx = *DAG.getContext();
27904 ElementCount NumElts = VT.getVectorElementCount();
27905 unsigned EltSizeInBits = VT.getScalarSizeInBits();
27906 if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
27907 unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
27908 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
27909 NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
27910 } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
27911 unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
27912 if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
27913 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
27914 NumElts.divideCoefficientBy(Scale));
27915 NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
27916 }
27917 }
27918 if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
27919 SDValue Res = DAG.getBitcast(NewVT, N0Src);
27920 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
27921 return DAG.getBitcast(VT, Res);
27922 }
27923 }
27924 }
27925
27926 // Canonicalize insert_subvector dag nodes.
27927 // Example:
27928 // (insert_subvector (insert_subvector A, Idx0), Idx1)
27929 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
27930 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
27931 N1.getValueType() == N0.getOperand(1).getValueType()) {
27932 unsigned OtherIdx = N0.getConstantOperandVal(2);
27933 if (InsIdx < OtherIdx) {
27934 // Swap nodes.
27935 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
27936 N0.getOperand(0), N1, N2);
27937 AddToWorklist(NewOp.getNode());
27938 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
27939 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
27940 }
27941 }
27942
27943 // If the input vector is a concatenation, and the insert replaces
27944 // one of the pieces, we can optimize into a single concat_vectors.
27945 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
27946 N0.getOperand(0).getValueType() == N1.getValueType() &&
27949 unsigned Factor = N1.getValueType().getVectorMinNumElements();
27950 SmallVector<SDValue, 8> Ops(N0->ops());
27951 Ops[InsIdx / Factor] = N1;
27952 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
27953 }
27954
27955 // Simplify source operands based on insertion.
27957 return SDValue(N, 0);
27958
27959 return SDValue();
27960}
27961
27962SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
27963 SDValue N0 = N->getOperand(0);
27964
27965 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
27966 if (N0->getOpcode() == ISD::FP16_TO_FP)
27967 return N0->getOperand(0);
27968
27969 return SDValue();
27970}
27971
27972SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
27973 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
27974 auto Op = N->getOpcode();
27976 "opcode should be FP16_TO_FP or BF16_TO_FP.");
27977 SDValue N0 = N->getOperand(0);
27978
27979 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) or
27980 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
27981 if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
27983 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
27984 return DAG.getNode(Op, SDLoc(N), N->getValueType(0), N0.getOperand(0));
27985 }
27986 }
27987
27988 if (SDValue CastEliminated = eliminateFPCastPair(N))
27989 return CastEliminated;
27990
27991 // Sometimes constants manage to survive very late in the pipeline, e.g.,
27992 // because they are wrapped inside the <1 x f16> type. Try one last time to
27993 // get rid of them.
27994 SDValue Folded = DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N),
27995 N->getValueType(0), {N0});
27996 return Folded;
27997}
27998
27999SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {
28000 SDValue N0 = N->getOperand(0);
28001
28002 // fold (fp_to_bf16 (bf16_to_fp op)) -> op
28003 if (N0->getOpcode() == ISD::BF16_TO_FP)
28004 return N0->getOperand(0);
28005
28006 return SDValue();
28007}
28008
28009SDValue DAGCombiner::visitBF16_TO_FP(SDNode *N) {
28010 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
28011 return visitFP16_TO_FP(N);
28012}
28013
28014SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
28015 SDValue N0 = N->getOperand(0);
28016 EVT VT = N0.getValueType();
28017 unsigned Opcode = N->getOpcode();
28018
28019 // VECREDUCE over 1-element vector is just an extract.
28020 if (VT.getVectorElementCount().isScalar()) {
28021 SDLoc dl(N);
28022 SDValue Res =
28024 DAG.getVectorIdxConstant(0, dl));
28025 if (Res.getValueType() != N->getValueType(0))
28026 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
28027 return Res;
28028 }
28029
28030 // On an boolean vector an and/or reduction is the same as a umin/umax
28031 // reduction. Convert them if the latter is legal while the former isn't.
28032 if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
28033 unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
28035 if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
28036 TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
28038 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
28039 }
28040
28041 // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
28042 // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
28043 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
28044 TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
28045 SDValue Vec = N0.getOperand(0);
28046 SDValue Subvec = N0.getOperand(1);
28047 if ((Opcode == ISD::VECREDUCE_OR &&
28048 (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) ||
28049 (Opcode == ISD::VECREDUCE_AND &&
28050 (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec))))
28051 return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
28052 }
28053
28054 // vecreduce_or(sext(x)) -> sext(vecreduce_or(x))
28055 // Same for zext and anyext, and for and/or/xor reductions.
28056 if ((Opcode == ISD::VECREDUCE_OR || Opcode == ISD::VECREDUCE_AND ||
28057 Opcode == ISD::VECREDUCE_XOR) &&
28058 (N0.getOpcode() == ISD::SIGN_EXTEND ||
28059 N0.getOpcode() == ISD::ZERO_EXTEND ||
28060 N0.getOpcode() == ISD::ANY_EXTEND) &&
28061 TLI.isOperationLegalOrCustom(Opcode, N0.getOperand(0).getValueType())) {
28062 SDValue Red = DAG.getNode(Opcode, SDLoc(N),
28064 N0.getOperand(0));
28065 return DAG.getNode(N0.getOpcode(), SDLoc(N), N->getValueType(0), Red);
28066 }
28067 return SDValue();
28068}
28069
28070SDValue DAGCombiner::visitVP_FSUB(SDNode *N) {
28071 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
28072
28073 // FSUB -> FMA combines:
28074 if (SDValue Fused = visitFSUBForFMACombine<VPMatchContext>(N)) {
28075 AddToWorklist(Fused.getNode());
28076 return Fused;
28077 }
28078 return SDValue();
28079}
28080
28081SDValue DAGCombiner::visitVPOp(SDNode *N) {
28082
28083 if (N->getOpcode() == ISD::VP_GATHER)
28084 if (SDValue SD = visitVPGATHER(N))
28085 return SD;
28086
28087 if (N->getOpcode() == ISD::VP_SCATTER)
28088 if (SDValue SD = visitVPSCATTER(N))
28089 return SD;
28090
28091 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD)
28092 if (SDValue SD = visitVP_STRIDED_LOAD(N))
28093 return SD;
28094
28095 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE)
28096 if (SDValue SD = visitVP_STRIDED_STORE(N))
28097 return SD;
28098
28099 // VP operations in which all vector elements are disabled - either by
28100 // determining that the mask is all false or that the EVL is 0 - can be
28101 // eliminated.
28102 bool AreAllEltsDisabled = false;
28103 if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
28104 AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
28105 if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
28106 AreAllEltsDisabled |=
28107 ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
28108
28109 // This is the only generic VP combine we support for now.
28110 if (!AreAllEltsDisabled) {
28111 switch (N->getOpcode()) {
28112 case ISD::VP_FADD:
28113 return visitVP_FADD(N);
28114 case ISD::VP_FSUB:
28115 return visitVP_FSUB(N);
28116 case ISD::VP_FMA:
28117 return visitFMA<VPMatchContext>(N);
28118 case ISD::VP_SELECT:
28119 return visitVP_SELECT(N);
28120 case ISD::VP_MUL:
28121 return visitMUL<VPMatchContext>(N);
28122 case ISD::VP_SUB:
28123 return foldSubCtlzNot<VPMatchContext>(N, DAG);
28124 default:
28125 break;
28126 }
28127 return SDValue();
28128 }
28129
28130 // Binary operations can be replaced by UNDEF.
28131 if (ISD::isVPBinaryOp(N->getOpcode()))
28132 return DAG.getUNDEF(N->getValueType(0));
28133
28134 // VP Memory operations can be replaced by either the chain (stores) or the
28135 // chain + undef (loads).
28136 if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
28137 if (MemSD->writeMem())
28138 return MemSD->getChain();
28139 return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
28140 }
28141
28142 // Reduction operations return the start operand when no elements are active.
28143 if (ISD::isVPReduction(N->getOpcode()))
28144 return N->getOperand(0);
28145
28146 return SDValue();
28147}
28148
28149SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) {
28150 SDValue Chain = N->getOperand(0);
28151 SDValue Ptr = N->getOperand(1);
28152 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
28153
28154 // Check if the memory, where FP state is written to, is used only in a single
28155 // load operation.
28156 LoadSDNode *LdNode = nullptr;
28157 for (auto *U : Ptr->users()) {
28158 if (U == N)
28159 continue;
28160 if (auto *Ld = dyn_cast<LoadSDNode>(U)) {
28161 if (LdNode && LdNode != Ld)
28162 return SDValue();
28163 LdNode = Ld;
28164 continue;
28165 }
28166 return SDValue();
28167 }
28168 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
28169 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
28171 return SDValue();
28172
28173 // Check if the loaded value is used only in a store operation.
28174 StoreSDNode *StNode = nullptr;
28175 for (SDUse &U : LdNode->uses()) {
28176 if (U.getResNo() == 0) {
28177 if (auto *St = dyn_cast<StoreSDNode>(U.getUser())) {
28178 if (StNode)
28179 return SDValue();
28180 StNode = St;
28181 } else {
28182 return SDValue();
28183 }
28184 }
28185 }
28186 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
28187 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
28188 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
28189 return SDValue();
28190
28191 // Create new node GET_FPENV_MEM, which uses the store address to write FP
28192 // environment.
28193 SDValue Res = DAG.getGetFPEnv(Chain, SDLoc(N), StNode->getBasePtr(), MemVT,
28194 StNode->getMemOperand());
28195 CombineTo(StNode, Res, false);
28196 return Res;
28197}
28198
28199SDValue DAGCombiner::visitSET_FPENV_MEM(SDNode *N) {
28200 SDValue Chain = N->getOperand(0);
28201 SDValue Ptr = N->getOperand(1);
28202 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
28203
28204 // Check if the address of FP state is used also in a store operation only.
28205 StoreSDNode *StNode = nullptr;
28206 for (auto *U : Ptr->users()) {
28207 if (U == N)
28208 continue;
28209 if (auto *St = dyn_cast<StoreSDNode>(U)) {
28210 if (StNode && StNode != St)
28211 return SDValue();
28212 StNode = St;
28213 continue;
28214 }
28215 return SDValue();
28216 }
28217 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
28218 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
28219 !Chain.reachesChainWithoutSideEffects(SDValue(StNode, 0)))
28220 return SDValue();
28221
28222 // Check if the stored value is loaded from some location and the loaded
28223 // value is used only in the store operation.
28224 SDValue StValue = StNode->getValue();
28225 auto *LdNode = dyn_cast<LoadSDNode>(StValue);
28226 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
28227 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
28228 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
28229 return SDValue();
28230
28231 // Create new node SET_FPENV_MEM, which uses the load address to read FP
28232 // environment.
28233 SDValue Res =
28234 DAG.getSetFPEnv(LdNode->getChain(), SDLoc(N), LdNode->getBasePtr(), MemVT,
28235 LdNode->getMemOperand());
28236 return Res;
28237}
28238
28239/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
28240/// with the destination vector and a zero vector.
28241/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
28242/// vector_shuffle V, Zero, <0, 4, 2, 4>
28243SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
28244 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
28245
28246 EVT VT = N->getValueType(0);
28247 SDValue LHS = N->getOperand(0);
28248 SDValue RHS = peekThroughBitcasts(N->getOperand(1));
28249 SDLoc DL(N);
28250
28251 // Make sure we're not running after operation legalization where it
28252 // may have custom lowered the vector shuffles.
28253 if (LegalOperations)
28254 return SDValue();
28255
28256 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
28257 return SDValue();
28258
28259 EVT RVT = RHS.getValueType();
28260 unsigned NumElts = RHS.getNumOperands();
28261
28262 // Attempt to create a valid clear mask, splitting the mask into
28263 // sub elements and checking to see if each is
28264 // all zeros or all ones - suitable for shuffle masking.
28265 auto BuildClearMask = [&](int Split) {
28266 int NumSubElts = NumElts * Split;
28267 int NumSubBits = RVT.getScalarSizeInBits() / Split;
28268
28269 SmallVector<int, 8> Indices;
28270 for (int i = 0; i != NumSubElts; ++i) {
28271 int EltIdx = i / Split;
28272 int SubIdx = i % Split;
28273 SDValue Elt = RHS.getOperand(EltIdx);
28274 // X & undef --> 0 (not undef). So this lane must be converted to choose
28275 // from the zero constant vector (same as if the element had all 0-bits).
28276 if (Elt.isUndef()) {
28277 Indices.push_back(i + NumSubElts);
28278 continue;
28279 }
28280
28281 std::optional<APInt> Bits = Elt->bitcastToAPInt();
28282 if (!Bits)
28283 return SDValue();
28284
28285 // Extract the sub element from the constant bit mask.
28286 if (DAG.getDataLayout().isBigEndian())
28287 *Bits =
28288 Bits->extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
28289 else
28290 *Bits = Bits->extractBits(NumSubBits, SubIdx * NumSubBits);
28291
28292 if (Bits->isAllOnes())
28293 Indices.push_back(i);
28294 else if (*Bits == 0)
28295 Indices.push_back(i + NumSubElts);
28296 else
28297 return SDValue();
28298 }
28299
28300 // Let's see if the target supports this vector_shuffle.
28301 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
28302 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
28303 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
28304 return SDValue();
28305
28306 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
28307 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
28308 DAG.getBitcast(ClearVT, LHS),
28309 Zero, Indices));
28310 };
28311
28312 // Determine maximum split level (byte level masking).
28313 int MaxSplit = 1;
28314 if (RVT.getScalarSizeInBits() % 8 == 0)
28315 MaxSplit = RVT.getScalarSizeInBits() / 8;
28316
28317 for (int Split = 1; Split <= MaxSplit; ++Split)
28318 if (RVT.getScalarSizeInBits() % Split == 0)
28319 if (SDValue S = BuildClearMask(Split))
28320 return S;
28321
28322 return SDValue();
28323}
28324
28325/// If a vector binop is performed on splat values, it may be profitable to
28326/// extract, scalarize, and insert/splat.
28328 const SDLoc &DL, bool LegalTypes) {
28329 SDValue N0 = N->getOperand(0);
28330 SDValue N1 = N->getOperand(1);
28331 unsigned Opcode = N->getOpcode();
28332 EVT VT = N->getValueType(0);
28333 EVT EltVT = VT.getVectorElementType();
28334 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
28335
28336 // TODO: Remove/replace the extract cost check? If the elements are available
28337 // as scalars, then there may be no extract cost. Should we ask if
28338 // inserting a scalar back into a vector is cheap instead?
28339 int Index0, Index1;
28340 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
28341 SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
28342 // Extract element from splat_vector should be free.
28343 // TODO: use DAG.isSplatValue instead?
28344 bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
28346 if (!Src0 || !Src1 || Index0 != Index1 ||
28347 Src0.getValueType().getVectorElementType() != EltVT ||
28348 Src1.getValueType().getVectorElementType() != EltVT ||
28349 !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
28350 // If before type legalization, allow scalar types that will eventually be
28351 // made legal.
28353 Opcode, LegalTypes
28354 ? EltVT
28355 : TLI.getTypeToTransformTo(*DAG.getContext(), EltVT)))
28356 return SDValue();
28357
28358 // FIXME: Type legalization can't handle illegal MULHS/MULHU.
28359 if ((Opcode == ISD::MULHS || Opcode == ISD::MULHU) && !TLI.isTypeLegal(EltVT))
28360 return SDValue();
28361
28362 if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode()) {
28363 // All but one element should have an undef input, which will fold to a
28364 // constant or undef. Avoid splatting which would over-define potentially
28365 // undefined elements.
28366
28367 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
28368 // build_vec ..undef, (bo X, Y), undef...
28369 SmallVector<SDValue, 16> EltsX, EltsY, EltsResult;
28370 DAG.ExtractVectorElements(Src0, EltsX);
28371 DAG.ExtractVectorElements(Src1, EltsY);
28372
28373 for (auto [X, Y] : zip(EltsX, EltsY))
28374 EltsResult.push_back(DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags()));
28375 return DAG.getBuildVector(VT, DL, EltsResult);
28376 }
28377
28378 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
28379 SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
28380 SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
28381 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
28382
28383 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
28384 return DAG.getSplat(VT, DL, ScalarBO);
28385}
28386
28387/// Visit a vector cast operation, like FP_EXTEND.
28388SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) {
28389 EVT VT = N->getValueType(0);
28390 assert(VT.isVector() && "SimplifyVCastOp only works on vectors!");
28391 EVT EltVT = VT.getVectorElementType();
28392 unsigned Opcode = N->getOpcode();
28393
28394 SDValue N0 = N->getOperand(0);
28395 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
28396
28397 // TODO: promote operation might be also good here?
28398 int Index0;
28399 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
28400 if (Src0 &&
28401 (N0.getOpcode() == ISD::SPLAT_VECTOR ||
28402 TLI.isExtractVecEltCheap(VT, Index0)) &&
28403 TLI.isOperationLegalOrCustom(Opcode, EltVT) &&
28404 TLI.preferScalarizeSplat(N)) {
28405 EVT SrcVT = N0.getValueType();
28406 EVT SrcEltVT = SrcVT.getVectorElementType();
28407 if (!LegalTypes || TLI.isTypeLegal(SrcEltVT)) {
28408 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
28409 SDValue Elt =
28410 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC);
28411 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags());
28412 if (VT.isScalableVector())
28413 return DAG.getSplatVector(VT, DL, ScalarBO);
28415 return DAG.getBuildVector(VT, DL, Ops);
28416 }
28417 }
28418
28419 return SDValue();
28420}
28421
28422/// Visit a binary vector operation, like ADD.
28423SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
28424 EVT VT = N->getValueType(0);
28425 assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
28426
28427 SDValue LHS = N->getOperand(0);
28428 SDValue RHS = N->getOperand(1);
28429 unsigned Opcode = N->getOpcode();
28430 SDNodeFlags Flags = N->getFlags();
28431
28432 // Move unary shuffles with identical masks after a vector binop:
28433 // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
28434 // --> shuffle (VBinOp A, B), Undef, Mask
28435 // This does not require type legality checks because we are creating the
28436 // same types of operations that are in the original sequence. We do have to
28437 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
28438 // though. This code is adapted from the identical transform in instcombine.
28439 if (DAG.isSafeToSpeculativelyExecute(Opcode)) {
28440 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
28441 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
28442 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
28443 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
28444 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
28445 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
28446 RHS.getOperand(0), Flags);
28447 SDValue UndefV = LHS.getOperand(1);
28448 return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
28449 }
28450
28451 // Try to sink a splat shuffle after a binop with a uniform constant.
28452 // This is limited to cases where neither the shuffle nor the constant have
28453 // undefined elements because that could be poison-unsafe or inhibit
28454 // demanded elements analysis. It is further limited to not change a splat
28455 // of an inserted scalar because that may be optimized better by
28456 // load-folding or other target-specific behaviors.
28457 if (isConstOrConstSplat(RHS) && Shuf0 && all_equal(Shuf0->getMask()) &&
28458 Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
28459 Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
28460 // binop (splat X), (splat C) --> splat (binop X, C)
28461 SDValue X = Shuf0->getOperand(0);
28462 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
28463 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
28464 Shuf0->getMask());
28465 }
28466 if (isConstOrConstSplat(LHS) && Shuf1 && all_equal(Shuf1->getMask()) &&
28467 Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
28468 Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
28469 // binop (splat C), (splat X) --> splat (binop C, X)
28470 SDValue X = Shuf1->getOperand(0);
28471 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
28472 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
28473 Shuf1->getMask());
28474 }
28475 }
28476
28477 // The following pattern is likely to emerge with vector reduction ops. Moving
28478 // the binary operation ahead of insertion may allow using a narrower vector
28479 // instruction that has better performance than the wide version of the op:
28480 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
28481 if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
28482 RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
28483 LHS.getOperand(2) == RHS.getOperand(2) &&
28484 (LHS.hasOneUse() || RHS.hasOneUse())) {
28485 SDValue X = LHS.getOperand(1);
28486 SDValue Y = RHS.getOperand(1);
28487 SDValue Z = LHS.getOperand(2);
28488 EVT NarrowVT = X.getValueType();
28489 if (NarrowVT == Y.getValueType() &&
28490 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
28491 LegalOperations)) {
28492 // (binop undef, undef) may not return undef, so compute that result.
28493 SDValue VecC =
28494 DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
28495 SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
28496 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
28497 }
28498 }
28499
28500 // Make sure all but the first op are undef or constant.
28501 auto ConcatWithConstantOrUndef = [](SDValue Concat) {
28502 return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
28503 all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
28504 return Op.isUndef() ||
28505 ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
28506 });
28507 };
28508
28509 // The following pattern is likely to emerge with vector reduction ops. Moving
28510 // the binary operation ahead of the concat may allow using a narrower vector
28511 // instruction that has better performance than the wide version of the op:
28512 // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
28513 // concat (VBinOp X, Y), VecC
28514 if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
28515 (LHS.hasOneUse() || RHS.hasOneUse())) {
28516 EVT NarrowVT = LHS.getOperand(0).getValueType();
28517 if (NarrowVT == RHS.getOperand(0).getValueType() &&
28518 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
28519 unsigned NumOperands = LHS.getNumOperands();
28520 SmallVector<SDValue, 4> ConcatOps;
28521 for (unsigned i = 0; i != NumOperands; ++i) {
28522 // This constant fold for operands 1 and up.
28523 ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
28524 RHS.getOperand(i)));
28525 }
28526
28527 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
28528 }
28529 }
28530
28531 if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL, LegalTypes))
28532 return V;
28533
28534 return SDValue();
28535}
28536
28537SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
28538 SDValue N2) {
28539 assert(N0.getOpcode() == ISD::SETCC &&
28540 "First argument must be a SetCC node!");
28541
28542 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
28543 cast<CondCodeSDNode>(N0.getOperand(2))->get());
28544
28545 // If we got a simplified select_cc node back from SimplifySelectCC, then
28546 // break it down into a new SETCC node, and a new SELECT node, and then return
28547 // the SELECT node, since we were called with a SELECT node.
28548 if (SCC.getNode()) {
28549 // Check to see if we got a select_cc back (to turn into setcc/select).
28550 // Otherwise, just return whatever node we got back, like fabs.
28551 if (SCC.getOpcode() == ISD::SELECT_CC) {
28552 const SDNodeFlags Flags = N0->getFlags();
28554 N0.getValueType(),
28555 SCC.getOperand(0), SCC.getOperand(1),
28556 SCC.getOperand(4), Flags);
28557 AddToWorklist(SETCC.getNode());
28558 return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
28559 SCC.getOperand(2), SCC.getOperand(3), Flags);
28560 }
28561
28562 return SCC;
28563 }
28564 return SDValue();
28565}
28566
28567/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
28568/// being selected between, see if we can simplify the select. Callers of this
28569/// should assume that TheSelect is deleted if this returns true. As such, they
28570/// should return the appropriate thing (e.g. the node) back to the top-level of
28571/// the DAG combiner loop to avoid it being looked at.
28572bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
28573 SDValue RHS) {
28574 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
28575 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
28576 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
28577 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
28578 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
28579 SDValue Sqrt = RHS;
28580 ISD::CondCode CC;
28581 SDValue CmpLHS;
28582 const ConstantFPSDNode *Zero = nullptr;
28583
28584 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
28585 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
28586 CmpLHS = TheSelect->getOperand(0);
28587 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
28588 } else {
28589 // SELECT or VSELECT
28590 SDValue Cmp = TheSelect->getOperand(0);
28591 if (Cmp.getOpcode() == ISD::SETCC) {
28592 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
28593 CmpLHS = Cmp.getOperand(0);
28594 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
28595 }
28596 }
28597 if (Zero && Zero->isZero() &&
28598 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
28599 CC == ISD::SETULT || CC == ISD::SETLT)) {
28600 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
28601 CombineTo(TheSelect, Sqrt);
28602 return true;
28603 }
28604 }
28605 }
28606 // Cannot simplify select with vector condition
28607 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
28608
28609 // If this is a select from two identical things, try to pull the operation
28610 // through the select.
28611 if (LHS.getOpcode() != RHS.getOpcode() ||
28612 !LHS.hasOneUse() || !RHS.hasOneUse())
28613 return false;
28614
28615 // If this is a load and the token chain is identical, replace the select
28616 // of two loads with a load through a select of the address to load from.
28617 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
28618 // constants have been dropped into the constant pool.
28619 if (LHS.getOpcode() == ISD::LOAD) {
28620 LoadSDNode *LLD = cast<LoadSDNode>(LHS);
28621 LoadSDNode *RLD = cast<LoadSDNode>(RHS);
28622
28623 // Token chains must be identical.
28624 if (LHS.getOperand(0) != RHS.getOperand(0) ||
28625 // Do not let this transformation reduce the number of volatile loads.
28626 // Be conservative for atomics for the moment
28627 // TODO: This does appear to be legal for unordered atomics (see D66309)
28628 !LLD->isSimple() || !RLD->isSimple() ||
28629 // FIXME: If either is a pre/post inc/dec load,
28630 // we'd need to split out the address adjustment.
28631 LLD->isIndexed() || RLD->isIndexed() ||
28632 // If this is an EXTLOAD, the VT's must match.
28633 LLD->getMemoryVT() != RLD->getMemoryVT() ||
28634 // If this is an EXTLOAD, the kind of extension must match.
28635 (LLD->getExtensionType() != RLD->getExtensionType() &&
28636 // The only exception is if one of the extensions is anyext.
28637 LLD->getExtensionType() != ISD::EXTLOAD &&
28638 RLD->getExtensionType() != ISD::EXTLOAD) ||
28639 // FIXME: this discards src value information. This is
28640 // over-conservative. It would be beneficial to be able to remember
28641 // both potential memory locations. Since we are discarding
28642 // src value info, don't do the transformation if the memory
28643 // locations are not in the default address space.
28644 LLD->getPointerInfo().getAddrSpace() != 0 ||
28645 RLD->getPointerInfo().getAddrSpace() != 0 ||
28646 // We can't produce a CMOV of a TargetFrameIndex since we won't
28647 // generate the address generation required.
28650 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
28651 LLD->getBasePtr().getValueType()))
28652 return false;
28653
28654 // The loads must not depend on one another.
28655 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
28656 return false;
28657
28658 // Check that the select condition doesn't reach either load. If so,
28659 // folding this will induce a cycle into the DAG. If not, this is safe to
28660 // xform, so create a select of the addresses.
28661
28664
28665 // Always fail if LLD and RLD are not independent. TheSelect is a
28666 // predecessor to all Nodes in question so we need not search past it.
28667
28668 Visited.insert(TheSelect);
28669 Worklist.push_back(LLD);
28670 Worklist.push_back(RLD);
28671
28672 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
28673 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
28674 return false;
28675
28676 SDValue Addr;
28677 if (TheSelect->getOpcode() == ISD::SELECT) {
28678 // We cannot do this optimization if any pair of {RLD, LLD} is a
28679 // predecessor to {RLD, LLD, CondNode}. As we've already compared the
28680 // Loads, we only need to check if CondNode is a successor to one of the
28681 // loads. We can further avoid this if there's no use of their chain
28682 // value.
28683 SDNode *CondNode = TheSelect->getOperand(0).getNode();
28684 Worklist.push_back(CondNode);
28685
28686 if ((LLD->hasAnyUseOfValue(1) &&
28687 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
28688 (RLD->hasAnyUseOfValue(1) &&
28689 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
28690 return false;
28691
28692 Addr = DAG.getSelect(SDLoc(TheSelect),
28693 LLD->getBasePtr().getValueType(),
28694 TheSelect->getOperand(0), LLD->getBasePtr(),
28695 RLD->getBasePtr());
28696 } else { // Otherwise SELECT_CC
28697 // We cannot do this optimization if any pair of {RLD, LLD} is a
28698 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
28699 // the Loads, we only need to check if CondLHS/CondRHS is a successor to
28700 // one of the loads. We can further avoid this if there's no use of their
28701 // chain value.
28702
28703 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
28704 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
28705 Worklist.push_back(CondLHS);
28706 Worklist.push_back(CondRHS);
28707
28708 if ((LLD->hasAnyUseOfValue(1) &&
28709 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
28710 (RLD->hasAnyUseOfValue(1) &&
28711 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
28712 return false;
28713
28714 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
28715 LLD->getBasePtr().getValueType(),
28716 TheSelect->getOperand(0),
28717 TheSelect->getOperand(1),
28718 LLD->getBasePtr(), RLD->getBasePtr(),
28719 TheSelect->getOperand(4));
28720 }
28721
28722 SDValue Load;
28723 // It is safe to replace the two loads if they have different alignments,
28724 // but the new load must be the minimum (most restrictive) alignment of the
28725 // inputs.
28726 Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
28727 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
28728 if (!RLD->isInvariant())
28729 MMOFlags &= ~MachineMemOperand::MOInvariant;
28730 if (!RLD->isDereferenceable())
28731 MMOFlags &= ~MachineMemOperand::MODereferenceable;
28732 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
28733 // FIXME: Discards pointer and AA info.
28734 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
28735 LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
28736 MMOFlags);
28737 } else {
28738 // FIXME: Discards pointer and AA info.
28739 Load = DAG.getExtLoad(
28741 : LLD->getExtensionType(),
28742 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
28743 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
28744 }
28745
28746 // Users of the select now use the result of the load.
28747 CombineTo(TheSelect, Load);
28748
28749 // Users of the old loads now use the new load's chain. We know the
28750 // old-load value is dead now.
28751 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
28752 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
28753 return true;
28754 }
28755
28756 return false;
28757}
28758
28759/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
28760/// bitwise 'and'.
28761SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
28762 SDValue N1, SDValue N2, SDValue N3,
28763 ISD::CondCode CC) {
28764 // If this is a select where the false operand is zero and the compare is a
28765 // check of the sign bit, see if we can perform the "gzip trick":
28766 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
28767 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
28768 EVT XType = N0.getValueType();
28769 EVT AType = N2.getValueType();
28770 if (!isNullConstant(N3) || !XType.bitsGE(AType))
28771 return SDValue();
28772
28773 // If the comparison is testing for a positive value, we have to invert
28774 // the sign bit mask, so only do that transform if the target has a bitwise
28775 // 'and not' instruction (the invert is free).
28776 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
28777 // (X > -1) ? A : 0
28778 // (X > 0) ? X : 0 <-- This is canonical signed max.
28779 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
28780 return SDValue();
28781 } else if (CC == ISD::SETLT) {
28782 // (X < 0) ? A : 0
28783 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
28784 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
28785 return SDValue();
28786 } else {
28787 return SDValue();
28788 }
28789
28790 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
28791 // constant.
28792 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
28793 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
28794 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
28795 if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
28796 SDValue ShiftAmt = DAG.getShiftAmountConstant(ShCt, XType, DL);
28797 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
28798 AddToWorklist(Shift.getNode());
28799
28800 if (XType.bitsGT(AType)) {
28801 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
28802 AddToWorklist(Shift.getNode());
28803 }
28804
28805 if (CC == ISD::SETGT)
28806 Shift = DAG.getNOT(DL, Shift, AType);
28807
28808 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
28809 }
28810 }
28811
28812 unsigned ShCt = XType.getSizeInBits() - 1;
28813 if (TLI.shouldAvoidTransformToShift(XType, ShCt))
28814 return SDValue();
28815
28816 SDValue ShiftAmt = DAG.getShiftAmountConstant(ShCt, XType, DL);
28817 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
28818 AddToWorklist(Shift.getNode());
28819
28820 if (XType.bitsGT(AType)) {
28821 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
28822 AddToWorklist(Shift.getNode());
28823 }
28824
28825 if (CC == ISD::SETGT)
28826 Shift = DAG.getNOT(DL, Shift, AType);
28827
28828 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
28829}
28830
28831// Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
28832SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
28833 SDValue N0 = N->getOperand(0);
28834 SDValue N1 = N->getOperand(1);
28835 SDValue N2 = N->getOperand(2);
28836 SDLoc DL(N);
28837
28838 unsigned BinOpc = N1.getOpcode();
28839 if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc) ||
28840 (N1.getResNo() != N2.getResNo()))
28841 return SDValue();
28842
28843 // The use checks are intentionally on SDNode because we may be dealing
28844 // with opcodes that produce more than one SDValue.
28845 // TODO: Do we really need to check N0 (the condition operand of the select)?
28846 // But removing that clause could cause an infinite loop...
28847 if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
28848 return SDValue();
28849
28850 // Binops may include opcodes that return multiple values, so all values
28851 // must be created/propagated from the newly created binops below.
28852 SDVTList OpVTs = N1->getVTList();
28853
28854 // Fold select(cond, binop(x, y), binop(z, y))
28855 // --> binop(select(cond, x, z), y)
28856 if (N1.getOperand(1) == N2.getOperand(1)) {
28857 SDValue N10 = N1.getOperand(0);
28858 SDValue N20 = N2.getOperand(0);
28859 SDValue NewSel = DAG.getSelect(DL, N10.getValueType(), N0, N10, N20);
28860 SDNodeFlags Flags = N1->getFlags() & N2->getFlags();
28861 SDValue NewBinOp =
28862 DAG.getNode(BinOpc, DL, OpVTs, {NewSel, N1.getOperand(1)}, Flags);
28863 return SDValue(NewBinOp.getNode(), N1.getResNo());
28864 }
28865
28866 // Fold select(cond, binop(x, y), binop(x, z))
28867 // --> binop(x, select(cond, y, z))
28868 if (N1.getOperand(0) == N2.getOperand(0)) {
28869 SDValue N11 = N1.getOperand(1);
28870 SDValue N21 = N2.getOperand(1);
28871 // Second op VT might be different (e.g. shift amount type)
28872 if (N11.getValueType() == N21.getValueType()) {
28873 SDValue NewSel = DAG.getSelect(DL, N11.getValueType(), N0, N11, N21);
28874 SDNodeFlags Flags = N1->getFlags() & N2->getFlags();
28875 SDValue NewBinOp =
28876 DAG.getNode(BinOpc, DL, OpVTs, {N1.getOperand(0), NewSel}, Flags);
28877 return SDValue(NewBinOp.getNode(), N1.getResNo());
28878 }
28879 }
28880
28881 // TODO: Handle isCommutativeBinOp patterns as well?
28882 return SDValue();
28883}
28884
28885// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
28886SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
28887 SDValue N0 = N->getOperand(0);
28888 EVT VT = N->getValueType(0);
28889 bool IsFabs = N->getOpcode() == ISD::FABS;
28890 bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
28891
28892 if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
28893 return SDValue();
28894
28895 SDValue Int = N0.getOperand(0);
28896 EVT IntVT = Int.getValueType();
28897
28898 // The operand to cast should be integer.
28899 if (!IntVT.isInteger() || IntVT.isVector())
28900 return SDValue();
28901
28902 // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
28903 // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
28904 APInt SignMask;
28905 if (N0.getValueType().isVector()) {
28906 // For vector, create a sign mask (0x80...) or its inverse (for fabs,
28907 // 0x7f...) per element and splat it.
28909 if (IsFabs)
28910 SignMask = ~SignMask;
28911 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
28912 } else {
28913 // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
28914 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
28915 if (IsFabs)
28916 SignMask = ~SignMask;
28917 }
28918 SDLoc DL(N0);
28919 Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
28920 DAG.getConstant(SignMask, DL, IntVT));
28921 AddToWorklist(Int.getNode());
28922 return DAG.getBitcast(VT, Int);
28923}
28924
28925/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
28926/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
28927/// in it. This may be a win when the constant is not otherwise available
28928/// because it replaces two constant pool loads with one.
28929SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
28930 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
28931 ISD::CondCode CC) {
28933 return SDValue();
28934
28935 // If we are before legalize types, we want the other legalization to happen
28936 // first (for example, to avoid messing with soft float).
28937 auto *TV = dyn_cast<ConstantFPSDNode>(N2);
28938 auto *FV = dyn_cast<ConstantFPSDNode>(N3);
28939 EVT VT = N2.getValueType();
28940 if (!TV || !FV || !TLI.isTypeLegal(VT))
28941 return SDValue();
28942
28943 // If a constant can be materialized without loads, this does not make sense.
28945 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
28946 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
28947 return SDValue();
28948
28949 // If both constants have multiple uses, then we won't need to do an extra
28950 // load. The values are likely around in registers for other users.
28951 if (!TV->hasOneUse() && !FV->hasOneUse())
28952 return SDValue();
28953
28954 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
28955 const_cast<ConstantFP*>(TV->getConstantFPValue()) };
28956 Type *FPTy = Elts[0]->getType();
28957 const DataLayout &TD = DAG.getDataLayout();
28958
28959 // Create a ConstantArray of the two constants.
28960 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
28961 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
28962 TD.getPrefTypeAlign(FPTy));
28963 Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
28964
28965 // Get offsets to the 0 and 1 elements of the array, so we can select between
28966 // them.
28967 SDValue Zero = DAG.getIntPtrConstant(0, DL);
28968 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
28969 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
28970 SDValue Cond =
28971 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
28972 AddToWorklist(Cond.getNode());
28973 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
28974 AddToWorklist(CstOffset.getNode());
28975 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
28976 AddToWorklist(CPIdx.getNode());
28977 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
28979 DAG.getMachineFunction()), Alignment);
28980}
28981
28982/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
28983/// where 'cond' is the comparison specified by CC.
28984SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
28985 SDValue N2, SDValue N3, ISD::CondCode CC,
28986 bool NotExtCompare) {
28987 // (x ? y : y) -> y.
28988 if (N2 == N3) return N2;
28989
28990 EVT CmpOpVT = N0.getValueType();
28991 EVT CmpResVT = getSetCCResultType(CmpOpVT);
28992 EVT VT = N2.getValueType();
28993 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
28994 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
28995 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
28996
28997 // Determine if the condition we're dealing with is constant.
28998 if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
28999 AddToWorklist(SCC.getNode());
29000 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
29001 // fold select_cc true, x, y -> x
29002 // fold select_cc false, x, y -> y
29003 return !(SCCC->isZero()) ? N2 : N3;
29004 }
29005 }
29006
29007 if (SDValue V =
29008 convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
29009 return V;
29010
29011 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
29012 return V;
29013
29014 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A)
29015 // where y is has a single bit set.
29016 // A plaintext description would be, we can turn the SELECT_CC into an AND
29017 // when the condition can be materialized as an all-ones register. Any
29018 // single bit-test can be materialized as an all-ones register with
29019 // shift-left and shift-right-arith.
29020 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
29021 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
29022 SDValue AndLHS = N0->getOperand(0);
29023 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
29024 if (ConstAndRHS && ConstAndRHS->getAPIntValue().popcount() == 1) {
29025 // Shift the tested bit over the sign bit.
29026 const APInt &AndMask = ConstAndRHS->getAPIntValue();
29027 if (TLI.shouldFoldSelectWithSingleBitTest(VT, AndMask)) {
29028 unsigned ShCt = AndMask.getBitWidth() - 1;
29029 SDValue ShlAmt = DAG.getShiftAmountConstant(AndMask.countl_zero(), VT,
29030 SDLoc(AndLHS));
29031 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
29032
29033 // Now arithmetic right shift it all the way over, so the result is
29034 // either all-ones, or zero.
29035 SDValue ShrAmt = DAG.getShiftAmountConstant(ShCt, VT, SDLoc(Shl));
29036 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
29037
29038 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
29039 }
29040 }
29041 }
29042
29043 // fold select C, 16, 0 -> shl C, 4
29044 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
29045 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
29046
29047 if ((Fold || Swap) &&
29048 TLI.getBooleanContents(CmpOpVT) ==
29050 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT)) &&
29052
29053 if (Swap) {
29054 CC = ISD::getSetCCInverse(CC, CmpOpVT);
29055 std::swap(N2C, N3C);
29056 }
29057
29058 // If the caller doesn't want us to simplify this into a zext of a compare,
29059 // don't do it.
29060 if (NotExtCompare && N2C->isOne())
29061 return SDValue();
29062
29063 SDValue Temp, SCC;
29064 // zext (setcc n0, n1)
29065 if (LegalTypes) {
29066 SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
29067 Temp = DAG.getZExtOrTrunc(SCC, SDLoc(N2), VT);
29068 } else {
29069 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
29070 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
29071 }
29072
29073 AddToWorklist(SCC.getNode());
29074 AddToWorklist(Temp.getNode());
29075
29076 if (N2C->isOne())
29077 return Temp;
29078
29079 unsigned ShCt = N2C->getAPIntValue().logBase2();
29080 if (TLI.shouldAvoidTransformToShift(VT, ShCt))
29081 return SDValue();
29082
29083 // shl setcc result by log2 n2c
29084 return DAG.getNode(
29085 ISD::SHL, DL, N2.getValueType(), Temp,
29086 DAG.getShiftAmountConstant(ShCt, N2.getValueType(), SDLoc(Temp)));
29087 }
29088
29089 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
29090 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
29091 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
29092 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
29093 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
29094 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
29095 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
29096 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
29097 if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
29098 SDValue ValueOnZero = N2;
29099 SDValue Count = N3;
29100 // If the condition is NE instead of E, swap the operands.
29101 if (CC == ISD::SETNE)
29102 std::swap(ValueOnZero, Count);
29103 // Check if the value on zero is a constant equal to the bits in the type.
29104 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
29105 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
29106 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
29107 // legal, combine to just cttz.
29108 if ((Count.getOpcode() == ISD::CTTZ ||
29109 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
29110 N0 == Count.getOperand(0) &&
29111 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
29112 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
29113 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
29114 // legal, combine to just ctlz.
29115 if ((Count.getOpcode() == ISD::CTLZ ||
29116 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
29117 N0 == Count.getOperand(0) &&
29118 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
29119 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
29120 }
29121 }
29122 }
29123
29124 // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
29125 // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
29126 if (!NotExtCompare && N1C && N2C && N3C &&
29127 N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
29128 ((N1C->isAllOnes() && CC == ISD::SETGT) ||
29129 (N1C->isZero() && CC == ISD::SETLT)) &&
29130 !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
29131 SDValue ASHR =
29132 DAG.getNode(ISD::SRA, DL, CmpOpVT, N0,
29134 CmpOpVT.getScalarSizeInBits() - 1, CmpOpVT, DL));
29135 return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASHR, DL, VT),
29136 DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
29137 }
29138
29139 // Fold sign pattern select_cc setgt X, -1, 1, -1 -> or (ashr X, BW-1), 1
29140 if (CC == ISD::SETGT && N1C && N2C && N3C && N1C->isAllOnes() &&
29141 N2C->isOne() && N3C->isAllOnes() &&
29142 !TLI.shouldAvoidTransformToShift(CmpOpVT,
29143 CmpOpVT.getScalarSizeInBits() - 1)) {
29144 SDValue ASHR =
29145 DAG.getNode(ISD::SRA, DL, CmpOpVT, N0,
29147 CmpOpVT.getScalarSizeInBits() - 1, CmpOpVT, DL));
29148 return DAG.getNode(ISD::OR, DL, VT, DAG.getSExtOrTrunc(ASHR, DL, VT),
29149 DAG.getConstant(1, DL, VT));
29150 }
29151
29152 if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
29153 return S;
29154 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
29155 return S;
29156 if (SDValue ABD = foldSelectToABD(N0, N1, N2, N3, CC, DL))
29157 return ABD;
29158
29159 return SDValue();
29160}
29161
29163 const TargetLowering &TLI) {
29164 // Match a pattern such as:
29165 // (X | (X >> C0) | (X >> C1) | ...) & Mask
29166 // This extracts contiguous parts of X and ORs them together before comparing.
29167 // We can optimize this so that we directly check (X & SomeMask) instead,
29168 // eliminating the shifts.
29169
29170 EVT VT = Root.getValueType();
29171
29172 // TODO: Support vectors?
29173 if (!VT.isScalarInteger() || Root.getOpcode() != ISD::AND)
29174 return SDValue();
29175
29176 SDValue N0 = Root.getOperand(0);
29177 SDValue N1 = Root.getOperand(1);
29178
29179 if (N0.getOpcode() != ISD::OR || !isa<ConstantSDNode>(N1))
29180 return SDValue();
29181
29182 APInt RootMask = cast<ConstantSDNode>(N1)->getAsAPIntVal();
29183
29184 SDValue Src;
29185 const auto IsSrc = [&](SDValue V) {
29186 if (!Src) {
29187 Src = V;
29188 return true;
29189 }
29190
29191 return Src == V;
29192 };
29193
29194 SmallVector<SDValue> Worklist = {N0};
29195 APInt PartsMask(VT.getSizeInBits(), 0);
29196 while (!Worklist.empty()) {
29197 SDValue V = Worklist.pop_back_val();
29198 if (!V.hasOneUse() && (Src && Src != V))
29199 return SDValue();
29200
29201 if (V.getOpcode() == ISD::OR) {
29202 Worklist.push_back(V.getOperand(0));
29203 Worklist.push_back(V.getOperand(1));
29204 continue;
29205 }
29206
29207 if (V.getOpcode() == ISD::SRL) {
29208 SDValue ShiftSrc = V.getOperand(0);
29209 SDValue ShiftAmt = V.getOperand(1);
29210
29211 if (!IsSrc(ShiftSrc) || !isa<ConstantSDNode>(ShiftAmt))
29212 return SDValue();
29213
29214 auto ShiftAmtVal = cast<ConstantSDNode>(ShiftAmt)->getAsZExtVal();
29215 if (ShiftAmtVal > RootMask.getBitWidth())
29216 return SDValue();
29217
29218 PartsMask |= (RootMask << ShiftAmtVal);
29219 continue;
29220 }
29221
29222 if (IsSrc(V)) {
29223 PartsMask |= RootMask;
29224 continue;
29225 }
29226
29227 return SDValue();
29228 }
29229
29230 if (!Src)
29231 return SDValue();
29232
29233 SDLoc DL(Root);
29234 return DAG.getNode(ISD::AND, DL, VT,
29235 {Src, DAG.getConstant(PartsMask, DL, VT)});
29236}
29237
29238/// This is a stub for TargetLowering::SimplifySetCC.
29239SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
29240 ISD::CondCode Cond, const SDLoc &DL,
29241 bool foldBooleans) {
29243 DagCombineInfo(DAG, Level, false, this);
29244 if (SDValue C =
29245 TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL))
29246 return C;
29247
29249 isNullConstant(N1)) {
29250
29251 if (SDValue Res = matchMergedBFX(N0, DAG, TLI))
29252 return DAG.getSetCC(DL, VT, Res, N1, Cond);
29253 }
29254
29255 return SDValue();
29256}
29257
29258/// Given an ISD::SDIV node expressing a divide by constant, return
29259/// a DAG expression to select that will generate the same value by multiplying
29260/// by a magic number.
29261/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
29262SDValue DAGCombiner::BuildSDIV(SDNode *N) {
29263 // when optimising for minimum size, we don't want to expand a div to a mul
29264 // and a shift.
29266 return SDValue();
29267
29269 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, LegalTypes, Built)) {
29270 for (SDNode *N : Built)
29271 AddToWorklist(N);
29272 return S;
29273 }
29274
29275 return SDValue();
29276}
29277
29278/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
29279/// DAG expression that will generate the same value by right shifting.
29280SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
29281 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
29282 if (!C)
29283 return SDValue();
29284
29285 // Avoid division by zero.
29286 if (C->isZero())
29287 return SDValue();
29288
29290 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
29291 for (SDNode *N : Built)
29292 AddToWorklist(N);
29293 return S;
29294 }
29295
29296 return SDValue();
29297}
29298
29299/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
29300/// expression that will generate the same value by multiplying by a magic
29301/// number.
29302/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
29303SDValue DAGCombiner::BuildUDIV(SDNode *N) {
29304 // when optimising for minimum size, we don't want to expand a div to a mul
29305 // and a shift.
29307 return SDValue();
29308
29310 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, LegalTypes, Built)) {
29311 for (SDNode *N : Built)
29312 AddToWorklist(N);
29313 return S;
29314 }
29315
29316 return SDValue();
29317}
29318
29319/// Given an ISD::SREM node expressing a remainder by constant power of 2,
29320/// return a DAG expression that will generate the same value.
29321SDValue DAGCombiner::BuildSREMPow2(SDNode *N) {
29322 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
29323 if (!C)
29324 return SDValue();
29325
29326 // Avoid division by zero.
29327 if (C->isZero())
29328 return SDValue();
29329
29331 if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) {
29332 for (SDNode *N : Built)
29333 AddToWorklist(N);
29334 return S;
29335 }
29336
29337 return SDValue();
29338}
29339
29340// This is basically just a port of takeLog2 from InstCombineMulDivRem.cpp
29341//
29342// Returns the node that represents `Log2(Op)`. This may create a new node. If
29343// we are unable to compute `Log2(Op)` its return `SDValue()`.
29344//
29345// All nodes will be created at `DL` and the output will be of type `VT`.
29346//
29347// This will only return `Log2(Op)` if we can prove `Op` is non-zero. Set
29348// `AssumeNonZero` if this function should simply assume (not require proving
29349// `Op` is non-zero).
29351 SDValue Op, unsigned Depth,
29352 bool AssumeNonZero) {
29353 assert(VT.isInteger() && "Only integer types are supported!");
29354
29355 auto PeekThroughCastsAndTrunc = [](SDValue V) {
29356 while (true) {
29357 switch (V.getOpcode()) {
29358 case ISD::TRUNCATE:
29359 case ISD::ZERO_EXTEND:
29360 V = V.getOperand(0);
29361 break;
29362 default:
29363 return V;
29364 }
29365 }
29366 };
29367
29368 if (VT.isScalableVector())
29369 return SDValue();
29370
29371 Op = PeekThroughCastsAndTrunc(Op);
29372
29373 // Helper for determining whether a value is a power-2 constant scalar or a
29374 // vector of such elements.
29375 SmallVector<APInt> Pow2Constants;
29376 auto IsPowerOfTwo = [&Pow2Constants](ConstantSDNode *C) {
29377 if (C->isZero() || C->isOpaque())
29378 return false;
29379 // TODO: We may also be able to support negative powers of 2 here.
29380 if (C->getAPIntValue().isPowerOf2()) {
29381 Pow2Constants.emplace_back(C->getAPIntValue());
29382 return true;
29383 }
29384 return false;
29385 };
29386
29387 if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo)) {
29388 if (!VT.isVector())
29389 return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
29390 // We need to create a build vector
29391 if (Op.getOpcode() == ISD::SPLAT_VECTOR)
29392 return DAG.getSplat(VT, DL,
29393 DAG.getConstant(Pow2Constants.back().logBase2(), DL,
29394 VT.getScalarType()));
29395 SmallVector<SDValue> Log2Ops;
29396 for (const APInt &Pow2 : Pow2Constants)
29397 Log2Ops.emplace_back(
29398 DAG.getConstant(Pow2.logBase2(), DL, VT.getScalarType()));
29399 return DAG.getBuildVector(VT, DL, Log2Ops);
29400 }
29401
29402 if (Depth >= DAG.MaxRecursionDepth)
29403 return SDValue();
29404
29405 auto CastToVT = [&](EVT NewVT, SDValue ToCast) {
29406 // Peek through zero extend. We can't peek through truncates since this
29407 // function is called on a shift amount. We must ensure that all of the bits
29408 // above the original shift amount are zeroed by this function.
29409 while (ToCast.getOpcode() == ISD::ZERO_EXTEND)
29410 ToCast = ToCast.getOperand(0);
29411 EVT CurVT = ToCast.getValueType();
29412 if (NewVT == CurVT)
29413 return ToCast;
29414
29415 if (NewVT.getSizeInBits() == CurVT.getSizeInBits())
29416 return DAG.getBitcast(NewVT, ToCast);
29417
29418 return DAG.getZExtOrTrunc(ToCast, DL, NewVT);
29419 };
29420
29421 // log2(X << Y) -> log2(X) + Y
29422 if (Op.getOpcode() == ISD::SHL) {
29423 // 1 << Y and X nuw/nsw << Y are all non-zero.
29424 if (AssumeNonZero || Op->getFlags().hasNoUnsignedWrap() ||
29425 Op->getFlags().hasNoSignedWrap() || isOneConstant(Op.getOperand(0)))
29426 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0),
29427 Depth + 1, AssumeNonZero))
29428 return DAG.getNode(ISD::ADD, DL, VT, LogX,
29429 CastToVT(VT, Op.getOperand(1)));
29430 }
29431
29432 // c ? X : Y -> c ? Log2(X) : Log2(Y)
29433 if ((Op.getOpcode() == ISD::SELECT || Op.getOpcode() == ISD::VSELECT) &&
29434 Op.hasOneUse()) {
29435 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1),
29436 Depth + 1, AssumeNonZero))
29437 if (SDValue LogY = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(2),
29438 Depth + 1, AssumeNonZero))
29439 return DAG.getSelect(DL, VT, Op.getOperand(0), LogX, LogY);
29440 }
29441
29442 // log2(umin(X, Y)) -> umin(log2(X), log2(Y))
29443 // log2(umax(X, Y)) -> umax(log2(X), log2(Y))
29444 if ((Op.getOpcode() == ISD::UMIN || Op.getOpcode() == ISD::UMAX) &&
29445 Op.hasOneUse()) {
29446 // Use AssumeNonZero as false here. Otherwise we can hit case where
29447 // log2(umax(X, Y)) != umax(log2(X), log2(Y)) (because overflow).
29448 if (SDValue LogX =
29449 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0), Depth + 1,
29450 /*AssumeNonZero*/ false))
29451 if (SDValue LogY =
29452 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1), Depth + 1,
29453 /*AssumeNonZero*/ false))
29454 return DAG.getNode(Op.getOpcode(), DL, VT, LogX, LogY);
29455 }
29456
29457 return SDValue();
29458}
29459
29460/// Determines the LogBase2 value for a non-null input value using the
29461/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
29462SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL,
29463 bool KnownNonZero, bool InexpensiveOnly,
29464 std::optional<EVT> OutVT) {
29465 EVT VT = OutVT ? *OutVT : V.getValueType();
29466 SDValue InexpensiveLogBase2 =
29467 takeInexpensiveLog2(DAG, DL, VT, V, /*Depth*/ 0, KnownNonZero);
29468 if (InexpensiveLogBase2 || InexpensiveOnly || !DAG.isKnownToBeAPowerOfTwo(V))
29469 return InexpensiveLogBase2;
29470
29471 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
29472 SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
29473 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
29474 return LogBase2;
29475}
29476
29477/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
29478/// For the reciprocal, we need to find the zero of the function:
29479/// F(X) = 1/X - A [which has a zero at X = 1/A]
29480/// =>
29481/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
29482/// does not require additional intermediate precision]
29483/// For the last iteration, put numerator N into it to gain more precision:
29484/// Result = N X_i + X_i (N - N A X_i)
29485SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
29486 SDNodeFlags Flags) {
29487 if (LegalDAG)
29488 return SDValue();
29489
29490 // TODO: Handle extended types?
29491 EVT VT = Op.getValueType();
29492 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
29493 VT.getScalarType() != MVT::f64)
29494 return SDValue();
29495
29496 // If estimates are explicitly disabled for this function, we're done.
29498 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
29499 if (Enabled == TLI.ReciprocalEstimate::Disabled)
29500 return SDValue();
29501
29502 // Estimates may be explicitly enabled for this type with a custom number of
29503 // refinement steps.
29504 int Iterations = TLI.getDivRefinementSteps(VT, MF);
29505 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
29506 AddToWorklist(Est.getNode());
29507
29508 SDLoc DL(Op);
29509 if (Iterations) {
29510 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
29511
29512 // Newton iterations: Est = Est + Est (N - Arg * Est)
29513 // If this is the last iteration, also multiply by the numerator.
29514 for (int i = 0; i < Iterations; ++i) {
29515 SDValue MulEst = Est;
29516
29517 if (i == Iterations - 1) {
29518 MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
29519 AddToWorklist(MulEst.getNode());
29520 }
29521
29522 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
29523 AddToWorklist(NewEst.getNode());
29524
29525 NewEst = DAG.getNode(ISD::FSUB, DL, VT,
29526 (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
29527 AddToWorklist(NewEst.getNode());
29528
29529 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
29530 AddToWorklist(NewEst.getNode());
29531
29532 Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
29533 AddToWorklist(Est.getNode());
29534 }
29535 } else {
29536 // If no iterations are available, multiply with N.
29537 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
29538 AddToWorklist(Est.getNode());
29539 }
29540
29541 return Est;
29542 }
29543
29544 return SDValue();
29545}
29546
29547/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
29548/// For the reciprocal sqrt, we need to find the zero of the function:
29549/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
29550/// =>
29551/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
29552/// As a result, we precompute A/2 prior to the iteration loop.
29553SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
29554 unsigned Iterations,
29555 SDNodeFlags Flags, bool Reciprocal) {
29556 EVT VT = Arg.getValueType();
29557 SDLoc DL(Arg);
29558 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
29559
29560 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
29561 // this entire sequence requires only one FP constant.
29562 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
29563 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
29564
29565 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
29566 for (unsigned i = 0; i < Iterations; ++i) {
29567 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
29568 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
29569 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
29570 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
29571 }
29572
29573 // If non-reciprocal square root is requested, multiply the result by Arg.
29574 if (!Reciprocal)
29575 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
29576
29577 return Est;
29578}
29579
29580/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
29581/// For the reciprocal sqrt, we need to find the zero of the function:
29582/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
29583/// =>
29584/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
29585SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
29586 unsigned Iterations,
29587 SDNodeFlags Flags, bool Reciprocal) {
29588 EVT VT = Arg.getValueType();
29589 SDLoc DL(Arg);
29590 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
29591 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
29592
29593 // This routine must enter the loop below to work correctly
29594 // when (Reciprocal == false).
29595 assert(Iterations > 0);
29596
29597 // Newton iterations for reciprocal square root:
29598 // E = (E * -0.5) * ((A * E) * E + -3.0)
29599 for (unsigned i = 0; i < Iterations; ++i) {
29600 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
29601 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
29602 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
29603
29604 // When calculating a square root at the last iteration build:
29605 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
29606 // (notice a common subexpression)
29607 SDValue LHS;
29608 if (Reciprocal || (i + 1) < Iterations) {
29609 // RSQRT: LHS = (E * -0.5)
29610 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
29611 } else {
29612 // SQRT: LHS = (A * E) * -0.5
29613 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
29614 }
29615
29616 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
29617 }
29618
29619 return Est;
29620}
29621
29622/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
29623/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
29624/// Op can be zero.
29625SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
29626 bool Reciprocal) {
29627 if (LegalDAG)
29628 return SDValue();
29629
29630 // TODO: Handle extended types?
29631 EVT VT = Op.getValueType();
29632 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
29633 VT.getScalarType() != MVT::f64)
29634 return SDValue();
29635
29636 // If estimates are explicitly disabled for this function, we're done.
29638 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
29639 if (Enabled == TLI.ReciprocalEstimate::Disabled)
29640 return SDValue();
29641
29642 // Estimates may be explicitly enabled for this type with a custom number of
29643 // refinement steps.
29644 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
29645
29646 bool UseOneConstNR = false;
29647 if (SDValue Est =
29648 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
29649 Reciprocal)) {
29650 AddToWorklist(Est.getNode());
29651
29652 if (Iterations > 0)
29653 Est = UseOneConstNR
29654 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
29655 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
29656 if (!Reciprocal) {
29657 SDLoc DL(Op);
29658 // Try the target specific test first.
29659 SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
29660
29661 // The estimate is now completely wrong if the input was exactly 0.0 or
29662 // possibly a denormal. Force the answer to 0.0 or value provided by
29663 // target for those cases.
29664 Est = DAG.getSelect(DL, VT, Test,
29665 TLI.getSqrtResultForDenormInput(Op, DAG), Est);
29666 }
29667 return Est;
29668 }
29669
29670 return SDValue();
29671}
29672
29673SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
29674 return buildSqrtEstimateImpl(Op, Flags, true);
29675}
29676
29677SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
29678 return buildSqrtEstimateImpl(Op, Flags, false);
29679}
29680
29681/// Return true if there is any possibility that the two addresses overlap.
29682bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
29683
29684 struct MemUseCharacteristics {
29685 bool IsVolatile;
29686 bool IsAtomic;
29688 int64_t Offset;
29689 LocationSize NumBytes;
29690 MachineMemOperand *MMO;
29691 };
29692
29693 auto getCharacteristics = [this](SDNode *N) -> MemUseCharacteristics {
29694 if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
29695 int64_t Offset = 0;
29696 if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
29697 Offset = (LSN->getAddressingMode() == ISD::PRE_INC) ? C->getSExtValue()
29698 : (LSN->getAddressingMode() == ISD::PRE_DEC)
29699 ? -1 * C->getSExtValue()
29700 : 0;
29701 TypeSize Size = LSN->getMemoryVT().getStoreSize();
29702 return {LSN->isVolatile(), LSN->isAtomic(),
29703 LSN->getBasePtr(), Offset /*base offset*/,
29704 LocationSize::precise(Size), LSN->getMemOperand()};
29705 }
29706 if (const auto *LN = cast<LifetimeSDNode>(N)) {
29708 return {false /*isVolatile*/,
29709 /*isAtomic*/ false,
29710 LN->getOperand(1),
29711 0,
29712 LocationSize::precise(MFI.getObjectSize(LN->getFrameIndex())),
29713 (MachineMemOperand *)nullptr};
29714 }
29715 // Default.
29716 return {false /*isvolatile*/,
29717 /*isAtomic*/ false,
29718 SDValue(),
29719 (int64_t)0 /*offset*/,
29721 (MachineMemOperand *)nullptr};
29722 };
29723
29724 MemUseCharacteristics MUC0 = getCharacteristics(Op0),
29725 MUC1 = getCharacteristics(Op1);
29726
29727 // If they are to the same address, then they must be aliases.
29728 if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
29729 MUC0.Offset == MUC1.Offset)
29730 return true;
29731
29732 // If they are both volatile then they cannot be reordered.
29733 if (MUC0.IsVolatile && MUC1.IsVolatile)
29734 return true;
29735
29736 // Be conservative about atomics for the moment
29737 // TODO: This is way overconservative for unordered atomics (see D66309)
29738 if (MUC0.IsAtomic && MUC1.IsAtomic)
29739 return true;
29740
29741 if (MUC0.MMO && MUC1.MMO) {
29742 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
29743 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
29744 return false;
29745 }
29746
29747 // If NumBytes is scalable and offset is not 0, conservatively return may
29748 // alias
29749 if ((MUC0.NumBytes.hasValue() && MUC0.NumBytes.isScalable() &&
29750 MUC0.Offset != 0) ||
29751 (MUC1.NumBytes.hasValue() && MUC1.NumBytes.isScalable() &&
29752 MUC1.Offset != 0))
29753 return true;
29754 // Try to prove that there is aliasing, or that there is no aliasing. Either
29755 // way, we can return now. If nothing can be proved, proceed with more tests.
29756 bool IsAlias;
29757 if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
29758 DAG, IsAlias))
29759 return IsAlias;
29760
29761 // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
29762 // either are not known.
29763 if (!MUC0.MMO || !MUC1.MMO)
29764 return true;
29765
29766 // If one operation reads from invariant memory, and the other may store, they
29767 // cannot alias. These should really be checking the equivalent of mayWrite,
29768 // but it only matters for memory nodes other than load /store.
29769 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
29770 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
29771 return false;
29772
29773 // If we know required SrcValue1 and SrcValue2 have relatively large
29774 // alignment compared to the size and offset of the access, we may be able
29775 // to prove they do not alias. This check is conservative for now to catch
29776 // cases created by splitting vector types, it only works when the offsets are
29777 // multiples of the size of the data.
29778 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
29779 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
29780 Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
29781 Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
29782 LocationSize Size0 = MUC0.NumBytes;
29783 LocationSize Size1 = MUC1.NumBytes;
29784
29785 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
29786 Size0.hasValue() && Size1.hasValue() && !Size0.isScalable() &&
29787 !Size1.isScalable() && Size0 == Size1 &&
29788 OrigAlignment0 > Size0.getValue().getKnownMinValue() &&
29789 SrcValOffset0 % Size0.getValue().getKnownMinValue() == 0 &&
29790 SrcValOffset1 % Size1.getValue().getKnownMinValue() == 0) {
29791 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
29792 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
29793
29794 // There is no overlap between these relatively aligned accesses of
29795 // similar size. Return no alias.
29796 if ((OffAlign0 + static_cast<int64_t>(
29797 Size0.getValue().getKnownMinValue())) <= OffAlign1 ||
29798 (OffAlign1 + static_cast<int64_t>(
29799 Size1.getValue().getKnownMinValue())) <= OffAlign0)
29800 return false;
29801 }
29802
29805 : DAG.getSubtarget().useAA();
29806#ifndef NDEBUG
29807 if (CombinerAAOnlyFunc.getNumOccurrences() &&
29809 UseAA = false;
29810#endif
29811
29812 if (UseAA && BatchAA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
29813 Size0.hasValue() && Size1.hasValue() &&
29814 // Can't represent a scalable size + fixed offset in LocationSize
29815 (!Size0.isScalable() || SrcValOffset0 == 0) &&
29816 (!Size1.isScalable() || SrcValOffset1 == 0)) {
29817 // Use alias analysis information.
29818 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
29819 int64_t Overlap0 =
29820 Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
29821 int64_t Overlap1 =
29822 Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
29823 LocationSize Loc0 =
29824 Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0);
29825 LocationSize Loc1 =
29826 Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1);
29827 if (BatchAA->isNoAlias(
29828 MemoryLocation(MUC0.MMO->getValue(), Loc0,
29829 UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
29830 MemoryLocation(MUC1.MMO->getValue(), Loc1,
29831 UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
29832 return false;
29833 }
29834
29835 // Otherwise we have to assume they alias.
29836 return true;
29837}
29838
29839/// Walk up chain skipping non-aliasing memory nodes,
29840/// looking for aliasing nodes and adding them to the Aliases vector.
29841void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
29842 SmallVectorImpl<SDValue> &Aliases) {
29843 SmallVector<SDValue, 8> Chains; // List of chains to visit.
29844 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
29845
29846 // Get alias information for node.
29847 // TODO: relax aliasing for unordered atomics (see D66309)
29848 const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
29849
29850 // Starting off.
29851 Chains.push_back(OriginalChain);
29852 unsigned Depth = 0;
29853
29854 // Attempt to improve chain by a single step
29855 auto ImproveChain = [&](SDValue &C) -> bool {
29856 switch (C.getOpcode()) {
29857 case ISD::EntryToken:
29858 // No need to mark EntryToken.
29859 C = SDValue();
29860 return true;
29861 case ISD::LOAD:
29862 case ISD::STORE: {
29863 // Get alias information for C.
29864 // TODO: Relax aliasing for unordered atomics (see D66309)
29865 bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
29866 cast<LSBaseSDNode>(C.getNode())->isSimple();
29867 if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
29868 // Look further up the chain.
29869 C = C.getOperand(0);
29870 return true;
29871 }
29872 // Alias, so stop here.
29873 return false;
29874 }
29875
29876 case ISD::CopyFromReg:
29877 // Always forward past CopyFromReg.
29878 C = C.getOperand(0);
29879 return true;
29880
29882 case ISD::LIFETIME_END: {
29883 // We can forward past any lifetime start/end that can be proven not to
29884 // alias the memory access.
29885 if (!mayAlias(N, C.getNode())) {
29886 // Look further up the chain.
29887 C = C.getOperand(0);
29888 return true;
29889 }
29890 return false;
29891 }
29892 default:
29893 return false;
29894 }
29895 };
29896
29897 // Look at each chain and determine if it is an alias. If so, add it to the
29898 // aliases list. If not, then continue up the chain looking for the next
29899 // candidate.
29900 while (!Chains.empty()) {
29901 SDValue Chain = Chains.pop_back_val();
29902
29903 // Don't bother if we've seen Chain before.
29904 if (!Visited.insert(Chain.getNode()).second)
29905 continue;
29906
29907 // For TokenFactor nodes, look at each operand and only continue up the
29908 // chain until we reach the depth limit.
29909 //
29910 // FIXME: The depth check could be made to return the last non-aliasing
29911 // chain we found before we hit a tokenfactor rather than the original
29912 // chain.
29913 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
29914 Aliases.clear();
29915 Aliases.push_back(OriginalChain);
29916 return;
29917 }
29918
29919 if (Chain.getOpcode() == ISD::TokenFactor) {
29920 // We have to check each of the operands of the token factor for "small"
29921 // token factors, so we queue them up. Adding the operands to the queue
29922 // (stack) in reverse order maintains the original order and increases the
29923 // likelihood that getNode will find a matching token factor (CSE.)
29924 if (Chain.getNumOperands() > 16) {
29925 Aliases.push_back(Chain);
29926 continue;
29927 }
29928 for (unsigned n = Chain.getNumOperands(); n;)
29929 Chains.push_back(Chain.getOperand(--n));
29930 ++Depth;
29931 continue;
29932 }
29933 // Everything else
29934 if (ImproveChain(Chain)) {
29935 // Updated Chain Found, Consider new chain if one exists.
29936 if (Chain.getNode())
29937 Chains.push_back(Chain);
29938 ++Depth;
29939 continue;
29940 }
29941 // No Improved Chain Possible, treat as Alias.
29942 Aliases.push_back(Chain);
29943 }
29944}
29945
29946/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
29947/// (aliasing node.)
29948SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
29949 if (OptLevel == CodeGenOptLevel::None)
29950 return OldChain;
29951
29952 // Ops for replacing token factor.
29954
29955 // Accumulate all the aliases to this node.
29956 GatherAllAliases(N, OldChain, Aliases);
29957
29958 // If no operands then chain to entry token.
29959 if (Aliases.empty())
29960 return DAG.getEntryNode();
29961
29962 // If a single operand then chain to it. We don't need to revisit it.
29963 if (Aliases.size() == 1)
29964 return Aliases[0];
29965
29966 // Construct a custom tailored token factor.
29967 return DAG.getTokenFactor(SDLoc(N), Aliases);
29968}
29969
29970// This function tries to collect a bunch of potentially interesting
29971// nodes to improve the chains of, all at once. This might seem
29972// redundant, as this function gets called when visiting every store
29973// node, so why not let the work be done on each store as it's visited?
29974//
29975// I believe this is mainly important because mergeConsecutiveStores
29976// is unable to deal with merging stores of different sizes, so unless
29977// we improve the chains of all the potential candidates up-front
29978// before running mergeConsecutiveStores, it might only see some of
29979// the nodes that will eventually be candidates, and then not be able
29980// to go from a partially-merged state to the desired final
29981// fully-merged state.
29982
29983bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
29984 SmallVector<StoreSDNode *, 8> ChainedStores;
29985 StoreSDNode *STChain = St;
29986 // Intervals records which offsets from BaseIndex have been covered. In
29987 // the common case, every store writes to the immediately previous address
29988 // space and thus merged with the previous interval at insertion time.
29989
29990 using IMap = llvm::IntervalMap<int64_t, std::monostate, 8,
29992 IMap::Allocator A;
29993 IMap Intervals(A);
29994
29995 // This holds the base pointer, index, and the offset in bytes from the base
29996 // pointer.
29998
29999 // We must have a base and an offset.
30000 if (!BasePtr.getBase().getNode())
30001 return false;
30002
30003 // Do not handle stores to undef base pointers.
30004 if (BasePtr.getBase().isUndef())
30005 return false;
30006
30007 // Do not handle stores to opaque types
30008 if (St->getMemoryVT().isZeroSized())
30009 return false;
30010
30011 // BaseIndexOffset assumes that offsets are fixed-size, which
30012 // is not valid for scalable vectors where the offsets are
30013 // scaled by `vscale`, so bail out early.
30014 if (St->getMemoryVT().isScalableVT())
30015 return false;
30016
30017 // Add ST's interval.
30018 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8,
30019 std::monostate{});
30020
30021 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
30022 if (Chain->getMemoryVT().isScalableVector())
30023 return false;
30024
30025 // If the chain has more than one use, then we can't reorder the mem ops.
30026 if (!SDValue(Chain, 0)->hasOneUse())
30027 break;
30028 // TODO: Relax for unordered atomics (see D66309)
30029 if (!Chain->isSimple() || Chain->isIndexed())
30030 break;
30031
30032 // Find the base pointer and offset for this memory node.
30033 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
30034 // Check that the base pointer is the same as the original one.
30035 int64_t Offset;
30036 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
30037 break;
30038 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
30039 // Make sure we don't overlap with other intervals by checking the ones to
30040 // the left or right before inserting.
30041 auto I = Intervals.find(Offset);
30042 // If there's a next interval, we should end before it.
30043 if (I != Intervals.end() && I.start() < (Offset + Length))
30044 break;
30045 // If there's a previous interval, we should start after it.
30046 if (I != Intervals.begin() && (--I).stop() <= Offset)
30047 break;
30048 Intervals.insert(Offset, Offset + Length, std::monostate{});
30049
30050 ChainedStores.push_back(Chain);
30051 STChain = Chain;
30052 }
30053
30054 // If we didn't find a chained store, exit.
30055 if (ChainedStores.empty())
30056 return false;
30057
30058 // Improve all chained stores (St and ChainedStores members) starting from
30059 // where the store chain ended and return single TokenFactor.
30060 SDValue NewChain = STChain->getChain();
30062 for (unsigned I = ChainedStores.size(); I;) {
30063 StoreSDNode *S = ChainedStores[--I];
30064 SDValue BetterChain = FindBetterChain(S, NewChain);
30065 S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
30066 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
30067 TFOps.push_back(SDValue(S, 0));
30068 ChainedStores[I] = S;
30069 }
30070
30071 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
30072 SDValue BetterChain = FindBetterChain(St, NewChain);
30073 SDValue NewST;
30074 if (St->isTruncatingStore())
30075 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
30076 St->getBasePtr(), St->getMemoryVT(),
30077 St->getMemOperand());
30078 else
30079 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
30080 St->getBasePtr(), St->getMemOperand());
30081
30082 TFOps.push_back(NewST);
30083
30084 // If we improved every element of TFOps, then we've lost the dependence on
30085 // NewChain to successors of St and we need to add it back to TFOps. Do so at
30086 // the beginning to keep relative order consistent with FindBetterChains.
30087 auto hasImprovedChain = [&](SDValue ST) -> bool {
30088 return ST->getOperand(0) != NewChain;
30089 };
30090 bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
30091 if (AddNewChain)
30092 TFOps.insert(TFOps.begin(), NewChain);
30093
30094 SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
30095 CombineTo(St, TF);
30096
30097 // Add TF and its operands to the worklist.
30098 AddToWorklist(TF.getNode());
30099 for (const SDValue &Op : TF->ops())
30100 AddToWorklist(Op.getNode());
30101 AddToWorklist(STChain);
30102 return true;
30103}
30104
30105bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
30106 if (OptLevel == CodeGenOptLevel::None)
30107 return false;
30108
30110
30111 // We must have a base and an offset.
30112 if (!BasePtr.getBase().getNode())
30113 return false;
30114
30115 // Do not handle stores to undef base pointers.
30116 if (BasePtr.getBase().isUndef())
30117 return false;
30118
30119 // Directly improve a chain of disjoint stores starting at St.
30120 if (parallelizeChainedStores(St))
30121 return true;
30122
30123 // Improve St's Chain..
30124 SDValue BetterChain = FindBetterChain(St, St->getChain());
30125 if (St->getChain() != BetterChain) {
30126 replaceStoreChain(St, BetterChain);
30127 return true;
30128 }
30129 return false;
30130}
30131
30132/// This is the entry point for the file.
30134 CodeGenOptLevel OptLevel) {
30135 /// This is the main entry point to this class.
30136 DAGCombiner(*this, BatchAA, OptLevel).Run(Level);
30137}
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
constexpr LLT S1
AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques=false)
static cl::opt< bool > EnableShrinkLoadReplaceStoreWithStore("combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable load/<replace bytes>/store with " "a narrower store"))
static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, unsigned ExtOpc, SmallVectorImpl< SDNode * > &ExtendNodes, const TargetLowering &TLI)
static cl::opt< unsigned > TokenFactorInlineLimit("combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), cl::desc("Limit the number of operands to inline for Token Factors"))
static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc, bool NonNegZExt=false)
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG)
static SDNode * getBuildPairElt(SDNode *N, unsigned i)
static SDValue foldExtractSubvectorFromShuffleVector(EVT NarrowVT, SDValue Src, unsigned Index, const SDLoc &DL, SelectionDAG &DAG, bool LegalOperations)
Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)), try to produce VECTOR_SHUFFLE(EXTRACT_SUBVEC...
static SDValue foldToMaskedStore(StoreSDNode *Store, SelectionDAG &DAG, const SDLoc &Dl)
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtendOfConstant(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalTypes)
Try to fold a sext/zext/aext dag node into a ConstantSDNode or a build_vector of constants.
static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, SDValue ExtractFrom, SDValue &Mask, const SDLoc &DL)
Helper function for visitOR to extract the needed side of a rotate idiom from a shl/srl/mul/udiv.
static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec, bool &IsLoad, bool &IsMasked, SDValue &Ptr, const TargetLowering &TLI)
bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled, SelectionDAG &DAG, const SDLoc &DL)
static SDValue narrowExtractedVectorLoad(EVT VT, SDValue Src, unsigned Index, const SDLoc &DL, SelectionDAG &DAG)
If we are extracting a subvector from a wide vector load, convert to a narrow load to eliminate the e...
static SDValue scalarizeExtractedBinOp(SDNode *ExtElt, SelectionDAG &DAG, const SDLoc &DL, bool LegalTypes)
Transform a vector binary operation into a scalar binary operation by moving the math/logic after an ...
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI)
Return true if divmod libcall is available.
static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG)
static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, SelectionDAG &DAG, const SDLoc &DL)
Given the operands of an add/sub operation, see if the 2nd operand is a masked 0/1 whose source opera...
static bool mergeEltWithShuffle(SDValue &X, SDValue &Y, ArrayRef< int > Mask, SmallVectorImpl< int > &NewMask, SDValue Elt, unsigned InsIndex)
static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf)
If we have a unary shuffle of a shuffle, see if it can be folded away completely.
static bool canSplitIdx(LoadSDNode *LD)
static SDValue ShrinkLoadReplaceStoreWithStore(const std::pair< unsigned, unsigned > &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC)
Check to see if IVal is something that provides a value as specified by MaskInfo.
static cl::opt< bool > StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, cl::desc("Bypass the profitability model of load slicing"), cl::init(false))
Hidden option to stress test load slicing, i.e., when this option is enabled, load slicing bypasses m...
static cl::opt< bool > UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), cl::desc("Enable DAG combiner's use of TBAA"))
static void adjustCostForPairing(SmallVectorImpl< LoadedSlice > &LoadedSlices, LoadedSlice::Cost &GlobalLSCost)
Adjust the GlobalLSCost according to the target paring capabilities and the layout of the slices.
static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI, SDValue N0, SDValue N1, SDNode *N)
static cl::opt< bool > DisableCombines("combiner-disabled", cl::Hidden, cl::init(false), cl::desc("Disable the DAG combiner"))
static SDValue foldExtendVectorInregToExtendOfSubvector(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalOperations)
static SDValue narrowExtractedVectorBinOp(EVT VT, SDValue Src, unsigned Index, const SDLoc &DL, SelectionDAG &DAG, bool LegalOperations)
If we are extracting a subvector produced by a wide binary operator try to use a narrow binary operat...
static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode)
Check if N satisfies: N is used once.
static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG, const SDLoc &DL)
Given an extending node with a pop-count operand, if the target does not support a pop-count in the n...
static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand, SDValue RightHand, SelectionDAG &DAG)
Given a tree of logic operations with shape like (LOGIC (LOGIC (X, Y), LOGIC (Z, Y))) try to match an...
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG)
static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned Depth, bool AssumeNonZero)
static SDValue combineSelectAsExtAnd(SDValue Cond, SDValue T, SDValue F, const SDLoc &DL, SelectionDAG &DAG)
static bool areUsedBitsDense(const APInt &UsedBits)
Check that all bits set in UsedBits form a dense region, i.e., UsedBits looks like 0....
static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG, const TargetLowering &TLI, const SDLoc &DL)
Fold "masked merge" expressions like (m & x) | (~m & y) and its DeMorgan variant (~m | x) & (m | y) i...
static SDValue getInputChainForNode(SDNode *N)
Given a node, return its input chain if it has one, otherwise return a null sd operand.
static ElementCount numVectorEltsOrZero(EVT T)
static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG, bool ShouldCommuteOperands)
This inverts a canonicalization in IR that replaces a variable select arm with an identity constant.
static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG)
static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
For targets that support usubsat, match a bit-hack form of that operation that ends in 'and' and conv...
static cl::opt< bool > CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Enable DAG combiner's use of IR alias analysis"))
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy)
static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf, SelectionDAG &DAG)
Combine shuffle of shuffle of the form: shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X...
static bool isDivisorPowerOfTwo(SDValue Divisor)
static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift, SDValue &Mask)
Match "(X shl/srl V1) & V2" where V2 may not be present.
static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG)
static bool hasNoInfs(const TargetOptions &Options, SDValue N)
static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS, const SDNodeFlags Flags, const TargetLowering &TLI)
static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static std::optional< EVT > canCombineShuffleToExtendVectorInreg(unsigned Opcode, EVT VT, std::function< bool(unsigned)> Match, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue combineShuffleToAnyExtendVectorInreg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static SDValue foldAddSubOfSignBit(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold a 'not' shifted sign-bit with add/sub with constant operand into a shift and add with a d...
static SDValue stripTruncAndExt(SDValue Value)
static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, SDValue X, SDValue Carry0, SDValue Carry1, SDNode *N)
If we are facing some sort of diamond carry propagation pattern try to break it up to generate someth...
static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles followed by concatenatio...
static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
static auto getFirstIndexOf(R &&Range, const T &Val)
static SDValue getSubVectorSrc(SDValue V, unsigned Index, EVT SubVT)
static std::pair< unsigned, unsigned > CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain)
Check to see if V is (and load (ptr), imm), where the load is having specific bytes cleared out.
static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef< int > Mask)
If the shuffle mask is taking exactly one element from the first vector operand and passing through a...
static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT, const TargetLowering &TLI)
static cl::opt< bool > EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable merging multiple stores " "into a wider store"))
static bool isContractableFMUL(const TargetOptions &Options, SDValue N)
static cl::opt< bool > MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), cl::desc("DAG combiner may split indexing from loads"))
static bool areSlicesNextToEachOther(const LoadedSlice &First, const LoadedSlice &Second)
Check whether or not First and Second are next to each other in memory.
static SDValue stripConstantMask(const SelectionDAG &DAG, SDValue Op, SDValue &Mask)
static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static bool isBSwapHWordPair(SDValue N, MutableArrayRef< SDNode * > Parts)
static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI)
static bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy)
copysign(x, fp_extend(y)) -> copysign(x, y) copysign(x, fp_round(y)) -> copysign(x,...
static cl::opt< bool > ReduceLoadOpStoreWidthForceNarrowingProfitable("combiner-reduce-load-op-store-width-force-narrowing-profitable", cl::Hidden, cl::init(false), cl::desc("DAG combiner force override the narrowing profitable check when " "reducing the width of load/op/store sequences"))
static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2, ISD::CondCode CC, unsigned OrAndOpcode, SelectionDAG &DAG, bool isFMAXNUMFMINNUM_IEEE, bool isFMAXNUMFMINNUM)
static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
static SDValue foldToSaturated(SDNode *N, EVT &VT, SDValue &Src, EVT &SrcVT, SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG)
static SDValue FoldIntToFPToInt(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue foldSubCtlzNot(SDNode *N, SelectionDAG &DAG)
static SDNode * getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad, bool &IsMasked, SDValue &Ptr, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, const TargetLowering &TLI, bool Force)
Flips a boolean if it is cheaper to compute.
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, KnownBits &Known)
static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc)
static SDValue combineConcatVectorOfShuffleAndItsOperands(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT, SelectionDAG &DAG)
static SDValue foldRemainderIdiom(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, SelectionDAG &DAG)
static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG)
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG)
static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset=0)
static SDValue combineShiftToMULH(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI)
static ConstantSDNode * getAsNonOpaqueConstant(SDValue N)
If N is a ConstantSDNode with isOpaque() == false return it casted to a ConstantSDNode pointer else n...
static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static SDValue detectUSatUPattern(SDValue In, EVT VT)
Detect patterns of truncation with unsigned saturation:
static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue combineConcatVectorOfSplats(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N)
OR combines for which the commuted variant will be tried as well.
static SDValue detectSSatUPattern(SDValue In, EVT VT, SelectionDAG &DAG, const SDLoc &DL)
Detect patterns of truncation with unsigned saturation:
static SDValue combineShuffleToZeroExtendVectorInReg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static cl::opt< bool > EnableReduceLoadOpStoreWidth("combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable reducing the width of load/op/store " "sequence"))
static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal, SDValue FVal, const TargetLowering &TLI, SelectionDAG &DAG, const SDLoc &DL)
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, SelectionDAG &DAG, bool IsRotate, bool FromAdd)
static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG)
static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG)
Try to replace shift/logic that tests if a bit is clear with mask + setcc.
static bool areBitwiseNotOfEachother(SDValue Op0, SDValue Op1)
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG)
static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, bool LegalTypes)
If a vector binop is performed on splat values, it may be profitable to extract, scalarize,...
static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG)
static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue combineConcatVectorOfConcatVectors(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtOfAtomicLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI, SelectionDAG &DAG, SDNode *N, SDValue N0, SDValue N1, EVT VT)
static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG, const SDLoc &DL, CombineLevel Level)
Fold (sext (select c, load x, load y)) -> (select c, sextload x, sextload y) (zext (select c,...
static SDValue getAsCarry(const TargetLowering &TLI, SDValue V, bool ForceCarryReconstruction=false)
static SDValue matchMergedBFX(SDValue Root, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue foldSelectOfConstantsUsingSra(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
If a (v)select has a condition value that is a sign-bit test, try to smear the condition operand sign...
static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG)
static SDValue detectSSatSPattern(SDValue In, EVT VT)
Detect patterns of truncation with signed saturation: (truncate (smin (smax (x, signed_min_of_dest_ty...
static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG)
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations)
static cl::opt< unsigned > StoreMergeDependenceLimit("combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10), cl::desc("Limit the number of times for the same StoreNode and RootNode " "to bail out in store merging dependence check"))
static SDValue eliminateFPCastPair(SDNode *N)
static cl::opt< std::string > CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, cl::desc("Only use DAG-combiner alias analysis in this" " function"))
static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp, SelectionDAG &DAG)
Given a bitwise logic operation N with a matching bitwise logic operand, fold a pattern where 2 of th...
static bool isSlicingProfitable(SmallVectorImpl< LoadedSlice > &LoadedSlices, const APInt &UsedBits, bool ForCodeSize)
Check the profitability of all involved LoadedSlice.
static SDValue narrowInsertExtractVectorBinOp(EVT SubVT, SDValue BinOp, unsigned Index, const SDLoc &DL, SelectionDAG &DAG, bool LegalOperations)
static bool isBSwapHWordElement(SDValue N, MutableArrayRef< SDNode * > Parts)
Return true if the specified node is an element that makes up a 32-bit packed halfword byteswap.
static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, unsigned &BW, bool &Unsigned, SelectionDAG &DAG)
static SDValue foldBoolSelectToLogic(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static std::optional< SDByteProvider > calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, std::optional< uint64_t > VectorIndex, unsigned StartingIndex=0)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:194
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
static MaybeAlign getAlign(Value *Ptr)
Definition: IRBuilder.cpp:442
iv Induction Variable Users
Definition: IVUsers.cpp:48
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
static Value * simplifyDivRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse)
Check for common or similar folds of integer division or integer remainder.
This file implements a coalescing interval map for small objects.
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static bool isUndef(const MachineInstr &MI)
Register const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
This file contains the declarations for metadata subclasses.
#define T1
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
static bool isSimple(Instruction *I)
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
static cl::opt< unsigned > MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), cl::desc("DAG combiner limit number of steps when searching DAG " "for predecessor nodes"))
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
#define LLVM_DEBUG(...)
Definition: Debug.h:119
This file describes how to lower LLVM code to machine code.
static constexpr int Concat[]
Value * RHS
Value * LHS
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
Definition: APFloat.h:1120
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition: APFloat.h:1208
bool isNegative() const
Definition: APFloat.h:1449
bool isNormal() const
Definition: APFloat.h:1453
bool isDenormal() const
Definition: APFloat.h:1450
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition: APFloat.h:1432
const fltSemantics & getSemantics() const
Definition: APFloat.h:1457
bool isNaN() const
Definition: APFloat.h:1447
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition: APFloat.h:1088
APInt bitcastToAPInt() const
Definition: APFloat.h:1353
bool isLargest() const
Definition: APFloat.h:1465
bool isInfinity() const
Definition: APFloat.h:1446
Class for arbitrary precision integers.
Definition: APInt.h:78
LLVM_ABI APInt umul_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1971
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:234
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition: APInt.cpp:1758
LLVM_ABI APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:644
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:449
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1540
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1670
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1385
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1033
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1512
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:206
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
APInt abs() const
Get the absolute value.
Definition: APInt.h:1795
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1182
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:258
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
Definition: APInt.h:466
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1111
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:209
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:329
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1249
int32_t exactLogBase2() const
Definition: APInt.h:1783
LLVM_ABI APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1935
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1639
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1598
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:219
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1531
unsigned countLeadingZeros() const
Definition: APInt.h:1606
void flipAllBits()
Toggle every bit to its opposite value.
Definition: APInt.h:1452
unsigned logBase2() const
Definition: APInt.h:1761
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition: APInt.h:510
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:475
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:471
LLVM_ABI APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1960
bool isMask(unsigned numBits) const
Definition: APInt.h:488
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1150
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:985
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition: APInt.h:1367
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:306
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:296
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:200
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:482
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:389
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:286
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:239
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1562
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:851
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1656
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1221
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:200
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
This is an SDNode representing atomic operations.
static LLVM_ABI BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
static LLVM_ABI bool computeAliasing(const SDNode *Op0, const LocationSize NumBytes0, const SDNode *Op1, const LocationSize NumBytes1, const SelectionDAG &DAG, bool &IsAlias)
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI bool isConstant() const
Represents known origin of an individual byte in combine pattern.
Definition: ByteProvider.h:32
static ByteProvider getConstantZero()
Definition: ByteProvider.h:67
static ByteProvider getSrc(std::optional< ISelOp > Val, int64_t ByteOffset, int64_t VectorOffset)
Definition: ByteProvider.h:60
Combiner implementation.
Definition: Combiner.h:34
ISD::CondCode get() const
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1314
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:535
const APFloat & getValueAPF() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
bool isNegative() const
Return true if the value is negative.
bool isZero() const
Return true if the value is positive or negative zero.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:277
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
This class represents a range of values.
Definition: ConstantRange.h:47
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI bool isFullSet() const
Return true if this set contains all of the elements possible for this data-type.
LLVM_ABI ConstantRange truncate(uint32_t BitWidth, unsigned NoWrapKind=0) const
Return a new range in the specified integer type, which must be strictly smaller than the current typ...
const APInt & getUpper() const
Return the upper value for this range.
uint32_t getBitWidth() const
Get the bit width of this ConstantRange.
const ConstantInt * getConstantIntValue() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:43
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:198
bool isBigEndian() const
Definition: DataLayout.h:199
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:504
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:846
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:88
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177
bool erase(const KeyT &Val)
Definition: DenseMap.h:319
iterator end()
Definition: DenseMap.h:87
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:312
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:323
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:703
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:352
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:727
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:39
This class is used to form a handle around another node that is persistent and is updated across invo...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
Base class for LoadSDNode and StoreSDNode.
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
static LocationSize precise(uint64_t Value)
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
bool isScalable() const
TypeSize getValue() const
Metadata node.
Definition: Metadata.h:1077
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1565
Machine Value Type.
SimpleValueType SimpleTy
static auto all_valuetypes()
SimpleValueType Iteration.
static MVT getIntegerVT(unsigned BitWidth)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
A description of a memory reference used in the backend.
const PseudoSourceValue * getPseudoValue() const
void clearRanges()
Unset the tracked range metadata.
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MONonTemporal
The memory access is non-temporal.
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
This class is used to represent an MGATHER node.
const SDValue & getPassThru() const
ISD::LoadExtType getExtensionType() const
const SDValue & getIndex() const
const SDValue & getScale() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getInc() const
const SDValue & getScale() const
const SDValue & getMask() const
const SDValue & getIntID() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
ISD::MemIndexType getIndexType() const
This class is used to represent an MLOAD node.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
const SDValue & getMask() const
const SDValue & getPassThru() const
const SDValue & getOffset() const
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent an MSCATTER node.
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This class is used to represent an MSTORE node.
bool isCompressingStore() const
Returns true if the op does a compression to the vector before storing.
const SDValue & getOffset() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getBaseAlign() const
Returns alignment and volatility of the memory access.
const MDNode * getRanges() const
Returns the Ranges that describes the dereference.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
bool isNonTemporal() const
bool isInvariant() const
bool isDereferenceable() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Representation for a specific memory location.
Root of the metadata hierarchy.
Definition: Metadata.h:63
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:303
MutableArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition: ArrayRef.h:424
iterator end() const
Definition: ArrayRef.h:348
iterator begin() const
Definition: ArrayRef.h:347
MutableArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:417
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
LLVM_ABI void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
TypeSize getValueSizeInBits(unsigned ResNo) const
Returns MVT::getSizeInBits(getValueType(ResNo)).
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
LLVM_ABI bool isOperandOf(const SDNode *N) const
Return true if this node is an operand of N.
const APInt & getConstantOperandAPInt(unsigned Num) const
Helper method returns the APInt of a ConstantSDNode operand.
std::optional< APInt > bitcastToAPInt() const
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
LLVM_ABI bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
LLVM_ABI bool reachesChainWithoutSideEffects(SDValue Dest, unsigned Depth=2) const
Return true if this operand (which must be a chain) reaches the specified operand without crossing an...
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isAnyAdd() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
virtual bool disableGenericCombines(CodeGenOptLevel OptLevel) const
Help to insert SDNodeFlags automatically in transforming.
Definition: SelectionDAG.h:372
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:229
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
LLVM_ABI SDValue getSplatSourceVector(SDValue V, int &SplatIndex)
If V is a splatted value, return the source vector and its splat index.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:578
LLVM_ABI SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
bool isKnownNeverSNaN(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
LLVM_ABI std::optional< bool > isBoolConstant(SDValue N) const
Check if a value \op N is a constant using the target's BooleanContent for its type.
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:500
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
LLVM_ABI SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getAtomicLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, MachineMemOperand *MMO)
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
LLVM_ABI void Combine(CombineLevel Level, BatchAAResults *BatchAA, CodeGenOptLevel OptLevel)
This iterates over the nodes in the SelectionDAG, folding certain types of nodes together,...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
static LLVM_ABI unsigned getHasPredecessorMaxSteps()
LLVM_ABI bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
Definition: SelectionDAG.h:963
LLVM_ABI bool cannotBeOrderedNegativeFP(SDValue Op) const
Test whether the given float value is known to be positive.
LLVM_ABI SDValue getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
LLVM_ABI SDValue getAssertAlign(const SDLoc &DL, SDValue V, Align A)
Return an AssertAlignSDNode.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
Definition: SelectionDAG.h:956
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
LLVM_ABI bool shouldOptForSize() const
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:504
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:459
LLVM_ABI SDValue getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
LLVM_ABI APInt computeVectorKnownZeroElements(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
For each demanded element of a vector, see if it is known to be zero.
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
LLVM_ABI void salvageDebugInfo(SDNode &N)
To be invoked on an SDNode that is slated to be erased.
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
LLVM_ABI SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:868
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI void DeleteNode(SDNode *N)
Remove the specified node from the system.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI SDValue simplifySelect(SDValue Cond, SDValue TVal, SDValue FVal)
Try to simplify a select/vselect into 1 of its operands or a constant.
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
LLVM_ABI bool isConstantFPBuildVectorOrConstantFP(SDValue N) const
Test whether the given value is a constant FP or similar node.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:498
LLVM_ABI SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
LLVM_ABI bool LegalizeOp(SDNode *N, SmallSetVector< SDNode *, 16 > &UpdatedNodes)
Transforms a SelectionDAG node and any operands to it into a node that is compatible with the target ...
LLVM_ABI bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
LLVM_ABI bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
LLVM_ABI SDValue getMaskedHistogram(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
LLVM_ABI SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
bool willNotOverflowMul(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the mul of 2 nodes can never overflow.
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:902
LLVM_ABI MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
bool isConstantValueOfAnyType(SDValue N) const
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
LLVM_ABI bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
LLVM_ABI SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getSetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:499
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
iterator_range< allnodes_iterator > allnodes()
Definition: SelectionDAG.h:570
LLVM_ABI SDValue getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, const MDNode *Ranges=nullptr, bool IsExpanding=false)
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
LLVM_ABI SDValue FoldConstantBuildVector(BuildVectorSDNode *BV, const SDLoc &DL, EVT DstEltVT)
Fold BUILD_VECTOR of constants/undefs to the destination type BUILD_VECTOR of constants/undefs elemen...
LLVM_ABI SDValue getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
const TargetLibraryInfo & getLibInfo() const
Definition: SelectionDAG.h:505
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI bool MaskedVectorIsZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Return true if 'Op' is known to be zero in DemandedElts.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:493
LLVM_ABI bool canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, bool PoisonOnly=false, bool ConsiderFlags=true, unsigned Depth=0) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
LLVM_ABI OverflowKind computeOverflowForUnsignedAdd(SDValue N0, SDValue N1) const
Determine if the result of the unsigned addition of 2 nodes can overflow.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:885
bool isSafeToSpeculativelyExecuteNode(const SDNode *N) const
Check if the provided node is save to speculatively executed given its current arguments.
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVM_ABI bool isKnownToBeAPowerOfTwoFP(SDValue Val, unsigned Depth=0) const
Test if the given fp value is known to be an integer power-of-2, either positive or negative.
LLVM_ABI std::optional< uint64_t > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
LLVMContext * getContext() const
Definition: SelectionDAG.h:511
LLVM_ABI SDValue simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y, SDNodeFlags Flags)
Try to simplify a floating-point binary operation into 1 of its operands or a constant.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:587
LLVM_ABI bool isUndef(unsigned Opcode, ArrayRef< SDValue > Ops)
Return true if the result of this operation is always undefined.
LLVM_ABI SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
LLVM_ABI SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags)
Get the specified node if it's already available, or else return NULL.
LLVM_ABI SDValue getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:581
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
DenormalMode getDenormalMode(EVT VT) const
Return the current function's default denormal handling kind for the given floating point type.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:918
static unsigned getOpcode_EXTEND(unsigned Opcode)
Convert *_EXTEND_VECTOR_INREG to *_EXTEND opcode.
Definition: SelectionDAG.h:979
LLVM_ABI bool isADDLike(SDValue Op, bool NoWrap=false) const
Return true if the specified operand is an ISD::OR or ISD::XOR node that can be treated as an ISD::AD...
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
LLVM_ABI SDValue simplifyShift(SDValue X, SDValue Y)
Try to simplify a shift into 1 of its operands or a constant.
LLVM_ABI void transferDbgValues(SDValue From, SDValue To, unsigned OffsetInBits=0, unsigned SizeInBits=0, bool InvalidateDbg=true)
Transfer debug values from one node to another, while optionally generating fragment expressions for ...
LLVM_ABI SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
LLVM_ABI SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
A vector that has set insertion semantics.
Definition: SetVector.h:59
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:198
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:99
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:168
value_type pop_back_val()
Definition: SetVector.h:296
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
void push_back(bool Val)
void reserve(unsigned N)
size_type size() const
Definition: SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:380
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:470
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:476
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:356
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
bool empty() const
Definition: SmallSet.h:169
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:182
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:705
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:938
void reserve(size_type N)
Definition: SmallVector.h:664
iterator erase(const_iterator CI)
Definition: SmallVector.h:738
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:684
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:806
void resize(size_type N)
Definition: SmallVector.h:639
void push_back(const T &Elt)
Definition: SmallVector.h:414
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:287
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
bool has(LibFunc F) const
Tests whether a library function is available.
virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT) const
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, EVT ExtVT) const
virtual bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
virtual bool hasAndNot(SDValue X) const
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool enableAggressiveFMAFusion(EVT VT) const
Return true if target always benefits from combining into FMA for a given value type.
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
SDValue promoteTargetBoolean(SelectionDAG &DAG, SDValue Bool, EVT ValVT) const
Promote the given target boolean to a target boolean of the given type.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, std::optional< unsigned > ByteOffset=std::nullopt) const
Return true if it is profitable to reduce a load to a smaller type.
virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT, bool LegalOnly) const
virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const
Use bitwise logic to make pairs of compares more efficient.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a square root of the given type based on the function's at...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, LLT DestTy, LLT SrcTy) const
Return true if an fpext operation input to an Opcode operation is free (for instance,...
virtual bool hasBitTest(SDValue X, SDValue Y) const
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: fold (conv (load x)) -> (load (conv*)x) On arch...
virtual bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
virtual bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst, SDValue IntPow2) const
virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, EVT VT) const
Returns true if we should normalize select(N0&N1, X, Y) => select(N0, select(N1, X,...
virtual bool preferScalarizeSplat(SDNode *N) const
bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const
Return true if it is profitable to convert a select of FP constants into a constant pool load whose a...
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
virtual bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
bool isPartialReduceMLALegalOrCustom(unsigned Opc, EVT AccVT, EVT InputVT) const
Return true if a PARTIAL_REDUCE_U/SMLA node with the specified types is legal or custom for this targ...
virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const
Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
int getDivRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a division of the given type based on the function's attributes.
virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to fold a pair of shifts into a mask.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a division of the given type based on the function's attri...
virtual bool preferIncOfAddToSubOfNot(EVT VT) const
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const
virtual bool isFNegFree(EVT VT) const
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
virtual bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X, SDValue Y) const
Return true if pulling a binary operation into a select with an identity constant is profitable.
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool mergeStoresAfterLegalization(EVT MemVT) const
Allow store merging for the specified type after legalization in addition to before legalization.
virtual bool shouldMergeStoreOfLoadsOverCall(EVT, EVT) const
Returns true if it's profitable to allow merging store of loads when there are functions calls betwee...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getGatherAllAliasesMaxDepth() const
virtual bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, unsigned AddrSpace) const
Return true if it is expected to be cheaper to do a store of vector constant with the given size and ...
virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal or custom on this target.
bool isAtomicLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified atomic load with extension is legal on this target.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const
There are two ways to clear extreme bits (either low or high): Mask: x & (-1 << y) (the instcombine c...
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const
Returns if it's reasonable to merge stores to MemVT size.
virtual bool preferABDSToABSWithNSW(EVT VT) const
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
AndOrSETCCFoldKind
Enum of different potentially desirable ways to fold (and/or (setcc ...), (setcc ....
virtual bool shouldScalarizeBinop(SDValue VecOp) const
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: (store (y (conv x)), y*)) -> (store x,...
bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool isVectorClearMaskLegal(ArrayRef< int >, EVT) const
Similar to isShuffleMaskLegal.
LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const
Return pair that represents the legalization kind (first) that needs to happen to EVT (second) in ord...
bool hasTargetDAGCombine(ISD::NodeType NT) const
If true, the target has custom DAG combine transformations that it can perform for the specified node...
virtual bool shouldSplatInsEltVarIndex(EVT) const
Return true if inserting a scalar into a variable element of an undef vector is more efficiently hand...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a square root of the given type based on the function's attribut...
virtual unsigned preferedOpcodeForCmpEqPiecesOfOperand(EVT VT, unsigned ShiftOpc, bool MayTransformRotate, const APInt &ShiftOrRotateAmt, const std::optional< APInt > &AndMask) const
virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const
Returns true if MI can be combined with another instruction to form TargetOpcode::G_FMAD.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const
virtual bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
virtual bool generateFMAsInMachineCombiner(EVT VT, CodeGenOptLevel OptLevel) const
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool hasPairedLoad(EVT, Align &) const
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
virtual bool convertSelectOfConstantsToMath(EVT VT) const
Return true if a select of constants (select Cond, C1, C2) should be transformed into simple math ops...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const
Hooks for building estimates in place of slower divisions and square roots.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const
SDValue getCheaperOrNeutralNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, const NegatibleCost CostThreshold=NegatibleCost::Neutral, unsigned Depth=0) const
virtual bool isTargetCanonicalSelect(SDNode *N) const
Return true if the given select/vselect should be considered canonical and not be transformed.
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
virtual bool IsDesirableToPromoteOp(SDValue, EVT &) const
This method query the target whether it is beneficial for dag combiner to promote the specified node.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const
Return a reciprocal estimate value for the input operand.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool getPostIndexedAddressParts(SDNode *, SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to move this shift by a constant amount through its operand,...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual AndOrSETCCFoldKind isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const
virtual bool getPreIndexedAddressParts(SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
SDValue scalarizeExtractedVectorLoad(EVT ResultVT, const SDLoc &DL, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad, SelectionDAG &DAG) const
Replace an extraction of a load with a narrowed load.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
virtual bool isDesirableToTransformToIntegerOp(unsigned, EVT) const
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetOptions Options
unsigned NoSignedZerosFPMath
NoSignedZerosFPMath - This flag is enabled when the -enable-no-signed-zeros-fp-math is specified on t...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:346
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM_ABI const fltSemantics & getFltSemantics() const
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:61
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:35
Value * getOperand(unsigned i) const
Definition: User.h:232
This class is used to represent an VP_GATHER node.
const SDValue & getScale() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getVectorLength() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
This class is used to represent an VP_SCATTER node.
const SDValue & getValue() const
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
user_iterator user_begin()
Definition: Value.h:402
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:439
iterator_range< user_iterator > users()
Definition: Value.h:426
bool use_empty() const
Definition: Value.h:346
iterator_range< use_iterator > uses()
Definition: Value.h:380
int getNumOccurrences() const
Definition: CommandLine.h:400
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition: TypeSize.h:184
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:203
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:233
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:172
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:169
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:255
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:2248
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition: APInt.h:2253
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition: APInt.h:2258
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition: APInt.h:2263
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:126
@ Entry
Definition: COFF.h:862
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
LLVM_ABI CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical AND between different comparisons of identical values: ((X op1 Y) & (X...
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:801
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:256
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:774
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:45
@ PARTIAL_REDUCE_SMLA
Definition: ISDOpcodes.h:1510
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
Definition: ISDOpcodes.h:1401
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1491
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:587
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:765
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition: ISDOpcodes.h:387
@ ConstantFP
Definition: ISDOpcodes.h:87
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1351
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:289
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:515
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:259
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1141
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:393
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:511
@ GlobalAddress
Definition: ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:571
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1476
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:410
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1480
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:738
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:892
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:275
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1490
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:505
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:985
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:975
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:249
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1568
@ PARTIAL_REDUCE_UMLA
Definition: ISDOpcodes.h:1511
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:826
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:706
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:656
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1473
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:773
@ TRUNCATE_SSAT_U
Definition: ISDOpcodes.h:855
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1477
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition: ISDOpcodes.h:809
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:1002
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1187
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:347
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor to...
Definition: ISDOpcodes.h:622
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:682
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:528
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:778
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1347
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:228
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1492
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:663
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition: ISDOpcodes.h:69
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:225
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:343
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1485
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:601
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:1075
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:48
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:563
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:832
@ TargetConstantFP
Definition: ISDOpcodes.h:175
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:928
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:793
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1448
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
Definition: ISDOpcodes.h:1059
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition: ISDOpcodes.h:379
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:351
@ TargetFrameIndex
Definition: ISDOpcodes.h:182
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:881
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:718
@ LIFETIME_START
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:1418
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:960
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:787
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:323
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
Definition: ISDOpcodes.h:1413
@ HANDLENODE
HANDLENODE node - Used as a handle for various purposes.
Definition: ISDOpcodes.h:1301
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1493
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:994
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1081
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:908
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition: ISDOpcodes.h:174
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:730
@ GET_FPENV_MEM
Gets the current floating-point environment.
Definition: ISDOpcodes.h:1117
@ CARRY_FALSE
CARRY_FALSE - This node is used when folding other nodes, like ADDC/SUBC, which indicate the carry re...
Definition: ISDOpcodes.h:280
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:701
@ VECREDUCE_FMUL
Definition: ISDOpcodes.h:1474
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:299
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:420
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition: ISDOpcodes.h:236
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:53
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
Definition: ISDOpcodes.h:1025
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:941
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition: ISDOpcodes.h:690
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:903
@ EXPERIMENTAL_VECTOR_HISTOGRAM
Definition: ISDOpcodes.h:1546
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:927
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1481
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:838
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1180
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:521
@ PARTIAL_REDUCE_SUMLA
Definition: ISDOpcodes.h:1512
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:360
@ AssertZext
Definition: ISDOpcodes.h:63
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:1250
@ SET_FPENV_MEM
Sets the current floating point environment.
Definition: ISDOpcodes.h:1122
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
Definition: ISDOpcodes.h:1086
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
Definition: ISDOpcodes.h:853
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:713
@ TRUNCATE_USAT_U
Definition: ISDOpcodes.h:857
@ SADDO_CARRY
Carry-using overflow-aware nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:333
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:543
bool isIndexTypeSigned(MemIndexType IndexType)
Definition: ISDOpcodes.h:1651
bool isExtVecInRegOpcode(unsigned Opcode)
Definition: ISDOpcodes.h:1761
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool matchUnaryFpPredicate(SDValue Op, std::function< bool(ConstantFPSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantFPSDNode predicate.
bool isFPEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with floati...
Definition: ISDOpcodes.h:1736
bool isExtOpcode(unsigned Opcode)
Definition: ISDOpcodes.h:1756
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI bool isVPBinaryOp(unsigned Opcode)
Whether this is a vector-predicated binary operation opcode.
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
Definition: ISDOpcodes.h:1572
LLVM_ABI std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
LLVM_ABI std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
LLVM_ABI bool allOperandsUndef(const SDNode *N)
Return true if the node has at least one operand and all operands of the specified node are ISD::UNDE...
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1647
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1647
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1718
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
LLVM_ABI NodeType getInverseMinMaxOpcode(unsigned MinMaxOpc)
Given a MinMaxOpc of ISD::(U|S)MIN or ISD::(U|S)MAX, returns ISD::(U|S)MAX and ISD::(U|S)MIN,...
LLVM_ABI bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
LLVM_ABI bool isVPReduction(unsigned Opcode)
Whether this is a vector-predicated reduction opcode.
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTruncation=false)
Hook for matching ConstantSDNode predicate.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1634
LLVM_ABI bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1685
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1665
LLVM_ABI CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical OR between different comparisons of identical values: ((X op1 Y) | (X ...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1730
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
@ VecLoad
Definition: NVPTX.h:131
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:962
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:592
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
CastInst_match< OpTy, FPToUIInst > m_FPToUI(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
Definition: PatternMatch.h:980
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:612
BinOpPred_match< LHS, RHS, is_bitwiselogic_op > m_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
@ Undef
Value of the register doesn't matter.
Opcode_match m_Opc(unsigned Opcode)
auto m_SelectCCLike(const LTy &L, const RTy &R, const TTy &T, const FTy &F, const CCTy &CC)
BinaryOpc_match< LHS, RHS > m_Srl(const LHS &L, const RHS &R)
auto m_SpecificVT(EVT RefVT, const Pattern &P)
Match a specific ValueType.
BinaryOpc_match< LHS, RHS > m_Sra(const LHS &L, const RHS &R)
auto m_UMinLike(const LHS &L, const RHS &R)
auto m_UMaxLike(const LHS &L, const RHS &R)
UnaryOpc_match< Opnd > m_Abs(const Opnd &Op)
Or< Preds... > m_AnyOf(const Preds &...preds)
And< Preds... > m_AllOf(const Preds &...preds)
TernaryOpc_match< T0_P, T1_P, T2_P > m_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
UnaryOpc_match< Opnd > m_AnyExt(const Opnd &Op)
auto m_SMaxLike(const LHS &L, const RHS &R)
UnaryOpc_match< Opnd > m_Ctlz(const Opnd &Op)
TernaryOpc_match< T0_P, T1_P, T2_P > m_VSelect(const T0_P &Cond, const T1_P &T, const T2_P &F)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
UnaryOpc_match< Opnd > m_UnaryOp(unsigned Opc, const Opnd &Op)
auto m_SMinLike(const LHS &L, const RHS &R)
CondCode_match m_SpecificCondCode(ISD::CondCode CC)
Match a conditional code SDNode with a specific ISD::CondCode.
NUses_match< 1, Value_match > m_OneUse()
CondCode_match m_CondCode()
Match any conditional code SDNode.
TernaryOpc_match< T0_P, T1_P, T2_P, true, false > m_c_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
bool sd_context_match(SDValue N, const MatchContext &Ctx, Pattern &&P)
ConstantInt_match m_ConstInt()
Match any integer constants or splat of an integer constant.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
constexpr double e
Definition: MathExtras.h:47
DiagnosticInfoOptimizationBase::Argument NV
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:338
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:349
@ Offset
Definition: DWP.cpp:477
@ Length
Definition: DWP.cpp:477
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:860
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:362
void stable_sort(R &&Range)
Definition: STLExtras.h:2077
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1770
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:307
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition: Utils.cpp:1605
LLVM_ABI SDValue getBitwiseNotOperand(SDValue V, SDValue Mask, bool AllowUndefs)
If V is a bitwise not, returns the inverted operand.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2491
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:260
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
LLVM_ABI llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2113
bool operator>=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
LLVM_ATTRIBUTE_ALWAYS_INLINE DynamicAPInt & operator+=(DynamicAPInt &A, int64_t B)
Definition: DynamicAPInt.h:531
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2155
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:293
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
int ilogb(const APFloat &Arg)
Returns the exponent of the internal representation of the APFloat.
Definition: APFloat.h:1534
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1587
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:342
LLVM_ABI bool isMinSignedConstant(SDValue V)
Returns true if V is a constant min signed integer value.
LLVM_ABI ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
LLVM_ABI ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:390
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:157
unsigned M1(unsigned Val)
Definition: VE.h:377
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:336
LLVM_ABI bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition: Utils.cpp:1543
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:203
bool operator>(int64_t V1, const APSInt &V2)
Definition: APSInt.h:363
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:288
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1669
detail::ValueMatchesPoly< M > HasValue(M Matcher)
Definition: Error.h:221
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
LLVM_ABI SDValue peekThroughTruncates(SDValue V)
Return the non-truncated source operand of V if it exists.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1758
LLVM_ABI SDValue peekThroughOneUseBitcasts(SDValue V)
Return the non-bitcasted and one-use source operand of V if it exists.
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:82
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
@ AfterLegalizeDAG
Definition: DAGCombine.h:19
@ AfterLegalizeVectorOps
Definition: DAGCombine.h:18
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
@ AfterLegalizeTypes
Definition: DAGCombine.h:17
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ FMul
Product of floats.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition: VE.h:376
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:223
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1980
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
LLVM_ABI void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1916
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition: STLExtras.h:2127
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool operator<=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:360
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:378
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
#define N
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
LLVM_ABI AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.
static LLVM_ABI ExponentType semanticsMinExponent(const fltSemantics &)
Definition: APFloat.cpp:332
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:304
static LLVM_ABI ExponentType semanticsMaxExponent(const fltSemantics &)
Definition: APFloat.cpp:328
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:324
static LLVM_ABI bool isIEEELikeFP(const fltSemantics &)
Definition: APFloat.cpp:365
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:320
static LLVM_ABI unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition: APFloat.cpp:338
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getIEEE()
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
bool knownBitsLE(EVT VT) const
Return true if we know at compile time this has fewer than or the same bits as VT.
Definition: ValueTypes.h:274
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition: ValueTypes.h:121
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:279
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:295
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:345
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:238
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:354
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:465
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:407
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
bool isScalableVT() const
Return true if the type is a scalable type.
Definition: ValueTypes.h:187
bool isFixedLengthVector() const
Definition: ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:287
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:251
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:216
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:243
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:174
bool knownBitsGE(EVT VT) const
Return true if we know at compile time this has more than or the same bits as VT.
Definition: ValueTypes.h:263
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:142
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
Definition: ValueTypes.cpp:330
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
bool isZeroSized() const
Test if the given EVT has zero size, this will fail if called on a scalable type.
Definition: ValueTypes.h:132
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:303
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:101
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:235
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:54
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:289
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:241
bool isAllOnes() const
Returns true if value is all one bits.
Definition: KnownBits.h:83
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:60
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoInfs() const
void setDisjoint(bool b)
void setAllowContract(bool b)
bool hasNoUnsignedWrap() const
void setAllowReassociation(bool b)
bool hasNoNaNs() const
void setAllowReciprocal(bool b)
bool hasAllowContract() const
bool hasDisjoint() const
bool hasApproximateFuncs() const
void setApproximateFuncs(bool b)
bool hasNoSignedWrap() const
bool hasAllowReciprocal() const
bool hasNonNeg() const
bool hasAllowReassociation() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Clients of various APIs that cause global effects on the DAG can optionally implement this interface.
Definition: SelectionDAG.h:318
virtual void NodeDeleted(SDNode *N, SDNode *E)
The node N that was deleted and, if E is not null, an equivalent node E that replaced it.
virtual void NodeInserted(SDNode *N)
The node N that was inserted.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI bool recursivelyDeleteUnusedNodes(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
LLVM_ABI void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...