LLVM 22.0.0git
DAGCombiner.cpp
Go to the documentation of this file.
1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/DenseMap.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/SmallSet.h"
29#include "llvm/ADT/Statistic.h"
51#include "llvm/IR/Attributes.h"
52#include "llvm/IR/Constant.h"
53#include "llvm/IR/DataLayout.h"
55#include "llvm/IR/Function.h"
56#include "llvm/IR/Metadata.h"
61#include "llvm/Support/Debug.h"
69#include <algorithm>
70#include <cassert>
71#include <cstdint>
72#include <functional>
73#include <iterator>
74#include <optional>
75#include <string>
76#include <tuple>
77#include <utility>
78#include <variant>
79
80#include "MatchContext.h"
81
82using namespace llvm;
83using namespace llvm::SDPatternMatch;
84
85#define DEBUG_TYPE "dagcombine"
86
87STATISTIC(NodesCombined , "Number of dag nodes combined");
88STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
89STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
90STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
91STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
92STATISTIC(SlicedLoads, "Number of load sliced");
93STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
94
95DEBUG_COUNTER(DAGCombineCounter, "dagcombine",
96 "Controls whether a DAG combine is performed for a node");
97
98static cl::opt<bool>
99CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
100 cl::desc("Enable DAG combiner's use of IR alias analysis"));
101
102static cl::opt<bool>
103UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
104 cl::desc("Enable DAG combiner's use of TBAA"));
105
106#ifndef NDEBUG
108CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
109 cl::desc("Only use DAG-combiner alias analysis in this"
110 " function"));
111#endif
112
113/// Hidden option to stress test load slicing, i.e., when this option
114/// is enabled, load slicing bypasses most of its profitability guards.
115static cl::opt<bool>
116StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
117 cl::desc("Bypass the profitability model of load slicing"),
118 cl::init(false));
119
120static cl::opt<bool>
121 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
122 cl::desc("DAG combiner may split indexing from loads"));
123
124static cl::opt<bool>
125 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
126 cl::desc("DAG combiner enable merging multiple stores "
127 "into a wider store"));
128
130 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
131 cl::desc("Limit the number of operands to inline for Token Factors"));
132
134 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
135 cl::desc("Limit the number of times for the same StoreNode and RootNode "
136 "to bail out in store merging dependence check"));
137
139 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
140 cl::desc("DAG combiner enable reducing the width of load/op/store "
141 "sequence"));
143 "combiner-reduce-load-op-store-width-force-narrowing-profitable",
144 cl::Hidden, cl::init(false),
145 cl::desc("DAG combiner force override the narrowing profitable check when "
146 "reducing the width of load/op/store sequences"));
147
149 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
150 cl::desc("DAG combiner enable load/<replace bytes>/store with "
151 "a narrower store"));
152
153static cl::opt<bool> DisableCombines("combiner-disabled", cl::Hidden,
154 cl::init(false),
155 cl::desc("Disable the DAG combiner"));
156
157namespace {
158
159 class DAGCombiner {
160 SelectionDAG &DAG;
161 const TargetLowering &TLI;
162 const SelectionDAGTargetInfo *STI;
164 CodeGenOptLevel OptLevel;
165 bool LegalDAG = false;
166 bool LegalOperations = false;
167 bool LegalTypes = false;
168 bool ForCodeSize;
169 bool DisableGenericCombines;
170
171 /// Worklist of all of the nodes that need to be simplified.
172 ///
173 /// This must behave as a stack -- new nodes to process are pushed onto the
174 /// back and when processing we pop off of the back.
175 ///
176 /// The worklist will not contain duplicates but may contain null entries
177 /// due to nodes being deleted from the underlying DAG. For fast lookup and
178 /// deduplication, the index of the node in this vector is stored in the
179 /// node in SDNode::CombinerWorklistIndex.
181
182 /// This records all nodes attempted to be added to the worklist since we
183 /// considered a new worklist entry. As we keep do not add duplicate nodes
184 /// in the worklist, this is different from the tail of the worklist.
186
187 /// Map from candidate StoreNode to the pair of RootNode and count.
188 /// The count is used to track how many times we have seen the StoreNode
189 /// with the same RootNode bail out in dependence check. If we have seen
190 /// the bail out for the same pair many times over a limit, we won't
191 /// consider the StoreNode with the same RootNode as store merging
192 /// candidate again.
194
195 // BatchAA - Used for DAG load/store alias analysis.
196 BatchAAResults *BatchAA;
197
198 /// This caches all chains that have already been processed in
199 /// DAGCombiner::getStoreMergeCandidates() and found to have no mergeable
200 /// stores candidates.
201 SmallPtrSet<SDNode *, 4> ChainsWithoutMergeableStores;
202
203 /// When an instruction is simplified, add all users of the instruction to
204 /// the work lists because they might get more simplified now.
205 void AddUsersToWorklist(SDNode *N) {
206 for (SDNode *Node : N->users())
207 AddToWorklist(Node);
208 }
209
210 /// Convenient shorthand to add a node and all of its user to the worklist.
211 void AddToWorklistWithUsers(SDNode *N) {
212 AddUsersToWorklist(N);
213 AddToWorklist(N);
214 }
215
216 // Prune potentially dangling nodes. This is called after
217 // any visit to a node, but should also be called during a visit after any
218 // failed combine which may have created a DAG node.
219 void clearAddedDanglingWorklistEntries() {
220 // Check any nodes added to the worklist to see if they are prunable.
221 while (!PruningList.empty()) {
222 auto *N = PruningList.pop_back_val();
223 if (N->use_empty())
224 recursivelyDeleteUnusedNodes(N);
225 }
226 }
227
228 SDNode *getNextWorklistEntry() {
229 // Before we do any work, remove nodes that are not in use.
230 clearAddedDanglingWorklistEntries();
231 SDNode *N = nullptr;
232 // The Worklist holds the SDNodes in order, but it may contain null
233 // entries.
234 while (!N && !Worklist.empty()) {
235 N = Worklist.pop_back_val();
236 }
237
238 if (N) {
239 assert(N->getCombinerWorklistIndex() >= 0 &&
240 "Found a worklist entry without a corresponding map entry!");
241 // Set to -2 to indicate that we combined the node.
242 N->setCombinerWorklistIndex(-2);
243 }
244 return N;
245 }
246
247 /// Call the node-specific routine that folds each particular type of node.
248 SDValue visit(SDNode *N);
249
250 public:
251 DAGCombiner(SelectionDAG &D, BatchAAResults *BatchAA, CodeGenOptLevel OL)
252 : DAG(D), TLI(D.getTargetLoweringInfo()),
253 STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL),
254 BatchAA(BatchAA) {
255 ForCodeSize = DAG.shouldOptForSize();
256 DisableGenericCombines =
257 DisableCombines || (STI && STI->disableGenericCombines(OptLevel));
258
259 MaximumLegalStoreInBits = 0;
260 // We use the minimum store size here, since that's all we can guarantee
261 // for the scalable vector types.
262 for (MVT VT : MVT::all_valuetypes())
263 if (EVT(VT).isSimple() && VT != MVT::Other &&
264 TLI.isTypeLegal(EVT(VT)) &&
265 VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
266 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
267 }
268
269 void ConsiderForPruning(SDNode *N) {
270 // Mark this for potential pruning.
271 PruningList.insert(N);
272 }
273
274 /// Add to the worklist making sure its instance is at the back (next to be
275 /// processed.)
276 void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true,
277 bool SkipIfCombinedBefore = false) {
278 assert(N->getOpcode() != ISD::DELETED_NODE &&
279 "Deleted Node added to Worklist");
280
281 // Skip handle nodes as they can't usefully be combined and confuse the
282 // zero-use deletion strategy.
283 if (N->getOpcode() == ISD::HANDLENODE)
284 return;
285
286 if (SkipIfCombinedBefore && N->getCombinerWorklistIndex() == -2)
287 return;
288
289 if (IsCandidateForPruning)
290 ConsiderForPruning(N);
291
292 if (N->getCombinerWorklistIndex() < 0) {
293 N->setCombinerWorklistIndex(Worklist.size());
294 Worklist.push_back(N);
295 }
296 }
297
298 /// Remove all instances of N from the worklist.
299 void removeFromWorklist(SDNode *N) {
300 PruningList.remove(N);
301 StoreRootCountMap.erase(N);
302
303 int WorklistIndex = N->getCombinerWorklistIndex();
304 // If not in the worklist, the index might be -1 or -2 (was combined
305 // before). As the node gets deleted anyway, there's no need to update
306 // the index.
307 if (WorklistIndex < 0)
308 return; // Not in the worklist.
309
310 // Null out the entry rather than erasing it to avoid a linear operation.
311 Worklist[WorklistIndex] = nullptr;
312 N->setCombinerWorklistIndex(-1);
313 }
314
315 void deleteAndRecombine(SDNode *N);
316 bool recursivelyDeleteUnusedNodes(SDNode *N);
317
318 /// Replaces all uses of the results of one DAG node with new values.
319 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
320 bool AddTo = true);
321
322 /// Replaces all uses of the results of one DAG node with new values.
323 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
324 return CombineTo(N, &Res, 1, AddTo);
325 }
326
327 /// Replaces all uses of the results of one DAG node with new values.
328 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
329 bool AddTo = true) {
330 SDValue To[] = { Res0, Res1 };
331 return CombineTo(N, To, 2, AddTo);
332 }
333
334 SDValue CombineTo(SDNode *N, SmallVectorImpl<SDValue> *To,
335 bool AddTo = true) {
336 return CombineTo(N, To->data(), To->size(), AddTo);
337 }
338
339 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
340
341 private:
342 unsigned MaximumLegalStoreInBits;
343
344 /// Check the specified integer node value to see if it can be simplified or
345 /// if things it uses can be simplified by bit propagation.
346 /// If so, return true.
347 bool SimplifyDemandedBits(SDValue Op) {
348 unsigned BitWidth = Op.getScalarValueSizeInBits();
349 APInt DemandedBits = APInt::getAllOnes(BitWidth);
350 return SimplifyDemandedBits(Op, DemandedBits);
351 }
352
353 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
354 EVT VT = Op.getValueType();
355 APInt DemandedElts = VT.isFixedLengthVector()
357 : APInt(1, 1);
358 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, false);
359 }
360
361 /// Check the specified vector node value to see if it can be simplified or
362 /// if things it uses can be simplified as it only uses some of the
363 /// elements. If so, return true.
364 bool SimplifyDemandedVectorElts(SDValue Op) {
365 // TODO: For now just pretend it cannot be simplified.
366 if (Op.getValueType().isScalableVector())
367 return false;
368
369 unsigned NumElts = Op.getValueType().getVectorNumElements();
370 APInt DemandedElts = APInt::getAllOnes(NumElts);
371 return SimplifyDemandedVectorElts(Op, DemandedElts);
372 }
373
374 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
375 const APInt &DemandedElts,
376 bool AssumeSingleUse = false);
377 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
378 bool AssumeSingleUse = false);
379
380 bool CombineToPreIndexedLoadStore(SDNode *N);
381 bool CombineToPostIndexedLoadStore(SDNode *N);
382 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
383 bool SliceUpLoad(SDNode *N);
384
385 // Looks up the chain to find a unique (unaliased) store feeding the passed
386 // load. If no such store is found, returns a nullptr.
387 // Note: This will look past a CALLSEQ_START if the load is chained to it so
388 // so that it can find stack stores for byval params.
389 StoreSDNode *getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset);
390 // Scalars have size 0 to distinguish from singleton vectors.
391 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
392 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
393 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
394
395 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
396 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
397 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
398 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
399 SDValue PromoteIntBinOp(SDValue Op);
400 SDValue PromoteIntShiftOp(SDValue Op);
401 SDValue PromoteExtend(SDValue Op);
402 bool PromoteLoad(SDValue Op);
403
404 SDValue foldShiftToAvg(SDNode *N, const SDLoc &DL);
405 // Fold `a bitwiseop (~b +/- c)` -> `a bitwiseop ~(b -/+ c)`
406 SDValue foldBitwiseOpWithNeg(SDNode *N, const SDLoc &DL, EVT VT);
407
408 SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
409 SDValue RHS, SDValue True, SDValue False,
410 ISD::CondCode CC);
411
412 /// Call the node-specific routine that knows how to fold each
413 /// particular type of node. If that doesn't do anything, try the
414 /// target-specific DAG combines.
415 SDValue combine(SDNode *N);
416
417 // Visitation implementation - Implement dag node combining for different
418 // node types. The semantics are as follows:
419 // Return Value:
420 // SDValue.getNode() == 0 - No change was made
421 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
422 // otherwise - N should be replaced by the returned Operand.
423 //
424 SDValue visitTokenFactor(SDNode *N);
425 SDValue visitMERGE_VALUES(SDNode *N);
426 SDValue visitADD(SDNode *N);
427 SDValue visitADDLike(SDNode *N);
428 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1,
429 SDNode *LocReference);
430 SDValue visitPTRADD(SDNode *N);
431 SDValue visitSUB(SDNode *N);
432 SDValue visitADDSAT(SDNode *N);
433 SDValue visitSUBSAT(SDNode *N);
434 SDValue visitADDC(SDNode *N);
435 SDValue visitADDO(SDNode *N);
436 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
437 SDValue visitSUBC(SDNode *N);
438 SDValue visitSUBO(SDNode *N);
439 SDValue visitADDE(SDNode *N);
440 SDValue visitUADDO_CARRY(SDNode *N);
441 SDValue visitSADDO_CARRY(SDNode *N);
442 SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
443 SDNode *N);
444 SDValue visitSADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
445 SDNode *N);
446 SDValue visitSUBE(SDNode *N);
447 SDValue visitUSUBO_CARRY(SDNode *N);
448 SDValue visitSSUBO_CARRY(SDNode *N);
449 template <class MatchContextClass> SDValue visitMUL(SDNode *N);
450 SDValue visitMULFIX(SDNode *N);
451 SDValue useDivRem(SDNode *N);
452 SDValue visitSDIV(SDNode *N);
453 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
454 SDValue visitUDIV(SDNode *N);
455 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
456 SDValue visitREM(SDNode *N);
457 SDValue visitMULHU(SDNode *N);
458 SDValue visitMULHS(SDNode *N);
459 SDValue visitAVG(SDNode *N);
460 SDValue visitABD(SDNode *N);
461 SDValue visitSMUL_LOHI(SDNode *N);
462 SDValue visitUMUL_LOHI(SDNode *N);
463 SDValue visitMULO(SDNode *N);
464 SDValue visitIMINMAX(SDNode *N);
465 SDValue visitAND(SDNode *N);
466 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
467 SDValue visitOR(SDNode *N);
468 SDValue visitORLike(SDValue N0, SDValue N1, const SDLoc &DL);
469 SDValue visitXOR(SDNode *N);
470 SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
471 SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
472 SDValue visitSHL(SDNode *N);
473 SDValue visitSRA(SDNode *N);
474 SDValue visitSRL(SDNode *N);
475 SDValue visitFunnelShift(SDNode *N);
476 SDValue visitSHLSAT(SDNode *N);
477 SDValue visitRotate(SDNode *N);
478 SDValue visitABS(SDNode *N);
479 SDValue visitBSWAP(SDNode *N);
480 SDValue visitBITREVERSE(SDNode *N);
481 SDValue visitCTLZ(SDNode *N);
482 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
483 SDValue visitCTTZ(SDNode *N);
484 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
485 SDValue visitCTPOP(SDNode *N);
486 SDValue visitSELECT(SDNode *N);
487 SDValue visitVSELECT(SDNode *N);
488 SDValue visitVP_SELECT(SDNode *N);
489 SDValue visitSELECT_CC(SDNode *N);
490 SDValue visitSETCC(SDNode *N);
491 SDValue visitSETCCCARRY(SDNode *N);
492 SDValue visitSIGN_EXTEND(SDNode *N);
493 SDValue visitZERO_EXTEND(SDNode *N);
494 SDValue visitANY_EXTEND(SDNode *N);
495 SDValue visitAssertExt(SDNode *N);
496 SDValue visitAssertAlign(SDNode *N);
497 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
498 SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
499 SDValue visitTRUNCATE(SDNode *N);
500 SDValue visitTRUNCATE_USAT_U(SDNode *N);
501 SDValue visitBITCAST(SDNode *N);
502 SDValue visitFREEZE(SDNode *N);
503 SDValue visitBUILD_PAIR(SDNode *N);
504 SDValue visitFADD(SDNode *N);
505 SDValue visitVP_FADD(SDNode *N);
506 SDValue visitVP_FSUB(SDNode *N);
507 SDValue visitSTRICT_FADD(SDNode *N);
508 SDValue visitFSUB(SDNode *N);
509 SDValue visitFMUL(SDNode *N);
510 template <class MatchContextClass> SDValue visitFMA(SDNode *N);
511 SDValue visitFMAD(SDNode *N);
512 SDValue visitFDIV(SDNode *N);
513 SDValue visitFREM(SDNode *N);
514 SDValue visitFSQRT(SDNode *N);
515 SDValue visitFCOPYSIGN(SDNode *N);
516 SDValue visitFPOW(SDNode *N);
517 SDValue visitFCANONICALIZE(SDNode *N);
518 SDValue visitSINT_TO_FP(SDNode *N);
519 SDValue visitUINT_TO_FP(SDNode *N);
520 SDValue visitFP_TO_SINT(SDNode *N);
521 SDValue visitFP_TO_UINT(SDNode *N);
522 SDValue visitXROUND(SDNode *N);
523 SDValue visitFP_ROUND(SDNode *N);
524 SDValue visitFP_EXTEND(SDNode *N);
525 SDValue visitFNEG(SDNode *N);
526 SDValue visitFABS(SDNode *N);
527 SDValue visitFCEIL(SDNode *N);
528 SDValue visitFTRUNC(SDNode *N);
529 SDValue visitFFREXP(SDNode *N);
530 SDValue visitFFLOOR(SDNode *N);
531 SDValue visitFMinMax(SDNode *N);
532 SDValue visitBRCOND(SDNode *N);
533 SDValue visitBR_CC(SDNode *N);
534 SDValue visitLOAD(SDNode *N);
535
536 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
537 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
538 SDValue replaceStoreOfInsertLoad(StoreSDNode *ST);
539
540 bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
541
542 SDValue visitSTORE(SDNode *N);
543 SDValue visitATOMIC_STORE(SDNode *N);
544 SDValue visitLIFETIME_END(SDNode *N);
545 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
546 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
547 SDValue visitBUILD_VECTOR(SDNode *N);
548 SDValue visitCONCAT_VECTORS(SDNode *N);
549 SDValue visitVECTOR_INTERLEAVE(SDNode *N);
550 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
551 SDValue visitVECTOR_SHUFFLE(SDNode *N);
552 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
553 SDValue visitINSERT_SUBVECTOR(SDNode *N);
554 SDValue visitVECTOR_COMPRESS(SDNode *N);
555 SDValue visitMLOAD(SDNode *N);
556 SDValue visitMSTORE(SDNode *N);
557 SDValue visitMGATHER(SDNode *N);
558 SDValue visitMSCATTER(SDNode *N);
559 SDValue visitMHISTOGRAM(SDNode *N);
560 SDValue visitPARTIAL_REDUCE_MLA(SDNode *N);
561 SDValue visitVPGATHER(SDNode *N);
562 SDValue visitVPSCATTER(SDNode *N);
563 SDValue visitVP_STRIDED_LOAD(SDNode *N);
564 SDValue visitVP_STRIDED_STORE(SDNode *N);
565 SDValue visitFP_TO_FP16(SDNode *N);
566 SDValue visitFP16_TO_FP(SDNode *N);
567 SDValue visitFP_TO_BF16(SDNode *N);
568 SDValue visitBF16_TO_FP(SDNode *N);
569 SDValue visitVECREDUCE(SDNode *N);
570 SDValue visitVPOp(SDNode *N);
571 SDValue visitGET_FPENV_MEM(SDNode *N);
572 SDValue visitSET_FPENV_MEM(SDNode *N);
573
574 template <class MatchContextClass>
575 SDValue visitFADDForFMACombine(SDNode *N);
576 template <class MatchContextClass>
577 SDValue visitFSUBForFMACombine(SDNode *N);
578 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
579
580 SDValue XformToShuffleWithZero(SDNode *N);
581 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
582 const SDLoc &DL,
583 SDNode *N,
584 SDValue N0,
585 SDValue N1);
586 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
587 SDValue N1, SDNodeFlags Flags);
588 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
589 SDValue N1, SDNodeFlags Flags);
590 SDValue reassociateReduction(unsigned RedOpc, unsigned Opc, const SDLoc &DL,
591 EVT VT, SDValue N0, SDValue N1,
592 SDNodeFlags Flags = SDNodeFlags());
593
594 SDValue visitShiftByConstant(SDNode *N);
595
596 SDValue foldSelectOfConstants(SDNode *N);
597 SDValue foldVSelectOfConstants(SDNode *N);
598 SDValue foldBinOpIntoSelect(SDNode *BO);
599 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
600 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
601 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
602 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
603 SDValue N2, SDValue N3, ISD::CondCode CC,
604 bool NotExtCompare = false);
605 SDValue convertSelectOfFPConstantsToLoadOffset(
606 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
607 ISD::CondCode CC);
608 SDValue foldSignChangeInBitcast(SDNode *N);
609 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
610 SDValue N2, SDValue N3, ISD::CondCode CC);
611 SDValue foldSelectOfBinops(SDNode *N);
612 SDValue foldSextSetcc(SDNode *N);
613 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
614 const SDLoc &DL);
615 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL);
616 SDValue foldABSToABD(SDNode *N, const SDLoc &DL);
617 SDValue foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
618 SDValue False, ISD::CondCode CC, const SDLoc &DL);
619 SDValue foldSelectToUMin(SDValue LHS, SDValue RHS, SDValue True,
620 SDValue False, ISD::CondCode CC, const SDLoc &DL);
621 SDValue unfoldMaskedMerge(SDNode *N);
622 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
623 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
624 const SDLoc &DL, bool foldBooleans);
625 SDValue rebuildSetCC(SDValue N);
626
627 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
628 SDValue &CC, bool MatchStrict = false) const;
629 bool isOneUseSetCC(SDValue N) const;
630
631 SDValue foldAddToAvg(SDNode *N, const SDLoc &DL);
632 SDValue foldSubToAvg(SDNode *N, const SDLoc &DL);
633
634 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
635 unsigned HiOp);
636 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
637 SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
638 const TargetLowering &TLI);
639 SDValue foldPartialReduceMLAMulOp(SDNode *N);
640 SDValue foldPartialReduceAdd(SDNode *N);
641
642 SDValue CombineExtLoad(SDNode *N);
643 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
644 SDValue combineRepeatedFPDivisors(SDNode *N);
645 SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
646 SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf);
647 SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
648 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
649 SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
650 SDValue BuildSDIV(SDNode *N);
651 SDValue BuildSDIVPow2(SDNode *N);
652 SDValue BuildUDIV(SDNode *N);
653 SDValue BuildSREMPow2(SDNode *N);
654 SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
655 SDValue BuildLogBase2(SDValue V, const SDLoc &DL,
656 bool KnownNeverZero = false,
657 bool InexpensiveOnly = false,
658 std::optional<EVT> OutVT = std::nullopt);
659 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
660 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
661 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
662 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
663 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
664 SDNodeFlags Flags, bool Reciprocal);
665 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
666 SDNodeFlags Flags, bool Reciprocal);
667 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
668 bool DemandHighBits = true);
669 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
670 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
671 SDValue InnerPos, SDValue InnerNeg, bool FromAdd,
672 bool HasPos, unsigned PosOpcode,
673 unsigned NegOpcode, const SDLoc &DL);
674 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
675 SDValue InnerPos, SDValue InnerNeg, bool FromAdd,
676 bool HasPos, unsigned PosOpcode,
677 unsigned NegOpcode, const SDLoc &DL);
678 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL,
679 bool FromAdd);
680 SDValue MatchLoadCombine(SDNode *N);
681 SDValue mergeTruncStores(StoreSDNode *N);
682 SDValue reduceLoadWidth(SDNode *N);
683 SDValue ReduceLoadOpStoreWidth(SDNode *N);
684 SDValue splitMergedValStore(StoreSDNode *ST);
685 SDValue TransformFPLoadStorePair(SDNode *N);
686 SDValue convertBuildVecZextToZext(SDNode *N);
687 SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
688 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
689 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
690 SDValue reduceBuildVecToShuffle(SDNode *N);
691 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
692 ArrayRef<int> VectorMask, SDValue VecIn1,
693 SDValue VecIn2, unsigned LeftIdx,
694 bool DidSplitVec);
695 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
696
697 /// Walk up chain skipping non-aliasing memory nodes,
698 /// looking for aliasing nodes and adding them to the Aliases vector.
699 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
700 SmallVectorImpl<SDValue> &Aliases);
701
702 /// Return true if there is any possibility that the two addresses overlap.
703 bool mayAlias(SDNode *Op0, SDNode *Op1) const;
704
705 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
706 /// chain (aliasing node.)
707 SDValue FindBetterChain(SDNode *N, SDValue Chain);
708
709 /// Try to replace a store and any possibly adjacent stores on
710 /// consecutive chains with better chains. Return true only if St is
711 /// replaced.
712 ///
713 /// Notice that other chains may still be replaced even if the function
714 /// returns false.
715 bool findBetterNeighborChains(StoreSDNode *St);
716
717 // Helper for findBetterNeighborChains. Walk up store chain add additional
718 // chained stores that do not overlap and can be parallelized.
719 bool parallelizeChainedStores(StoreSDNode *St);
720
721 /// Holds a pointer to an LSBaseSDNode as well as information on where it
722 /// is located in a sequence of memory operations connected by a chain.
723 struct MemOpLink {
724 // Ptr to the mem node.
725 LSBaseSDNode *MemNode;
726
727 // Offset from the base ptr.
728 int64_t OffsetFromBase;
729
730 MemOpLink(LSBaseSDNode *N, int64_t Offset)
731 : MemNode(N), OffsetFromBase(Offset) {}
732 };
733
734 // Classify the origin of a stored value.
735 enum class StoreSource { Unknown, Constant, Extract, Load };
736 StoreSource getStoreSource(SDValue StoreVal) {
737 switch (StoreVal.getOpcode()) {
738 case ISD::Constant:
739 case ISD::ConstantFP:
740 return StoreSource::Constant;
744 return StoreSource::Constant;
745 return StoreSource::Unknown;
748 return StoreSource::Extract;
749 case ISD::LOAD:
750 return StoreSource::Load;
751 default:
752 return StoreSource::Unknown;
753 }
754 }
755
756 /// This is a helper function for visitMUL to check the profitability
757 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
758 /// MulNode is the original multiply, AddNode is (add x, c1),
759 /// and ConstNode is c2.
760 bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
761 SDValue ConstNode);
762
763 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
764 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
765 /// the type of the loaded value to be extended.
766 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
767 EVT LoadResultTy, EVT &ExtVT);
768
769 /// Helper function to calculate whether the given Load/Store can have its
770 /// width reduced to ExtVT.
771 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
772 EVT &MemVT, unsigned ShAmt = 0);
773
774 /// Used by BackwardsPropagateMask to find suitable loads.
775 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
776 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
777 ConstantSDNode *Mask, SDNode *&NodeToMask);
778 /// Attempt to propagate a given AND node back to load leaves so that they
779 /// can be combined into narrow loads.
780 bool BackwardsPropagateMask(SDNode *N);
781
782 /// Helper function for mergeConsecutiveStores which merges the component
783 /// store chains.
784 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
785 unsigned NumStores);
786
787 /// Helper function for mergeConsecutiveStores which checks if all the store
788 /// nodes have the same underlying object. We can still reuse the first
789 /// store's pointer info if all the stores are from the same object.
790 bool hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes);
791
792 /// This is a helper function for mergeConsecutiveStores. When the source
793 /// elements of the consecutive stores are all constants or all extracted
794 /// vector elements, try to merge them into one larger store introducing
795 /// bitcasts if necessary. \return True if a merged store was created.
796 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
797 EVT MemVT, unsigned NumStores,
798 bool IsConstantSrc, bool UseVector,
799 bool UseTrunc);
800
801 /// This is a helper function for mergeConsecutiveStores. Stores that
802 /// potentially may be merged with St are placed in StoreNodes. On success,
803 /// returns a chain predecessor to all store candidates.
804 SDNode *getStoreMergeCandidates(StoreSDNode *St,
805 SmallVectorImpl<MemOpLink> &StoreNodes);
806
807 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
808 /// have indirect dependency through their operands. RootNode is the
809 /// predecessor to all stores calculated by getStoreMergeCandidates and is
810 /// used to prune the dependency check. \return True if safe to merge.
811 bool checkMergeStoreCandidatesForDependencies(
812 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
813 SDNode *RootNode);
814
815 /// Helper function for tryStoreMergeOfLoads. Checks if the load/store
816 /// chain has a call in it. \return True if a call is found.
817 bool hasCallInLdStChain(StoreSDNode *St, LoadSDNode *Ld);
818
819 /// This is a helper function for mergeConsecutiveStores. Given a list of
820 /// store candidates, find the first N that are consecutive in memory.
821 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
822 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
823 int64_t ElementSizeBytes) const;
824
825 /// This is a helper function for mergeConsecutiveStores. It is used for
826 /// store chains that are composed entirely of constant values.
827 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
828 unsigned NumConsecutiveStores,
829 EVT MemVT, SDNode *Root, bool AllowVectors);
830
831 /// This is a helper function for mergeConsecutiveStores. It is used for
832 /// store chains that are composed entirely of extracted vector elements.
833 /// When extracting multiple vector elements, try to store them in one
834 /// vector store rather than a sequence of scalar stores.
835 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
836 unsigned NumConsecutiveStores, EVT MemVT,
837 SDNode *Root);
838
839 /// This is a helper function for mergeConsecutiveStores. It is used for
840 /// store chains that are composed entirely of loaded values.
841 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
842 unsigned NumConsecutiveStores, EVT MemVT,
843 SDNode *Root, bool AllowVectors,
844 bool IsNonTemporalStore, bool IsNonTemporalLoad);
845
846 /// Merge consecutive store operations into a wide store.
847 /// This optimization uses wide integers or vectors when possible.
848 /// \return true if stores were merged.
849 bool mergeConsecutiveStores(StoreSDNode *St);
850
851 /// Try to transform a truncation where C is a constant:
852 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
853 ///
854 /// \p N needs to be a truncation and its first operand an AND. Other
855 /// requirements are checked by the function (e.g. that trunc is
856 /// single-use) and if missed an empty SDValue is returned.
857 SDValue distributeTruncateThroughAnd(SDNode *N);
858
859 /// Helper function to determine whether the target supports operation
860 /// given by \p Opcode for type \p VT, that is, whether the operation
861 /// is legal or custom before legalizing operations, and whether is
862 /// legal (but not custom) after legalization.
863 bool hasOperation(unsigned Opcode, EVT VT) {
864 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
865 }
866
867 bool hasUMin(EVT VT) const {
868 auto LK = TLI.getTypeConversion(*DAG.getContext(), VT);
869 return (LK.first == TargetLoweringBase::TypeLegal ||
871 TLI.isOperationLegalOrCustom(ISD::UMIN, LK.second);
872 }
873
874 public:
875 /// Runs the dag combiner on all nodes in the work list
876 void Run(CombineLevel AtLevel);
877
878 SelectionDAG &getDAG() const { return DAG; }
879
880 /// Convenience wrapper around TargetLowering::getShiftAmountTy.
881 EVT getShiftAmountTy(EVT LHSTy) {
882 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout());
883 }
884
885 /// This method returns true if we are running before type legalization or
886 /// if the specified VT is legal.
887 bool isTypeLegal(const EVT &VT) {
888 if (!LegalTypes) return true;
889 return TLI.isTypeLegal(VT);
890 }
891
892 /// Convenience wrapper around TargetLowering::getSetCCResultType
893 EVT getSetCCResultType(EVT VT) const {
894 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
895 }
896
897 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
898 SDValue OrigLoad, SDValue ExtLoad,
899 ISD::NodeType ExtType);
900 };
901
902/// This class is a DAGUpdateListener that removes any deleted
903/// nodes from the worklist.
904class WorklistRemover : public SelectionDAG::DAGUpdateListener {
905 DAGCombiner &DC;
906
907public:
908 explicit WorklistRemover(DAGCombiner &dc)
909 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
910
911 void NodeDeleted(SDNode *N, SDNode *E) override {
912 DC.removeFromWorklist(N);
913 }
914};
915
916class WorklistInserter : public SelectionDAG::DAGUpdateListener {
917 DAGCombiner &DC;
918
919public:
920 explicit WorklistInserter(DAGCombiner &dc)
921 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
922
923 // FIXME: Ideally we could add N to the worklist, but this causes exponential
924 // compile time costs in large DAGs, e.g. Halide.
925 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
926};
927
928} // end anonymous namespace
929
930//===----------------------------------------------------------------------===//
931// TargetLowering::DAGCombinerInfo implementation
932//===----------------------------------------------------------------------===//
933
935 ((DAGCombiner*)DC)->AddToWorklist(N);
936}
937
939CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
940 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
941}
942
944CombineTo(SDNode *N, SDValue Res, bool AddTo) {
945 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
946}
947
949CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
950 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
951}
952
955 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
956}
957
960 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
961}
962
963//===----------------------------------------------------------------------===//
964// Helper Functions
965//===----------------------------------------------------------------------===//
966
967void DAGCombiner::deleteAndRecombine(SDNode *N) {
968 removeFromWorklist(N);
969
970 // If the operands of this node are only used by the node, they will now be
971 // dead. Make sure to re-visit them and recursively delete dead nodes.
972 for (const SDValue &Op : N->ops())
973 // For an operand generating multiple values, one of the values may
974 // become dead allowing further simplification (e.g. split index
975 // arithmetic from an indexed load).
976 if (Op->hasOneUse() || Op->getNumValues() > 1)
977 AddToWorklist(Op.getNode());
978
979 DAG.DeleteNode(N);
980}
981
982// APInts must be the same size for most operations, this helper
983// function zero extends the shorter of the pair so that they match.
984// We provide an Offset so that we can create bitwidths that won't overflow.
985static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
986 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
987 LHS = LHS.zext(Bits);
988 RHS = RHS.zext(Bits);
989}
990
991// Return true if this node is a setcc, or is a select_cc
992// that selects between the target values used for true and false, making it
993// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
994// the appropriate nodes based on the type of node we are checking. This
995// simplifies life a bit for the callers.
996bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
997 SDValue &CC, bool MatchStrict) const {
998 if (N.getOpcode() == ISD::SETCC) {
999 LHS = N.getOperand(0);
1000 RHS = N.getOperand(1);
1001 CC = N.getOperand(2);
1002 return true;
1003 }
1004
1005 if (MatchStrict &&
1006 (N.getOpcode() == ISD::STRICT_FSETCC ||
1007 N.getOpcode() == ISD::STRICT_FSETCCS)) {
1008 LHS = N.getOperand(1);
1009 RHS = N.getOperand(2);
1010 CC = N.getOperand(3);
1011 return true;
1012 }
1013
1014 if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
1015 !TLI.isConstFalseVal(N.getOperand(3)))
1016 return false;
1017
1018 if (TLI.getBooleanContents(N.getValueType()) ==
1020 return false;
1021
1022 LHS = N.getOperand(0);
1023 RHS = N.getOperand(1);
1024 CC = N.getOperand(4);
1025 return true;
1026}
1027
1028/// Return true if this is a SetCC-equivalent operation with only one use.
1029/// If this is true, it allows the users to invert the operation for free when
1030/// it is profitable to do so.
1031bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1032 SDValue N0, N1, N2;
1033 if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
1034 return true;
1035 return false;
1036}
1037
1039 if (!ScalarTy.isSimple())
1040 return false;
1041
1042 uint64_t MaskForTy = 0ULL;
1043 switch (ScalarTy.getSimpleVT().SimpleTy) {
1044 case MVT::i8:
1045 MaskForTy = 0xFFULL;
1046 break;
1047 case MVT::i16:
1048 MaskForTy = 0xFFFFULL;
1049 break;
1050 case MVT::i32:
1051 MaskForTy = 0xFFFFFFFFULL;
1052 break;
1053 default:
1054 return false;
1055 break;
1056 }
1057
1058 APInt Val;
1059 if (ISD::isConstantSplatVector(N, Val))
1060 return Val.getLimitedValue() == MaskForTy;
1061
1062 return false;
1063}
1064
1065// Determines if it is a constant integer or a splat/build vector of constant
1066// integers (and undefs).
1067// Do not permit build vector implicit truncation.
1068static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1070 return !(Const->isOpaque() && NoOpaques);
1071 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
1072 return false;
1073 unsigned BitWidth = N.getScalarValueSizeInBits();
1074 for (const SDValue &Op : N->op_values()) {
1075 if (Op.isUndef())
1076 continue;
1078 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1079 (Const->isOpaque() && NoOpaques))
1080 return false;
1081 }
1082 return true;
1083}
1084
1085// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1086// undef's.
1087static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1088 if (V.getOpcode() != ISD::BUILD_VECTOR)
1089 return false;
1090 return isConstantOrConstantVector(V, NoOpaques) ||
1092}
1093
1094// Determine if this an indexed load with an opaque target constant index.
1095static bool canSplitIdx(LoadSDNode *LD) {
1096 return MaySplitLoadIndex &&
1097 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1098 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1099}
1100
1101bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1102 const SDLoc &DL,
1103 SDNode *N,
1104 SDValue N0,
1105 SDValue N1) {
1106 // Currently this only tries to ensure we don't undo the GEP splits done by
1107 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1108 // we check if the following transformation would be problematic:
1109 // (load/store (add, (add, x, offset1), offset2)) ->
1110 // (load/store (add, x, offset1+offset2)).
1111
1112 // (load/store (add, (add, x, y), offset2)) ->
1113 // (load/store (add, (add, x, offset2), y)).
1114
1115 if (!N0.isAnyAdd())
1116 return false;
1117
1118 // Check for vscale addressing modes.
1119 // (load/store (add/sub (add x, y), vscale))
1120 // (load/store (add/sub (add x, y), (lsl vscale, C)))
1121 // (load/store (add/sub (add x, y), (mul vscale, C)))
1122 if ((N1.getOpcode() == ISD::VSCALE ||
1123 ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::MUL) &&
1124 N1.getOperand(0).getOpcode() == ISD::VSCALE &&
1126 N1.getValueType().getFixedSizeInBits() <= 64) {
1127 int64_t ScalableOffset = N1.getOpcode() == ISD::VSCALE
1128 ? N1.getConstantOperandVal(0)
1129 : (N1.getOperand(0).getConstantOperandVal(0) *
1130 (N1.getOpcode() == ISD::SHL
1131 ? (1LL << N1.getConstantOperandVal(1))
1132 : N1.getConstantOperandVal(1)));
1133 if (Opc == ISD::SUB)
1134 ScalableOffset = -ScalableOffset;
1135 if (all_of(N->users(), [&](SDNode *Node) {
1136 if (auto *LoadStore = dyn_cast<MemSDNode>(Node);
1137 LoadStore && LoadStore->getBasePtr().getNode() == N) {
1138 TargetLoweringBase::AddrMode AM;
1139 AM.HasBaseReg = true;
1140 AM.ScalableOffset = ScalableOffset;
1141 EVT VT = LoadStore->getMemoryVT();
1142 unsigned AS = LoadStore->getAddressSpace();
1143 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1144 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy,
1145 AS);
1146 }
1147 return false;
1148 }))
1149 return true;
1150 }
1151
1152 if (Opc != ISD::ADD && Opc != ISD::PTRADD)
1153 return false;
1154
1155 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1156 if (!C2)
1157 return false;
1158
1159 const APInt &C2APIntVal = C2->getAPIntValue();
1160 if (C2APIntVal.getSignificantBits() > 64)
1161 return false;
1162
1163 if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1164 if (N0.hasOneUse())
1165 return false;
1166
1167 const APInt &C1APIntVal = C1->getAPIntValue();
1168 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1169 if (CombinedValueIntVal.getSignificantBits() > 64)
1170 return false;
1171 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1172
1173 for (SDNode *Node : N->users()) {
1174 if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1175 // Is x[offset2] already not a legal addressing mode? If so then
1176 // reassociating the constants breaks nothing (we test offset2 because
1177 // that's the one we hope to fold into the load or store).
1178 TargetLoweringBase::AddrMode AM;
1179 AM.HasBaseReg = true;
1180 AM.BaseOffs = C2APIntVal.getSExtValue();
1181 EVT VT = LoadStore->getMemoryVT();
1182 unsigned AS = LoadStore->getAddressSpace();
1183 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1184 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1185 continue;
1186
1187 // Would x[offset1+offset2] still be a legal addressing mode?
1188 AM.BaseOffs = CombinedValue;
1189 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1190 return true;
1191 }
1192 }
1193 } else {
1194 if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1195 if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1196 return false;
1197
1198 for (SDNode *Node : N->users()) {
1199 auto *LoadStore = dyn_cast<MemSDNode>(Node);
1200 if (!LoadStore)
1201 return false;
1202
1203 // Is x[offset2] a legal addressing mode? If so then
1204 // reassociating the constants breaks address pattern
1205 TargetLoweringBase::AddrMode AM;
1206 AM.HasBaseReg = true;
1207 AM.BaseOffs = C2APIntVal.getSExtValue();
1208 EVT VT = LoadStore->getMemoryVT();
1209 unsigned AS = LoadStore->getAddressSpace();
1210 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1211 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1212 return false;
1213 }
1214 return true;
1215 }
1216
1217 return false;
1218}
1219
1220/// Helper for DAGCombiner::reassociateOps. Try to reassociate (Opc N0, N1) if
1221/// \p N0 is the same kind of operation as \p Opc.
1222SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1223 SDValue N0, SDValue N1,
1224 SDNodeFlags Flags) {
1225 EVT VT = N0.getValueType();
1226
1227 if (N0.getOpcode() != Opc)
1228 return SDValue();
1229
1230 SDValue N00 = N0.getOperand(0);
1231 SDValue N01 = N0.getOperand(1);
1232
1234 SDNodeFlags NewFlags;
1235 if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
1236 Flags.hasNoUnsignedWrap())
1237 NewFlags |= SDNodeFlags::NoUnsignedWrap;
1238
1240 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1241 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1})) {
1242 NewFlags.setDisjoint(Flags.hasDisjoint() &&
1243 N0->getFlags().hasDisjoint());
1244 return DAG.getNode(Opc, DL, VT, N00, OpNode, NewFlags);
1245 }
1246 return SDValue();
1247 }
1248 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1249 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1250 // iff (op x, c1) has one use
1251 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags);
1252 return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags);
1253 }
1254 }
1255
1256 // Check for repeated operand logic simplifications.
1257 if (Opc == ISD::AND || Opc == ISD::OR) {
1258 // (N00 & N01) & N00 --> N00 & N01
1259 // (N00 & N01) & N01 --> N00 & N01
1260 // (N00 | N01) | N00 --> N00 | N01
1261 // (N00 | N01) | N01 --> N00 | N01
1262 if (N1 == N00 || N1 == N01)
1263 return N0;
1264 }
1265 if (Opc == ISD::XOR) {
1266 // (N00 ^ N01) ^ N00 --> N01
1267 if (N1 == N00)
1268 return N01;
1269 // (N00 ^ N01) ^ N01 --> N00
1270 if (N1 == N01)
1271 return N00;
1272 }
1273
1274 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1275 if (N1 != N01) {
1276 // Reassociate if (op N00, N1) already exist
1277 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1278 // if Op (Op N00, N1), N01 already exist
1279 // we need to stop reassciate to avoid dead loop
1280 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1281 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1282 }
1283 }
1284
1285 if (N1 != N00) {
1286 // Reassociate if (op N01, N1) already exist
1287 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1288 // if Op (Op N01, N1), N00 already exist
1289 // we need to stop reassciate to avoid dead loop
1290 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1291 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1292 }
1293 }
1294
1295 // Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND
1296 // (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same
1297 // predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are
1298 // comparisons with the same predicate. This enables optimizations as the
1299 // following one:
1300 // CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
1301 // CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)
1302 if (Opc == ISD::AND || Opc == ISD::OR) {
1303 if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC &&
1304 N01->getOpcode() == ISD::SETCC) {
1305 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
1306 ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get();
1307 ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get();
1308 if (CC1 == CC00 && CC1 != CC01) {
1309 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, Flags);
1310 return DAG.getNode(Opc, DL, VT, OpNode, N01, Flags);
1311 }
1312 if (CC1 == CC01 && CC1 != CC00) {
1313 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N01, N1, Flags);
1314 return DAG.getNode(Opc, DL, VT, OpNode, N00, Flags);
1315 }
1316 }
1317 }
1318 }
1319
1320 return SDValue();
1321}
1322
1323/// Try to reassociate commutative (Opc N0, N1) if either \p N0 or \p N1 is the
1324/// same kind of operation as \p Opc.
1325SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1326 SDValue N1, SDNodeFlags Flags) {
1327 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1328
1329 // Floating-point reassociation is not allowed without loose FP math.
1330 if (N0.getValueType().isFloatingPoint() ||
1332 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1333 return SDValue();
1334
1335 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1, Flags))
1336 return Combined;
1337 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0, Flags))
1338 return Combined;
1339 return SDValue();
1340}
1341
1342// Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
1343// Note that we only expect Flags to be passed from FP operations. For integer
1344// operations they need to be dropped.
1345SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
1346 const SDLoc &DL, EVT VT, SDValue N0,
1347 SDValue N1, SDNodeFlags Flags) {
1348 if (N0.getOpcode() == RedOpc && N1.getOpcode() == RedOpc &&
1349 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
1350 N0->hasOneUse() && N1->hasOneUse() &&
1352 TLI.shouldReassociateReduction(RedOpc, N0.getOperand(0).getValueType())) {
1353 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
1354 return DAG.getNode(RedOpc, DL, VT,
1355 DAG.getNode(Opc, DL, N0.getOperand(0).getValueType(),
1356 N0.getOperand(0), N1.getOperand(0)));
1357 }
1358
1359 // Reassociate op(op(vecreduce(a), b), op(vecreduce(c), d)) into
1360 // op(vecreduce(op(a, c)), op(b, d)), to combine the reductions into a
1361 // single node.
1362 SDValue A, B, C, D, RedA, RedB;
1363 if (sd_match(N0, m_OneUse(m_c_BinOp(
1364 Opc,
1365 m_AllOf(m_OneUse(m_UnaryOp(RedOpc, m_Value(A))),
1366 m_Value(RedA)),
1367 m_Value(B)))) &&
1369 Opc,
1370 m_AllOf(m_OneUse(m_UnaryOp(RedOpc, m_Value(C))),
1371 m_Value(RedB)),
1372 m_Value(D)))) &&
1373 !sd_match(B, m_UnaryOp(RedOpc, m_Value())) &&
1374 !sd_match(D, m_UnaryOp(RedOpc, m_Value())) &&
1375 A.getValueType() == C.getValueType() &&
1376 hasOperation(Opc, A.getValueType()) &&
1377 TLI.shouldReassociateReduction(RedOpc, VT)) {
1378 if ((Opc == ISD::FADD || Opc == ISD::FMUL) &&
1379 (!N0->getFlags().hasAllowReassociation() ||
1381 !RedA->getFlags().hasAllowReassociation() ||
1382 !RedB->getFlags().hasAllowReassociation()))
1383 return SDValue();
1384 SelectionDAG::FlagInserter FlagsInserter(
1385 DAG, Flags & N0->getFlags() & N1->getFlags() & RedA->getFlags() &
1386 RedB->getFlags());
1387 SDValue Op = DAG.getNode(Opc, DL, A.getValueType(), A, C);
1388 SDValue Red = DAG.getNode(RedOpc, DL, VT, Op);
1389 SDValue Op2 = DAG.getNode(Opc, DL, VT, B, D);
1390 return DAG.getNode(Opc, DL, VT, Red, Op2);
1391 }
1392 return SDValue();
1393}
1394
1395SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1396 bool AddTo) {
1397 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1398 ++NodesCombined;
1399 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1400 To[0].dump(&DAG);
1401 dbgs() << " and " << NumTo - 1 << " other values\n");
1402 for (unsigned i = 0, e = NumTo; i != e; ++i)
1403 assert((!To[i].getNode() ||
1404 N->getValueType(i) == To[i].getValueType()) &&
1405 "Cannot combine value to value of different type!");
1406
1407 WorklistRemover DeadNodes(*this);
1408 DAG.ReplaceAllUsesWith(N, To);
1409 if (AddTo) {
1410 // Push the new nodes and any users onto the worklist
1411 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1412 if (To[i].getNode())
1413 AddToWorklistWithUsers(To[i].getNode());
1414 }
1415 }
1416
1417 // Finally, if the node is now dead, remove it from the graph. The node
1418 // may not be dead if the replacement process recursively simplified to
1419 // something else needing this node.
1420 if (N->use_empty())
1421 deleteAndRecombine(N);
1422 return SDValue(N, 0);
1423}
1424
1425void DAGCombiner::
1426CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1427 // Replace the old value with the new one.
1428 ++NodesCombined;
1429 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1430 dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1431
1432 // Replace all uses.
1433 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1434
1435 // Push the new node and any (possibly new) users onto the worklist.
1436 AddToWorklistWithUsers(TLO.New.getNode());
1437
1438 // Finally, if the node is now dead, remove it from the graph.
1439 recursivelyDeleteUnusedNodes(TLO.Old.getNode());
1440}
1441
1442/// Check the specified integer node value to see if it can be simplified or if
1443/// things it uses can be simplified by bit propagation. If so, return true.
1444bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1445 const APInt &DemandedElts,
1446 bool AssumeSingleUse) {
1447 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1448 KnownBits Known;
1449 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1450 AssumeSingleUse))
1451 return false;
1452
1453 // Revisit the node.
1454 AddToWorklist(Op.getNode());
1455
1456 CommitTargetLoweringOpt(TLO);
1457 return true;
1458}
1459
1460/// Check the specified vector node value to see if it can be simplified or
1461/// if things it uses can be simplified as it only uses some of the elements.
1462/// If so, return true.
1463bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1464 const APInt &DemandedElts,
1465 bool AssumeSingleUse) {
1466 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1467 APInt KnownUndef, KnownZero;
1468 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1469 TLO, 0, AssumeSingleUse))
1470 return false;
1471
1472 // Revisit the node.
1473 AddToWorklist(Op.getNode());
1474
1475 CommitTargetLoweringOpt(TLO);
1476 return true;
1477}
1478
1479void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1480 SDLoc DL(Load);
1481 EVT VT = Load->getValueType(0);
1482 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1483
1484 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1485 Trunc.dump(&DAG); dbgs() << '\n');
1486
1487 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1488 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1489
1490 AddToWorklist(Trunc.getNode());
1491 recursivelyDeleteUnusedNodes(Load);
1492}
1493
1494SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1495 Replace = false;
1496 SDLoc DL(Op);
1497 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1498 LoadSDNode *LD = cast<LoadSDNode>(Op);
1499 EVT MemVT = LD->getMemoryVT();
1501 : LD->getExtensionType();
1502 Replace = true;
1503 return DAG.getExtLoad(ExtType, DL, PVT,
1504 LD->getChain(), LD->getBasePtr(),
1505 MemVT, LD->getMemOperand());
1506 }
1507
1508 unsigned Opc = Op.getOpcode();
1509 switch (Opc) {
1510 default: break;
1511 case ISD::AssertSext:
1512 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1513 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1514 break;
1515 case ISD::AssertZext:
1516 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1517 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1518 break;
1519 case ISD::Constant: {
1520 unsigned ExtOpc =
1521 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1522 return DAG.getNode(ExtOpc, DL, PVT, Op);
1523 }
1524 }
1525
1526 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1527 return SDValue();
1528 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1529}
1530
1531SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1533 return SDValue();
1534 EVT OldVT = Op.getValueType();
1535 SDLoc DL(Op);
1536 bool Replace = false;
1537 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1538 if (!NewOp.getNode())
1539 return SDValue();
1540 AddToWorklist(NewOp.getNode());
1541
1542 if (Replace)
1543 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1544 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1545 DAG.getValueType(OldVT));
1546}
1547
1548SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1549 EVT OldVT = Op.getValueType();
1550 SDLoc DL(Op);
1551 bool Replace = false;
1552 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1553 if (!NewOp.getNode())
1554 return SDValue();
1555 AddToWorklist(NewOp.getNode());
1556
1557 if (Replace)
1558 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1559 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1560}
1561
1562/// Promote the specified integer binary operation if the target indicates it is
1563/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1564/// i32 since i16 instructions are longer.
1565SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1566 if (!LegalOperations)
1567 return SDValue();
1568
1569 EVT VT = Op.getValueType();
1570 if (VT.isVector() || !VT.isInteger())
1571 return SDValue();
1572
1573 // If operation type is 'undesirable', e.g. i16 on x86, consider
1574 // promoting it.
1575 unsigned Opc = Op.getOpcode();
1576 if (TLI.isTypeDesirableForOp(Opc, VT))
1577 return SDValue();
1578
1579 EVT PVT = VT;
1580 // Consult target whether it is a good idea to promote this operation and
1581 // what's the right type to promote it to.
1582 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1583 assert(PVT != VT && "Don't know what type to promote to!");
1584
1585 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1586
1587 bool Replace0 = false;
1588 SDValue N0 = Op.getOperand(0);
1589 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1590
1591 bool Replace1 = false;
1592 SDValue N1 = Op.getOperand(1);
1593 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1594 SDLoc DL(Op);
1595
1596 SDValue RV =
1597 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1598
1599 // We are always replacing N0/N1's use in N and only need additional
1600 // replacements if there are additional uses.
1601 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1602 // (SDValue) here because the node may reference multiple values
1603 // (for example, the chain value of a load node).
1604 Replace0 &= !N0->hasOneUse();
1605 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1606
1607 // Combine Op here so it is preserved past replacements.
1608 CombineTo(Op.getNode(), RV);
1609
1610 // If operands have a use ordering, make sure we deal with
1611 // predecessor first.
1612 if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1613 std::swap(N0, N1);
1614 std::swap(NN0, NN1);
1615 }
1616
1617 if (Replace0) {
1618 AddToWorklist(NN0.getNode());
1619 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1620 }
1621 if (Replace1) {
1622 AddToWorklist(NN1.getNode());
1623 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1624 }
1625 return Op;
1626 }
1627 return SDValue();
1628}
1629
1630/// Promote the specified integer shift operation if the target indicates it is
1631/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1632/// i32 since i16 instructions are longer.
1633SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1634 if (!LegalOperations)
1635 return SDValue();
1636
1637 EVT VT = Op.getValueType();
1638 if (VT.isVector() || !VT.isInteger())
1639 return SDValue();
1640
1641 // If operation type is 'undesirable', e.g. i16 on x86, consider
1642 // promoting it.
1643 unsigned Opc = Op.getOpcode();
1644 if (TLI.isTypeDesirableForOp(Opc, VT))
1645 return SDValue();
1646
1647 EVT PVT = VT;
1648 // Consult target whether it is a good idea to promote this operation and
1649 // what's the right type to promote it to.
1650 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1651 assert(PVT != VT && "Don't know what type to promote to!");
1652
1653 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1654
1655 bool Replace = false;
1656 SDValue N0 = Op.getOperand(0);
1657 if (Opc == ISD::SRA)
1658 N0 = SExtPromoteOperand(N0, PVT);
1659 else if (Opc == ISD::SRL)
1660 N0 = ZExtPromoteOperand(N0, PVT);
1661 else
1662 N0 = PromoteOperand(N0, PVT, Replace);
1663
1664 if (!N0.getNode())
1665 return SDValue();
1666
1667 SDLoc DL(Op);
1668 SDValue N1 = Op.getOperand(1);
1669 SDValue RV =
1670 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1671
1672 if (Replace)
1673 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1674
1675 // Deal with Op being deleted.
1676 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1677 return RV;
1678 }
1679 return SDValue();
1680}
1681
1682SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1683 if (!LegalOperations)
1684 return SDValue();
1685
1686 EVT VT = Op.getValueType();
1687 if (VT.isVector() || !VT.isInteger())
1688 return SDValue();
1689
1690 // If operation type is 'undesirable', e.g. i16 on x86, consider
1691 // promoting it.
1692 unsigned Opc = Op.getOpcode();
1693 if (TLI.isTypeDesirableForOp(Opc, VT))
1694 return SDValue();
1695
1696 EVT PVT = VT;
1697 // Consult target whether it is a good idea to promote this operation and
1698 // what's the right type to promote it to.
1699 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1700 assert(PVT != VT && "Don't know what type to promote to!");
1701 // fold (aext (aext x)) -> (aext x)
1702 // fold (aext (zext x)) -> (zext x)
1703 // fold (aext (sext x)) -> (sext x)
1704 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1705 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1706 }
1707 return SDValue();
1708}
1709
1710bool DAGCombiner::PromoteLoad(SDValue Op) {
1711 if (!LegalOperations)
1712 return false;
1713
1714 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1715 return false;
1716
1717 EVT VT = Op.getValueType();
1718 if (VT.isVector() || !VT.isInteger())
1719 return false;
1720
1721 // If operation type is 'undesirable', e.g. i16 on x86, consider
1722 // promoting it.
1723 unsigned Opc = Op.getOpcode();
1724 if (TLI.isTypeDesirableForOp(Opc, VT))
1725 return false;
1726
1727 EVT PVT = VT;
1728 // Consult target whether it is a good idea to promote this operation and
1729 // what's the right type to promote it to.
1730 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1731 assert(PVT != VT && "Don't know what type to promote to!");
1732
1733 SDLoc DL(Op);
1734 SDNode *N = Op.getNode();
1735 LoadSDNode *LD = cast<LoadSDNode>(N);
1736 EVT MemVT = LD->getMemoryVT();
1738 : LD->getExtensionType();
1739 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1740 LD->getChain(), LD->getBasePtr(),
1741 MemVT, LD->getMemOperand());
1742 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1743
1744 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1745 Result.dump(&DAG); dbgs() << '\n');
1746
1747 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1748 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1749
1750 AddToWorklist(Result.getNode());
1751 recursivelyDeleteUnusedNodes(N);
1752 return true;
1753 }
1754
1755 return false;
1756}
1757
1758/// Recursively delete a node which has no uses and any operands for
1759/// which it is the only use.
1760///
1761/// Note that this both deletes the nodes and removes them from the worklist.
1762/// It also adds any nodes who have had a user deleted to the worklist as they
1763/// may now have only one use and subject to other combines.
1764bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1765 if (!N->use_empty())
1766 return false;
1767
1768 SmallSetVector<SDNode *, 16> Nodes;
1769 Nodes.insert(N);
1770 do {
1771 N = Nodes.pop_back_val();
1772 if (!N)
1773 continue;
1774
1775 if (N->use_empty()) {
1776 for (const SDValue &ChildN : N->op_values())
1777 Nodes.insert(ChildN.getNode());
1778
1779 removeFromWorklist(N);
1780 DAG.DeleteNode(N);
1781 } else {
1782 AddToWorklist(N);
1783 }
1784 } while (!Nodes.empty());
1785 return true;
1786}
1787
1788//===----------------------------------------------------------------------===//
1789// Main DAG Combiner implementation
1790//===----------------------------------------------------------------------===//
1791
1792void DAGCombiner::Run(CombineLevel AtLevel) {
1793 // set the instance variables, so that the various visit routines may use it.
1794 Level = AtLevel;
1795 LegalDAG = Level >= AfterLegalizeDAG;
1796 LegalOperations = Level >= AfterLegalizeVectorOps;
1797 LegalTypes = Level >= AfterLegalizeTypes;
1798
1799 WorklistInserter AddNodes(*this);
1800
1801 // Add all the dag nodes to the worklist.
1802 //
1803 // Note: All nodes are not added to PruningList here, this is because the only
1804 // nodes which can be deleted are those which have no uses and all other nodes
1805 // which would otherwise be added to the worklist by the first call to
1806 // getNextWorklistEntry are already present in it.
1807 for (SDNode &Node : DAG.allnodes())
1808 AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
1809
1810 // Create a dummy node (which is not added to allnodes), that adds a reference
1811 // to the root node, preventing it from being deleted, and tracking any
1812 // changes of the root.
1813 HandleSDNode Dummy(DAG.getRoot());
1814
1815 // While we have a valid worklist entry node, try to combine it.
1816 while (SDNode *N = getNextWorklistEntry()) {
1817 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1818 // N is deleted from the DAG, since they too may now be dead or may have a
1819 // reduced number of uses, allowing other xforms.
1820 if (recursivelyDeleteUnusedNodes(N))
1821 continue;
1822
1823 WorklistRemover DeadNodes(*this);
1824
1825 // If this combine is running after legalizing the DAG, re-legalize any
1826 // nodes pulled off the worklist.
1827 if (LegalDAG) {
1828 SmallSetVector<SDNode *, 16> UpdatedNodes;
1829 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1830
1831 for (SDNode *LN : UpdatedNodes)
1832 AddToWorklistWithUsers(LN);
1833
1834 if (!NIsValid)
1835 continue;
1836 }
1837
1838 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1839
1840 // Add any operands of the new node which have not yet been combined to the
1841 // worklist as well. getNextWorklistEntry flags nodes that have been
1842 // combined before. Because the worklist uniques things already, this won't
1843 // repeatedly process the same operand.
1844 for (const SDValue &ChildN : N->op_values())
1845 AddToWorklist(ChildN.getNode(), /*IsCandidateForPruning=*/true,
1846 /*SkipIfCombinedBefore=*/true);
1847
1848 SDValue RV = combine(N);
1849
1850 if (!RV.getNode())
1851 continue;
1852
1853 ++NodesCombined;
1854
1855 // Invalidate cached info.
1856 ChainsWithoutMergeableStores.clear();
1857
1858 // If we get back the same node we passed in, rather than a new node or
1859 // zero, we know that the node must have defined multiple values and
1860 // CombineTo was used. Since CombineTo takes care of the worklist
1861 // mechanics for us, we have no work to do in this case.
1862 if (RV.getNode() == N)
1863 continue;
1864
1865 assert(N->getOpcode() != ISD::DELETED_NODE &&
1866 RV.getOpcode() != ISD::DELETED_NODE &&
1867 "Node was deleted but visit returned new node!");
1868
1869 LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1870
1871 if (N->getNumValues() == RV->getNumValues())
1872 DAG.ReplaceAllUsesWith(N, RV.getNode());
1873 else {
1874 assert(N->getValueType(0) == RV.getValueType() &&
1875 N->getNumValues() == 1 && "Type mismatch");
1876 DAG.ReplaceAllUsesWith(N, &RV);
1877 }
1878
1879 // Push the new node and any users onto the worklist. Omit this if the
1880 // new node is the EntryToken (e.g. if a store managed to get optimized
1881 // out), because re-visiting the EntryToken and its users will not uncover
1882 // any additional opportunities, but there may be a large number of such
1883 // users, potentially causing compile time explosion.
1884 if (RV.getOpcode() != ISD::EntryToken)
1885 AddToWorklistWithUsers(RV.getNode());
1886
1887 // Finally, if the node is now dead, remove it from the graph. The node
1888 // may not be dead if the replacement process recursively simplified to
1889 // something else needing this node. This will also take care of adding any
1890 // operands which have lost a user to the worklist.
1891 recursivelyDeleteUnusedNodes(N);
1892 }
1893
1894 // If the root changed (e.g. it was a dead load, update the root).
1895 DAG.setRoot(Dummy.getValue());
1896 DAG.RemoveDeadNodes();
1897}
1898
1899SDValue DAGCombiner::visit(SDNode *N) {
1900 // clang-format off
1901 switch (N->getOpcode()) {
1902 default: break;
1903 case ISD::TokenFactor: return visitTokenFactor(N);
1904 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1905 case ISD::ADD: return visitADD(N);
1906 case ISD::PTRADD: return visitPTRADD(N);
1907 case ISD::SUB: return visitSUB(N);
1908 case ISD::SADDSAT:
1909 case ISD::UADDSAT: return visitADDSAT(N);
1910 case ISD::SSUBSAT:
1911 case ISD::USUBSAT: return visitSUBSAT(N);
1912 case ISD::ADDC: return visitADDC(N);
1913 case ISD::SADDO:
1914 case ISD::UADDO: return visitADDO(N);
1915 case ISD::SUBC: return visitSUBC(N);
1916 case ISD::SSUBO:
1917 case ISD::USUBO: return visitSUBO(N);
1918 case ISD::ADDE: return visitADDE(N);
1919 case ISD::UADDO_CARRY: return visitUADDO_CARRY(N);
1920 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1921 case ISD::SUBE: return visitSUBE(N);
1922 case ISD::USUBO_CARRY: return visitUSUBO_CARRY(N);
1923 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1924 case ISD::SMULFIX:
1925 case ISD::SMULFIXSAT:
1926 case ISD::UMULFIX:
1927 case ISD::UMULFIXSAT: return visitMULFIX(N);
1928 case ISD::MUL: return visitMUL<EmptyMatchContext>(N);
1929 case ISD::SDIV: return visitSDIV(N);
1930 case ISD::UDIV: return visitUDIV(N);
1931 case ISD::SREM:
1932 case ISD::UREM: return visitREM(N);
1933 case ISD::MULHU: return visitMULHU(N);
1934 case ISD::MULHS: return visitMULHS(N);
1935 case ISD::AVGFLOORS:
1936 case ISD::AVGFLOORU:
1937 case ISD::AVGCEILS:
1938 case ISD::AVGCEILU: return visitAVG(N);
1939 case ISD::ABDS:
1940 case ISD::ABDU: return visitABD(N);
1941 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1942 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1943 case ISD::SMULO:
1944 case ISD::UMULO: return visitMULO(N);
1945 case ISD::SMIN:
1946 case ISD::SMAX:
1947 case ISD::UMIN:
1948 case ISD::UMAX: return visitIMINMAX(N);
1949 case ISD::AND: return visitAND(N);
1950 case ISD::OR: return visitOR(N);
1951 case ISD::XOR: return visitXOR(N);
1952 case ISD::SHL: return visitSHL(N);
1953 case ISD::SRA: return visitSRA(N);
1954 case ISD::SRL: return visitSRL(N);
1955 case ISD::ROTR:
1956 case ISD::ROTL: return visitRotate(N);
1957 case ISD::FSHL:
1958 case ISD::FSHR: return visitFunnelShift(N);
1959 case ISD::SSHLSAT:
1960 case ISD::USHLSAT: return visitSHLSAT(N);
1961 case ISD::ABS: return visitABS(N);
1962 case ISD::BSWAP: return visitBSWAP(N);
1963 case ISD::BITREVERSE: return visitBITREVERSE(N);
1964 case ISD::CTLZ: return visitCTLZ(N);
1965 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1966 case ISD::CTTZ: return visitCTTZ(N);
1967 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1968 case ISD::CTPOP: return visitCTPOP(N);
1969 case ISD::SELECT: return visitSELECT(N);
1970 case ISD::VSELECT: return visitVSELECT(N);
1971 case ISD::SELECT_CC: return visitSELECT_CC(N);
1972 case ISD::SETCC: return visitSETCC(N);
1973 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1974 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1975 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1976 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1977 case ISD::AssertSext:
1978 case ISD::AssertZext: return visitAssertExt(N);
1979 case ISD::AssertAlign: return visitAssertAlign(N);
1980 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1983 case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1984 case ISD::TRUNCATE: return visitTRUNCATE(N);
1985 case ISD::TRUNCATE_USAT_U: return visitTRUNCATE_USAT_U(N);
1986 case ISD::BITCAST: return visitBITCAST(N);
1987 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1988 case ISD::FADD: return visitFADD(N);
1989 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1990 case ISD::FSUB: return visitFSUB(N);
1991 case ISD::FMUL: return visitFMUL(N);
1992 case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
1993 case ISD::FMAD: return visitFMAD(N);
1994 case ISD::FDIV: return visitFDIV(N);
1995 case ISD::FREM: return visitFREM(N);
1996 case ISD::FSQRT: return visitFSQRT(N);
1997 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1998 case ISD::FPOW: return visitFPOW(N);
1999 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
2000 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
2001 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
2002 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
2003 case ISD::LROUND:
2004 case ISD::LLROUND:
2005 case ISD::LRINT:
2006 case ISD::LLRINT: return visitXROUND(N);
2007 case ISD::FP_ROUND: return visitFP_ROUND(N);
2008 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
2009 case ISD::FNEG: return visitFNEG(N);
2010 case ISD::FABS: return visitFABS(N);
2011 case ISD::FFLOOR: return visitFFLOOR(N);
2012 case ISD::FMINNUM:
2013 case ISD::FMAXNUM:
2014 case ISD::FMINIMUM:
2015 case ISD::FMAXIMUM:
2016 case ISD::FMINIMUMNUM:
2017 case ISD::FMAXIMUMNUM: return visitFMinMax(N);
2018 case ISD::FCEIL: return visitFCEIL(N);
2019 case ISD::FTRUNC: return visitFTRUNC(N);
2020 case ISD::FFREXP: return visitFFREXP(N);
2021 case ISD::BRCOND: return visitBRCOND(N);
2022 case ISD::BR_CC: return visitBR_CC(N);
2023 case ISD::LOAD: return visitLOAD(N);
2024 case ISD::STORE: return visitSTORE(N);
2025 case ISD::ATOMIC_STORE: return visitATOMIC_STORE(N);
2026 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
2027 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
2028 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
2029 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
2030 case ISD::VECTOR_INTERLEAVE: return visitVECTOR_INTERLEAVE(N);
2031 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
2032 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
2033 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
2034 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
2035 case ISD::MGATHER: return visitMGATHER(N);
2036 case ISD::MLOAD: return visitMLOAD(N);
2037 case ISD::MSCATTER: return visitMSCATTER(N);
2038 case ISD::MSTORE: return visitMSTORE(N);
2039 case ISD::EXPERIMENTAL_VECTOR_HISTOGRAM: return visitMHISTOGRAM(N);
2040 case ISD::PARTIAL_REDUCE_SMLA:
2041 case ISD::PARTIAL_REDUCE_UMLA:
2042 case ISD::PARTIAL_REDUCE_SUMLA:
2043 return visitPARTIAL_REDUCE_MLA(N);
2044 case ISD::VECTOR_COMPRESS: return visitVECTOR_COMPRESS(N);
2045 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
2046 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
2047 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
2048 case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
2049 case ISD::BF16_TO_FP: return visitBF16_TO_FP(N);
2050 case ISD::FREEZE: return visitFREEZE(N);
2051 case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
2052 case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
2053 case ISD::FCANONICALIZE: return visitFCANONICALIZE(N);
2054 case ISD::VECREDUCE_FADD:
2055 case ISD::VECREDUCE_FMUL:
2056 case ISD::VECREDUCE_ADD:
2057 case ISD::VECREDUCE_MUL:
2058 case ISD::VECREDUCE_AND:
2059 case ISD::VECREDUCE_OR:
2060 case ISD::VECREDUCE_XOR:
2061 case ISD::VECREDUCE_SMAX:
2062 case ISD::VECREDUCE_SMIN:
2063 case ISD::VECREDUCE_UMAX:
2064 case ISD::VECREDUCE_UMIN:
2065 case ISD::VECREDUCE_FMAX:
2066 case ISD::VECREDUCE_FMIN:
2067 case ISD::VECREDUCE_FMAXIMUM:
2068 case ISD::VECREDUCE_FMINIMUM: return visitVECREDUCE(N);
2069#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
2070#include "llvm/IR/VPIntrinsics.def"
2071 return visitVPOp(N);
2072 }
2073 // clang-format on
2074 return SDValue();
2075}
2076
2077SDValue DAGCombiner::combine(SDNode *N) {
2078 if (!DebugCounter::shouldExecute(DAGCombineCounter))
2079 return SDValue();
2080
2081 SDValue RV;
2082 if (!DisableGenericCombines)
2083 RV = visit(N);
2084
2085 // If nothing happened, try a target-specific DAG combine.
2086 if (!RV.getNode()) {
2087 assert(N->getOpcode() != ISD::DELETED_NODE &&
2088 "Node was deleted but visit returned NULL!");
2089
2090 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
2091 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
2092
2093 // Expose the DAG combiner to the target combiner impls.
2094 TargetLowering::DAGCombinerInfo
2095 DagCombineInfo(DAG, Level, false, this);
2096
2097 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
2098 }
2099 }
2100
2101 // If nothing happened still, try promoting the operation.
2102 if (!RV.getNode()) {
2103 switch (N->getOpcode()) {
2104 default: break;
2105 case ISD::ADD:
2106 case ISD::SUB:
2107 case ISD::MUL:
2108 case ISD::AND:
2109 case ISD::OR:
2110 case ISD::XOR:
2111 RV = PromoteIntBinOp(SDValue(N, 0));
2112 break;
2113 case ISD::SHL:
2114 case ISD::SRA:
2115 case ISD::SRL:
2116 RV = PromoteIntShiftOp(SDValue(N, 0));
2117 break;
2118 case ISD::SIGN_EXTEND:
2119 case ISD::ZERO_EXTEND:
2120 case ISD::ANY_EXTEND:
2121 RV = PromoteExtend(SDValue(N, 0));
2122 break;
2123 case ISD::LOAD:
2124 if (PromoteLoad(SDValue(N, 0)))
2125 RV = SDValue(N, 0);
2126 break;
2127 }
2128 }
2129
2130 // If N is a commutative binary node, try to eliminate it if the commuted
2131 // version is already present in the DAG.
2132 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
2133 SDValue N0 = N->getOperand(0);
2134 SDValue N1 = N->getOperand(1);
2135
2136 // Constant operands are canonicalized to RHS.
2137 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
2138 SDValue Ops[] = {N1, N0};
2139 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
2140 N->getFlags());
2141 if (CSENode)
2142 return SDValue(CSENode, 0);
2143 }
2144 }
2145
2146 return RV;
2147}
2148
2149/// Given a node, return its input chain if it has one, otherwise return a null
2150/// sd operand.
2152 if (unsigned NumOps = N->getNumOperands()) {
2153 if (N->getOperand(0).getValueType() == MVT::Other)
2154 return N->getOperand(0);
2155 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
2156 return N->getOperand(NumOps-1);
2157 for (unsigned i = 1; i < NumOps-1; ++i)
2158 if (N->getOperand(i).getValueType() == MVT::Other)
2159 return N->getOperand(i);
2160 }
2161 return SDValue();
2162}
2163
2164SDValue DAGCombiner::visitFCANONICALIZE(SDNode *N) {
2165 SDValue Operand = N->getOperand(0);
2166 EVT VT = Operand.getValueType();
2167 SDLoc dl(N);
2168
2169 // Canonicalize undef to quiet NaN.
2170 if (Operand.isUndef()) {
2171 APFloat CanonicalQNaN = APFloat::getQNaN(VT.getFltSemantics());
2172 return DAG.getConstantFP(CanonicalQNaN, dl, VT);
2173 }
2174 return SDValue();
2175}
2176
2177SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
2178 // If N has two operands, where one has an input chain equal to the other,
2179 // the 'other' chain is redundant.
2180 if (N->getNumOperands() == 2) {
2181 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
2182 return N->getOperand(0);
2183 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
2184 return N->getOperand(1);
2185 }
2186
2187 // Don't simplify token factors if optnone.
2188 if (OptLevel == CodeGenOptLevel::None)
2189 return SDValue();
2190
2191 // Don't simplify the token factor if the node itself has too many operands.
2192 if (N->getNumOperands() > TokenFactorInlineLimit)
2193 return SDValue();
2194
2195 // If the sole user is a token factor, we should make sure we have a
2196 // chance to merge them together. This prevents TF chains from inhibiting
2197 // optimizations.
2198 if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::TokenFactor)
2199 AddToWorklist(*(N->user_begin()));
2200
2201 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
2202 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
2203 SmallPtrSet<SDNode*, 16> SeenOps;
2204 bool Changed = false; // If we should replace this token factor.
2205
2206 // Start out with this token factor.
2207 TFs.push_back(N);
2208
2209 // Iterate through token factors. The TFs grows when new token factors are
2210 // encountered.
2211 for (unsigned i = 0; i < TFs.size(); ++i) {
2212 // Limit number of nodes to inline, to avoid quadratic compile times.
2213 // We have to add the outstanding Token Factors to Ops, otherwise we might
2214 // drop Ops from the resulting Token Factors.
2215 if (Ops.size() > TokenFactorInlineLimit) {
2216 for (unsigned j = i; j < TFs.size(); j++)
2217 Ops.emplace_back(TFs[j], 0);
2218 // Drop unprocessed Token Factors from TFs, so we do not add them to the
2219 // combiner worklist later.
2220 TFs.resize(i);
2221 break;
2222 }
2223
2224 SDNode *TF = TFs[i];
2225 // Check each of the operands.
2226 for (const SDValue &Op : TF->op_values()) {
2227 switch (Op.getOpcode()) {
2228 case ISD::EntryToken:
2229 // Entry tokens don't need to be added to the list. They are
2230 // redundant.
2231 Changed = true;
2232 break;
2233
2234 case ISD::TokenFactor:
2235 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
2236 // Queue up for processing.
2237 TFs.push_back(Op.getNode());
2238 Changed = true;
2239 break;
2240 }
2241 [[fallthrough]];
2242
2243 default:
2244 // Only add if it isn't already in the list.
2245 if (SeenOps.insert(Op.getNode()).second)
2246 Ops.push_back(Op);
2247 else
2248 Changed = true;
2249 break;
2250 }
2251 }
2252 }
2253
2254 // Re-visit inlined Token Factors, to clean them up in case they have been
2255 // removed. Skip the first Token Factor, as this is the current node.
2256 for (unsigned i = 1, e = TFs.size(); i < e; i++)
2257 AddToWorklist(TFs[i]);
2258
2259 // Remove Nodes that are chained to another node in the list. Do so
2260 // by walking up chains breath-first stopping when we've seen
2261 // another operand. In general we must climb to the EntryNode, but we can exit
2262 // early if we find all remaining work is associated with just one operand as
2263 // no further pruning is possible.
2264
2265 // List of nodes to search through and original Ops from which they originate.
2267 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
2268 SmallPtrSet<SDNode *, 16> SeenChains;
2269 bool DidPruneOps = false;
2270
2271 unsigned NumLeftToConsider = 0;
2272 for (const SDValue &Op : Ops) {
2273 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2274 OpWorkCount.push_back(1);
2275 }
2276
2277 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2278 // If this is an Op, we can remove the op from the list. Remark any
2279 // search associated with it as from the current OpNumber.
2280 if (SeenOps.contains(Op)) {
2281 Changed = true;
2282 DidPruneOps = true;
2283 unsigned OrigOpNumber = 0;
2284 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2285 OrigOpNumber++;
2286 assert((OrigOpNumber != Ops.size()) &&
2287 "expected to find TokenFactor Operand");
2288 // Re-mark worklist from OrigOpNumber to OpNumber
2289 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2290 if (Worklist[i].second == OrigOpNumber) {
2291 Worklist[i].second = OpNumber;
2292 }
2293 }
2294 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2295 OpWorkCount[OrigOpNumber] = 0;
2296 NumLeftToConsider--;
2297 }
2298 // Add if it's a new chain
2299 if (SeenChains.insert(Op).second) {
2300 OpWorkCount[OpNumber]++;
2301 Worklist.push_back(std::make_pair(Op, OpNumber));
2302 }
2303 };
2304
2305 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2306 // We need at least be consider at least 2 Ops to prune.
2307 if (NumLeftToConsider <= 1)
2308 break;
2309 auto CurNode = Worklist[i].first;
2310 auto CurOpNumber = Worklist[i].second;
2311 assert((OpWorkCount[CurOpNumber] > 0) &&
2312 "Node should not appear in worklist");
2313 switch (CurNode->getOpcode()) {
2314 case ISD::EntryToken:
2315 // Hitting EntryToken is the only way for the search to terminate without
2316 // hitting
2317 // another operand's search. Prevent us from marking this operand
2318 // considered.
2319 NumLeftToConsider++;
2320 break;
2321 case ISD::TokenFactor:
2322 for (const SDValue &Op : CurNode->op_values())
2323 AddToWorklist(i, Op.getNode(), CurOpNumber);
2324 break;
2325 case ISD::LIFETIME_START:
2326 case ISD::LIFETIME_END:
2327 case ISD::CopyFromReg:
2328 case ISD::CopyToReg:
2329 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2330 break;
2331 default:
2332 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2333 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2334 break;
2335 }
2336 OpWorkCount[CurOpNumber]--;
2337 if (OpWorkCount[CurOpNumber] == 0)
2338 NumLeftToConsider--;
2339 }
2340
2341 // If we've changed things around then replace token factor.
2342 if (Changed) {
2344 if (Ops.empty()) {
2345 // The entry token is the only possible outcome.
2346 Result = DAG.getEntryNode();
2347 } else {
2348 if (DidPruneOps) {
2349 SmallVector<SDValue, 8> PrunedOps;
2350 //
2351 for (const SDValue &Op : Ops) {
2352 if (SeenChains.count(Op.getNode()) == 0)
2353 PrunedOps.push_back(Op);
2354 }
2355 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2356 } else {
2357 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2358 }
2359 }
2360 return Result;
2361 }
2362 return SDValue();
2363}
2364
2365/// MERGE_VALUES can always be eliminated.
2366SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2367 WorklistRemover DeadNodes(*this);
2368 // Replacing results may cause a different MERGE_VALUES to suddenly
2369 // be CSE'd with N, and carry its uses with it. Iterate until no
2370 // uses remain, to ensure that the node can be safely deleted.
2371 // First add the users of this node to the work list so that they
2372 // can be tried again once they have new operands.
2373 AddUsersToWorklist(N);
2374 do {
2375 // Do as a single replacement to avoid rewalking use lists.
2377 DAG.ReplaceAllUsesWith(N, Ops.data());
2378 } while (!N->use_empty());
2379 deleteAndRecombine(N);
2380 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2381}
2382
2383/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2384/// ConstantSDNode pointer else nullptr.
2387 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2388}
2389
2390// isTruncateOf - If N is a truncate of some other value, return true, record
2391// the value being truncated in Op and which of Op's bits are zero/one in Known.
2392// This function computes KnownBits to avoid a duplicated call to
2393// computeKnownBits in the caller.
2395 KnownBits &Known) {
2396 if (N->getOpcode() == ISD::TRUNCATE) {
2397 Op = N->getOperand(0);
2398 Known = DAG.computeKnownBits(Op);
2399 if (N->getFlags().hasNoUnsignedWrap())
2400 Known.Zero.setBitsFrom(N.getScalarValueSizeInBits());
2401 return true;
2402 }
2403
2404 if (N.getValueType().getScalarType() != MVT::i1 ||
2405 !sd_match(
2407 return false;
2408
2409 Known = DAG.computeKnownBits(Op);
2410 return (Known.Zero | 1).isAllOnes();
2411}
2412
2413/// Return true if 'Use' is a load or a store that uses N as its base pointer
2414/// and that N may be folded in the load / store addressing mode.
2416 const TargetLowering &TLI) {
2417 EVT VT;
2418 unsigned AS;
2419
2420 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2421 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2422 return false;
2423 VT = LD->getMemoryVT();
2424 AS = LD->getAddressSpace();
2425 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2426 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2427 return false;
2428 VT = ST->getMemoryVT();
2429 AS = ST->getAddressSpace();
2431 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2432 return false;
2433 VT = LD->getMemoryVT();
2434 AS = LD->getAddressSpace();
2436 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2437 return false;
2438 VT = ST->getMemoryVT();
2439 AS = ST->getAddressSpace();
2440 } else {
2441 return false;
2442 }
2443
2445 if (N->isAnyAdd()) {
2446 AM.HasBaseReg = true;
2448 if (Offset)
2449 // [reg +/- imm]
2450 AM.BaseOffs = Offset->getSExtValue();
2451 else
2452 // [reg +/- reg]
2453 AM.Scale = 1;
2454 } else if (N->getOpcode() == ISD::SUB) {
2455 AM.HasBaseReg = true;
2457 if (Offset)
2458 // [reg +/- imm]
2459 AM.BaseOffs = -Offset->getSExtValue();
2460 else
2461 // [reg +/- reg]
2462 AM.Scale = 1;
2463 } else {
2464 return false;
2465 }
2466
2467 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2468 VT.getTypeForEVT(*DAG.getContext()), AS);
2469}
2470
2471/// This inverts a canonicalization in IR that replaces a variable select arm
2472/// with an identity constant. Codegen improves if we re-use the variable
2473/// operand rather than load a constant. This can also be converted into a
2474/// masked vector operation if the target supports it.
2476 bool ShouldCommuteOperands) {
2477 // Match a select as operand 1. The identity constant that we are looking for
2478 // is only valid as operand 1 of a non-commutative binop.
2479 SDValue N0 = N->getOperand(0);
2480 SDValue N1 = N->getOperand(1);
2481 if (ShouldCommuteOperands)
2482 std::swap(N0, N1);
2483
2484 unsigned SelOpcode = N1.getOpcode();
2485 if ((SelOpcode != ISD::VSELECT && SelOpcode != ISD::SELECT) ||
2486 !N1.hasOneUse())
2487 return SDValue();
2488
2489 // We can't hoist all instructions because of immediate UB (not speculatable).
2490 // For example div/rem by zero.
2492 return SDValue();
2493
2494 unsigned Opcode = N->getOpcode();
2495 EVT VT = N->getValueType(0);
2496 SDValue Cond = N1.getOperand(0);
2497 SDValue TVal = N1.getOperand(1);
2498 SDValue FVal = N1.getOperand(2);
2499 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2500
2501 // This transform increases uses of N0, so freeze it to be safe.
2502 // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2503 unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
2504 if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo) &&
2505 TLI.shouldFoldSelectWithIdentityConstant(Opcode, VT, SelOpcode, N0,
2506 FVal)) {
2507 SDValue F0 = DAG.getFreeze(N0);
2508 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2509 return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2510 }
2511 // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2512 if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo) &&
2513 TLI.shouldFoldSelectWithIdentityConstant(Opcode, VT, SelOpcode, N0,
2514 TVal)) {
2515 SDValue F0 = DAG.getFreeze(N0);
2516 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2517 return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2518 }
2519
2520 return SDValue();
2521}
2522
2523SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2524 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2525 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2526 "Unexpected binary operator");
2527
2528 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2529 return Sel;
2530
2531 if (TLI.isCommutativeBinOp(BO->getOpcode()))
2532 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2533 return Sel;
2534
2535 // Don't do this unless the old select is going away. We want to eliminate the
2536 // binary operator, not replace a binop with a select.
2537 // TODO: Handle ISD::SELECT_CC.
2538 unsigned SelOpNo = 0;
2539 SDValue Sel = BO->getOperand(0);
2540 auto BinOpcode = BO->getOpcode();
2541 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2542 SelOpNo = 1;
2543 Sel = BO->getOperand(1);
2544
2545 // Peek through trunc to shift amount type.
2546 if ((BinOpcode == ISD::SHL || BinOpcode == ISD::SRA ||
2547 BinOpcode == ISD::SRL) && Sel.hasOneUse()) {
2548 // This is valid when the truncated bits of x are already zero.
2549 SDValue Op;
2550 KnownBits Known;
2551 if (isTruncateOf(DAG, Sel, Op, Known) &&
2553 Sel = Op;
2554 }
2555 }
2556
2557 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2558 return SDValue();
2559
2560 SDValue CT = Sel.getOperand(1);
2561 if (!isConstantOrConstantVector(CT, true) &&
2563 return SDValue();
2564
2565 SDValue CF = Sel.getOperand(2);
2566 if (!isConstantOrConstantVector(CF, true) &&
2568 return SDValue();
2569
2570 // Bail out if any constants are opaque because we can't constant fold those.
2571 // The exception is "and" and "or" with either 0 or -1 in which case we can
2572 // propagate non constant operands into select. I.e.:
2573 // and (select Cond, 0, -1), X --> select Cond, 0, X
2574 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2575 bool CanFoldNonConst =
2576 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2579
2580 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2581 if (!CanFoldNonConst &&
2582 !isConstantOrConstantVector(CBO, true) &&
2584 return SDValue();
2585
2586 SDLoc DL(Sel);
2587 SDValue NewCT, NewCF;
2588 EVT VT = BO->getValueType(0);
2589
2590 if (CanFoldNonConst) {
2591 // If CBO is an opaque constant, we can't rely on getNode to constant fold.
2592 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
2593 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
2594 NewCT = CT;
2595 else
2596 NewCT = CBO;
2597
2598 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
2599 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
2600 NewCF = CF;
2601 else
2602 NewCF = CBO;
2603 } else {
2604 // We have a select-of-constants followed by a binary operator with a
2605 // constant. Eliminate the binop by pulling the constant math into the
2606 // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2607 // CBO, CF + CBO
2608 NewCT = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CT})
2609 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CT, CBO});
2610 if (!NewCT)
2611 return SDValue();
2612
2613 NewCF = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CF})
2614 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CF, CBO});
2615 if (!NewCF)
2616 return SDValue();
2617 }
2618
2619 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF, BO->getFlags());
2620}
2621
2623 SelectionDAG &DAG) {
2624 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2625 "Expecting add or sub");
2626
2627 // Match a constant operand and a zext operand for the math instruction:
2628 // add Z, C
2629 // sub C, Z
2630 bool IsAdd = N->getOpcode() == ISD::ADD;
2631 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2632 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2633 auto *CN = dyn_cast<ConstantSDNode>(C);
2634 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2635 return SDValue();
2636
2637 // Match the zext operand as a setcc of a boolean.
2638 if (Z.getOperand(0).getValueType() != MVT::i1)
2639 return SDValue();
2640
2641 // Match the compare as: setcc (X & 1), 0, eq.
2642 if (!sd_match(Z.getOperand(0), m_SetCC(m_And(m_Value(), m_One()), m_Zero(),
2644 return SDValue();
2645
2646 // We are adding/subtracting a constant and an inverted low bit. Turn that
2647 // into a subtract/add of the low bit with incremented/decremented constant:
2648 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2649 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2650 EVT VT = C.getValueType();
2651 SDValue LowBit = DAG.getZExtOrTrunc(Z.getOperand(0).getOperand(0), DL, VT);
2652 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT)
2653 : DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2654 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2655}
2656
2657// Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
2658SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) {
2659 SDValue N0 = N->getOperand(0);
2660 EVT VT = N0.getValueType();
2661 SDValue A, B;
2662
2663 if ((!LegalOperations || hasOperation(ISD::AVGCEILU, VT)) &&
2665 m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2666 return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
2667 }
2668 if ((!LegalOperations || hasOperation(ISD::AVGCEILS, VT)) &&
2670 m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2671 return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
2672 }
2673 return SDValue();
2674}
2675
2676/// Try to fold a pointer arithmetic node.
2677/// This needs to be done separately from normal addition, because pointer
2678/// addition is not commutative.
2679SDValue DAGCombiner::visitPTRADD(SDNode *N) {
2680 SDValue N0 = N->getOperand(0);
2681 SDValue N1 = N->getOperand(1);
2682 EVT PtrVT = N0.getValueType();
2683 EVT IntVT = N1.getValueType();
2684 SDLoc DL(N);
2685
2686 // This is already ensured by an assert in SelectionDAG::getNode(). Several
2687 // combines here depend on this assumption.
2688 assert(PtrVT == IntVT &&
2689 "PTRADD with different operand types is not supported");
2690
2691 // fold (ptradd x, 0) -> x
2692 if (isNullConstant(N1))
2693 return N0;
2694
2695 // fold (ptradd 0, x) -> x
2696 if (PtrVT == IntVT && isNullConstant(N0))
2697 return N1;
2698
2699 if (N0.getOpcode() == ISD::PTRADD &&
2700 !reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1)) {
2701 SDValue X = N0.getOperand(0);
2702 SDValue Y = N0.getOperand(1);
2703 SDValue Z = N1;
2704 bool N0OneUse = N0.hasOneUse();
2705 bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2706 bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2707
2708 // (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
2709 // * y is a constant and (ptradd x, y) has one use; or
2710 // * y and z are both constants.
2711 if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
2712 // If both additions in the original were NUW, the new ones are as well.
2713 SDNodeFlags Flags =
2714 (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2715 SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
2716 AddToWorklist(Add.getNode());
2717 return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
2718 }
2719 }
2720
2721 // The following combines can turn in-bounds pointer arithmetic out of bounds.
2722 // That is problematic for settings like AArch64's CPA, which checks that
2723 // intermediate results of pointer arithmetic remain in bounds. The target
2724 // therefore needs to opt-in to enable them.
2726 DAG.getMachineFunction().getFunction(), PtrVT))
2727 return SDValue();
2728
2729 if (N0.getOpcode() == ISD::PTRADD && isa<ConstantSDNode>(N1)) {
2730 // Fold (ptradd (ptradd GA, v), c) -> (ptradd (ptradd GA, c) v) with
2731 // global address GA and constant c, such that c can be folded into GA.
2732 // TODO: Support constant vector splats.
2733 SDValue GAValue = N0.getOperand(0);
2734 if (const GlobalAddressSDNode *GA =
2736 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2737 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2738 // If both additions in the original were NUW, reassociation preserves
2739 // that.
2740 SDNodeFlags Flags =
2741 (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2742 SDValue Inner = DAG.getMemBasePlusOffset(GAValue, N1, DL, Flags);
2743 AddToWorklist(Inner.getNode());
2744 return DAG.getMemBasePlusOffset(Inner, N0.getOperand(1), DL, Flags);
2745 }
2746 }
2747 }
2748
2749 if (N1.getOpcode() == ISD::ADD && N1.hasOneUse()) {
2750 // (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
2751 // y is not, and (add y, z) is used only once.
2752 // (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
2753 // z is not, and (add y, z) is used only once.
2754 // The goal is to move constant offsets to the outermost ptradd, to create
2755 // more opportunities to fold offsets into memory instructions.
2756 // Together with the another combine above, this also implements
2757 // (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
2758 SDValue X = N0;
2759 SDValue Y = N1.getOperand(0);
2760 SDValue Z = N1.getOperand(1);
2761 bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2762 bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2763
2764 // If both additions in the original were NUW, reassociation preserves that.
2765 SDNodeFlags ReassocFlags =
2766 (N->getFlags() & N1->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2767
2768 if (ZIsConstant != YIsConstant) {
2769 if (YIsConstant)
2770 std::swap(Y, Z);
2771 SDValue Inner = DAG.getMemBasePlusOffset(X, Y, DL, ReassocFlags);
2772 AddToWorklist(Inner.getNode());
2773 return DAG.getMemBasePlusOffset(Inner, Z, DL, ReassocFlags);
2774 }
2775 }
2776
2777 // Transform (ptradd a, b) -> (or disjoint a, b) if it is equivalent and if
2778 // that transformation can't block an offset folding at any use of the ptradd.
2779 // This should be done late, after legalization, so that it doesn't block
2780 // other ptradd combines that could enable more offset folding.
2781 if (LegalOperations && DAG.haveNoCommonBitsSet(N0, N1)) {
2782 bool TransformCannotBreakAddrMode = none_of(N->users(), [&](SDNode *User) {
2783 return canFoldInAddressingMode(N, User, DAG, TLI);
2784 });
2785
2786 if (TransformCannotBreakAddrMode)
2787 return DAG.getNode(ISD::OR, DL, PtrVT, N0, N1, SDNodeFlags::Disjoint);
2788 }
2789
2790 return SDValue();
2791}
2792
2793/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2794/// a shift and add with a different constant.
2796 SelectionDAG &DAG) {
2797 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2798 "Expecting add or sub");
2799
2800 // We need a constant operand for the add/sub, and the other operand is a
2801 // logical shift right: add (srl), C or sub C, (srl).
2802 bool IsAdd = N->getOpcode() == ISD::ADD;
2803 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2804 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2805 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2806 ShiftOp.getOpcode() != ISD::SRL)
2807 return SDValue();
2808
2809 // The shift must be of a 'not' value.
2810 SDValue Not = ShiftOp.getOperand(0);
2811 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2812 return SDValue();
2813
2814 // The shift must be moving the sign bit to the least-significant-bit.
2815 EVT VT = ShiftOp.getValueType();
2816 SDValue ShAmt = ShiftOp.getOperand(1);
2817 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2818 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2819 return SDValue();
2820
2821 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2822 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2823 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2824 if (SDValue NewC = DAG.FoldConstantArithmetic(
2825 IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2826 {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2827 SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2828 Not.getOperand(0), ShAmt);
2829 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2830 }
2831
2832 return SDValue();
2833}
2834
2835static bool
2837 return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
2838 (isBitwiseNot(Op1) && Op1.getOperand(0) == Op0);
2839}
2840
2841/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2842/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2843/// are no common bits set in the operands).
2844SDValue DAGCombiner::visitADDLike(SDNode *N) {
2845 SDValue N0 = N->getOperand(0);
2846 SDValue N1 = N->getOperand(1);
2847 EVT VT = N0.getValueType();
2848 SDLoc DL(N);
2849
2850 // fold (add x, undef) -> undef
2851 if (N0.isUndef())
2852 return N0;
2853 if (N1.isUndef())
2854 return N1;
2855
2856 // fold (add c1, c2) -> c1+c2
2857 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2858 return C;
2859
2860 // canonicalize constant to RHS
2863 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2864
2865 if (areBitwiseNotOfEachother(N0, N1))
2866 return DAG.getConstant(APInt::getAllOnes(VT.getScalarSizeInBits()), DL, VT);
2867
2868 // fold vector ops
2869 if (VT.isVector()) {
2870 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2871 return FoldedVOp;
2872
2873 // fold (add x, 0) -> x, vector edition
2875 return N0;
2876 }
2877
2878 // fold (add x, 0) -> x
2879 if (isNullConstant(N1))
2880 return N0;
2881
2882 if (N0.getOpcode() == ISD::SUB) {
2883 SDValue N00 = N0.getOperand(0);
2884 SDValue N01 = N0.getOperand(1);
2885
2886 // fold ((A-c1)+c2) -> (A+(c2-c1))
2887 if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2888 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2889
2890 // fold ((c1-A)+c2) -> (c1+c2)-A
2891 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2892 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2893 }
2894
2895 // add (sext i1 X), 1 -> zext (not i1 X)
2896 // We don't transform this pattern:
2897 // add (zext i1 X), -1 -> sext (not i1 X)
2898 // because most (?) targets generate better code for the zext form.
2899 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2900 isOneOrOneSplat(N1)) {
2901 SDValue X = N0.getOperand(0);
2902 if ((!LegalOperations ||
2903 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2905 X.getScalarValueSizeInBits() == 1) {
2906 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2907 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2908 }
2909 }
2910
2911 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2912 // iff (or x, c0) is equivalent to (add x, c0).
2913 // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2914 // iff (xor x, c0) is equivalent to (add x, c0).
2915 if (DAG.isADDLike(N0)) {
2916 SDValue N01 = N0.getOperand(1);
2917 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2918 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2919 }
2920
2921 if (SDValue NewSel = foldBinOpIntoSelect(N))
2922 return NewSel;
2923
2924 // reassociate add
2925 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2926 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2927 return RADD;
2928
2929 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2930 // equivalent to (add x, c).
2931 // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2932 // equivalent to (add x, c).
2933 // Do this optimization only when adding c does not introduce instructions
2934 // for adding carries.
2935 auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2936 if (DAG.isADDLike(N0) && N0.hasOneUse() &&
2937 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2938 // If N0's type does not split or is a sign mask, it does not introduce
2939 // add carry.
2940 auto TyActn = TLI.getTypeAction(*DAG.getContext(), N0.getValueType());
2941 bool NoAddCarry = TyActn == TargetLoweringBase::TypeLegal ||
2944 if (NoAddCarry)
2945 return DAG.getNode(
2946 ISD::ADD, DL, VT,
2947 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2948 N0.getOperand(1));
2949 }
2950 return SDValue();
2951 };
2952 if (SDValue Add = ReassociateAddOr(N0, N1))
2953 return Add;
2954 if (SDValue Add = ReassociateAddOr(N1, N0))
2955 return Add;
2956
2957 // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
2958 if (SDValue SD =
2959 reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
2960 return SD;
2961 }
2962
2963 SDValue A, B, C, D;
2964
2965 // fold ((0-A) + B) -> B-A
2966 if (sd_match(N0, m_Neg(m_Value(A))))
2967 return DAG.getNode(ISD::SUB, DL, VT, N1, A);
2968
2969 // fold (A + (0-B)) -> A-B
2970 if (sd_match(N1, m_Neg(m_Value(B))))
2971 return DAG.getNode(ISD::SUB, DL, VT, N0, B);
2972
2973 // fold (A+(B-A)) -> B
2974 if (sd_match(N1, m_Sub(m_Value(B), m_Specific(N0))))
2975 return B;
2976
2977 // fold ((B-A)+A) -> B
2978 if (sd_match(N0, m_Sub(m_Value(B), m_Specific(N1))))
2979 return B;
2980
2981 // fold ((A-B)+(C-A)) -> (C-B)
2982 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2984 return DAG.getNode(ISD::SUB, DL, VT, C, B);
2985
2986 // fold ((A-B)+(B-C)) -> (A-C)
2987 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2989 return DAG.getNode(ISD::SUB, DL, VT, A, C);
2990
2991 // fold (A+(B-(A+C))) to (B-C)
2992 // fold (A+(B-(C+A))) to (B-C)
2993 if (sd_match(N1, m_Sub(m_Value(B), m_Add(m_Specific(N0), m_Value(C)))))
2994 return DAG.getNode(ISD::SUB, DL, VT, B, C);
2995
2996 // fold (A+((B-A)+or-C)) to (B+or-C)
2997 if (sd_match(N1,
2999 m_Sub(m_Sub(m_Value(B), m_Specific(N0)), m_Value(C)))))
3000 return DAG.getNode(N1.getOpcode(), DL, VT, B, C);
3001
3002 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
3003 if (sd_match(N0, m_OneUse(m_Sub(m_Value(A), m_Value(B)))) &&
3004 sd_match(N1, m_OneUse(m_Sub(m_Value(C), m_Value(D)))) &&
3006 return DAG.getNode(ISD::SUB, DL, VT,
3007 DAG.getNode(ISD::ADD, SDLoc(N0), VT, A, C),
3008 DAG.getNode(ISD::ADD, SDLoc(N1), VT, B, D));
3009
3010 // fold (add (umax X, C), -C) --> (usubsat X, C)
3011 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
3012 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
3013 return (!Max && !Op) ||
3014 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
3015 };
3016 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
3017 /*AllowUndefs*/ true))
3018 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
3019 N0.getOperand(1));
3020 }
3021
3023 return SDValue(N, 0);
3024
3025 if (isOneOrOneSplat(N1)) {
3026 // fold (add (xor a, -1), 1) -> (sub 0, a)
3027 if (isBitwiseNot(N0))
3028 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
3029 N0.getOperand(0));
3030
3031 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
3032 if (N0.getOpcode() == ISD::ADD) {
3033 SDValue A, Xor;
3034
3035 if (isBitwiseNot(N0.getOperand(0))) {
3036 A = N0.getOperand(1);
3037 Xor = N0.getOperand(0);
3038 } else if (isBitwiseNot(N0.getOperand(1))) {
3039 A = N0.getOperand(0);
3040 Xor = N0.getOperand(1);
3041 }
3042
3043 if (Xor)
3044 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
3045 }
3046
3047 // Look for:
3048 // add (add x, y), 1
3049 // And if the target does not like this form then turn into:
3050 // sub y, (xor x, -1)
3051 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3052 N0.hasOneUse() &&
3053 // Limit this to after legalization if the add has wrap flags
3054 (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
3055 !N->getFlags().hasNoSignedWrap()))) {
3056 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3057 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
3058 }
3059 }
3060
3061 // (x - y) + -1 -> add (xor y, -1), x
3062 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
3063 isAllOnesOrAllOnesSplat(N1, /*AllowUndefs=*/true)) {
3064 SDValue Not = DAG.getNOT(DL, N0.getOperand(1), VT);
3065 return DAG.getNode(ISD::ADD, DL, VT, Not, N0.getOperand(0));
3066 }
3067
3068 // Fold add(mul(add(A, CA), CM), CB) -> add(mul(A, CM), CM*CA+CB).
3069 // This can help if the inner add has multiple uses.
3070 APInt CM, CA;
3071 if (ConstantSDNode *CB = dyn_cast<ConstantSDNode>(N1)) {
3072 if (VT.getScalarSizeInBits() <= 64) {
3074 m_ConstInt(CM)))) &&
3076 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
3077 SDNodeFlags Flags;
3078 // If all the inputs are nuw, the outputs can be nuw. If all the input
3079 // are _also_ nsw the outputs can be too.
3080 if (N->getFlags().hasNoUnsignedWrap() &&
3081 N0->getFlags().hasNoUnsignedWrap() &&
3084 if (N->getFlags().hasNoSignedWrap() &&
3085 N0->getFlags().hasNoSignedWrap() &&
3088 }
3089 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
3090 DAG.getConstant(CM, DL, VT), Flags);
3091 return DAG.getNode(
3092 ISD::ADD, DL, VT, Mul,
3093 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
3094 }
3095 // Also look in case there is an intermediate add.
3096 if (sd_match(N0, m_OneUse(m_Add(
3098 m_ConstInt(CM))),
3099 m_Value(B)))) &&
3101 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
3102 SDNodeFlags Flags;
3103 // If all the inputs are nuw, the outputs can be nuw. If all the input
3104 // are _also_ nsw the outputs can be too.
3105 SDValue OMul =
3106 N0.getOperand(0) == B ? N0.getOperand(1) : N0.getOperand(0);
3107 if (N->getFlags().hasNoUnsignedWrap() &&
3108 N0->getFlags().hasNoUnsignedWrap() &&
3109 OMul->getFlags().hasNoUnsignedWrap() &&
3110 OMul.getOperand(0)->getFlags().hasNoUnsignedWrap()) {
3112 if (N->getFlags().hasNoSignedWrap() &&
3113 N0->getFlags().hasNoSignedWrap() &&
3114 OMul->getFlags().hasNoSignedWrap() &&
3115 OMul.getOperand(0)->getFlags().hasNoSignedWrap())
3117 }
3118 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
3119 DAG.getConstant(CM, DL, VT), Flags);
3120 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N1), VT, Mul, B, Flags);
3121 return DAG.getNode(
3122 ISD::ADD, DL, VT, Add,
3123 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
3124 }
3125 }
3126 }
3127
3128 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
3129 return Combined;
3130
3131 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
3132 return Combined;
3133
3134 return SDValue();
3135}
3136
3137// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
3138SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) {
3139 SDValue N0 = N->getOperand(0);
3140 EVT VT = N0.getValueType();
3141 SDValue A, B;
3142
3143 if ((!LegalOperations || hasOperation(ISD::AVGFLOORU, VT)) &&
3145 m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
3146 return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
3147 }
3148 if ((!LegalOperations || hasOperation(ISD::AVGFLOORS, VT)) &&
3150 m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
3151 return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
3152 }
3153
3154 return SDValue();
3155}
3156
3157SDValue DAGCombiner::visitADD(SDNode *N) {
3158 SDValue N0 = N->getOperand(0);
3159 SDValue N1 = N->getOperand(1);
3160 EVT VT = N0.getValueType();
3161 SDLoc DL(N);
3162
3163 if (SDValue Combined = visitADDLike(N))
3164 return Combined;
3165
3166 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
3167 return V;
3168
3169 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
3170 return V;
3171
3172 if (SDValue V = MatchRotate(N0, N1, SDLoc(N), /*FromAdd=*/true))
3173 return V;
3174
3175 // Try to match AVGFLOOR fixedwidth pattern
3176 if (SDValue V = foldAddToAvg(N, DL))
3177 return V;
3178
3179 // fold (a+b) -> (a|b) iff a and b share no bits.
3180 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
3181 DAG.haveNoCommonBitsSet(N0, N1))
3182 return DAG.getNode(ISD::OR, DL, VT, N0, N1, SDNodeFlags::Disjoint);
3183
3184 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
3185 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
3186 const APInt &C0 = N0->getConstantOperandAPInt(0);
3187 const APInt &C1 = N1->getConstantOperandAPInt(0);
3188 return DAG.getVScale(DL, VT, C0 + C1);
3189 }
3190
3191 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
3192 if (N0.getOpcode() == ISD::ADD &&
3193 N0.getOperand(1).getOpcode() == ISD::VSCALE &&
3194 N1.getOpcode() == ISD::VSCALE) {
3195 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3196 const APInt &VS1 = N1->getConstantOperandAPInt(0);
3197 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
3198 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
3199 }
3200
3201 // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
3202 if (N0.getOpcode() == ISD::STEP_VECTOR &&
3203 N1.getOpcode() == ISD::STEP_VECTOR) {
3204 const APInt &C0 = N0->getConstantOperandAPInt(0);
3205 const APInt &C1 = N1->getConstantOperandAPInt(0);
3206 APInt NewStep = C0 + C1;
3207 return DAG.getStepVector(DL, VT, NewStep);
3208 }
3209
3210 // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
3211 if (N0.getOpcode() == ISD::ADD &&
3213 N1.getOpcode() == ISD::STEP_VECTOR) {
3214 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3215 const APInt &SV1 = N1->getConstantOperandAPInt(0);
3216 APInt NewStep = SV0 + SV1;
3217 SDValue SV = DAG.getStepVector(DL, VT, NewStep);
3218 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
3219 }
3220
3221 return SDValue();
3222}
3223
3224SDValue DAGCombiner::visitADDSAT(SDNode *N) {
3225 unsigned Opcode = N->getOpcode();
3226 SDValue N0 = N->getOperand(0);
3227 SDValue N1 = N->getOperand(1);
3228 EVT VT = N0.getValueType();
3229 bool IsSigned = Opcode == ISD::SADDSAT;
3230 SDLoc DL(N);
3231
3232 // fold (add_sat x, undef) -> -1
3233 if (N0.isUndef() || N1.isUndef())
3234 return DAG.getAllOnesConstant(DL, VT);
3235
3236 // fold (add_sat c1, c2) -> c3
3237 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
3238 return C;
3239
3240 // canonicalize constant to RHS
3243 return DAG.getNode(Opcode, DL, VT, N1, N0);
3244
3245 // fold vector ops
3246 if (VT.isVector()) {
3247 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3248 return FoldedVOp;
3249
3250 // fold (add_sat x, 0) -> x, vector edition
3252 return N0;
3253 }
3254
3255 // fold (add_sat x, 0) -> x
3256 if (isNullConstant(N1))
3257 return N0;
3258
3259 // If it cannot overflow, transform into an add.
3260 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3261 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
3262
3263 return SDValue();
3264}
3265
3267 bool ForceCarryReconstruction = false) {
3268 bool Masked = false;
3269
3270 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
3271 while (true) {
3272 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
3273 V = V.getOperand(0);
3274 continue;
3275 }
3276
3277 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
3278 if (ForceCarryReconstruction)
3279 return V;
3280
3281 Masked = true;
3282 V = V.getOperand(0);
3283 continue;
3284 }
3285
3286 if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
3287 return V;
3288
3289 break;
3290 }
3291
3292 // If this is not a carry, return.
3293 if (V.getResNo() != 1)
3294 return SDValue();
3295
3296 if (V.getOpcode() != ISD::UADDO_CARRY && V.getOpcode() != ISD::USUBO_CARRY &&
3297 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
3298 return SDValue();
3299
3300 EVT VT = V->getValueType(0);
3301 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
3302 return SDValue();
3303
3304 // If the result is masked, then no matter what kind of bool it is we can
3305 // return. If it isn't, then we need to make sure the bool type is either 0 or
3306 // 1 and not other values.
3307 if (Masked ||
3308 TLI.getBooleanContents(V.getValueType()) ==
3310 return V;
3311
3312 return SDValue();
3313}
3314
3315/// Given the operands of an add/sub operation, see if the 2nd operand is a
3316/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
3317/// the opcode and bypass the mask operation.
3318static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
3319 SelectionDAG &DAG, const SDLoc &DL) {
3320 if (N1.getOpcode() == ISD::ZERO_EXTEND)
3321 N1 = N1.getOperand(0);
3322
3323 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
3324 return SDValue();
3325
3326 EVT VT = N0.getValueType();
3327 SDValue N10 = N1.getOperand(0);
3328 if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
3329 N10 = N10.getOperand(0);
3330
3331 if (N10.getValueType() != VT)
3332 return SDValue();
3333
3334 if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits())
3335 return SDValue();
3336
3337 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
3338 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
3339 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10);
3340}
3341
3342/// Helper for doing combines based on N0 and N1 being added to each other.
3343SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
3344 SDNode *LocReference) {
3345 EVT VT = N0.getValueType();
3346 SDLoc DL(LocReference);
3347
3348 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
3349 SDValue Y, N;
3350 if (sd_match(N1, m_Shl(m_Neg(m_Value(Y)), m_Value(N))))
3351 return DAG.getNode(ISD::SUB, DL, VT, N0,
3352 DAG.getNode(ISD::SHL, DL, VT, Y, N));
3353
3354 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
3355 return V;
3356
3357 // Look for:
3358 // add (add x, 1), y
3359 // And if the target does not like this form then turn into:
3360 // sub y, (xor x, -1)
3361 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3362 N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1)) &&
3363 // Limit this to after legalization if the add has wrap flags
3364 (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
3365 !N0->getFlags().hasNoSignedWrap()))) {
3366 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3367 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
3368 }
3369
3370 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
3371 // Hoist one-use subtraction by non-opaque constant:
3372 // (x - C) + y -> (x + y) - C
3373 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3374 if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3375 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
3376 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
3377 }
3378 // Hoist one-use subtraction from non-opaque constant:
3379 // (C - x) + y -> (y - x) + C
3380 if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3381 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
3382 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
3383 }
3384 }
3385
3386 // add (mul x, C), x -> mul x, C+1
3387 if (N0.getOpcode() == ISD::MUL && N0.getOperand(0) == N1 &&
3388 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true) &&
3389 N0.hasOneUse()) {
3390 SDValue NewC = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
3391 DAG.getConstant(1, DL, VT));
3392 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), NewC);
3393 }
3394
3395 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
3396 // rather than 'add 0/-1' (the zext should get folded).
3397 // add (sext i1 Y), X --> sub X, (zext i1 Y)
3398 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
3399 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
3401 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
3402 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
3403 }
3404
3405 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
3406 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3407 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3408 if (TN->getVT() == MVT::i1) {
3409 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3410 DAG.getConstant(1, DL, VT));
3411 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
3412 }
3413 }
3414
3415 // (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3416 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1)) &&
3417 N1.getResNo() == 0)
3418 return DAG.getNode(ISD::UADDO_CARRY, DL, N1->getVTList(),
3419 N0, N1.getOperand(0), N1.getOperand(2));
3420
3421 // (add X, Carry) -> (uaddo_carry X, 0, Carry)
3423 if (SDValue Carry = getAsCarry(TLI, N1))
3424 return DAG.getNode(ISD::UADDO_CARRY, DL,
3425 DAG.getVTList(VT, Carry.getValueType()), N0,
3426 DAG.getConstant(0, DL, VT), Carry);
3427
3428 return SDValue();
3429}
3430
3431SDValue DAGCombiner::visitADDC(SDNode *N) {
3432 SDValue N0 = N->getOperand(0);
3433 SDValue N1 = N->getOperand(1);
3434 EVT VT = N0.getValueType();
3435 SDLoc DL(N);
3436
3437 // If the flag result is dead, turn this into an ADD.
3438 if (!N->hasAnyUseOfValue(1))
3439 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3440 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3441
3442 // canonicalize constant to RHS.
3443 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3444 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3445 if (N0C && !N1C)
3446 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
3447
3448 // fold (addc x, 0) -> x + no carry out
3449 if (isNullConstant(N1))
3450 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
3451 DL, MVT::Glue));
3452
3453 // If it cannot overflow, transform into an add.
3455 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3456 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3457
3458 return SDValue();
3459}
3460
3461/**
3462 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
3463 * then the flip also occurs if computing the inverse is the same cost.
3464 * This function returns an empty SDValue in case it cannot flip the boolean
3465 * without increasing the cost of the computation. If you want to flip a boolean
3466 * no matter what, use DAG.getLogicalNOT.
3467 */
3469 const TargetLowering &TLI,
3470 bool Force) {
3471 if (Force && isa<ConstantSDNode>(V))
3472 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3473
3474 if (V.getOpcode() != ISD::XOR)
3475 return SDValue();
3476
3477 if (DAG.isBoolConstant(V.getOperand(1)) == true)
3478 return V.getOperand(0);
3479 if (Force && isConstOrConstSplat(V.getOperand(1), false))
3480 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3481 return SDValue();
3482}
3483
3484SDValue DAGCombiner::visitADDO(SDNode *N) {
3485 SDValue N0 = N->getOperand(0);
3486 SDValue N1 = N->getOperand(1);
3487 EVT VT = N0.getValueType();
3488 bool IsSigned = (ISD::SADDO == N->getOpcode());
3489
3490 EVT CarryVT = N->getValueType(1);
3491 SDLoc DL(N);
3492
3493 // If the flag result is dead, turn this into an ADD.
3494 if (!N->hasAnyUseOfValue(1))
3495 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3496 DAG.getUNDEF(CarryVT));
3497
3498 // canonicalize constant to RHS.
3501 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
3502
3503 // fold (addo x, 0) -> x + no carry out
3504 if (isNullOrNullSplat(N1))
3505 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3506
3507 // If it cannot overflow, transform into an add.
3508 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3509 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3510 DAG.getConstant(0, DL, CarryVT));
3511
3512 if (IsSigned) {
3513 // fold (saddo (xor a, -1), 1) -> (ssub 0, a).
3514 if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
3515 return DAG.getNode(ISD::SSUBO, DL, N->getVTList(),
3516 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3517 } else {
3518 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
3519 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
3520 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
3521 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3522 return CombineTo(
3523 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3524 }
3525
3526 if (SDValue Combined = visitUADDOLike(N0, N1, N))
3527 return Combined;
3528
3529 if (SDValue Combined = visitUADDOLike(N1, N0, N))
3530 return Combined;
3531 }
3532
3533 return SDValue();
3534}
3535
3536SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3537 EVT VT = N0.getValueType();
3538 if (VT.isVector())
3539 return SDValue();
3540
3541 // (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3542 // If Y + 1 cannot overflow.
3543 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1))) {
3544 SDValue Y = N1.getOperand(0);
3545 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3547 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0, Y,
3548 N1.getOperand(2));
3549 }
3550
3551 // (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
3553 if (SDValue Carry = getAsCarry(TLI, N1))
3554 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0,
3555 DAG.getConstant(0, SDLoc(N), VT), Carry);
3556
3557 return SDValue();
3558}
3559
3560SDValue DAGCombiner::visitADDE(SDNode *N) {
3561 SDValue N0 = N->getOperand(0);
3562 SDValue N1 = N->getOperand(1);
3563 SDValue CarryIn = N->getOperand(2);
3564
3565 // canonicalize constant to RHS
3566 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3567 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3568 if (N0C && !N1C)
3569 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3570 N1, N0, CarryIn);
3571
3572 // fold (adde x, y, false) -> (addc x, y)
3573 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3574 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3575
3576 return SDValue();
3577}
3578
3579SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
3580 SDValue N0 = N->getOperand(0);
3581 SDValue N1 = N->getOperand(1);
3582 SDValue CarryIn = N->getOperand(2);
3583 SDLoc DL(N);
3584
3585 // canonicalize constant to RHS
3586 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3587 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3588 if (N0C && !N1C)
3589 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3590
3591 // fold (uaddo_carry x, y, false) -> (uaddo x, y)
3592 if (isNullConstant(CarryIn)) {
3593 if (!LegalOperations ||
3594 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3595 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3596 }
3597
3598 // fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3599 if (isNullConstant(N0) && isNullConstant(N1)) {
3600 EVT VT = N0.getValueType();
3601 EVT CarryVT = CarryIn.getValueType();
3602 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3603 AddToWorklist(CarryExt.getNode());
3604 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3605 DAG.getConstant(1, DL, VT)),
3606 DAG.getConstant(0, DL, CarryVT));
3607 }
3608
3609 if (SDValue Combined = visitUADDO_CARRYLike(N0, N1, CarryIn, N))
3610 return Combined;
3611
3612 if (SDValue Combined = visitUADDO_CARRYLike(N1, N0, CarryIn, N))
3613 return Combined;
3614
3615 // We want to avoid useless duplication.
3616 // TODO: This is done automatically for binary operations. As UADDO_CARRY is
3617 // not a binary operation, this is not really possible to leverage this
3618 // existing mechanism for it. However, if more operations require the same
3619 // deduplication logic, then it may be worth generalize.
3620 SDValue Ops[] = {N1, N0, CarryIn};
3621 SDNode *CSENode =
3622 DAG.getNodeIfExists(ISD::UADDO_CARRY, N->getVTList(), Ops, N->getFlags());
3623 if (CSENode)
3624 return SDValue(CSENode, 0);
3625
3626 return SDValue();
3627}
3628
3629/**
3630 * If we are facing some sort of diamond carry propagation pattern try to
3631 * break it up to generate something like:
3632 * (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
3633 *
3634 * The end result is usually an increase in operation required, but because the
3635 * carry is now linearized, other transforms can kick in and optimize the DAG.
3636 *
3637 * Patterns typically look something like
3638 * (uaddo A, B)
3639 * / \
3640 * Carry Sum
3641 * | \
3642 * | (uaddo_carry *, 0, Z)
3643 * | /
3644 * \ Carry
3645 * | /
3646 * (uaddo_carry X, *, *)
3647 *
3648 * But numerous variation exist. Our goal is to identify A, B, X and Z and
3649 * produce a combine with a single path for carry propagation.
3650 */
3652 SelectionDAG &DAG, SDValue X,
3653 SDValue Carry0, SDValue Carry1,
3654 SDNode *N) {
3655 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3656 return SDValue();
3657 if (Carry1.getOpcode() != ISD::UADDO)
3658 return SDValue();
3659
3660 SDValue Z;
3661
3662 /**
3663 * First look for a suitable Z. It will present itself in the form of
3664 * (uaddo_carry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3665 */
3666 if (Carry0.getOpcode() == ISD::UADDO_CARRY &&
3667 isNullConstant(Carry0.getOperand(1))) {
3668 Z = Carry0.getOperand(2);
3669 } else if (Carry0.getOpcode() == ISD::UADDO &&
3670 isOneConstant(Carry0.getOperand(1))) {
3671 EVT VT = Carry0->getValueType(1);
3672 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3673 } else {
3674 // We couldn't find a suitable Z.
3675 return SDValue();
3676 }
3677
3678
3679 auto cancelDiamond = [&](SDValue A,SDValue B) {
3680 SDLoc DL(N);
3681 SDValue NewY =
3682 DAG.getNode(ISD::UADDO_CARRY, DL, Carry0->getVTList(), A, B, Z);
3683 Combiner.AddToWorklist(NewY.getNode());
3684 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), X,
3685 DAG.getConstant(0, DL, X.getValueType()),
3686 NewY.getValue(1));
3687 };
3688
3689 /**
3690 * (uaddo A, B)
3691 * |
3692 * Sum
3693 * |
3694 * (uaddo_carry *, 0, Z)
3695 */
3696 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3697 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3698 }
3699
3700 /**
3701 * (uaddo_carry A, 0, Z)
3702 * |
3703 * Sum
3704 * |
3705 * (uaddo *, B)
3706 */
3707 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3708 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3709 }
3710
3711 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3712 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3713 }
3714
3715 return SDValue();
3716}
3717
3718// If we are facing some sort of diamond carry/borrow in/out pattern try to
3719// match patterns like:
3720//
3721// (uaddo A, B) CarryIn
3722// | \ |
3723// | \ |
3724// PartialSum PartialCarryOutX /
3725// | | /
3726// | ____|____________/
3727// | / |
3728// (uaddo *, *) \________
3729// | \ \
3730// | \ |
3731// | PartialCarryOutY |
3732// | \ |
3733// | \ /
3734// AddCarrySum | ______/
3735// | /
3736// CarryOut = (or *, *)
3737//
3738// And generate UADDO_CARRY (or USUBO_CARRY) with two result values:
3739//
3740// {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn)
3741//
3742// Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY
3743// with a single path for carry/borrow out propagation.
3745 SDValue N0, SDValue N1, SDNode *N) {
3746 SDValue Carry0 = getAsCarry(TLI, N0);
3747 if (!Carry0)
3748 return SDValue();
3749 SDValue Carry1 = getAsCarry(TLI, N1);
3750 if (!Carry1)
3751 return SDValue();
3752
3753 unsigned Opcode = Carry0.getOpcode();
3754 if (Opcode != Carry1.getOpcode())
3755 return SDValue();
3756 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3757 return SDValue();
3758 // Guarantee identical type of CarryOut
3759 EVT CarryOutType = N->getValueType(0);
3760 if (CarryOutType != Carry0.getValue(1).getValueType() ||
3761 CarryOutType != Carry1.getValue(1).getValueType())
3762 return SDValue();
3763
3764 // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3765 // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3766 if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3767 std::swap(Carry0, Carry1);
3768
3769 // Check if nodes are connected in expected way.
3770 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3771 Carry1.getOperand(1) != Carry0.getValue(0))
3772 return SDValue();
3773
3774 // The carry in value must be on the righthand side for subtraction.
3775 unsigned CarryInOperandNum =
3776 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3777 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3778 return SDValue();
3779 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3780
3781 unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
3782 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3783 return SDValue();
3784
3785 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3786 CarryIn = getAsCarry(TLI, CarryIn, true);
3787 if (!CarryIn)
3788 return SDValue();
3789
3790 SDLoc DL(N);
3791 CarryIn = DAG.getBoolExtOrTrunc(CarryIn, DL, Carry1->getValueType(1),
3792 Carry1->getValueType(0));
3793 SDValue Merged =
3794 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3795 Carry0.getOperand(1), CarryIn);
3796
3797 // Please note that because we have proven that the result of the UADDO/USUBO
3798 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3799 // therefore prove that if the first UADDO/USUBO overflows, the second
3800 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3801 // maximum value.
3802 //
3803 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3804 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3805 //
3806 // This is important because it means that OR and XOR can be used to merge
3807 // carry flags; and that AND can return a constant zero.
3808 //
3809 // TODO: match other operations that can merge flags (ADD, etc)
3810 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3811 if (N->getOpcode() == ISD::AND)
3812 return DAG.getConstant(0, DL, CarryOutType);
3813 return Merged.getValue(1);
3814}
3815
3816SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
3817 SDValue CarryIn, SDNode *N) {
3818 // fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
3819 // carry.
3820 if (isBitwiseNot(N0))
3821 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3822 SDLoc DL(N);
3823 SDValue Sub = DAG.getNode(ISD::USUBO_CARRY, DL, N->getVTList(), N1,
3824 N0.getOperand(0), NotC);
3825 return CombineTo(
3826 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3827 }
3828
3829 // Iff the flag result is dead:
3830 // (uaddo_carry (add|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
3831 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3832 // or the dependency between the instructions.
3833 if ((N0.getOpcode() == ISD::ADD ||
3834 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3835 N0.getValue(1) != CarryIn)) &&
3836 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3837 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(),
3838 N0.getOperand(0), N0.getOperand(1), CarryIn);
3839
3840 /**
3841 * When one of the uaddo_carry argument is itself a carry, we may be facing
3842 * a diamond carry propagation. In which case we try to transform the DAG
3843 * to ensure linear carry propagation if that is possible.
3844 */
3845 if (auto Y = getAsCarry(TLI, N1)) {
3846 // Because both are carries, Y and Z can be swapped.
3847 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3848 return R;
3849 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3850 return R;
3851 }
3852
3853 return SDValue();
3854}
3855
3856SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1,
3857 SDValue CarryIn, SDNode *N) {
3858 // fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c)
3859 if (isBitwiseNot(N0)) {
3860 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true))
3861 return DAG.getNode(ISD::SSUBO_CARRY, SDLoc(N), N->getVTList(), N1,
3862 N0.getOperand(0), NotC);
3863 }
3864
3865 return SDValue();
3866}
3867
3868SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3869 SDValue N0 = N->getOperand(0);
3870 SDValue N1 = N->getOperand(1);
3871 SDValue CarryIn = N->getOperand(2);
3872 SDLoc DL(N);
3873
3874 // canonicalize constant to RHS
3875 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3876 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3877 if (N0C && !N1C)
3878 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3879
3880 // fold (saddo_carry x, y, false) -> (saddo x, y)
3881 if (isNullConstant(CarryIn)) {
3882 if (!LegalOperations ||
3883 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3884 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3885 }
3886
3887 if (SDValue Combined = visitSADDO_CARRYLike(N0, N1, CarryIn, N))
3888 return Combined;
3889
3890 if (SDValue Combined = visitSADDO_CARRYLike(N1, N0, CarryIn, N))
3891 return Combined;
3892
3893 return SDValue();
3894}
3895
3896// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3897// clamp/truncation if necessary.
3899 SDValue RHS, SelectionDAG &DAG,
3900 const SDLoc &DL) {
3901 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3902 "Illegal truncation");
3903
3904 if (DstVT == SrcVT)
3905 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3906
3907 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3908 // clamping RHS.
3910 DstVT.getScalarSizeInBits());
3911 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3912 return SDValue();
3913
3914 SDValue SatLimit =
3916 DstVT.getScalarSizeInBits()),
3917 DL, SrcVT);
3918 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3919 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3920 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3921 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3922}
3923
3924// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3925// usubsat(a,b), optionally as a truncated type.
3926SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL) {
3927 if (N->getOpcode() != ISD::SUB ||
3928 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3929 return SDValue();
3930
3931 EVT SubVT = N->getValueType(0);
3932 SDValue Op0 = N->getOperand(0);
3933 SDValue Op1 = N->getOperand(1);
3934
3935 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3936 // they may be converted to usubsat(a,b).
3937 if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3938 SDValue MaxLHS = Op0.getOperand(0);
3939 SDValue MaxRHS = Op0.getOperand(1);
3940 if (MaxLHS == Op1)
3941 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, DL);
3942 if (MaxRHS == Op1)
3943 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, DL);
3944 }
3945
3946 if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3947 SDValue MinLHS = Op1.getOperand(0);
3948 SDValue MinRHS = Op1.getOperand(1);
3949 if (MinLHS == Op0)
3950 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, DL);
3951 if (MinRHS == Op0)
3952 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, DL);
3953 }
3954
3955 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3956 if (Op1.getOpcode() == ISD::TRUNCATE &&
3957 Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3958 Op1.getOperand(0).hasOneUse()) {
3959 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3960 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3961 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3962 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3963 DAG, DL);
3964 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3965 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3966 DAG, DL);
3967 }
3968
3969 return SDValue();
3970}
3971
3972// Refinement of DAG/Type Legalisation (promotion) when CTLZ is used for
3973// counting leading ones. Broadly, it replaces the substraction with a left
3974// shift.
3975//
3976// * DAG Legalisation Pattern:
3977//
3978// (sub (ctlz (zeroextend (not Src)))
3979// BitWidthDiff)
3980//
3981// if BitWidthDiff == BitWidth(Node) - BitWidth(Src)
3982// -->
3983//
3984// (ctlz_zero_undef (not (shl (anyextend Src)
3985// BitWidthDiff)))
3986//
3987// * Type Legalisation Pattern:
3988//
3989// (sub (ctlz (and (xor Src XorMask)
3990// AndMask))
3991// BitWidthDiff)
3992//
3993// if AndMask has only trailing ones
3994// and MaskBitWidth(AndMask) == BitWidth(Node) - BitWidthDiff
3995// and XorMask has more trailing ones than AndMask
3996// -->
3997//
3998// (ctlz_zero_undef (not (shl Src BitWidthDiff)))
3999template <class MatchContextClass>
4001 const SDLoc DL(N);
4002 SDValue N0 = N->getOperand(0);
4003 EVT VT = N0.getValueType();
4004 unsigned BitWidth = VT.getScalarSizeInBits();
4005
4006 MatchContextClass Matcher(DAG, DAG.getTargetLoweringInfo(), N);
4007
4008 APInt AndMask;
4009 APInt XorMask;
4010 APInt BitWidthDiff;
4011
4012 SDValue CtlzOp;
4013 SDValue Src;
4014
4015 if (!sd_context_match(
4016 N, Matcher, m_Sub(m_Ctlz(m_Value(CtlzOp)), m_ConstInt(BitWidthDiff))))
4017 return SDValue();
4018
4019 if (sd_context_match(CtlzOp, Matcher, m_ZExt(m_Not(m_Value(Src))))) {
4020 // DAG Legalisation Pattern:
4021 // (sub (ctlz (zero_extend (not Op)) BitWidthDiff))
4022 if ((BitWidth - Src.getValueType().getScalarSizeInBits()) != BitWidthDiff)
4023 return SDValue();
4024
4025 Src = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Src);
4026 } else if (sd_context_match(CtlzOp, Matcher,
4027 m_And(m_Xor(m_Value(Src), m_ConstInt(XorMask)),
4028 m_ConstInt(AndMask)))) {
4029 // Type Legalisation Pattern:
4030 // (sub (ctlz (and (xor Op XorMask) AndMask)) BitWidthDiff)
4031 unsigned AndMaskWidth = BitWidth - BitWidthDiff.getZExtValue();
4032 if (!(AndMask.isMask(AndMaskWidth) && XorMask.countr_one() >= AndMaskWidth))
4033 return SDValue();
4034 } else
4035 return SDValue();
4036
4037 SDValue ShiftConst = DAG.getShiftAmountConstant(BitWidthDiff, VT, DL);
4038 SDValue LShift = Matcher.getNode(ISD::SHL, DL, VT, Src, ShiftConst);
4039 SDValue Not =
4040 Matcher.getNode(ISD::XOR, DL, VT, LShift, DAG.getAllOnesConstant(DL, VT));
4041
4042 return Matcher.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, Not);
4043}
4044
4045// Fold sub(x, mul(divrem(x,y)[0], y)) to divrem(x, y)[1]
4047 const SDLoc &DL) {
4048 assert(N->getOpcode() == ISD::SUB && "Node must be a SUB");
4049 SDValue Sub0 = N->getOperand(0);
4050 SDValue Sub1 = N->getOperand(1);
4051
4052 auto CheckAndFoldMulCase = [&](SDValue DivRem, SDValue MaybeY) -> SDValue {
4053 if ((DivRem.getOpcode() == ISD::SDIVREM ||
4054 DivRem.getOpcode() == ISD::UDIVREM) &&
4055 DivRem.getResNo() == 0 && DivRem.getOperand(0) == Sub0 &&
4056 DivRem.getOperand(1) == MaybeY) {
4057 return SDValue(DivRem.getNode(), 1);
4058 }
4059 return SDValue();
4060 };
4061
4062 if (Sub1.getOpcode() == ISD::MUL) {
4063 // (sub x, (mul divrem(x,y)[0], y))
4064 SDValue Mul0 = Sub1.getOperand(0);
4065 SDValue Mul1 = Sub1.getOperand(1);
4066
4067 if (SDValue Res = CheckAndFoldMulCase(Mul0, Mul1))
4068 return Res;
4069
4070 if (SDValue Res = CheckAndFoldMulCase(Mul1, Mul0))
4071 return Res;
4072
4073 } else if (Sub1.getOpcode() == ISD::SHL) {
4074 // Handle (sub x, (shl divrem(x,y)[0], C)) where y = 1 << C
4075 SDValue Shl0 = Sub1.getOperand(0);
4076 SDValue Shl1 = Sub1.getOperand(1);
4077 // Check if Shl0 is divrem(x, Y)[0]
4078 if ((Shl0.getOpcode() == ISD::SDIVREM ||
4079 Shl0.getOpcode() == ISD::UDIVREM) &&
4080 Shl0.getResNo() == 0 && Shl0.getOperand(0) == Sub0) {
4081
4082 SDValue Divisor = Shl0.getOperand(1);
4083
4084 ConstantSDNode *DivC = isConstOrConstSplat(Divisor);
4086 if (!DivC || !ShC)
4087 return SDValue();
4088
4089 if (DivC->getAPIntValue().isPowerOf2() &&
4090 DivC->getAPIntValue().logBase2() == ShC->getAPIntValue())
4091 return SDValue(Shl0.getNode(), 1);
4092 }
4093 }
4094 return SDValue();
4095}
4096
4097// Since it may not be valid to emit a fold to zero for vector initializers
4098// check if we can before folding.
4099static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
4100 SelectionDAG &DAG, bool LegalOperations) {
4101 if (!VT.isVector())
4102 return DAG.getConstant(0, DL, VT);
4103 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
4104 return DAG.getConstant(0, DL, VT);
4105 return SDValue();
4106}
4107
4108SDValue DAGCombiner::visitSUB(SDNode *N) {
4109 SDValue N0 = N->getOperand(0);
4110 SDValue N1 = N->getOperand(1);
4111 EVT VT = N0.getValueType();
4112 unsigned BitWidth = VT.getScalarSizeInBits();
4113 SDLoc DL(N);
4114
4116 return V;
4117
4118 // fold (sub x, x) -> 0
4119 if (N0 == N1)
4120 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4121
4122 // fold (sub c1, c2) -> c3
4123 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
4124 return C;
4125
4126 // fold vector ops
4127 if (VT.isVector()) {
4128 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4129 return FoldedVOp;
4130
4131 // fold (sub x, 0) -> x, vector edition
4133 return N0;
4134 }
4135
4136 // (sub x, ([v]select (ult x, y), 0, y)) -> (umin x, (sub x, y))
4137 // (sub x, ([v]select (uge x, y), y, 0)) -> (umin x, (sub x, y))
4138 if (N1.hasOneUse() && hasUMin(VT)) {
4139 SDValue Y;
4140 auto MS0 = m_Specific(N0);
4141 auto MVY = m_Value(Y);
4142 auto MZ = m_Zero();
4143 auto MCC1 = m_SpecificCondCode(ISD::SETULT);
4144 auto MCC2 = m_SpecificCondCode(ISD::SETUGE);
4145
4146 if (sd_match(N1, m_SelectCCLike(MS0, MVY, MZ, m_Deferred(Y), MCC1)) ||
4147 sd_match(N1, m_SelectCCLike(MS0, MVY, m_Deferred(Y), MZ, MCC2)) ||
4148 sd_match(N1, m_VSelect(m_SetCC(MS0, MVY, MCC1), MZ, m_Deferred(Y))) ||
4149 sd_match(N1, m_VSelect(m_SetCC(MS0, MVY, MCC2), m_Deferred(Y), MZ)))
4150
4151 return DAG.getNode(ISD::UMIN, DL, VT, N0,
4152 DAG.getNode(ISD::SUB, DL, VT, N0, Y));
4153 }
4154
4155 if (SDValue NewSel = foldBinOpIntoSelect(N))
4156 return NewSel;
4157
4158 // fold (sub x, c) -> (add x, -c)
4159 if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1))
4160 return DAG.getNode(ISD::ADD, DL, VT, N0,
4161 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4162
4163 if (isNullOrNullSplat(N0)) {
4164 // Right-shifting everything out but the sign bit followed by negation is
4165 // the same as flipping arithmetic/logical shift type without the negation:
4166 // -(X >>u 31) -> (X >>s 31)
4167 // -(X >>s 31) -> (X >>u 31)
4168 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
4169 ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
4170 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
4171 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
4172 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
4173 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
4174 }
4175 }
4176
4177 // 0 - X --> 0 if the sub is NUW.
4178 if (N->getFlags().hasNoUnsignedWrap())
4179 return N0;
4180
4182 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
4183 // N1 must be 0 because negating the minimum signed value is undefined.
4184 if (N->getFlags().hasNoSignedWrap())
4185 return N0;
4186
4187 // 0 - X --> X if X is 0 or the minimum signed value.
4188 return N1;
4189 }
4190
4191 // Convert 0 - abs(x).
4192 if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
4194 if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
4195 return Result;
4196
4197 // Similar to the previous rule, but this time targeting an expanded abs.
4198 // (sub 0, (max X, (sub 0, X))) --> (min X, (sub 0, X))
4199 // as well as
4200 // (sub 0, (min X, (sub 0, X))) --> (max X, (sub 0, X))
4201 // Note that these two are applicable to both signed and unsigned min/max.
4202 SDValue X;
4203 SDValue S0;
4204 auto NegPat = m_AllOf(m_Neg(m_Deferred(X)), m_Value(S0));
4205 if (sd_match(N1, m_OneUse(m_AnyOf(m_SMax(m_Value(X), NegPat),
4206 m_UMax(m_Value(X), NegPat),
4207 m_SMin(m_Value(X), NegPat),
4208 m_UMin(m_Value(X), NegPat))))) {
4209 unsigned NewOpc = ISD::getInverseMinMaxOpcode(N1->getOpcode());
4210 if (hasOperation(NewOpc, VT))
4211 return DAG.getNode(NewOpc, DL, VT, X, S0);
4212 }
4213
4214 // Fold neg(splat(neg(x)) -> splat(x)
4215 if (VT.isVector()) {
4216 SDValue N1S = DAG.getSplatValue(N1, true);
4217 if (N1S && N1S.getOpcode() == ISD::SUB &&
4218 isNullConstant(N1S.getOperand(0)))
4219 return DAG.getSplat(VT, DL, N1S.getOperand(1));
4220 }
4221
4222 // sub 0, (and x, 1) --> SIGN_EXTEND_INREG x, i1
4223 if (N1.getOpcode() == ISD::AND && N1.hasOneUse() &&
4224 isOneOrOneSplat(N1->getOperand(1))) {
4225 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), 1);
4226 if (VT.isVector())
4227 ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT,
4231 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N1->getOperand(0),
4232 DAG.getValueType(ExtVT));
4233 }
4234 }
4235 }
4236
4237 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
4239 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4240
4241 // fold (A - (0-B)) -> A+B
4242 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
4243 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
4244
4245 // fold A-(A-B) -> B
4246 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
4247 return N1.getOperand(1);
4248
4249 // fold (A+B)-A -> B
4250 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
4251 return N0.getOperand(1);
4252
4253 // fold (A+B)-B -> A
4254 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
4255 return N0.getOperand(0);
4256
4257 // fold (A+C1)-C2 -> A+(C1-C2)
4258 if (N0.getOpcode() == ISD::ADD) {
4259 SDValue N01 = N0.getOperand(1);
4260 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
4261 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
4262 }
4263
4264 // fold C2-(A+C1) -> (C2-C1)-A
4265 if (N1.getOpcode() == ISD::ADD) {
4266 SDValue N11 = N1.getOperand(1);
4267 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
4268 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
4269 }
4270
4271 // fold (A-C1)-C2 -> A-(C1+C2)
4272 if (N0.getOpcode() == ISD::SUB) {
4273 SDValue N01 = N0.getOperand(1);
4274 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
4275 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
4276 }
4277
4278 // fold (c1-A)-c2 -> (c1-c2)-A
4279 if (N0.getOpcode() == ISD::SUB) {
4280 SDValue N00 = N0.getOperand(0);
4281 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
4282 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
4283 }
4284
4285 SDValue A, B, C;
4286
4287 // fold ((A+(B+C))-B) -> A+C
4288 if (sd_match(N0, m_Add(m_Value(A), m_Add(m_Specific(N1), m_Value(C)))))
4289 return DAG.getNode(ISD::ADD, DL, VT, A, C);
4290
4291 // fold ((A+(B-C))-B) -> A-C
4292 if (sd_match(N0, m_Add(m_Value(A), m_Sub(m_Specific(N1), m_Value(C)))))
4293 return DAG.getNode(ISD::SUB, DL, VT, A, C);
4294
4295 // fold ((A-(B-C))-C) -> A-B
4296 if (sd_match(N0, m_Sub(m_Value(A), m_Sub(m_Value(B), m_Specific(N1)))))
4297 return DAG.getNode(ISD::SUB, DL, VT, A, B);
4298
4299 // fold (A-(B-C)) -> A+(C-B)
4300 if (sd_match(N1, m_OneUse(m_Sub(m_Value(B), m_Value(C)))))
4301 return DAG.getNode(ISD::ADD, DL, VT, N0,
4302 DAG.getNode(ISD::SUB, DL, VT, C, B));
4303
4304 // A - (A & B) -> A & (~B)
4305 if (sd_match(N1, m_And(m_Specific(N0), m_Value(B))) &&
4306 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true)))
4307 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getNOT(DL, B, VT));
4308
4309 // fold (A - (-B * C)) -> (A + (B * C))
4310 if (sd_match(N1, m_OneUse(m_Mul(m_Neg(m_Value(B)), m_Value(C)))))
4311 return DAG.getNode(ISD::ADD, DL, VT, N0,
4312 DAG.getNode(ISD::MUL, DL, VT, B, C));
4313
4314 // If either operand of a sub is undef, the result is undef
4315 if (N0.isUndef())
4316 return N0;
4317 if (N1.isUndef())
4318 return N1;
4319
4320 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
4321 return V;
4322
4323 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
4324 return V;
4325
4326 // Try to match AVGCEIL fixedwidth pattern
4327 if (SDValue V = foldSubToAvg(N, DL))
4328 return V;
4329
4330 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, DL))
4331 return V;
4332
4333 if (SDValue V = foldSubToUSubSat(VT, N, DL))
4334 return V;
4335
4336 if (SDValue V = foldRemainderIdiom(N, DAG, DL))
4337 return V;
4338
4339 // (A - B) - 1 -> add (xor B, -1), A
4341 m_One(/*AllowUndefs=*/true))))
4342 return DAG.getNode(ISD::ADD, DL, VT, A, DAG.getNOT(DL, B, VT));
4343
4344 // Look for:
4345 // sub y, (xor x, -1)
4346 // And if the target does not like this form then turn into:
4347 // add (add x, y), 1
4348 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
4349 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
4350 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
4351 }
4352
4353 // Hoist one-use addition by non-opaque constant:
4354 // (x + C) - y -> (x - y) + C
4355 if (!reassociationCanBreakAddressingModePattern(ISD::SUB, DL, N, N0, N1) &&
4356 N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
4357 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4358 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4359 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
4360 }
4361 // y - (x + C) -> (y - x) - C
4362 if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
4363 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
4364 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
4365 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
4366 }
4367 // (x - C) - y -> (x - y) - C
4368 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
4369 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4370 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4371 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4372 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
4373 }
4374 // (C - x) - y -> C - (x + y)
4375 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4376 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
4377 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
4378 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
4379 }
4380
4381 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
4382 // rather than 'sub 0/1' (the sext should get folded).
4383 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
4384 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
4385 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
4386 TLI.getBooleanContents(VT) ==
4388 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
4389 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
4390 }
4391
4392 // fold B = sra (A, size(A)-1); sub (xor (A, B), B) -> (abs A)
4393 if ((!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4395 sd_match(N0, m_Xor(m_Specific(A), m_Specific(N1))))
4396 return DAG.getNode(ISD::ABS, DL, VT, A);
4397
4398 // If the relocation model supports it, consider symbol offsets.
4399 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
4400 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
4401 // fold (sub Sym+c1, Sym+c2) -> c1-c2
4402 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
4403 if (GA->getGlobal() == GB->getGlobal())
4404 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
4405 DL, VT);
4406 }
4407
4408 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
4409 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
4410 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
4411 if (TN->getVT() == MVT::i1) {
4412 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
4413 DAG.getConstant(1, DL, VT));
4414 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
4415 }
4416 }
4417
4418 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
4419 if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
4420 const APInt &IntVal = N1.getConstantOperandAPInt(0);
4421 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
4422 }
4423
4424 // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
4425 if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
4426 APInt NewStep = -N1.getConstantOperandAPInt(0);
4427 return DAG.getNode(ISD::ADD, DL, VT, N0,
4428 DAG.getStepVector(DL, VT, NewStep));
4429 }
4430
4431 // Prefer an add for more folding potential and possibly better codegen:
4432 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
4433 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
4434 SDValue ShAmt = N1.getOperand(1);
4435 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
4436 if (ShAmtC && ShAmtC->getAPIntValue() == (BitWidth - 1)) {
4437 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
4438 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
4439 }
4440 }
4441
4442 // As with the previous fold, prefer add for more folding potential.
4443 // Subtracting SMIN/0 is the same as adding SMIN/0:
4444 // N0 - (X << BW-1) --> N0 + (X << BW-1)
4445 if (N1.getOpcode() == ISD::SHL) {
4446 ConstantSDNode *ShlC = isConstOrConstSplat(N1.getOperand(1));
4447 if (ShlC && ShlC->getAPIntValue() == (BitWidth - 1))
4448 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
4449 }
4450
4451 // (sub (usubo_carry X, 0, Carry), Y) -> (usubo_carry X, Y, Carry)
4452 if (N0.getOpcode() == ISD::USUBO_CARRY && isNullConstant(N0.getOperand(1)) &&
4453 N0.getResNo() == 0 && N0.hasOneUse())
4454 return DAG.getNode(ISD::USUBO_CARRY, DL, N0->getVTList(),
4455 N0.getOperand(0), N1, N0.getOperand(2));
4456
4458 // (sub Carry, X) -> (uaddo_carry (sub 0, X), 0, Carry)
4459 if (SDValue Carry = getAsCarry(TLI, N0)) {
4460 SDValue X = N1;
4461 SDValue Zero = DAG.getConstant(0, DL, VT);
4462 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
4463 return DAG.getNode(ISD::UADDO_CARRY, DL,
4464 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
4465 Carry);
4466 }
4467 }
4468
4469 // If there's no chance of borrowing from adjacent bits, then sub is xor:
4470 // sub C0, X --> xor X, C0
4471 if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
4472 if (!C0->isOpaque()) {
4473 const APInt &C0Val = C0->getAPIntValue();
4474 const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
4475 if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
4476 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4477 }
4478 }
4479
4480 // smax(a,b) - smin(a,b) --> abds(a,b)
4481 if ((!LegalOperations || hasOperation(ISD::ABDS, VT)) &&
4482 sd_match(N0, m_SMaxLike(m_Value(A), m_Value(B))) &&
4484 return DAG.getNode(ISD::ABDS, DL, VT, A, B);
4485
4486 // smin(a,b) - smax(a,b) --> neg(abds(a,b))
4487 if (hasOperation(ISD::ABDS, VT) &&
4488 sd_match(N0, m_SMinLike(m_Value(A), m_Value(B))) &&
4490 return DAG.getNegative(DAG.getNode(ISD::ABDS, DL, VT, A, B), DL, VT);
4491
4492 // umax(a,b) - umin(a,b) --> abdu(a,b)
4493 if ((!LegalOperations || hasOperation(ISD::ABDU, VT)) &&
4494 sd_match(N0, m_UMaxLike(m_Value(A), m_Value(B))) &&
4496 return DAG.getNode(ISD::ABDU, DL, VT, A, B);
4497
4498 // umin(a,b) - umax(a,b) --> neg(abdu(a,b))
4499 if (hasOperation(ISD::ABDU, VT) &&
4500 sd_match(N0, m_UMinLike(m_Value(A), m_Value(B))) &&
4502 return DAG.getNegative(DAG.getNode(ISD::ABDU, DL, VT, A, B), DL, VT);
4503
4504 return SDValue();
4505}
4506
4507SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
4508 unsigned Opcode = N->getOpcode();
4509 SDValue N0 = N->getOperand(0);
4510 SDValue N1 = N->getOperand(1);
4511 EVT VT = N0.getValueType();
4512 bool IsSigned = Opcode == ISD::SSUBSAT;
4513 SDLoc DL(N);
4514
4515 // fold (sub_sat x, undef) -> 0
4516 if (N0.isUndef() || N1.isUndef())
4517 return DAG.getConstant(0, DL, VT);
4518
4519 // fold (sub_sat x, x) -> 0
4520 if (N0 == N1)
4521 return DAG.getConstant(0, DL, VT);
4522
4523 // fold (sub_sat c1, c2) -> c3
4524 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4525 return C;
4526
4527 // fold vector ops
4528 if (VT.isVector()) {
4529 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4530 return FoldedVOp;
4531
4532 // fold (sub_sat x, 0) -> x, vector edition
4534 return N0;
4535 }
4536
4537 // fold (sub_sat x, 0) -> x
4538 if (isNullConstant(N1))
4539 return N0;
4540
4541 // If it cannot overflow, transform into an sub.
4542 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4543 return DAG.getNode(ISD::SUB, DL, VT, N0, N1);
4544
4545 return SDValue();
4546}
4547
4548SDValue DAGCombiner::visitSUBC(SDNode *N) {
4549 SDValue N0 = N->getOperand(0);
4550 SDValue N1 = N->getOperand(1);
4551 EVT VT = N0.getValueType();
4552 SDLoc DL(N);
4553
4554 // If the flag result is dead, turn this into an SUB.
4555 if (!N->hasAnyUseOfValue(1))
4556 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4557 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4558
4559 // fold (subc x, x) -> 0 + no borrow
4560 if (N0 == N1)
4561 return CombineTo(N, DAG.getConstant(0, DL, VT),
4562 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4563
4564 // fold (subc x, 0) -> x + no borrow
4565 if (isNullConstant(N1))
4566 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4567
4568 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4569 if (isAllOnesConstant(N0))
4570 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4571 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4572
4573 return SDValue();
4574}
4575
4576SDValue DAGCombiner::visitSUBO(SDNode *N) {
4577 SDValue N0 = N->getOperand(0);
4578 SDValue N1 = N->getOperand(1);
4579 EVT VT = N0.getValueType();
4580 bool IsSigned = (ISD::SSUBO == N->getOpcode());
4581
4582 EVT CarryVT = N->getValueType(1);
4583 SDLoc DL(N);
4584
4585 // If the flag result is dead, turn this into an SUB.
4586 if (!N->hasAnyUseOfValue(1))
4587 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4588 DAG.getUNDEF(CarryVT));
4589
4590 // fold (subo x, x) -> 0 + no borrow
4591 if (N0 == N1)
4592 return CombineTo(N, DAG.getConstant(0, DL, VT),
4593 DAG.getConstant(0, DL, CarryVT));
4594
4595 // fold (subox, c) -> (addo x, -c)
4596 if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1))
4597 if (IsSigned && !N1C->isMinSignedValue())
4598 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
4599 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4600
4601 // fold (subo x, 0) -> x + no borrow
4602 if (isNullOrNullSplat(N1))
4603 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
4604
4605 // If it cannot overflow, transform into an sub.
4606 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4607 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4608 DAG.getConstant(0, DL, CarryVT));
4609
4610 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4611 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
4612 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4613 DAG.getConstant(0, DL, CarryVT));
4614
4615 return SDValue();
4616}
4617
4618SDValue DAGCombiner::visitSUBE(SDNode *N) {
4619 SDValue N0 = N->getOperand(0);
4620 SDValue N1 = N->getOperand(1);
4621 SDValue CarryIn = N->getOperand(2);
4622
4623 // fold (sube x, y, false) -> (subc x, y)
4624 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
4625 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
4626
4627 return SDValue();
4628}
4629
4630SDValue DAGCombiner::visitUSUBO_CARRY(SDNode *N) {
4631 SDValue N0 = N->getOperand(0);
4632 SDValue N1 = N->getOperand(1);
4633 SDValue CarryIn = N->getOperand(2);
4634
4635 // fold (usubo_carry x, y, false) -> (usubo x, y)
4636 if (isNullConstant(CarryIn)) {
4637 if (!LegalOperations ||
4638 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
4639 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
4640 }
4641
4642 return SDValue();
4643}
4644
4645SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
4646 SDValue N0 = N->getOperand(0);
4647 SDValue N1 = N->getOperand(1);
4648 SDValue CarryIn = N->getOperand(2);
4649
4650 // fold (ssubo_carry x, y, false) -> (ssubo x, y)
4651 if (isNullConstant(CarryIn)) {
4652 if (!LegalOperations ||
4653 TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
4654 return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
4655 }
4656
4657 return SDValue();
4658}
4659
4660// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
4661// UMULFIXSAT here.
4662SDValue DAGCombiner::visitMULFIX(SDNode *N) {
4663 SDValue N0 = N->getOperand(0);
4664 SDValue N1 = N->getOperand(1);
4665 SDValue Scale = N->getOperand(2);
4666 EVT VT = N0.getValueType();
4667
4668 // fold (mulfix x, undef, scale) -> 0
4669 if (N0.isUndef() || N1.isUndef())
4670 return DAG.getConstant(0, SDLoc(N), VT);
4671
4672 // Canonicalize constant to RHS (vector doesn't have to splat)
4675 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
4676
4677 // fold (mulfix x, 0, scale) -> 0
4678 if (isNullConstant(N1))
4679 return DAG.getConstant(0, SDLoc(N), VT);
4680
4681 return SDValue();
4682}
4683
4684template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) {
4685 SDValue N0 = N->getOperand(0);
4686 SDValue N1 = N->getOperand(1);
4687 EVT VT = N0.getValueType();
4688 unsigned BitWidth = VT.getScalarSizeInBits();
4689 SDLoc DL(N);
4690 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
4691 MatchContextClass Matcher(DAG, TLI, N);
4692
4693 // fold (mul x, undef) -> 0
4694 if (N0.isUndef() || N1.isUndef())
4695 return DAG.getConstant(0, DL, VT);
4696
4697 // fold (mul c1, c2) -> c1*c2
4698 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
4699 return C;
4700
4701 // canonicalize constant to RHS (vector doesn't have to splat)
4704 return Matcher.getNode(ISD::MUL, DL, VT, N1, N0);
4705
4706 bool N1IsConst = false;
4707 bool N1IsOpaqueConst = false;
4708 APInt ConstValue1;
4709
4710 // fold vector ops
4711 if (VT.isVector()) {
4712 // TODO: Change this to use SimplifyVBinOp when it supports VP op.
4713 if (!UseVP)
4714 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4715 return FoldedVOp;
4716
4717 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
4718 assert((!N1IsConst || ConstValue1.getBitWidth() == BitWidth) &&
4719 "Splat APInt should be element width");
4720 } else {
4721 N1IsConst = isa<ConstantSDNode>(N1);
4722 if (N1IsConst) {
4723 ConstValue1 = N1->getAsAPIntVal();
4724 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
4725 }
4726 }
4727
4728 // fold (mul x, 0) -> 0
4729 if (N1IsConst && ConstValue1.isZero())
4730 return N1;
4731
4732 // fold (mul x, 1) -> x
4733 if (N1IsConst && ConstValue1.isOne())
4734 return N0;
4735
4736 if (!UseVP)
4737 if (SDValue NewSel = foldBinOpIntoSelect(N))
4738 return NewSel;
4739
4740 // fold (mul x, -1) -> 0-x
4741 if (N1IsConst && ConstValue1.isAllOnes())
4742 return Matcher.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4743
4744 // fold (mul x, (1 << c)) -> x << c
4745 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4746 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
4747 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4748 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4749 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4750 SDNodeFlags Flags;
4751 Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap());
4752 // TODO: Preserve setNoSignedWrap if LogBase2 isn't BitWidth - 1.
4753 return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc, Flags);
4754 }
4755 }
4756
4757 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4758 if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
4759 unsigned Log2Val = (-ConstValue1).logBase2();
4760
4761 // FIXME: If the input is something that is easily negated (e.g. a
4762 // single-use add), we should put the negate there.
4763 return Matcher.getNode(
4764 ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
4765 Matcher.getNode(ISD::SHL, DL, VT, N0,
4766 DAG.getShiftAmountConstant(Log2Val, VT, DL)));
4767 }
4768
4769 // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
4770 // hi result is in use in case we hit this mid-legalization.
4771 if (!UseVP) {
4772 for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
4773 if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
4774 SDVTList LoHiVT = DAG.getVTList(VT, VT);
4775 // TODO: Can we match commutable operands with getNodeIfExists?
4776 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
4777 if (LoHi->hasAnyUseOfValue(1))
4778 return SDValue(LoHi, 0);
4779 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
4780 if (LoHi->hasAnyUseOfValue(1))
4781 return SDValue(LoHi, 0);
4782 }
4783 }
4784 }
4785
4786 // Try to transform:
4787 // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4788 // mul x, (2^N + 1) --> add (shl x, N), x
4789 // mul x, (2^N - 1) --> sub (shl x, N), x
4790 // Examples: x * 33 --> (x << 5) + x
4791 // x * 15 --> (x << 4) - x
4792 // x * -33 --> -((x << 5) + x)
4793 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
4794 // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4795 // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4796 // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4797 // Examples: x * 0x8800 --> (x << 15) + (x << 11)
4798 // x * 0xf800 --> (x << 16) - (x << 11)
4799 // x * -0x8800 --> -((x << 15) + (x << 11))
4800 // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
4801 if (!UseVP && N1IsConst &&
4802 TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
4803 // TODO: We could handle more general decomposition of any constant by
4804 // having the target set a limit on number of ops and making a
4805 // callback to determine that sequence (similar to sqrt expansion).
4806 unsigned MathOp = ISD::DELETED_NODE;
4807 APInt MulC = ConstValue1.abs();
4808 // The constant `2` should be treated as (2^0 + 1).
4809 unsigned TZeros = MulC == 2 ? 0 : MulC.countr_zero();
4810 MulC.lshrInPlace(TZeros);
4811 if ((MulC - 1).isPowerOf2())
4812 MathOp = ISD::ADD;
4813 else if ((MulC + 1).isPowerOf2())
4814 MathOp = ISD::SUB;
4815
4816 if (MathOp != ISD::DELETED_NODE) {
4817 unsigned ShAmt =
4818 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4819 ShAmt += TZeros;
4820 assert(ShAmt < BitWidth &&
4821 "multiply-by-constant generated out of bounds shift");
4822 SDValue Shl =
4823 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
4824 SDValue R =
4825 TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
4826 DAG.getNode(ISD::SHL, DL, VT, N0,
4827 DAG.getConstant(TZeros, DL, VT)))
4828 : DAG.getNode(MathOp, DL, VT, Shl, N0);
4829 if (ConstValue1.isNegative())
4830 R = DAG.getNegative(R, DL, VT);
4831 return R;
4832 }
4833 }
4834
4835 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4836 if (sd_context_match(N0, Matcher, m_Opc(ISD::SHL))) {
4837 SDValue N01 = N0.getOperand(1);
4838 if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
4839 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
4840 }
4841
4842 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4843 // use.
4844 {
4845 SDValue Sh, Y;
4846
4847 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
4848 if (sd_context_match(N0, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
4850 Sh = N0; Y = N1;
4851 } else if (sd_context_match(N1, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
4853 Sh = N1; Y = N0;
4854 }
4855
4856 if (Sh.getNode()) {
4857 SDValue Mul = Matcher.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
4858 return Matcher.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
4859 }
4860 }
4861
4862 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4863 if (sd_context_match(N0, Matcher, m_Opc(ISD::ADD)) &&
4867 return Matcher.getNode(
4868 ISD::ADD, DL, VT,
4869 Matcher.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
4870 Matcher.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
4871
4872 // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4873 ConstantSDNode *NC1 = isConstOrConstSplat(N1);
4874 if (!UseVP && N0.getOpcode() == ISD::VSCALE && NC1) {
4875 const APInt &C0 = N0.getConstantOperandAPInt(0);
4876 const APInt &C1 = NC1->getAPIntValue();
4877 return DAG.getVScale(DL, VT, C0 * C1);
4878 }
4879
4880 // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4881 APInt MulVal;
4882 if (!UseVP && N0.getOpcode() == ISD::STEP_VECTOR &&
4883 ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4884 const APInt &C0 = N0.getConstantOperandAPInt(0);
4885 APInt NewStep = C0 * MulVal;
4886 return DAG.getStepVector(DL, VT, NewStep);
4887 }
4888
4889 // Fold Y = sra (X, size(X)-1); mul (or (Y, 1), X) -> (abs X)
4890 SDValue X;
4891 if (!UseVP && (!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4893 N, Matcher,
4895 m_Deferred(X)))) {
4896 return Matcher.getNode(ISD::ABS, DL, VT, X);
4897 }
4898
4899 // Fold ((mul x, 0/undef) -> 0,
4900 // (mul x, 1) -> x) -> x)
4901 // -> and(x, mask)
4902 // We can replace vectors with '0' and '1' factors with a clearing mask.
4903 if (VT.isFixedLengthVector()) {
4904 unsigned NumElts = VT.getVectorNumElements();
4905 SmallBitVector ClearMask;
4906 ClearMask.reserve(NumElts);
4907 auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4908 if (!V || V->isZero()) {
4909 ClearMask.push_back(true);
4910 return true;
4911 }
4912 ClearMask.push_back(false);
4913 return V->isOne();
4914 };
4915 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4916 ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4917 assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4918 EVT LegalSVT = N1.getOperand(0).getValueType();
4919 SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4920 SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4922 for (unsigned I = 0; I != NumElts; ++I)
4923 if (ClearMask[I])
4924 Mask[I] = Zero;
4925 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4926 }
4927 }
4928
4929 // reassociate mul
4930 // TODO: Change reassociateOps to support vp ops.
4931 if (!UseVP)
4932 if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
4933 return RMUL;
4934
4935 // Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
4936 // TODO: Change reassociateReduction to support vp ops.
4937 if (!UseVP)
4938 if (SDValue SD =
4939 reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
4940 return SD;
4941
4942 // Simplify the operands using demanded-bits information.
4944 return SDValue(N, 0);
4945
4946 return SDValue();
4947}
4948
4949/// Return true if divmod libcall is available.
4951 const TargetLowering &TLI) {
4952 RTLIB::Libcall LC;
4953 EVT NodeType = Node->getValueType(0);
4954 if (!NodeType.isSimple())
4955 return false;
4956 switch (NodeType.getSimpleVT().SimpleTy) {
4957 default: return false; // No libcall for vector types.
4958 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
4959 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4960 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4961 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4962 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4963 }
4964
4965 return TLI.getLibcallName(LC) != nullptr;
4966}
4967
4968/// Issue divrem if both quotient and remainder are needed.
4969SDValue DAGCombiner::useDivRem(SDNode *Node) {
4970 if (Node->use_empty())
4971 return SDValue(); // This is a dead node, leave it alone.
4972
4973 unsigned Opcode = Node->getOpcode();
4974 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4975 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4976
4977 // DivMod lib calls can still work on non-legal types if using lib-calls.
4978 EVT VT = Node->getValueType(0);
4979 if (VT.isVector() || !VT.isInteger())
4980 return SDValue();
4981
4982 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4983 return SDValue();
4984
4985 // If DIVREM is going to get expanded into a libcall,
4986 // but there is no libcall available, then don't combine.
4987 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4989 return SDValue();
4990
4991 // If div is legal, it's better to do the normal expansion
4992 unsigned OtherOpcode = 0;
4993 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4994 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4995 if (TLI.isOperationLegalOrCustom(Opcode, VT))
4996 return SDValue();
4997 } else {
4998 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4999 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
5000 return SDValue();
5001 }
5002
5003 SDValue Op0 = Node->getOperand(0);
5004 SDValue Op1 = Node->getOperand(1);
5005 SDValue combined;
5006 for (SDNode *User : Op0->users()) {
5007 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
5008 User->use_empty())
5009 continue;
5010 // Convert the other matching node(s), too;
5011 // otherwise, the DIVREM may get target-legalized into something
5012 // target-specific that we won't be able to recognize.
5013 unsigned UserOpc = User->getOpcode();
5014 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
5015 User->getOperand(0) == Op0 &&
5016 User->getOperand(1) == Op1) {
5017 if (!combined) {
5018 if (UserOpc == OtherOpcode) {
5019 SDVTList VTs = DAG.getVTList(VT, VT);
5020 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
5021 } else if (UserOpc == DivRemOpc) {
5022 combined = SDValue(User, 0);
5023 } else {
5024 assert(UserOpc == Opcode);
5025 continue;
5026 }
5027 }
5028 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
5029 CombineTo(User, combined);
5030 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
5031 CombineTo(User, combined.getValue(1));
5032 }
5033 }
5034 return combined;
5035}
5036
5038 SDValue N0 = N->getOperand(0);
5039 SDValue N1 = N->getOperand(1);
5040 EVT VT = N->getValueType(0);
5041 SDLoc DL(N);
5042
5043 unsigned Opc = N->getOpcode();
5044 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
5046
5047 // X / undef -> undef
5048 // X % undef -> undef
5049 // X / 0 -> undef
5050 // X % 0 -> undef
5051 // NOTE: This includes vectors where any divisor element is zero/undef.
5052 if (DAG.isUndef(Opc, {N0, N1}))
5053 return DAG.getUNDEF(VT);
5054
5055 // undef / X -> 0
5056 // undef % X -> 0
5057 if (N0.isUndef())
5058 return DAG.getConstant(0, DL, VT);
5059
5060 // 0 / X -> 0
5061 // 0 % X -> 0
5063 if (N0C && N0C->isZero())
5064 return N0;
5065
5066 // X / X -> 1
5067 // X % X -> 0
5068 if (N0 == N1)
5069 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
5070
5071 // X / 1 -> X
5072 // X % 1 -> 0
5073 // If this is a boolean op (single-bit element type), we can't have
5074 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
5075 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
5076 // it's a 1.
5077 if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
5078 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
5079
5080 return SDValue();
5081}
5082
5083SDValue DAGCombiner::visitSDIV(SDNode *N) {
5084 SDValue N0 = N->getOperand(0);
5085 SDValue N1 = N->getOperand(1);
5086 EVT VT = N->getValueType(0);
5087 EVT CCVT = getSetCCResultType(VT);
5088 SDLoc DL(N);
5089
5090 // fold (sdiv c1, c2) -> c1/c2
5091 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
5092 return C;
5093
5094 // fold vector ops
5095 if (VT.isVector())
5096 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5097 return FoldedVOp;
5098
5099 // fold (sdiv X, -1) -> 0-X
5100 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5101 if (N1C && N1C->isAllOnes())
5102 return DAG.getNegative(N0, DL, VT);
5103
5104 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
5105 if (N1C && N1C->isMinSignedValue())
5106 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
5107 DAG.getConstant(1, DL, VT),
5108 DAG.getConstant(0, DL, VT));
5109
5110 if (SDValue V = simplifyDivRem(N, DAG))
5111 return V;
5112
5113 if (SDValue NewSel = foldBinOpIntoSelect(N))
5114 return NewSel;
5115
5116 // If we know the sign bits of both operands are zero, strength reduce to a
5117 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
5118 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
5119 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
5120
5121 if (SDValue V = visitSDIVLike(N0, N1, N)) {
5122 // If the corresponding remainder node exists, update its users with
5123 // (Dividend - (Quotient * Divisor).
5124 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
5125 { N0, N1 })) {
5126 // If the sdiv has the exact flag we shouldn't propagate it to the
5127 // remainder node.
5128 if (!N->getFlags().hasExact()) {
5129 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
5130 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5131 AddToWorklist(Mul.getNode());
5132 AddToWorklist(Sub.getNode());
5133 CombineTo(RemNode, Sub);
5134 }
5135 }
5136 return V;
5137 }
5138
5139 // sdiv, srem -> sdivrem
5140 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
5141 // true. Otherwise, we break the simplification logic in visitREM().
5142 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5143 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
5144 if (SDValue DivRem = useDivRem(N))
5145 return DivRem;
5146
5147 return SDValue();
5148}
5149
5150static bool isDivisorPowerOfTwo(SDValue Divisor) {
5151 // Helper for determining whether a value is a power-2 constant scalar or a
5152 // vector of such elements.
5153 auto IsPowerOfTwo = [](ConstantSDNode *C) {
5154 if (C->isZero() || C->isOpaque())
5155 return false;
5156 if (C->getAPIntValue().isPowerOf2())
5157 return true;
5158 if (C->getAPIntValue().isNegatedPowerOf2())
5159 return true;
5160 return false;
5161 };
5162
5163 return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
5164}
5165
5166SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
5167 SDLoc DL(N);
5168 EVT VT = N->getValueType(0);
5169 EVT CCVT = getSetCCResultType(VT);
5170 unsigned BitWidth = VT.getScalarSizeInBits();
5171
5172 // fold (sdiv X, pow2) -> simple ops after legalize
5173 // FIXME: We check for the exact bit here because the generic lowering gives
5174 // better results in that case. The target-specific lowering should learn how
5175 // to handle exact sdivs efficiently.
5176 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
5177 // Target-specific implementation of sdiv x, pow2.
5178 if (SDValue Res = BuildSDIVPow2(N))
5179 return Res;
5180
5181 // Create constants that are functions of the shift amount value.
5182 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
5183 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
5184 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
5185 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
5186 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
5187 if (!isConstantOrConstantVector(Inexact))
5188 return SDValue();
5189
5190 // Splat the sign bit into the register
5191 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
5192 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
5193 AddToWorklist(Sign.getNode());
5194
5195 // Add (N0 < 0) ? abs2 - 1 : 0;
5196 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
5197 AddToWorklist(Srl.getNode());
5198 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
5199 AddToWorklist(Add.getNode());
5200 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
5201 AddToWorklist(Sra.getNode());
5202
5203 // Special case: (sdiv X, 1) -> X
5204 // Special Case: (sdiv X, -1) -> 0-X
5205 SDValue One = DAG.getConstant(1, DL, VT);
5207 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
5208 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
5209 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
5210 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
5211
5212 // If dividing by a positive value, we're done. Otherwise, the result must
5213 // be negated.
5214 SDValue Zero = DAG.getConstant(0, DL, VT);
5215 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
5216
5217 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
5218 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
5219 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
5220 return Res;
5221 }
5222
5223 // If integer divide is expensive and we satisfy the requirements, emit an
5224 // alternate sequence. Targets may check function attributes for size/speed
5225 // trade-offs.
5226 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5228 !TLI.isIntDivCheap(N->getValueType(0), Attr))
5229 if (SDValue Op = BuildSDIV(N))
5230 return Op;
5231
5232 return SDValue();
5233}
5234
5235SDValue DAGCombiner::visitUDIV(SDNode *N) {
5236 SDValue N0 = N->getOperand(0);
5237 SDValue N1 = N->getOperand(1);
5238 EVT VT = N->getValueType(0);
5239 EVT CCVT = getSetCCResultType(VT);
5240 SDLoc DL(N);
5241
5242 // fold (udiv c1, c2) -> c1/c2
5243 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
5244 return C;
5245
5246 // fold vector ops
5247 if (VT.isVector())
5248 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5249 return FoldedVOp;
5250
5251 // fold (udiv X, -1) -> select(X == -1, 1, 0)
5252 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5253 if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) {
5254 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
5255 DAG.getConstant(1, DL, VT),
5256 DAG.getConstant(0, DL, VT));
5257 }
5258
5259 if (SDValue V = simplifyDivRem(N, DAG))
5260 return V;
5261
5262 if (SDValue NewSel = foldBinOpIntoSelect(N))
5263 return NewSel;
5264
5265 if (SDValue V = visitUDIVLike(N0, N1, N)) {
5266 // If the corresponding remainder node exists, update its users with
5267 // (Dividend - (Quotient * Divisor).
5268 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
5269 { N0, N1 })) {
5270 // If the udiv has the exact flag we shouldn't propagate it to the
5271 // remainder node.
5272 if (!N->getFlags().hasExact()) {
5273 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
5274 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5275 AddToWorklist(Mul.getNode());
5276 AddToWorklist(Sub.getNode());
5277 CombineTo(RemNode, Sub);
5278 }
5279 }
5280 return V;
5281 }
5282
5283 // sdiv, srem -> sdivrem
5284 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
5285 // true. Otherwise, we break the simplification logic in visitREM().
5286 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5287 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
5288 if (SDValue DivRem = useDivRem(N))
5289 return DivRem;
5290
5291 // Simplify the operands using demanded-bits information.
5292 // We don't have demanded bits support for UDIV so this just enables constant
5293 // folding based on known bits.
5295 return SDValue(N, 0);
5296
5297 return SDValue();
5298}
5299
5300SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
5301 SDLoc DL(N);
5302 EVT VT = N->getValueType(0);
5303
5304 // fold (udiv x, (1 << c)) -> x >>u c
5305 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
5306 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5307 AddToWorklist(LogBase2.getNode());
5308
5309 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5310 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
5311 AddToWorklist(Trunc.getNode());
5312 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5313 }
5314 }
5315
5316 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
5317 if (N1.getOpcode() == ISD::SHL) {
5318 SDValue N10 = N1.getOperand(0);
5319 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
5320 if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
5321 AddToWorklist(LogBase2.getNode());
5322
5323 EVT ADDVT = N1.getOperand(1).getValueType();
5324 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
5325 AddToWorklist(Trunc.getNode());
5326 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
5327 AddToWorklist(Add.getNode());
5328 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
5329 }
5330 }
5331 }
5332
5333 // fold (udiv x, c) -> alternate
5334 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5336 !TLI.isIntDivCheap(N->getValueType(0), Attr))
5337 if (SDValue Op = BuildUDIV(N))
5338 return Op;
5339
5340 return SDValue();
5341}
5342
5343SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) {
5344 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
5345 !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
5346 // Target-specific implementation of srem x, pow2.
5347 if (SDValue Res = BuildSREMPow2(N))
5348 return Res;
5349 }
5350 return SDValue();
5351}
5352
5353// handles ISD::SREM and ISD::UREM
5354SDValue DAGCombiner::visitREM(SDNode *N) {
5355 unsigned Opcode = N->getOpcode();
5356 SDValue N0 = N->getOperand(0);
5357 SDValue N1 = N->getOperand(1);
5358 EVT VT = N->getValueType(0);
5359 EVT CCVT = getSetCCResultType(VT);
5360
5361 bool isSigned = (Opcode == ISD::SREM);
5362 SDLoc DL(N);
5363
5364 // fold (rem c1, c2) -> c1%c2
5365 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5366 return C;
5367
5368 // fold (urem X, -1) -> select(FX == -1, 0, FX)
5369 // Freeze the numerator to avoid a miscompile with an undefined value.
5370 if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false) &&
5371 CCVT.isVector() == VT.isVector()) {
5372 SDValue F0 = DAG.getFreeze(N0);
5373 SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
5374 return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
5375 }
5376
5377 if (SDValue V = simplifyDivRem(N, DAG))
5378 return V;
5379
5380 if (SDValue NewSel = foldBinOpIntoSelect(N))
5381 return NewSel;
5382
5383 if (isSigned) {
5384 // If we know the sign bits of both operands are zero, strength reduce to a
5385 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
5386 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
5387 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
5388 } else {
5389 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
5390 // fold (urem x, pow2) -> (and x, pow2-1)
5391 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5392 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5393 AddToWorklist(Add.getNode());
5394 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5395 }
5396 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
5397 // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1))
5398 // TODO: We should sink the following into isKnownToBePowerOfTwo
5399 // using a OrZero parameter analogous to our handling in ValueTracking.
5400 if ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) &&
5402 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5403 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5404 AddToWorklist(Add.getNode());
5405 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5406 }
5407 }
5408
5409 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5410
5411 // If X/C can be simplified by the division-by-constant logic, lower
5412 // X%C to the equivalent of X-X/C*C.
5413 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
5414 // speculative DIV must not cause a DIVREM conversion. We guard against this
5415 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
5416 // combine will not return a DIVREM. Regardless, checking cheapness here
5417 // makes sense since the simplification results in fatter code.
5418 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
5419 if (isSigned) {
5420 // check if we can build faster implementation for srem
5421 if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))
5422 return OptimizedRem;
5423 }
5424
5425 SDValue OptimizedDiv =
5426 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
5427 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
5428 // If the equivalent Div node also exists, update its users.
5429 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
5430 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
5431 { N0, N1 }))
5432 CombineTo(DivNode, OptimizedDiv);
5433 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
5434 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5435 AddToWorklist(OptimizedDiv.getNode());
5436 AddToWorklist(Mul.getNode());
5437 return Sub;
5438 }
5439 }
5440
5441 // sdiv, srem -> sdivrem
5442 if (SDValue DivRem = useDivRem(N))
5443 return DivRem.getValue(1);
5444
5445 // fold urem(urem(A, BCst), Op1Cst) -> urem(A, Op1Cst)
5446 // iff urem(BCst, Op1Cst) == 0
5447 SDValue A;
5448 APInt Op1Cst, BCst;
5449 if (sd_match(N, m_URem(m_URem(m_Value(A), m_ConstInt(BCst)),
5450 m_ConstInt(Op1Cst))) &&
5451 BCst.urem(Op1Cst).isZero()) {
5452 return DAG.getNode(ISD::UREM, DL, VT, A, DAG.getConstant(Op1Cst, DL, VT));
5453 }
5454
5455 // fold srem(srem(A, BCst), Op1Cst) -> srem(A, Op1Cst)
5456 // iff srem(BCst, Op1Cst) == 0 && Op1Cst != 1
5457 if (sd_match(N, m_SRem(m_SRem(m_Value(A), m_ConstInt(BCst)),
5458 m_ConstInt(Op1Cst))) &&
5459 BCst.srem(Op1Cst).isZero() && !Op1Cst.isAllOnes()) {
5460 return DAG.getNode(ISD::SREM, DL, VT, A, DAG.getConstant(Op1Cst, DL, VT));
5461 }
5462
5463 return SDValue();
5464}
5465
5466SDValue DAGCombiner::visitMULHS(SDNode *N) {
5467 SDValue N0 = N->getOperand(0);
5468 SDValue N1 = N->getOperand(1);
5469 EVT VT = N->getValueType(0);
5470 SDLoc DL(N);
5471
5472 // fold (mulhs c1, c2)
5473 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
5474 return C;
5475
5476 // canonicalize constant to RHS.
5479 return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
5480
5481 if (VT.isVector()) {
5482 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5483 return FoldedVOp;
5484
5485 // fold (mulhs x, 0) -> 0
5486 // do not return N1, because undef node may exist.
5488 return DAG.getConstant(0, DL, VT);
5489 }
5490
5491 // fold (mulhs x, 0) -> 0
5492 if (isNullConstant(N1))
5493 return N1;
5494
5495 // fold (mulhs x, 1) -> (sra x, size(x)-1)
5496 if (isOneConstant(N1))
5497 return DAG.getNode(
5498 ISD::SRA, DL, VT, N0,
5500
5501 // fold (mulhs x, undef) -> 0
5502 if (N0.isUndef() || N1.isUndef())
5503 return DAG.getConstant(0, DL, VT);
5504
5505 // If the type twice as wide is legal, transform the mulhs to a wider multiply
5506 // plus a shift.
5507 if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
5508 !VT.isVector()) {
5509 MVT Simple = VT.getSimpleVT();
5510 unsigned SimpleSize = Simple.getSizeInBits();
5511 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5512 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5513 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5514 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5515 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5516 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5517 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5518 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5519 }
5520 }
5521
5522 return SDValue();
5523}
5524
5525SDValue DAGCombiner::visitMULHU(SDNode *N) {
5526 SDValue N0 = N->getOperand(0);
5527 SDValue N1 = N->getOperand(1);
5528 EVT VT = N->getValueType(0);
5529 SDLoc DL(N);
5530
5531 // fold (mulhu c1, c2)
5532 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
5533 return C;
5534
5535 // canonicalize constant to RHS.
5538 return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
5539
5540 if (VT.isVector()) {
5541 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5542 return FoldedVOp;
5543
5544 // fold (mulhu x, 0) -> 0
5545 // do not return N1, because undef node may exist.
5547 return DAG.getConstant(0, DL, VT);
5548 }
5549
5550 // fold (mulhu x, 0) -> 0
5551 if (isNullConstant(N1))
5552 return N1;
5553
5554 // fold (mulhu x, 1) -> 0
5555 if (isOneConstant(N1))
5556 return DAG.getConstant(0, DL, VT);
5557
5558 // fold (mulhu x, undef) -> 0
5559 if (N0.isUndef() || N1.isUndef())
5560 return DAG.getConstant(0, DL, VT);
5561
5562 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
5563 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
5564 hasOperation(ISD::SRL, VT)) {
5565 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5566 unsigned NumEltBits = VT.getScalarSizeInBits();
5567 SDValue SRLAmt = DAG.getNode(
5568 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
5569 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5570 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
5571 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5572 }
5573 }
5574
5575 // If the type twice as wide is legal, transform the mulhu to a wider multiply
5576 // plus a shift.
5577 if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
5578 !VT.isVector()) {
5579 MVT Simple = VT.getSimpleVT();
5580 unsigned SimpleSize = Simple.getSizeInBits();
5581 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5582 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5583 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5584 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5585 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5586 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5587 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5588 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5589 }
5590 }
5591
5592 // Simplify the operands using demanded-bits information.
5593 // We don't have demanded bits support for MULHU so this just enables constant
5594 // folding based on known bits.
5596 return SDValue(N, 0);
5597
5598 return SDValue();
5599}
5600
5601SDValue DAGCombiner::visitAVG(SDNode *N) {
5602 unsigned Opcode = N->getOpcode();
5603 SDValue N0 = N->getOperand(0);
5604 SDValue N1 = N->getOperand(1);
5605 EVT VT = N->getValueType(0);
5606 SDLoc DL(N);
5607 bool IsSigned = Opcode == ISD::AVGCEILS || Opcode == ISD::AVGFLOORS;
5608
5609 // fold (avg c1, c2)
5610 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5611 return C;
5612
5613 // canonicalize constant to RHS.
5616 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5617
5618 if (VT.isVector())
5619 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5620 return FoldedVOp;
5621
5622 // fold (avg x, undef) -> x
5623 if (N0.isUndef())
5624 return N1;
5625 if (N1.isUndef())
5626 return N0;
5627
5628 // fold (avg x, x) --> x
5629 if (N0 == N1 && Level >= AfterLegalizeTypes)
5630 return N0;
5631
5632 // fold (avgfloor x, 0) -> x >> 1
5633 SDValue X, Y;
5635 return DAG.getNode(ISD::SRA, DL, VT, X,
5636 DAG.getShiftAmountConstant(1, VT, DL));
5638 return DAG.getNode(ISD::SRL, DL, VT, X,
5639 DAG.getShiftAmountConstant(1, VT, DL));
5640
5641 // fold avgu(zext(x), zext(y)) -> zext(avgu(x, y))
5642 // fold avgs(sext(x), sext(y)) -> sext(avgs(x, y))
5643 if (!IsSigned &&
5644 sd_match(N, m_BinOp(Opcode, m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) &&
5645 X.getValueType() == Y.getValueType() &&
5646 hasOperation(Opcode, X.getValueType())) {
5647 SDValue AvgU = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5648 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, AvgU);
5649 }
5650 if (IsSigned &&
5651 sd_match(N, m_BinOp(Opcode, m_SExt(m_Value(X)), m_SExt(m_Value(Y)))) &&
5652 X.getValueType() == Y.getValueType() &&
5653 hasOperation(Opcode, X.getValueType())) {
5654 SDValue AvgS = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5655 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, AvgS);
5656 }
5657
5658 // Fold avgflooru(x,y) -> avgceilu(x,y-1) iff y != 0
5659 // Fold avgflooru(x,y) -> avgceilu(x-1,y) iff x != 0
5660 // Check if avgflooru isn't legal/custom but avgceilu is.
5661 if (Opcode == ISD::AVGFLOORU && !hasOperation(ISD::AVGFLOORU, VT) &&
5662 (!LegalOperations || hasOperation(ISD::AVGCEILU, VT))) {
5663 if (DAG.isKnownNeverZero(N1))
5664 return DAG.getNode(
5665 ISD::AVGCEILU, DL, VT, N0,
5666 DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getAllOnesConstant(DL, VT)));
5667 if (DAG.isKnownNeverZero(N0))
5668 return DAG.getNode(
5669 ISD::AVGCEILU, DL, VT, N1,
5670 DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getAllOnesConstant(DL, VT)));
5671 }
5672
5673 // Fold avgfloor((add nw x,y), 1) -> avgceil(x,y)
5674 // Fold avgfloor((add nw x,1), y) -> avgceil(x,y)
5675 if ((Opcode == ISD::AVGFLOORU && hasOperation(ISD::AVGCEILU, VT)) ||
5676 (Opcode == ISD::AVGFLOORS && hasOperation(ISD::AVGCEILS, VT))) {
5677 SDValue Add;
5678 if (sd_match(N,
5679 m_c_BinOp(Opcode,
5681 m_One())) ||
5682 sd_match(N, m_c_BinOp(Opcode,
5684 m_Value(Y)))) {
5685
5686 if (IsSigned && Add->getFlags().hasNoSignedWrap())
5687 return DAG.getNode(ISD::AVGCEILS, DL, VT, X, Y);
5688
5689 if (!IsSigned && Add->getFlags().hasNoUnsignedWrap())
5690 return DAG.getNode(ISD::AVGCEILU, DL, VT, X, Y);
5691 }
5692 }
5693
5694 // Fold avgfloors(x,y) -> avgflooru(x,y) if both x and y are non-negative
5695 if (Opcode == ISD::AVGFLOORS && hasOperation(ISD::AVGFLOORU, VT)) {
5696 if (DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5697 return DAG.getNode(ISD::AVGFLOORU, DL, VT, N0, N1);
5698 }
5699
5700 return SDValue();
5701}
5702
5703SDValue DAGCombiner::visitABD(SDNode *N) {
5704 unsigned Opcode = N->getOpcode();
5705 SDValue N0 = N->getOperand(0);
5706 SDValue N1 = N->getOperand(1);
5707 EVT VT = N->getValueType(0);
5708 SDLoc DL(N);
5709
5710 // fold (abd c1, c2)
5711 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5712 return C;
5713
5714 // canonicalize constant to RHS.
5717 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5718
5719 if (VT.isVector())
5720 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5721 return FoldedVOp;
5722
5723 // fold (abd x, undef) -> 0
5724 if (N0.isUndef() || N1.isUndef())
5725 return DAG.getConstant(0, DL, VT);
5726
5727 // fold (abd x, x) -> 0
5728 if (N0 == N1)
5729 return DAG.getConstant(0, DL, VT);
5730
5731 SDValue X;
5732
5733 // fold (abds x, 0) -> abs x
5735 (!LegalOperations || hasOperation(ISD::ABS, VT)))
5736 return DAG.getNode(ISD::ABS, DL, VT, X);
5737
5738 // fold (abdu x, 0) -> x
5740 return X;
5741
5742 // fold (abds x, y) -> (abdu x, y) iff both args are known positive
5743 if (Opcode == ISD::ABDS && hasOperation(ISD::ABDU, VT) &&
5744 DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5745 return DAG.getNode(ISD::ABDU, DL, VT, N1, N0);
5746
5747 return SDValue();
5748}
5749
5750/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
5751/// give the opcodes for the two computations that are being performed. Return
5752/// true if a simplification was made.
5753SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
5754 unsigned HiOp) {
5755 // If the high half is not needed, just compute the low half.
5756 bool HiExists = N->hasAnyUseOfValue(1);
5757 if (!HiExists && (!LegalOperations ||
5758 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
5759 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5760 return CombineTo(N, Res, Res);
5761 }
5762
5763 // If the low half is not needed, just compute the high half.
5764 bool LoExists = N->hasAnyUseOfValue(0);
5765 if (!LoExists && (!LegalOperations ||
5766 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
5767 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5768 return CombineTo(N, Res, Res);
5769 }
5770
5771 // If both halves are used, return as it is.
5772 if (LoExists && HiExists)
5773 return SDValue();
5774
5775 // If the two computed results can be simplified separately, separate them.
5776 if (LoExists) {
5777 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5778 AddToWorklist(Lo.getNode());
5779 SDValue LoOpt = combine(Lo.getNode());
5780 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
5781 (!LegalOperations ||
5782 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
5783 return CombineTo(N, LoOpt, LoOpt);
5784 }
5785
5786 if (HiExists) {
5787 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5788 AddToWorklist(Hi.getNode());
5789 SDValue HiOpt = combine(Hi.getNode());
5790 if (HiOpt.getNode() && HiOpt != Hi &&
5791 (!LegalOperations ||
5792 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
5793 return CombineTo(N, HiOpt, HiOpt);
5794 }
5795
5796 return SDValue();
5797}
5798
5799SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
5800 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
5801 return Res;
5802
5803 SDValue N0 = N->getOperand(0);
5804 SDValue N1 = N->getOperand(1);
5805 EVT VT = N->getValueType(0);
5806 SDLoc DL(N);
5807
5808 // Constant fold.
5810 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1);
5811
5812 // canonicalize constant to RHS (vector doesn't have to splat)
5815 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0);
5816
5817 // If the type is twice as wide is legal, transform the mulhu to a wider
5818 // multiply plus a shift.
5819 if (VT.isSimple() && !VT.isVector()) {
5820 MVT Simple = VT.getSimpleVT();
5821 unsigned SimpleSize = Simple.getSizeInBits();
5822 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5823 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5824 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5825 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5826 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5827 // Compute the high part as N1.
5828 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5829 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5830 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5831 // Compute the low part as N0.
5832 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5833 return CombineTo(N, Lo, Hi);
5834 }
5835 }
5836
5837 return SDValue();
5838}
5839
5840SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
5841 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
5842 return Res;
5843
5844 SDValue N0 = N->getOperand(0);
5845 SDValue N1 = N->getOperand(1);
5846 EVT VT = N->getValueType(0);
5847 SDLoc DL(N);
5848
5849 // Constant fold.
5851 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1);
5852
5853 // canonicalize constant to RHS (vector doesn't have to splat)
5856 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0);
5857
5858 // (umul_lohi N0, 0) -> (0, 0)
5859 if (isNullConstant(N1)) {
5860 SDValue Zero = DAG.getConstant(0, DL, VT);
5861 return CombineTo(N, Zero, Zero);
5862 }
5863
5864 // (umul_lohi N0, 1) -> (N0, 0)
5865 if (isOneConstant(N1)) {
5866 SDValue Zero = DAG.getConstant(0, DL, VT);
5867 return CombineTo(N, N0, Zero);
5868 }
5869
5870 // If the type is twice as wide is legal, transform the mulhu to a wider
5871 // multiply plus a shift.
5872 if (VT.isSimple() && !VT.isVector()) {
5873 MVT Simple = VT.getSimpleVT();
5874 unsigned SimpleSize = Simple.getSizeInBits();
5875 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5876 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5877 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5878 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5879 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5880 // Compute the high part as N1.
5881 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5882 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5883 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5884 // Compute the low part as N0.
5885 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5886 return CombineTo(N, Lo, Hi);
5887 }
5888 }
5889
5890 return SDValue();
5891}
5892
5893SDValue DAGCombiner::visitMULO(SDNode *N) {
5894 SDValue N0 = N->getOperand(0);
5895 SDValue N1 = N->getOperand(1);
5896 EVT VT = N0.getValueType();
5897 bool IsSigned = (ISD::SMULO == N->getOpcode());
5898
5899 EVT CarryVT = N->getValueType(1);
5900 SDLoc DL(N);
5901
5902 ConstantSDNode *N0C = isConstOrConstSplat(N0);
5903 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5904
5905 // fold operation with constant operands.
5906 // TODO: Move this to FoldConstantArithmetic when it supports nodes with
5907 // multiple results.
5908 if (N0C && N1C) {
5909 bool Overflow;
5910 APInt Result =
5911 IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
5912 : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
5913 return CombineTo(N, DAG.getConstant(Result, DL, VT),
5914 DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
5915 }
5916
5917 // canonicalize constant to RHS.
5920 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
5921
5922 // fold (mulo x, 0) -> 0 + no carry out
5923 if (isNullOrNullSplat(N1))
5924 return CombineTo(N, DAG.getConstant(0, DL, VT),
5925 DAG.getConstant(0, DL, CarryVT));
5926
5927 // (mulo x, 2) -> (addo x, x)
5928 // FIXME: This needs a freeze.
5929 if (N1C && N1C->getAPIntValue() == 2 &&
5930 (!IsSigned || VT.getScalarSizeInBits() > 2))
5931 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
5932 N->getVTList(), N0, N0);
5933
5934 // A 1 bit SMULO overflows if both inputs are 1.
5935 if (IsSigned && VT.getScalarSizeInBits() == 1) {
5936 SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
5937 SDValue Cmp = DAG.getSetCC(DL, CarryVT, And,
5938 DAG.getConstant(0, DL, VT), ISD::SETNE);
5939 return CombineTo(N, And, Cmp);
5940 }
5941
5942 // If it cannot overflow, transform into a mul.
5943 if (DAG.willNotOverflowMul(IsSigned, N0, N1))
5944 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
5945 DAG.getConstant(0, DL, CarryVT));
5946 return SDValue();
5947}
5948
5949// Function to calculate whether the Min/Max pair of SDNodes (potentially
5950// swapped around) make a signed saturate pattern, clamping to between a signed
5951// saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
5952// Returns the node being clamped and the bitwidth of the clamp in BW. Should
5953// work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
5954// same as SimplifySelectCC. N0<N1 ? N2 : N3.
5956 SDValue N3, ISD::CondCode CC, unsigned &BW,
5957 bool &Unsigned, SelectionDAG &DAG) {
5958 auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
5959 ISD::CondCode CC) {
5960 // The compare and select operand should be the same or the select operands
5961 // should be truncated versions of the comparison.
5962 if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
5963 return 0;
5964 // The constants need to be the same or a truncated version of each other.
5967 if (!N1C || !N3C)
5968 return 0;
5969 const APInt &C1 = N1C->getAPIntValue().trunc(N1.getScalarValueSizeInBits());
5970 const APInt &C2 = N3C->getAPIntValue().trunc(N3.getScalarValueSizeInBits());
5971 if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth()))
5972 return 0;
5973 return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
5974 };
5975
5976 // Check the initial value is a SMIN/SMAX equivalent.
5977 unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
5978 if (!Opcode0)
5979 return SDValue();
5980
5981 // We could only need one range check, if the fptosi could never produce
5982 // the upper value.
5983 if (N0.getOpcode() == ISD::FP_TO_SINT && Opcode0 == ISD::SMAX) {
5984 if (isNullOrNullSplat(N3)) {
5985 EVT IntVT = N0.getValueType().getScalarType();
5986 EVT FPVT = N0.getOperand(0).getValueType().getScalarType();
5987 if (FPVT.isSimple()) {
5988 Type *InputTy = FPVT.getTypeForEVT(*DAG.getContext());
5989 const fltSemantics &Semantics = InputTy->getFltSemantics();
5990 uint32_t MinBitWidth =
5991 APFloatBase::semanticsIntSizeInBits(Semantics, /*isSigned*/ true);
5992 if (IntVT.getSizeInBits() >= MinBitWidth) {
5993 Unsigned = true;
5994 BW = PowerOf2Ceil(MinBitWidth);
5995 return N0;
5996 }
5997 }
5998 }
5999 }
6000
6001 SDValue N00, N01, N02, N03;
6002 ISD::CondCode N0CC;
6003 switch (N0.getOpcode()) {
6004 case ISD::SMIN:
6005 case ISD::SMAX:
6006 N00 = N02 = N0.getOperand(0);
6007 N01 = N03 = N0.getOperand(1);
6008 N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
6009 break;
6010 case ISD::SELECT_CC:
6011 N00 = N0.getOperand(0);
6012 N01 = N0.getOperand(1);
6013 N02 = N0.getOperand(2);
6014 N03 = N0.getOperand(3);
6015 N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
6016 break;
6017 case ISD::SELECT:
6018 case ISD::VSELECT:
6019 if (N0.getOperand(0).getOpcode() != ISD::SETCC)
6020 return SDValue();
6021 N00 = N0.getOperand(0).getOperand(0);
6022 N01 = N0.getOperand(0).getOperand(1);
6023 N02 = N0.getOperand(1);
6024 N03 = N0.getOperand(2);
6025 N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
6026 break;
6027 default:
6028 return SDValue();
6029 }
6030
6031 unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
6032 if (!Opcode1 || Opcode0 == Opcode1)
6033 return SDValue();
6034
6035 ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
6036 ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
6037 if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
6038 return SDValue();
6039
6040 const APInt &MinC = MinCOp->getAPIntValue();
6041 const APInt &MaxC = MaxCOp->getAPIntValue();
6042 APInt MinCPlus1 = MinC + 1;
6043 if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
6044 BW = MinCPlus1.exactLogBase2() + 1;
6045 Unsigned = false;
6046 return N02;
6047 }
6048
6049 if (MaxC == 0 && MinC != 0 && MinCPlus1.isPowerOf2()) {
6050 BW = MinCPlus1.exactLogBase2();
6051 Unsigned = true;
6052 return N02;
6053 }
6054
6055 return SDValue();
6056}
6057
6059 SDValue N3, ISD::CondCode CC,
6060 SelectionDAG &DAG) {
6061 unsigned BW;
6062 bool Unsigned;
6063 SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned, DAG);
6064 if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
6065 return SDValue();
6066 EVT FPVT = Fp.getOperand(0).getValueType();
6067 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
6068 if (FPVT.isVector())
6069 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
6070 FPVT.getVectorElementCount());
6071 unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
6072 if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
6073 return SDValue();
6074 SDLoc DL(Fp);
6075 SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
6076 DAG.getValueType(NewVT.getScalarType()));
6077 return DAG.getExtOrTrunc(!Unsigned, Sat, DL, N2->getValueType(0));
6078}
6079
6081 SDValue N3, ISD::CondCode CC,
6082 SelectionDAG &DAG) {
6083 // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
6084 // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
6085 // be truncated versions of the setcc (N0/N1).
6086 if ((N0 != N2 &&
6087 (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
6088 N0.getOpcode() != ISD::FP_TO_UINT || CC != ISD::SETULT)
6089 return SDValue();
6092 if (!N1C || !N3C)
6093 return SDValue();
6094 const APInt &C1 = N1C->getAPIntValue();
6095 const APInt &C3 = N3C->getAPIntValue();
6096 if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
6097 C1 != C3.zext(C1.getBitWidth()))
6098 return SDValue();
6099
6100 unsigned BW = (C1 + 1).exactLogBase2();
6101 EVT FPVT = N0.getOperand(0).getValueType();
6102 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
6103 if (FPVT.isVector())
6104 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
6105 FPVT.getVectorElementCount());
6107 FPVT, NewVT))
6108 return SDValue();
6109
6110 SDValue Sat =
6111 DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
6112 DAG.getValueType(NewVT.getScalarType()));
6113 return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
6114}
6115
6116SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
6117 SDValue N0 = N->getOperand(0);
6118 SDValue N1 = N->getOperand(1);
6119 EVT VT = N0.getValueType();
6120 unsigned Opcode = N->getOpcode();
6121 SDLoc DL(N);
6122
6123 // fold operation with constant operands.
6124 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
6125 return C;
6126
6127 // If the operands are the same, this is a no-op.
6128 if (N0 == N1)
6129 return N0;
6130
6131 // Fold operation with vscale operands.
6132 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
6133 uint64_t C0 = N0->getConstantOperandVal(0);
6134 uint64_t C1 = N1->getConstantOperandVal(0);
6135 if (Opcode == ISD::UMAX)
6136 return C0 > C1 ? N0 : N1;
6137 else if (Opcode == ISD::UMIN)
6138 return C0 > C1 ? N1 : N0;
6139 }
6140
6141 // canonicalize constant to RHS
6144 return DAG.getNode(Opcode, DL, VT, N1, N0);
6145
6146 // fold vector ops
6147 if (VT.isVector())
6148 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
6149 return FoldedVOp;
6150
6151 // reassociate minmax
6152 if (SDValue RMINMAX = reassociateOps(Opcode, DL, N0, N1, N->getFlags()))
6153 return RMINMAX;
6154
6155 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
6156 // Only do this if:
6157 // 1. The current op isn't legal and the flipped is.
6158 // 2. The saturation pattern is broken by canonicalization in InstCombine.
6159 bool IsOpIllegal = !TLI.isOperationLegal(Opcode, VT);
6160 bool IsSatBroken = Opcode == ISD::UMIN && N0.getOpcode() == ISD::SMAX;
6161 if ((IsSatBroken || IsOpIllegal) && (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
6162 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
6163 unsigned AltOpcode;
6164 switch (Opcode) {
6165 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
6166 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
6167 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
6168 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
6169 default: llvm_unreachable("Unknown MINMAX opcode");
6170 }
6171 if ((IsSatBroken && IsOpIllegal) || TLI.isOperationLegal(AltOpcode, VT))
6172 return DAG.getNode(AltOpcode, DL, VT, N0, N1);
6173 }
6174
6175 if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
6177 N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
6178 return S;
6179 if (Opcode == ISD::UMIN)
6180 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
6181 return S;
6182
6183 // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
6184 auto ReductionOpcode = [](unsigned Opcode) {
6185 switch (Opcode) {
6186 case ISD::SMIN:
6187 return ISD::VECREDUCE_SMIN;
6188 case ISD::SMAX:
6189 return ISD::VECREDUCE_SMAX;
6190 case ISD::UMIN:
6191 return ISD::VECREDUCE_UMIN;
6192 case ISD::UMAX:
6193 return ISD::VECREDUCE_UMAX;
6194 default:
6195 llvm_unreachable("Unexpected opcode");
6196 }
6197 };
6198 if (SDValue SD = reassociateReduction(ReductionOpcode(Opcode), Opcode,
6199 SDLoc(N), VT, N0, N1))
6200 return SD;
6201
6202 // Simplify the operands using demanded-bits information.
6204 return SDValue(N, 0);
6205
6206 return SDValue();
6207}
6208
6209/// If this is a bitwise logic instruction and both operands have the same
6210/// opcode, try to sink the other opcode after the logic instruction.
6211SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
6212 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
6213 EVT VT = N0.getValueType();
6214 unsigned LogicOpcode = N->getOpcode();
6215 unsigned HandOpcode = N0.getOpcode();
6216 assert(ISD::isBitwiseLogicOp(LogicOpcode) && "Expected logic opcode");
6217 assert(HandOpcode == N1.getOpcode() && "Bad input!");
6218
6219 // Bail early if none of these transforms apply.
6220 if (N0.getNumOperands() == 0)
6221 return SDValue();
6222
6223 // FIXME: We should check number of uses of the operands to not increase
6224 // the instruction count for all transforms.
6225
6226 // Handle size-changing casts (or sign_extend_inreg).
6227 SDValue X = N0.getOperand(0);
6228 SDValue Y = N1.getOperand(0);
6229 EVT XVT = X.getValueType();
6230 SDLoc DL(N);
6231 if (ISD::isExtOpcode(HandOpcode) || ISD::isExtVecInRegOpcode(HandOpcode) ||
6232 (HandOpcode == ISD::SIGN_EXTEND_INREG &&
6233 N0.getOperand(1) == N1.getOperand(1))) {
6234 // If both operands have other uses, this transform would create extra
6235 // instructions without eliminating anything.
6236 if (!N0.hasOneUse() && !N1.hasOneUse())
6237 return SDValue();
6238 // We need matching integer source types.
6239 if (XVT != Y.getValueType())
6240 return SDValue();
6241 // Don't create an illegal op during or after legalization. Don't ever
6242 // create an unsupported vector op.
6243 if ((VT.isVector() || LegalOperations) &&
6244 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
6245 return SDValue();
6246 // Avoid infinite looping with PromoteIntBinOp.
6247 // TODO: Should we apply desirable/legal constraints to all opcodes?
6248 if ((HandOpcode == ISD::ANY_EXTEND ||
6249 HandOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
6250 LegalTypes && !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
6251 return SDValue();
6252 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
6253 SDNodeFlags LogicFlags;
6254 LogicFlags.setDisjoint(N->getFlags().hasDisjoint() &&
6255 ISD::isExtOpcode(HandOpcode));
6256 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y, LogicFlags);
6257 if (HandOpcode == ISD::SIGN_EXTEND_INREG)
6258 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
6259 return DAG.getNode(HandOpcode, DL, VT, Logic);
6260 }
6261
6262 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
6263 if (HandOpcode == ISD::TRUNCATE) {
6264 // If both operands have other uses, this transform would create extra
6265 // instructions without eliminating anything.
6266 if (!N0.hasOneUse() && !N1.hasOneUse())
6267 return SDValue();
6268 // We need matching source types.
6269 if (XVT != Y.getValueType())
6270 return SDValue();
6271 // Don't create an illegal op during or after legalization.
6272 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
6273 return SDValue();
6274 // Be extra careful sinking truncate. If it's free, there's no benefit in
6275 // widening a binop. Also, don't create a logic op on an illegal type.
6276 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
6277 return SDValue();
6278 if (!TLI.isTypeLegal(XVT))
6279 return SDValue();
6280 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6281 return DAG.getNode(HandOpcode, DL, VT, Logic);
6282 }
6283
6284 // For binops SHL/SRL/SRA/AND:
6285 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
6286 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
6287 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
6288 N0.getOperand(1) == N1.getOperand(1)) {
6289 // If either operand has other uses, this transform is not an improvement.
6290 if (!N0.hasOneUse() || !N1.hasOneUse())
6291 return SDValue();
6292 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6293 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
6294 }
6295
6296 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
6297 if (HandOpcode == ISD::BSWAP) {
6298 // If either operand has other uses, this transform is not an improvement.
6299 if (!N0.hasOneUse() || !N1.hasOneUse())
6300 return SDValue();
6301 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6302 return DAG.getNode(HandOpcode, DL, VT, Logic);
6303 }
6304
6305 // For funnel shifts FSHL/FSHR:
6306 // logic_op (OP x, x1, s), (OP y, y1, s) -->
6307 // --> OP (logic_op x, y), (logic_op, x1, y1), s
6308 if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) &&
6309 N0.getOperand(2) == N1.getOperand(2)) {
6310 if (!N0.hasOneUse() || !N1.hasOneUse())
6311 return SDValue();
6312 SDValue X1 = N0.getOperand(1);
6313 SDValue Y1 = N1.getOperand(1);
6314 SDValue S = N0.getOperand(2);
6315 SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y);
6316 SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1);
6317 return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S);
6318 }
6319
6320 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
6321 // Only perform this optimization up until type legalization, before
6322 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
6323 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
6324 // we don't want to undo this promotion.
6325 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
6326 // on scalars.
6327 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
6328 Level <= AfterLegalizeTypes) {
6329 // Input types must be integer and the same.
6330 if (XVT.isInteger() && XVT == Y.getValueType() &&
6331 !(VT.isVector() && TLI.isTypeLegal(VT) &&
6332 !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
6333 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6334 return DAG.getNode(HandOpcode, DL, VT, Logic);
6335 }
6336 }
6337
6338 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
6339 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
6340 // If both shuffles use the same mask, and both shuffle within a single
6341 // vector, then it is worthwhile to move the swizzle after the operation.
6342 // The type-legalizer generates this pattern when loading illegal
6343 // vector types from memory. In many cases this allows additional shuffle
6344 // optimizations.
6345 // There are other cases where moving the shuffle after the xor/and/or
6346 // is profitable even if shuffles don't perform a swizzle.
6347 // If both shuffles use the same mask, and both shuffles have the same first
6348 // or second operand, then it might still be profitable to move the shuffle
6349 // after the xor/and/or operation.
6350 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
6351 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
6352 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
6353 assert(X.getValueType() == Y.getValueType() &&
6354 "Inputs to shuffles are not the same type");
6355
6356 // Check that both shuffles use the same mask. The masks are known to be of
6357 // the same length because the result vector type is the same.
6358 // Check also that shuffles have only one use to avoid introducing extra
6359 // instructions.
6360 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
6361 !SVN0->getMask().equals(SVN1->getMask()))
6362 return SDValue();
6363
6364 // Don't try to fold this node if it requires introducing a
6365 // build vector of all zeros that might be illegal at this stage.
6366 SDValue ShOp = N0.getOperand(1);
6367 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
6368 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6369
6370 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
6371 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
6372 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
6373 N0.getOperand(0), N1.getOperand(0));
6374 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
6375 }
6376
6377 // Don't try to fold this node if it requires introducing a
6378 // build vector of all zeros that might be illegal at this stage.
6379 ShOp = N0.getOperand(0);
6380 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
6381 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6382
6383 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
6384 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
6385 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
6386 N1.getOperand(1));
6387 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
6388 }
6389 }
6390
6391 return SDValue();
6392}
6393
6394/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
6395SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
6396 const SDLoc &DL) {
6397 SDValue LL, LR, RL, RR, N0CC, N1CC;
6398 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
6399 !isSetCCEquivalent(N1, RL, RR, N1CC))
6400 return SDValue();
6401
6402 assert(N0.getValueType() == N1.getValueType() &&
6403 "Unexpected operand types for bitwise logic op");
6404 assert(LL.getValueType() == LR.getValueType() &&
6405 RL.getValueType() == RR.getValueType() &&
6406 "Unexpected operand types for setcc");
6407
6408 // If we're here post-legalization or the logic op type is not i1, the logic
6409 // op type must match a setcc result type. Also, all folds require new
6410 // operations on the left and right operands, so those types must match.
6411 EVT VT = N0.getValueType();
6412 EVT OpVT = LL.getValueType();
6413 if (LegalOperations || VT.getScalarType() != MVT::i1)
6414 if (VT != getSetCCResultType(OpVT))
6415 return SDValue();
6416 if (OpVT != RL.getValueType())
6417 return SDValue();
6418
6419 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
6420 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
6421 bool IsInteger = OpVT.isInteger();
6422 if (LR == RR && CC0 == CC1 && IsInteger) {
6423 bool IsZero = isNullOrNullSplat(LR);
6424 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
6425
6426 // All bits clear?
6427 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
6428 // All sign bits clear?
6429 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
6430 // Any bits set?
6431 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
6432 // Any sign bits set?
6433 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
6434
6435 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
6436 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
6437 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
6438 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
6439 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
6440 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
6441 AddToWorklist(Or.getNode());
6442 return DAG.getSetCC(DL, VT, Or, LR, CC1);
6443 }
6444
6445 // All bits set?
6446 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
6447 // All sign bits set?
6448 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
6449 // Any bits clear?
6450 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
6451 // Any sign bits clear?
6452 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
6453
6454 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
6455 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
6456 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
6457 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
6458 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
6459 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
6460 AddToWorklist(And.getNode());
6461 return DAG.getSetCC(DL, VT, And, LR, CC1);
6462 }
6463 }
6464
6465 // TODO: What is the 'or' equivalent of this fold?
6466 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
6467 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
6468 IsInteger && CC0 == ISD::SETNE &&
6469 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
6470 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
6471 SDValue One = DAG.getConstant(1, DL, OpVT);
6472 SDValue Two = DAG.getConstant(2, DL, OpVT);
6473 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
6474 AddToWorklist(Add.getNode());
6475 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
6476 }
6477
6478 // Try more general transforms if the predicates match and the only user of
6479 // the compares is the 'and' or 'or'.
6480 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
6481 N0.hasOneUse() && N1.hasOneUse()) {
6482 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
6483 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
6484 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
6485 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
6486 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
6487 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
6488 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6489 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
6490 }
6491
6492 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
6493 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
6494 // Match a shared variable operand and 2 non-opaque constant operands.
6495 auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) {
6496 // The difference of the constants must be a single bit.
6497 const APInt &CMax =
6498 APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
6499 const APInt &CMin =
6500 APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
6501 return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
6502 };
6503 if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) {
6504 // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
6505 // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
6506 SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
6507 SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
6508 SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
6509 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
6510 SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
6511 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
6512 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6513 return DAG.getSetCC(DL, VT, And, Zero, CC0);
6514 }
6515 }
6516 }
6517
6518 // Canonicalize equivalent operands to LL == RL.
6519 if (LL == RR && LR == RL) {
6521 std::swap(RL, RR);
6522 }
6523
6524 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6525 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6526 if (LL == RL && LR == RR) {
6527 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
6528 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
6529 if (NewCC != ISD::SETCC_INVALID &&
6530 (!LegalOperations ||
6531 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
6532 TLI.isOperationLegal(ISD::SETCC, OpVT))))
6533 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
6534 }
6535
6536 return SDValue();
6537}
6538
6539static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2,
6540 SelectionDAG &DAG) {
6541 return DAG.isKnownNeverSNaN(Operand2) && DAG.isKnownNeverSNaN(Operand1);
6542}
6543
6544static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
6545 SelectionDAG &DAG) {
6546 return DAG.isKnownNeverNaN(Operand2) && DAG.isKnownNeverNaN(Operand1);
6547}
6548
6549// FIXME: use FMINIMUMNUM if possible, such as for RISC-V.
6550static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
6551 ISD::CondCode CC, unsigned OrAndOpcode,
6552 SelectionDAG &DAG,
6553 bool isFMAXNUMFMINNUM_IEEE,
6554 bool isFMAXNUMFMINNUM) {
6555 // The optimization cannot be applied for all the predicates because
6556 // of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle
6557 // NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be
6558 // applied at all if one of the operands is a signaling NaN.
6559
6560 // It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
6561 // are non NaN values.
6562 if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
6563 ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND))) {
6564 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6565 isFMAXNUMFMINNUM_IEEE
6566 ? ISD::FMINNUM_IEEE
6568 }
6569
6570 if (((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::OR)) ||
6571 ((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::AND))) {
6572 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6573 isFMAXNUMFMINNUM_IEEE
6574 ? ISD::FMAXNUM_IEEE
6576 }
6577
6578 // Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet
6579 // NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/
6580 // FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove
6581 // that there are not any sNaNs, then the optimization is not valid
6582 // for FMINNUM_IEEE/FMAXNUM_IEEE. In the presence of sNaNs, we apply
6583 // the optimization using FMINNUM/FMAXNUM for the following cases. If
6584 // we can prove that we do not have any sNaNs, then we can do the
6585 // optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following
6586 // cases.
6587 if (((CC == ISD::SETOLT || CC == ISD::SETOLE) && (OrAndOpcode == ISD::OR)) ||
6588 ((CC == ISD::SETUGT || CC == ISD::SETUGE) && (OrAndOpcode == ISD::AND))) {
6589 return isFMAXNUMFMINNUM ? ISD::FMINNUM
6590 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6591 isFMAXNUMFMINNUM_IEEE
6592 ? ISD::FMINNUM_IEEE
6594 }
6595
6596 if (((CC == ISD::SETOGT || CC == ISD::SETOGE) && (OrAndOpcode == ISD::OR)) ||
6597 ((CC == ISD::SETULT || CC == ISD::SETULE) && (OrAndOpcode == ISD::AND))) {
6598 return isFMAXNUMFMINNUM ? ISD::FMAXNUM
6599 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6600 isFMAXNUMFMINNUM_IEEE
6601 ? ISD::FMAXNUM_IEEE
6603 }
6604
6605 return ISD::DELETED_NODE;
6606}
6607
6610 assert(
6611 (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
6612 "Invalid Op to combine SETCC with");
6613
6614 // TODO: Search past casts/truncates.
6615 SDValue LHS = LogicOp->getOperand(0);
6616 SDValue RHS = LogicOp->getOperand(1);
6617 if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC ||
6618 !LHS->hasOneUse() || !RHS->hasOneUse())
6619 return SDValue();
6620
6621 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6623 LogicOp, LHS.getNode(), RHS.getNode());
6624
6625 SDValue LHS0 = LHS->getOperand(0);
6626 SDValue RHS0 = RHS->getOperand(0);
6627 SDValue LHS1 = LHS->getOperand(1);
6628 SDValue RHS1 = RHS->getOperand(1);
6629 // TODO: We don't actually need a splat here, for vectors we just need the
6630 // invariants to hold for each element.
6631 auto *LHS1C = isConstOrConstSplat(LHS1);
6632 auto *RHS1C = isConstOrConstSplat(RHS1);
6633 ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6634 ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
6635 EVT VT = LogicOp->getValueType(0);
6636 EVT OpVT = LHS0.getValueType();
6637 SDLoc DL(LogicOp);
6638
6639 // Check if the operands of an and/or operation are comparisons and if they
6640 // compare against the same value. Replace the and/or-cmp-cmp sequence with
6641 // min/max cmp sequence. If LHS1 is equal to RHS1, then the or-cmp-cmp
6642 // sequence will be replaced with min-cmp sequence:
6643 // (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1
6644 // and and-cmp-cmp will be replaced with max-cmp sequence:
6645 // (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1
6646 // The optimization does not work for `==` or `!=` .
6647 // The two comparisons should have either the same predicate or the
6648 // predicate of one of the comparisons is the opposite of the other one.
6649 bool isFMAXNUMFMINNUM_IEEE = TLI.isOperationLegal(ISD::FMAXNUM_IEEE, OpVT) &&
6650 TLI.isOperationLegal(ISD::FMINNUM_IEEE, OpVT);
6651 bool isFMAXNUMFMINNUM = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, OpVT) &&
6652 TLI.isOperationLegalOrCustom(ISD::FMINNUM, OpVT);
6653 if (((OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
6654 TLI.isOperationLegal(ISD::SMAX, OpVT) &&
6655 TLI.isOperationLegal(ISD::UMIN, OpVT) &&
6656 TLI.isOperationLegal(ISD::SMIN, OpVT)) ||
6657 (OpVT.isFloatingPoint() &&
6658 (isFMAXNUMFMINNUM_IEEE || isFMAXNUMFMINNUM))) &&
6660 CCL != ISD::SETFALSE && CCL != ISD::SETO && CCL != ISD::SETUO &&
6661 CCL != ISD::SETTRUE &&
6662 (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR))) {
6663
6664 SDValue CommonValue, Operand1, Operand2;
6666 if (CCL == CCR) {
6667 if (LHS0 == RHS0) {
6668 CommonValue = LHS0;
6669 Operand1 = LHS1;
6670 Operand2 = RHS1;
6672 } else if (LHS1 == RHS1) {
6673 CommonValue = LHS1;
6674 Operand1 = LHS0;
6675 Operand2 = RHS0;
6676 CC = CCL;
6677 }
6678 } else {
6679 assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC");
6680 if (LHS0 == RHS1) {
6681 CommonValue = LHS0;
6682 Operand1 = LHS1;
6683 Operand2 = RHS0;
6684 CC = CCR;
6685 } else if (RHS0 == LHS1) {
6686 CommonValue = LHS1;
6687 Operand1 = LHS0;
6688 Operand2 = RHS1;
6689 CC = CCL;
6690 }
6691 }
6692
6693 // Don't do this transform for sign bit tests. Let foldLogicOfSetCCs
6694 // handle it using OR/AND.
6695 if (CC == ISD::SETLT && isNullOrNullSplat(CommonValue))
6696 CC = ISD::SETCC_INVALID;
6697 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CommonValue))
6698 CC = ISD::SETCC_INVALID;
6699
6700 if (CC != ISD::SETCC_INVALID) {
6701 unsigned NewOpcode = ISD::DELETED_NODE;
6702 bool IsSigned = isSignedIntSetCC(CC);
6703 if (OpVT.isInteger()) {
6704 bool IsLess = (CC == ISD::SETLE || CC == ISD::SETULE ||
6705 CC == ISD::SETLT || CC == ISD::SETULT);
6706 bool IsOr = (LogicOp->getOpcode() == ISD::OR);
6707 if (IsLess == IsOr)
6708 NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN;
6709 else
6710 NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
6711 } else if (OpVT.isFloatingPoint())
6712 NewOpcode =
6713 getMinMaxOpcodeForFP(Operand1, Operand2, CC, LogicOp->getOpcode(),
6714 DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
6715
6716 if (NewOpcode != ISD::DELETED_NODE) {
6717 SDValue MinMaxValue =
6718 DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
6719 return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
6720 }
6721 }
6722 }
6723
6724 if (LHS0 == LHS1 && RHS0 == RHS1 && CCL == CCR &&
6725 LHS0.getValueType() == RHS0.getValueType() &&
6726 ((LogicOp->getOpcode() == ISD::AND && CCL == ISD::SETO) ||
6727 (LogicOp->getOpcode() == ISD::OR && CCL == ISD::SETUO)))
6728 return DAG.getSetCC(DL, VT, LHS0, RHS0, CCL);
6729
6730 if (TargetPreference == AndOrSETCCFoldKind::None)
6731 return SDValue();
6732
6733 if (CCL == CCR &&
6734 CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
6735 LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger()) {
6736 const APInt &APLhs = LHS1C->getAPIntValue();
6737 const APInt &APRhs = RHS1C->getAPIntValue();
6738
6739 // Preference is to use ISD::ABS or we already have an ISD::ABS (in which
6740 // case this is just a compare).
6741 if (APLhs == (-APRhs) &&
6742 ((TargetPreference & AndOrSETCCFoldKind::ABS) ||
6743 DAG.doesNodeExist(ISD::ABS, DAG.getVTList(OpVT), {LHS0}))) {
6744 const APInt &C = APLhs.isNegative() ? APRhs : APLhs;
6745 // (icmp eq A, C) | (icmp eq A, -C)
6746 // -> (icmp eq Abs(A), C)
6747 // (icmp ne A, C) & (icmp ne A, -C)
6748 // -> (icmp ne Abs(A), C)
6749 SDValue AbsOp = DAG.getNode(ISD::ABS, DL, OpVT, LHS0);
6750 return DAG.getNode(ISD::SETCC, DL, VT, AbsOp,
6751 DAG.getConstant(C, DL, OpVT), LHS.getOperand(2));
6752 } else if (TargetPreference &
6754
6755 // AndOrSETCCFoldKind::AddAnd:
6756 // A == C0 | A == C1
6757 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6758 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) == 0
6759 // A != C0 & A != C1
6760 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6761 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) != 0
6762
6763 // AndOrSETCCFoldKind::NotAnd:
6764 // A == C0 | A == C1
6765 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6766 // -> ~A & smin(C0, C1) == 0
6767 // A != C0 & A != C1
6768 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6769 // -> ~A & smin(C0, C1) != 0
6770
6771 const APInt &MaxC = APIntOps::smax(APRhs, APLhs);
6772 const APInt &MinC = APIntOps::smin(APRhs, APLhs);
6773 APInt Dif = MaxC - MinC;
6774 if (!Dif.isZero() && Dif.isPowerOf2()) {
6775 if (MaxC.isAllOnes() &&
6776 (TargetPreference & AndOrSETCCFoldKind::NotAnd)) {
6777 SDValue NotOp = DAG.getNOT(DL, LHS0, OpVT);
6778 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, NotOp,
6779 DAG.getConstant(MinC, DL, OpVT));
6780 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6781 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6782 } else if (TargetPreference & AndOrSETCCFoldKind::AddAnd) {
6783
6784 SDValue AddOp = DAG.getNode(ISD::ADD, DL, OpVT, LHS0,
6785 DAG.getConstant(-MinC, DL, OpVT));
6786 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
6787 DAG.getConstant(~Dif, DL, OpVT));
6788 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6789 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6790 }
6791 }
6792 }
6793 }
6794
6795 return SDValue();
6796}
6797
6798// Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`.
6799// We canonicalize to the `select` form in the middle end, but the `and` form
6800// gets better codegen and all tested targets (arm, x86, riscv)
6802 const SDLoc &DL, SelectionDAG &DAG) {
6803 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6804 if (!isNullConstant(F))
6805 return SDValue();
6806
6807 EVT CondVT = Cond.getValueType();
6808 if (TLI.getBooleanContents(CondVT) !=
6810 return SDValue();
6811
6812 if (T.getOpcode() != ISD::AND)
6813 return SDValue();
6814
6815 if (!isOneConstant(T.getOperand(1)))
6816 return SDValue();
6817
6818 EVT OpVT = T.getValueType();
6819
6820 SDValue CondMask =
6821 OpVT == CondVT ? Cond : DAG.getBoolExtOrTrunc(Cond, DL, OpVT, CondVT);
6822 return DAG.getNode(ISD::AND, DL, OpVT, CondMask, T.getOperand(0));
6823}
6824
6825/// This contains all DAGCombine rules which reduce two values combined by
6826/// an And operation to a single value. This makes them reusable in the context
6827/// of visitSELECT(). Rules involving constants are not included as
6828/// visitSELECT() already handles those cases.
6829SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
6830 EVT VT = N1.getValueType();
6831 SDLoc DL(N);
6832
6833 // fold (and x, undef) -> 0
6834 if (N0.isUndef() || N1.isUndef())
6835 return DAG.getConstant(0, DL, VT);
6836
6837 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
6838 return V;
6839
6840 // Canonicalize:
6841 // and(x, add) -> and(add, x)
6842 if (N1.getOpcode() == ISD::ADD)
6843 std::swap(N0, N1);
6844
6845 // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
6846 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
6847 VT.isScalarInteger() && VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
6848 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6849 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
6850 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
6851 // immediate for an add, but it is legal if its top c2 bits are set,
6852 // transform the ADD so the immediate doesn't need to be materialized
6853 // in a register.
6854 APInt ADDC = ADDI->getAPIntValue();
6855 APInt SRLC = SRLI->getAPIntValue();
6856 if (ADDC.getSignificantBits() <= 64 && SRLC.ult(VT.getSizeInBits()) &&
6857 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6859 SRLC.getZExtValue());
6860 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
6861 ADDC |= Mask;
6862 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6863 SDLoc DL0(N0);
6864 SDValue NewAdd =
6865 DAG.getNode(ISD::ADD, DL0, VT,
6866 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
6867 CombineTo(N0.getNode(), NewAdd);
6868 // Return N so it doesn't get rechecked!
6869 return SDValue(N, 0);
6870 }
6871 }
6872 }
6873 }
6874 }
6875 }
6876
6877 return SDValue();
6878}
6879
6880bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
6881 EVT LoadResultTy, EVT &ExtVT) {
6882 if (!AndC->getAPIntValue().isMask())
6883 return false;
6884
6885 unsigned ActiveBits = AndC->getAPIntValue().countr_one();
6886
6887 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6888 EVT LoadedVT = LoadN->getMemoryVT();
6889
6890 if (ExtVT == LoadedVT &&
6891 (!LegalOperations ||
6892 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
6893 // ZEXTLOAD will match without needing to change the size of the value being
6894 // loaded.
6895 return true;
6896 }
6897
6898 // Do not change the width of a volatile or atomic loads.
6899 if (!LoadN->isSimple())
6900 return false;
6901
6902 // Do not generate loads of non-round integer types since these can
6903 // be expensive (and would be wrong if the type is not byte sized).
6904 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
6905 return false;
6906
6907 if (LegalOperations &&
6908 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
6909 return false;
6910
6911 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT, /*ByteOffset=*/0))
6912 return false;
6913
6914 return true;
6915}
6916
6917bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
6918 ISD::LoadExtType ExtType, EVT &MemVT,
6919 unsigned ShAmt) {
6920 if (!LDST)
6921 return false;
6922
6923 // Only allow byte offsets.
6924 if (ShAmt % 8)
6925 return false;
6926 const unsigned ByteShAmt = ShAmt / 8;
6927
6928 // Do not generate loads of non-round integer types since these can
6929 // be expensive (and would be wrong if the type is not byte sized).
6930 if (!MemVT.isRound())
6931 return false;
6932
6933 // Don't change the width of a volatile or atomic loads.
6934 if (!LDST->isSimple())
6935 return false;
6936
6937 EVT LdStMemVT = LDST->getMemoryVT();
6938
6939 // Bail out when changing the scalable property, since we can't be sure that
6940 // we're actually narrowing here.
6941 if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
6942 return false;
6943
6944 // Verify that we are actually reducing a load width here.
6945 if (LdStMemVT.bitsLT(MemVT))
6946 return false;
6947
6948 // Ensure that this isn't going to produce an unsupported memory access.
6949 if (ShAmt) {
6950 const Align LDSTAlign = LDST->getAlign();
6951 const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
6952 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
6953 LDST->getAddressSpace(), NarrowAlign,
6954 LDST->getMemOperand()->getFlags()))
6955 return false;
6956 }
6957
6958 // It's not possible to generate a constant of extended or untyped type.
6959 EVT PtrType = LDST->getBasePtr().getValueType();
6960 if (PtrType == MVT::Untyped || PtrType.isExtended())
6961 return false;
6962
6963 if (isa<LoadSDNode>(LDST)) {
6964 LoadSDNode *Load = cast<LoadSDNode>(LDST);
6965 // Don't transform one with multiple uses, this would require adding a new
6966 // load.
6967 if (!SDValue(Load, 0).hasOneUse())
6968 return false;
6969
6970 if (LegalOperations &&
6971 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
6972 return false;
6973
6974 // For the transform to be legal, the load must produce only two values
6975 // (the value loaded and the chain). Don't transform a pre-increment
6976 // load, for example, which produces an extra value. Otherwise the
6977 // transformation is not equivalent, and the downstream logic to replace
6978 // uses gets things wrong.
6979 if (Load->getNumValues() > 2)
6980 return false;
6981
6982 // If the load that we're shrinking is an extload and we're not just
6983 // discarding the extension we can't simply shrink the load. Bail.
6984 // TODO: It would be possible to merge the extensions in some cases.
6985 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
6986 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6987 return false;
6988
6989 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT, ByteShAmt))
6990 return false;
6991 } else {
6992 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
6993 StoreSDNode *Store = cast<StoreSDNode>(LDST);
6994 // Can't write outside the original store
6995 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6996 return false;
6997
6998 if (LegalOperations &&
6999 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
7000 return false;
7001 }
7002 return true;
7003}
7004
7005bool DAGCombiner::SearchForAndLoads(SDNode *N,
7006 SmallVectorImpl<LoadSDNode*> &Loads,
7007 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
7008 ConstantSDNode *Mask,
7009 SDNode *&NodeToMask) {
7010 // Recursively search for the operands, looking for loads which can be
7011 // narrowed.
7012 for (SDValue Op : N->op_values()) {
7013 if (Op.getValueType().isVector())
7014 return false;
7015
7016 // Some constants may need fixing up later if they are too large.
7017 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
7018 assert(ISD::isBitwiseLogicOp(N->getOpcode()) &&
7019 "Expected bitwise logic operation");
7020 if (!C->getAPIntValue().isSubsetOf(Mask->getAPIntValue()))
7021 NodesWithConsts.insert(N);
7022 continue;
7023 }
7024
7025 if (!Op.hasOneUse())
7026 return false;
7027
7028 switch(Op.getOpcode()) {
7029 case ISD::LOAD: {
7030 auto *Load = cast<LoadSDNode>(Op);
7031 EVT ExtVT;
7032 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
7033 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
7034
7035 // ZEXTLOAD is already small enough.
7036 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
7037 ExtVT.bitsGE(Load->getMemoryVT()))
7038 continue;
7039
7040 // Use LE to convert equal sized loads to zext.
7041 if (ExtVT.bitsLE(Load->getMemoryVT()))
7042 Loads.push_back(Load);
7043
7044 continue;
7045 }
7046 return false;
7047 }
7048 case ISD::ZERO_EXTEND:
7049 case ISD::AssertZext: {
7050 unsigned ActiveBits = Mask->getAPIntValue().countr_one();
7051 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
7052 EVT VT = Op.getOpcode() == ISD::AssertZext ?
7053 cast<VTSDNode>(Op.getOperand(1))->getVT() :
7054 Op.getOperand(0).getValueType();
7055
7056 // We can accept extending nodes if the mask is wider or an equal
7057 // width to the original type.
7058 if (ExtVT.bitsGE(VT))
7059 continue;
7060 break;
7061 }
7062 case ISD::OR:
7063 case ISD::XOR:
7064 case ISD::AND:
7065 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
7066 NodeToMask))
7067 return false;
7068 continue;
7069 }
7070
7071 // Allow one node which will masked along with any loads found.
7072 if (NodeToMask)
7073 return false;
7074
7075 // Also ensure that the node to be masked only produces one data result.
7076 NodeToMask = Op.getNode();
7077 if (NodeToMask->getNumValues() > 1) {
7078 bool HasValue = false;
7079 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
7080 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
7081 if (VT != MVT::Glue && VT != MVT::Other) {
7082 if (HasValue) {
7083 NodeToMask = nullptr;
7084 return false;
7085 }
7086 HasValue = true;
7087 }
7088 }
7089 assert(HasValue && "Node to be masked has no data result?");
7090 }
7091 }
7092 return true;
7093}
7094
7095bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
7096 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
7097 if (!Mask)
7098 return false;
7099
7100 if (!Mask->getAPIntValue().isMask())
7101 return false;
7102
7103 // No need to do anything if the and directly uses a load.
7104 if (isa<LoadSDNode>(N->getOperand(0)))
7105 return false;
7106
7108 SmallPtrSet<SDNode*, 2> NodesWithConsts;
7109 SDNode *FixupNode = nullptr;
7110 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
7111 if (Loads.empty())
7112 return false;
7113
7114 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
7115 SDValue MaskOp = N->getOperand(1);
7116
7117 // If it exists, fixup the single node we allow in the tree that needs
7118 // masking.
7119 if (FixupNode) {
7120 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
7121 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
7122 FixupNode->getValueType(0),
7123 SDValue(FixupNode, 0), MaskOp);
7124 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
7125 if (And.getOpcode() == ISD ::AND)
7126 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
7127 }
7128
7129 // Narrow any constants that need it.
7130 for (auto *LogicN : NodesWithConsts) {
7131 SDValue Op0 = LogicN->getOperand(0);
7132 SDValue Op1 = LogicN->getOperand(1);
7133
7134 // We only need to fix AND if both inputs are constants. And we only need
7135 // to fix one of the constants.
7136 if (LogicN->getOpcode() == ISD::AND &&
7138 continue;
7139
7140 if (isa<ConstantSDNode>(Op0) && LogicN->getOpcode() != ISD::AND)
7141 Op0 =
7142 DAG.getNode(ISD::AND, SDLoc(Op0), Op0.getValueType(), Op0, MaskOp);
7143
7144 if (isa<ConstantSDNode>(Op1))
7145 Op1 =
7146 DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOp);
7147
7148 if (isa<ConstantSDNode>(Op0) && !isa<ConstantSDNode>(Op1))
7149 std::swap(Op0, Op1);
7150
7151 DAG.UpdateNodeOperands(LogicN, Op0, Op1);
7152 }
7153
7154 // Create narrow loads.
7155 for (auto *Load : Loads) {
7156 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
7157 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
7158 SDValue(Load, 0), MaskOp);
7159 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
7160 if (And.getOpcode() == ISD ::AND)
7161 And = SDValue(
7162 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
7163 SDValue NewLoad = reduceLoadWidth(And.getNode());
7164 assert(NewLoad &&
7165 "Shouldn't be masking the load if it can't be narrowed");
7166 CombineTo(Load, NewLoad, NewLoad.getValue(1));
7167 }
7168 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
7169 return true;
7170 }
7171 return false;
7172}
7173
7174// Unfold
7175// x & (-1 'logical shift' y)
7176// To
7177// (x 'opposite logical shift' y) 'logical shift' y
7178// if it is better for performance.
7179SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
7180 assert(N->getOpcode() == ISD::AND);
7181
7182 SDValue N0 = N->getOperand(0);
7183 SDValue N1 = N->getOperand(1);
7184
7185 // Do we actually prefer shifts over mask?
7187 return SDValue();
7188
7189 // Try to match (-1 '[outer] logical shift' y)
7190 unsigned OuterShift;
7191 unsigned InnerShift; // The opposite direction to the OuterShift.
7192 SDValue Y; // Shift amount.
7193 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
7194 if (!M.hasOneUse())
7195 return false;
7196 OuterShift = M->getOpcode();
7197 if (OuterShift == ISD::SHL)
7198 InnerShift = ISD::SRL;
7199 else if (OuterShift == ISD::SRL)
7200 InnerShift = ISD::SHL;
7201 else
7202 return false;
7203 if (!isAllOnesConstant(M->getOperand(0)))
7204 return false;
7205 Y = M->getOperand(1);
7206 return true;
7207 };
7208
7209 SDValue X;
7210 if (matchMask(N1))
7211 X = N0;
7212 else if (matchMask(N0))
7213 X = N1;
7214 else
7215 return SDValue();
7216
7217 SDLoc DL(N);
7218 EVT VT = N->getValueType(0);
7219
7220 // tmp = x 'opposite logical shift' y
7221 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
7222 // ret = tmp 'logical shift' y
7223 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
7224
7225 return T1;
7226}
7227
7228/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
7229/// For a target with a bit test, this is expected to become test + set and save
7230/// at least 1 instruction.
7232 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
7233
7234 // Look through an optional extension.
7235 SDValue And0 = And->getOperand(0), And1 = And->getOperand(1);
7236 if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse())
7237 And0 = And0.getOperand(0);
7238 if (!isOneConstant(And1) || !And0.hasOneUse())
7239 return SDValue();
7240
7241 SDValue Src = And0;
7242
7243 // Attempt to find a 'not' op.
7244 // TODO: Should we favor test+set even without the 'not' op?
7245 bool FoundNot = false;
7246 if (isBitwiseNot(Src)) {
7247 FoundNot = true;
7248 Src = Src.getOperand(0);
7249
7250 // Look though an optional truncation. The source operand may not be the
7251 // same type as the original 'and', but that is ok because we are masking
7252 // off everything but the low bit.
7253 if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse())
7254 Src = Src.getOperand(0);
7255 }
7256
7257 // Match a shift-right by constant.
7258 if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse())
7259 return SDValue();
7260
7261 // This is probably not worthwhile without a supported type.
7262 EVT SrcVT = Src.getValueType();
7263 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7264 if (!TLI.isTypeLegal(SrcVT))
7265 return SDValue();
7266
7267 // We might have looked through casts that make this transform invalid.
7268 unsigned BitWidth = SrcVT.getScalarSizeInBits();
7269 SDValue ShiftAmt = Src.getOperand(1);
7270 auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt);
7271 if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(BitWidth))
7272 return SDValue();
7273
7274 // Set source to shift source.
7275 Src = Src.getOperand(0);
7276
7277 // Try again to find a 'not' op.
7278 // TODO: Should we favor test+set even with two 'not' ops?
7279 if (!FoundNot) {
7280 if (!isBitwiseNot(Src))
7281 return SDValue();
7282 Src = Src.getOperand(0);
7283 }
7284
7285 if (!TLI.hasBitTest(Src, ShiftAmt))
7286 return SDValue();
7287
7288 // Turn this into a bit-test pattern using mask op + setcc:
7289 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
7290 // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
7291 SDLoc DL(And);
7292 SDValue X = DAG.getZExtOrTrunc(Src, DL, SrcVT);
7293 EVT CCVT =
7294 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
7295 SDValue Mask = DAG.getConstant(
7296 APInt::getOneBitSet(BitWidth, ShiftAmtC->getZExtValue()), DL, SrcVT);
7297 SDValue NewAnd = DAG.getNode(ISD::AND, DL, SrcVT, X, Mask);
7298 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
7299 SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
7300 return DAG.getZExtOrTrunc(Setcc, DL, And->getValueType(0));
7301}
7302
7303/// For targets that support usubsat, match a bit-hack form of that operation
7304/// that ends in 'and' and convert it.
7306 EVT VT = N->getValueType(0);
7307 unsigned BitWidth = VT.getScalarSizeInBits();
7308 APInt SignMask = APInt::getSignMask(BitWidth);
7309
7310 // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
7311 // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
7312 // xor/add with SMIN (signmask) are logically equivalent.
7313 SDValue X;
7314 if (!sd_match(N, m_And(m_OneUse(m_Xor(m_Value(X), m_SpecificInt(SignMask))),
7316 m_SpecificInt(BitWidth - 1))))) &&
7319 m_SpecificInt(BitWidth - 1))))))
7320 return SDValue();
7321
7322 return DAG.getNode(ISD::USUBSAT, DL, VT, X,
7323 DAG.getConstant(SignMask, DL, VT));
7324}
7325
7326/// Given a bitwise logic operation N with a matching bitwise logic operand,
7327/// fold a pattern where 2 of the source operands are identically shifted
7328/// values. For example:
7329/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
7331 SelectionDAG &DAG) {
7332 unsigned LogicOpcode = N->getOpcode();
7333 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
7334 "Expected bitwise logic operation");
7335
7336 if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
7337 return SDValue();
7338
7339 // Match another bitwise logic op and a shift.
7340 unsigned ShiftOpcode = ShiftOp.getOpcode();
7341 if (LogicOp.getOpcode() != LogicOpcode ||
7342 !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
7343 ShiftOpcode == ISD::SRA))
7344 return SDValue();
7345
7346 // Match another shift op inside the first logic operand. Handle both commuted
7347 // possibilities.
7348 // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
7349 // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
7350 SDValue X1 = ShiftOp.getOperand(0);
7351 SDValue Y = ShiftOp.getOperand(1);
7352 SDValue X0, Z;
7353 if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
7354 LogicOp.getOperand(0).getOperand(1) == Y) {
7355 X0 = LogicOp.getOperand(0).getOperand(0);
7356 Z = LogicOp.getOperand(1);
7357 } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
7358 LogicOp.getOperand(1).getOperand(1) == Y) {
7359 X0 = LogicOp.getOperand(1).getOperand(0);
7360 Z = LogicOp.getOperand(0);
7361 } else {
7362 return SDValue();
7363 }
7364
7365 EVT VT = N->getValueType(0);
7366 SDLoc DL(N);
7367 SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
7368 SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
7369 return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
7370}
7371
7372/// Given a tree of logic operations with shape like
7373/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y)))
7374/// try to match and fold shift operations with the same shift amount.
7375/// For example:
7376/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
7377/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
7379 SDValue RightHand, SelectionDAG &DAG) {
7380 unsigned LogicOpcode = N->getOpcode();
7381 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
7382 "Expected bitwise logic operation");
7383 if (LeftHand.getOpcode() != LogicOpcode ||
7384 RightHand.getOpcode() != LogicOpcode)
7385 return SDValue();
7386 if (!LeftHand.hasOneUse() || !RightHand.hasOneUse())
7387 return SDValue();
7388
7389 // Try to match one of following patterns:
7390 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W)
7391 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y))
7392 // Note that foldLogicOfShifts will handle commuted versions of the left hand
7393 // itself.
7394 SDValue CombinedShifts, W;
7395 SDValue R0 = RightHand.getOperand(0);
7396 SDValue R1 = RightHand.getOperand(1);
7397 if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG)))
7398 W = R1;
7399 else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG)))
7400 W = R0;
7401 else
7402 return SDValue();
7403
7404 EVT VT = N->getValueType(0);
7405 SDLoc DL(N);
7406 return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
7407}
7408
7409/// Fold "masked merge" expressions like `(m & x) | (~m & y)` and its DeMorgan
7410/// variant `(~m | x) & (m | y)` into the equivalent `((x ^ y) & m) ^ y)`
7411/// pattern. This is typically a better representation for targets without a
7412/// fused "and-not" operation.
7414 const TargetLowering &TLI, const SDLoc &DL) {
7415 // Note that masked-merge variants using XOR or ADD expressions are
7416 // normalized to OR by InstCombine so we only check for OR or AND.
7417 assert((Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::AND) &&
7418 "Must be called with ISD::OR or ISD::AND node");
7419
7420 // If the target supports and-not, don't fold this.
7421 if (TLI.hasAndNot(SDValue(Node, 0)))
7422 return SDValue();
7423
7424 SDValue M, X, Y;
7425
7426 if (sd_match(Node,
7428 m_OneUse(m_And(m_Deferred(M), m_Value(X))))) ||
7429 sd_match(Node,
7431 m_OneUse(m_Or(m_Deferred(M), m_Value(Y)))))) {
7432 EVT VT = M.getValueType();
7433 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, X, Y);
7434 SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor, M);
7435 return DAG.getNode(ISD::XOR, DL, VT, And, Y);
7436 }
7437 return SDValue();
7438}
7439
7440SDValue DAGCombiner::visitAND(SDNode *N) {
7441 SDValue N0 = N->getOperand(0);
7442 SDValue N1 = N->getOperand(1);
7443 EVT VT = N1.getValueType();
7444 SDLoc DL(N);
7445
7446 // x & x --> x
7447 if (N0 == N1)
7448 return N0;
7449
7450 // fold (and c1, c2) -> c1&c2
7451 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N0, N1}))
7452 return C;
7453
7454 // canonicalize constant to RHS
7457 return DAG.getNode(ISD::AND, DL, VT, N1, N0);
7458
7459 if (areBitwiseNotOfEachother(N0, N1))
7460 return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), DL, VT);
7461
7462 // fold vector ops
7463 if (VT.isVector()) {
7464 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
7465 return FoldedVOp;
7466
7467 // fold (and x, 0) -> 0, vector edition
7469 // do not return N1, because undef node may exist in N1
7471 N1.getValueType());
7472
7473 // fold (and x, -1) -> x, vector edition
7475 return N0;
7476
7477 // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
7478 auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
7479 ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
7480 if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat) {
7481 EVT LoadVT = MLoad->getMemoryVT();
7482 EVT ExtVT = VT;
7483 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
7484 // For this AND to be a zero extension of the masked load the elements
7485 // of the BuildVec must mask the bottom bits of the extended element
7486 // type
7487 uint64_t ElementSize =
7489 if (Splat->getAPIntValue().isMask(ElementSize)) {
7490 SDValue NewLoad = DAG.getMaskedLoad(
7491 ExtVT, DL, MLoad->getChain(), MLoad->getBasePtr(),
7492 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
7493 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
7494 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
7495 bool LoadHasOtherUsers = !N0.hasOneUse();
7496 CombineTo(N, NewLoad);
7497 if (LoadHasOtherUsers)
7498 CombineTo(MLoad, NewLoad.getValue(0), NewLoad.getValue(1));
7499 return SDValue(N, 0);
7500 }
7501 }
7502 }
7503 }
7504
7505 // fold (and x, -1) -> x
7506 if (isAllOnesConstant(N1))
7507 return N0;
7508
7509 // if (and x, c) is known to be zero, return 0
7510 unsigned BitWidth = VT.getScalarSizeInBits();
7511 ConstantSDNode *N1C = isConstOrConstSplat(N1);
7513 return DAG.getConstant(0, DL, VT);
7514
7515 if (SDValue R = foldAndOrOfSETCC(N, DAG))
7516 return R;
7517
7518 if (SDValue NewSel = foldBinOpIntoSelect(N))
7519 return NewSel;
7520
7521 // reassociate and
7522 if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))
7523 return RAND;
7524
7525 // Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y))
7526 if (SDValue SD =
7527 reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, DL, VT, N0, N1))
7528 return SD;
7529
7530 // fold (and (or x, C), D) -> D if (C & D) == D
7531 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7532 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
7533 };
7534 if (N0.getOpcode() == ISD::OR &&
7535 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
7536 return N1;
7537
7538 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
7539 SDValue N0Op0 = N0.getOperand(0);
7540 EVT SrcVT = N0Op0.getValueType();
7541 unsigned SrcBitWidth = SrcVT.getScalarSizeInBits();
7542 APInt Mask = ~N1C->getAPIntValue();
7543 Mask = Mask.trunc(SrcBitWidth);
7544
7545 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
7546 if (DAG.MaskedValueIsZero(N0Op0, Mask))
7547 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0Op0);
7548
7549 // fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
7550 if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) &&
7551 TLI.isTruncateFree(VT, SrcVT) && TLI.isZExtFree(SrcVT, VT) &&
7552 TLI.isTypeDesirableForOp(ISD::AND, SrcVT) &&
7553 TLI.isNarrowingProfitable(N, VT, SrcVT))
7554 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
7555 DAG.getNode(ISD::AND, DL, SrcVT, N0Op0,
7556 DAG.getZExtOrTrunc(N1, DL, SrcVT)));
7557 }
7558
7559 // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
7560 if (ISD::isExtOpcode(N0.getOpcode())) {
7561 unsigned ExtOpc = N0.getOpcode();
7562 SDValue N0Op0 = N0.getOperand(0);
7563 if (N0Op0.getOpcode() == ISD::AND &&
7564 (ExtOpc != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0Op0, VT)) &&
7565 N0->hasOneUse() && N0Op0->hasOneUse()) {
7566 if (SDValue NewExt = DAG.FoldConstantArithmetic(ExtOpc, DL, VT,
7567 {N0Op0.getOperand(1)})) {
7568 if (SDValue NewMask =
7569 DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N1, NewExt})) {
7570 return DAG.getNode(ISD::AND, DL, VT,
7571 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(0)),
7572 NewMask);
7573 }
7574 }
7575 }
7576 }
7577
7578 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
7579 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
7580 // already be zero by virtue of the width of the base type of the load.
7581 //
7582 // the 'X' node here can either be nothing or an extract_vector_elt to catch
7583 // more cases.
7584 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7586 N0.getOperand(0).getOpcode() == ISD::LOAD &&
7587 N0.getOperand(0).getResNo() == 0) ||
7588 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
7589 auto *Load =
7590 cast<LoadSDNode>((N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(0));
7591
7592 // Get the constant (if applicable) the zero'th operand is being ANDed with.
7593 // This can be a pure constant or a vector splat, in which case we treat the
7594 // vector as a scalar and use the splat value.
7595 APInt Constant = APInt::getZero(1);
7596 if (const ConstantSDNode *C = isConstOrConstSplat(
7597 N1, /*AllowUndefs=*/false, /*AllowTruncation=*/true)) {
7598 Constant = C->getAPIntValue();
7599 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
7600 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
7601 APInt SplatValue, SplatUndef;
7602 unsigned SplatBitSize;
7603 bool HasAnyUndefs;
7604 // Endianness should not matter here. Code below makes sure that we only
7605 // use the result if the SplatBitSize is a multiple of the vector element
7606 // size. And after that we AND all element sized parts of the splat
7607 // together. So the end result should be the same regardless of in which
7608 // order we do those operations.
7609 const bool IsBigEndian = false;
7610 bool IsSplat =
7611 Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
7612 HasAnyUndefs, EltBitWidth, IsBigEndian);
7613
7614 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
7615 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
7616 if (IsSplat && (SplatBitSize % EltBitWidth) == 0) {
7617 // Undef bits can contribute to a possible optimisation if set, so
7618 // set them.
7619 SplatValue |= SplatUndef;
7620
7621 // The splat value may be something like "0x00FFFFFF", which means 0 for
7622 // the first vector value and FF for the rest, repeating. We need a mask
7623 // that will apply equally to all members of the vector, so AND all the
7624 // lanes of the constant together.
7625 Constant = APInt::getAllOnes(EltBitWidth);
7626 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
7627 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
7628 }
7629 }
7630
7631 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
7632 // actually legal and isn't going to get expanded, else this is a false
7633 // optimisation.
7634 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
7635 Load->getValueType(0),
7636 Load->getMemoryVT());
7637
7638 // Resize the constant to the same size as the original memory access before
7639 // extension. If it is still the AllOnesValue then this AND is completely
7640 // unneeded.
7641 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
7642
7643 bool B;
7644 switch (Load->getExtensionType()) {
7645 default: B = false; break;
7646 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
7647 case ISD::ZEXTLOAD:
7648 case ISD::NON_EXTLOAD: B = true; break;
7649 }
7650
7651 if (B && Constant.isAllOnes()) {
7652 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
7653 // preserve semantics once we get rid of the AND.
7654 SDValue NewLoad(Load, 0);
7655
7656 // Fold the AND away. NewLoad may get replaced immediately.
7657 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
7658
7659 if (Load->getExtensionType() == ISD::EXTLOAD) {
7660 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
7661 Load->getValueType(0), SDLoc(Load),
7662 Load->getChain(), Load->getBasePtr(),
7663 Load->getOffset(), Load->getMemoryVT(),
7664 Load->getMemOperand());
7665 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
7666 if (Load->getNumValues() == 3) {
7667 // PRE/POST_INC loads have 3 values.
7668 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
7669 NewLoad.getValue(2) };
7670 CombineTo(Load, To, 3, true);
7671 } else {
7672 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
7673 }
7674 }
7675
7676 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7677 }
7678 }
7679
7680 // Try to convert a constant mask AND into a shuffle clear mask.
7681 if (VT.isVector())
7682 if (SDValue Shuffle = XformToShuffleWithZero(N))
7683 return Shuffle;
7684
7685 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7686 return Combined;
7687
7688 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C &&
7690 SDValue Ext = N0.getOperand(0);
7691 EVT ExtVT = Ext->getValueType(0);
7692 SDValue Extendee = Ext->getOperand(0);
7693
7694 unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits();
7695 if (N1C->getAPIntValue().isMask(ScalarWidth) &&
7696 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, ExtVT))) {
7697 // (and (extract_subvector (zext|anyext|sext v) _) iN_mask)
7698 // => (extract_subvector (iN_zeroext v))
7699 SDValue ZeroExtExtendee =
7700 DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Extendee);
7701
7702 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ZeroExtExtendee,
7703 N0.getOperand(1));
7704 }
7705 }
7706
7707 // fold (and (masked_gather x)) -> (zext_masked_gather x)
7708 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
7709 EVT MemVT = GN0->getMemoryVT();
7710 EVT ScalarVT = MemVT.getScalarType();
7711
7712 if (SDValue(GN0, 0).hasOneUse() &&
7713 isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
7715 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
7716 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
7717
7718 SDValue ZExtLoad = DAG.getMaskedGather(
7719 DAG.getVTList(VT, MVT::Other), MemVT, DL, Ops, GN0->getMemOperand(),
7720 GN0->getIndexType(), ISD::ZEXTLOAD);
7721
7722 CombineTo(N, ZExtLoad);
7723 AddToWorklist(ZExtLoad.getNode());
7724 // Avoid recheck of N.
7725 return SDValue(N, 0);
7726 }
7727 }
7728
7729 // fold (and (load x), 255) -> (zextload x, i8)
7730 // fold (and (extload x, i16), 255) -> (zextload x, i8)
7731 if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector())
7732 if (SDValue Res = reduceLoadWidth(N))
7733 return Res;
7734
7735 if (LegalTypes) {
7736 // Attempt to propagate the AND back up to the leaves which, if they're
7737 // loads, can be combined to narrow loads and the AND node can be removed.
7738 // Perform after legalization so that extend nodes will already be
7739 // combined into the loads.
7740 if (BackwardsPropagateMask(N))
7741 return SDValue(N, 0);
7742 }
7743
7744 if (SDValue Combined = visitANDLike(N0, N1, N))
7745 return Combined;
7746
7747 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
7748 if (N0.getOpcode() == N1.getOpcode())
7749 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7750 return V;
7751
7752 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7753 return R;
7754 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
7755 return R;
7756
7757 // Fold (and X, (bswap (not Y))) -> (and X, (not (bswap Y)))
7758 // Fold (and X, (bitreverse (not Y))) -> (and X, (not (bitreverse Y)))
7759 SDValue X, Y, Z, NotY;
7760 for (unsigned Opc : {ISD::BSWAP, ISD::BITREVERSE})
7761 if (sd_match(N,
7762 m_And(m_Value(X), m_OneUse(m_UnaryOp(Opc, m_Value(NotY))))) &&
7763 sd_match(NotY, m_Not(m_Value(Y))) &&
7764 (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
7765 return DAG.getNode(ISD::AND, DL, VT, X,
7766 DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y), VT));
7767
7768 // Fold (and X, (rot (not Y), Z)) -> (and X, (not (rot Y, Z)))
7769 for (unsigned Opc : {ISD::ROTL, ISD::ROTR})
7770 if (sd_match(N, m_And(m_Value(X),
7771 m_OneUse(m_BinOp(Opc, m_Value(NotY), m_Value(Z))))) &&
7772 sd_match(NotY, m_Not(m_Value(Y))) &&
7773 (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
7774 return DAG.getNode(ISD::AND, DL, VT, X,
7775 DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y, Z), VT));
7776
7777 // Fold (and X, (add (not Y), Z)) -> (and X, (not (sub Y, Z)))
7778 // Fold (and X, (sub (not Y), Z)) -> (and X, (not (add Y, Z)))
7779 if (TLI.hasAndNot(SDValue(N, 0)))
7780 if (SDValue Folded = foldBitwiseOpWithNeg(N, DL, VT))
7781 return Folded;
7782
7783 // Fold (and (srl X, C), 1) -> (srl X, BW-1) for signbit extraction
7784 // If we are shifting down an extended sign bit, see if we can simplify
7785 // this to shifting the MSB directly to expose further simplifications.
7786 // This pattern often appears after sext_inreg legalization.
7787 APInt Amt;
7788 if (sd_match(N, m_And(m_Srl(m_Value(X), m_ConstInt(Amt)), m_One())) &&
7789 Amt.ult(BitWidth - 1) && Amt.uge(BitWidth - DAG.ComputeNumSignBits(X)))
7790 return DAG.getNode(ISD::SRL, DL, VT, X,
7791 DAG.getShiftAmountConstant(BitWidth - 1, VT, DL));
7792
7793 // Masking the negated extension of a boolean is just the zero-extended
7794 // boolean:
7795 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
7796 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
7797 //
7798 // Note: the SimplifyDemandedBits fold below can make an information-losing
7799 // transform, and then we have no way to find this better fold.
7800 if (sd_match(N, m_And(m_Sub(m_Zero(), m_Value(X)), m_One()))) {
7801 if (X.getOpcode() == ISD::ZERO_EXTEND &&
7802 X.getOperand(0).getScalarValueSizeInBits() == 1)
7803 return X;
7804 if (X.getOpcode() == ISD::SIGN_EXTEND &&
7805 X.getOperand(0).getScalarValueSizeInBits() == 1)
7806 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, X.getOperand(0));
7807 }
7808
7809 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
7810 // fold (and (sra)) -> (and (srl)) when possible.
7812 return SDValue(N, 0);
7813
7814 // fold (zext_inreg (extload x)) -> (zextload x)
7815 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
7816 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
7817 (ISD::isEXTLoad(N0.getNode()) ||
7818 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
7819 auto *LN0 = cast<LoadSDNode>(N0);
7820 EVT MemVT = LN0->getMemoryVT();
7821 // If we zero all the possible extended bits, then we can turn this into
7822 // a zextload if we are running before legalize or the operation is legal.
7823 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
7824 unsigned MemBitSize = MemVT.getScalarSizeInBits();
7825 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
7826 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
7827 ((!LegalOperations && LN0->isSimple()) ||
7828 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
7829 SDValue ExtLoad =
7830 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
7831 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
7832 AddToWorklist(N);
7833 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7834 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7835 }
7836 }
7837
7838 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
7839 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
7840 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
7841 N0.getOperand(1), false))
7842 return BSwap;
7843 }
7844
7845 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
7846 return Shifts;
7847
7848 if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
7849 return V;
7850
7851 // Recognize the following pattern:
7852 //
7853 // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
7854 //
7855 // where bitmask is a mask that clears the upper bits of AndVT. The
7856 // number of bits in bitmask must be a power of two.
7857 auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
7858 if (LHS->getOpcode() != ISD::SIGN_EXTEND)
7859 return false;
7860
7862 if (!C)
7863 return false;
7864
7865 if (!C->getAPIntValue().isMask(
7866 LHS.getOperand(0).getValueType().getFixedSizeInBits()))
7867 return false;
7868
7869 return true;
7870 };
7871
7872 // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
7873 if (IsAndZeroExtMask(N0, N1))
7874 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
7875
7876 if (hasOperation(ISD::USUBSAT, VT))
7877 if (SDValue V = foldAndToUsubsat(N, DAG, DL))
7878 return V;
7879
7880 // Postpone until legalization completed to avoid interference with bswap
7881 // folding
7882 if (LegalOperations || VT.isVector())
7883 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
7884 return R;
7885
7886 if (VT.isScalarInteger() && VT != MVT::i1)
7887 if (SDValue R = foldMaskedMerge(N, DAG, TLI, DL))
7888 return R;
7889
7890 return SDValue();
7891}
7892
7893/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
7894SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
7895 bool DemandHighBits) {
7896 if (!LegalOperations)
7897 return SDValue();
7898
7899 EVT VT = N->getValueType(0);
7900 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
7901 return SDValue();
7903 return SDValue();
7904
7905 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
7906 bool LookPassAnd0 = false;
7907 bool LookPassAnd1 = false;
7908 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
7909 std::swap(N0, N1);
7910 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
7911 std::swap(N0, N1);
7912 if (N0.getOpcode() == ISD::AND) {
7913 if (!N0->hasOneUse())
7914 return SDValue();
7915 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7916 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
7917 // This is needed for X86.
7918 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
7919 N01C->getZExtValue() != 0xFFFF))
7920 return SDValue();
7921 N0 = N0.getOperand(0);
7922 LookPassAnd0 = true;
7923 }
7924
7925 if (N1.getOpcode() == ISD::AND) {
7926 if (!N1->hasOneUse())
7927 return SDValue();
7928 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7929 if (!N11C || N11C->getZExtValue() != 0xFF)
7930 return SDValue();
7931 N1 = N1.getOperand(0);
7932 LookPassAnd1 = true;
7933 }
7934
7935 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
7936 std::swap(N0, N1);
7937 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
7938 return SDValue();
7939 if (!N0->hasOneUse() || !N1->hasOneUse())
7940 return SDValue();
7941
7942 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7943 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7944 if (!N01C || !N11C)
7945 return SDValue();
7946 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
7947 return SDValue();
7948
7949 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
7950 SDValue N00 = N0->getOperand(0);
7951 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
7952 if (!N00->hasOneUse())
7953 return SDValue();
7954 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
7955 if (!N001C || N001C->getZExtValue() != 0xFF)
7956 return SDValue();
7957 N00 = N00.getOperand(0);
7958 LookPassAnd0 = true;
7959 }
7960
7961 SDValue N10 = N1->getOperand(0);
7962 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
7963 if (!N10->hasOneUse())
7964 return SDValue();
7965 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
7966 // Also allow 0xFFFF since the bits will be shifted out. This is needed
7967 // for X86.
7968 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
7969 N101C->getZExtValue() != 0xFFFF))
7970 return SDValue();
7971 N10 = N10.getOperand(0);
7972 LookPassAnd1 = true;
7973 }
7974
7975 if (N00 != N10)
7976 return SDValue();
7977
7978 // Make sure everything beyond the low halfword gets set to zero since the SRL
7979 // 16 will clear the top bits.
7980 unsigned OpSizeInBits = VT.getSizeInBits();
7981 if (OpSizeInBits > 16) {
7982 // If the left-shift isn't masked out then the only way this is a bswap is
7983 // if all bits beyond the low 8 are 0. In that case the entire pattern
7984 // reduces to a left shift anyway: leave it for other parts of the combiner.
7985 if (DemandHighBits && !LookPassAnd0)
7986 return SDValue();
7987
7988 // However, if the right shift isn't masked out then it might be because
7989 // it's not needed. See if we can spot that too. If the high bits aren't
7990 // demanded, we only need bits 23:16 to be zero. Otherwise, we need all
7991 // upper bits to be zero.
7992 if (!LookPassAnd1) {
7993 unsigned HighBit = DemandHighBits ? OpSizeInBits : 24;
7994 if (!DAG.MaskedValueIsZero(N10,
7995 APInt::getBitsSet(OpSizeInBits, 16, HighBit)))
7996 return SDValue();
7997 }
7998 }
7999
8000 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
8001 if (OpSizeInBits > 16) {
8002 SDLoc DL(N);
8003 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
8004 DAG.getShiftAmountConstant(OpSizeInBits - 16, VT, DL));
8005 }
8006 return Res;
8007}
8008
8009/// Return true if the specified node is an element that makes up a 32-bit
8010/// packed halfword byteswap.
8011/// ((x & 0x000000ff) << 8) |
8012/// ((x & 0x0000ff00) >> 8) |
8013/// ((x & 0x00ff0000) << 8) |
8014/// ((x & 0xff000000) >> 8)
8016 if (!N->hasOneUse())
8017 return false;
8018
8019 unsigned Opc = N.getOpcode();
8020 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
8021 return false;
8022
8023 SDValue N0 = N.getOperand(0);
8024 unsigned Opc0 = N0.getOpcode();
8025 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
8026 return false;
8027
8028 ConstantSDNode *N1C = nullptr;
8029 // SHL or SRL: look upstream for AND mask operand
8030 if (Opc == ISD::AND)
8031 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
8032 else if (Opc0 == ISD::AND)
8034 if (!N1C)
8035 return false;
8036
8037 unsigned MaskByteOffset;
8038 switch (N1C->getZExtValue()) {
8039 default:
8040 return false;
8041 case 0xFF: MaskByteOffset = 0; break;
8042 case 0xFF00: MaskByteOffset = 1; break;
8043 case 0xFFFF:
8044 // In case demanded bits didn't clear the bits that will be shifted out.
8045 // This is needed for X86.
8046 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
8047 MaskByteOffset = 1;
8048 break;
8049 }
8050 return false;
8051 case 0xFF0000: MaskByteOffset = 2; break;
8052 case 0xFF000000: MaskByteOffset = 3; break;
8053 }
8054
8055 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
8056 if (Opc == ISD::AND) {
8057 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
8058 // (x >> 8) & 0xff
8059 // (x >> 8) & 0xff0000
8060 if (Opc0 != ISD::SRL)
8061 return false;
8063 if (!C || C->getZExtValue() != 8)
8064 return false;
8065 } else {
8066 // (x << 8) & 0xff00
8067 // (x << 8) & 0xff000000
8068 if (Opc0 != ISD::SHL)
8069 return false;
8071 if (!C || C->getZExtValue() != 8)
8072 return false;
8073 }
8074 } else if (Opc == ISD::SHL) {
8075 // (x & 0xff) << 8
8076 // (x & 0xff0000) << 8
8077 if (MaskByteOffset != 0 && MaskByteOffset != 2)
8078 return false;
8079 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
8080 if (!C || C->getZExtValue() != 8)
8081 return false;
8082 } else { // Opc == ISD::SRL
8083 // (x & 0xff00) >> 8
8084 // (x & 0xff000000) >> 8
8085 if (MaskByteOffset != 1 && MaskByteOffset != 3)
8086 return false;
8087 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
8088 if (!C || C->getZExtValue() != 8)
8089 return false;
8090 }
8091
8092 if (Parts[MaskByteOffset])
8093 return false;
8094
8095 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
8096 return true;
8097}
8098
8099// Match 2 elements of a packed halfword bswap.
8101 if (N.getOpcode() == ISD::OR)
8102 return isBSwapHWordElement(N.getOperand(0), Parts) &&
8103 isBSwapHWordElement(N.getOperand(1), Parts);
8104
8105 if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
8106 ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
8107 if (!C || C->getAPIntValue() != 16)
8108 return false;
8109 Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
8110 return true;
8111 }
8112
8113 return false;
8114}
8115
8116// Match this pattern:
8117// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
8118// And rewrite this to:
8119// (rotr (bswap A), 16)
8121 SelectionDAG &DAG, SDNode *N, SDValue N0,
8122 SDValue N1, EVT VT) {
8123 assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
8124 "MatchBSwapHWordOrAndAnd: expecting i32");
8125 if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
8126 return SDValue();
8127 if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
8128 return SDValue();
8129 // TODO: this is too restrictive; lifting this restriction requires more tests
8130 if (!N0->hasOneUse() || !N1->hasOneUse())
8131 return SDValue();
8134 if (!Mask0 || !Mask1)
8135 return SDValue();
8136 if (Mask0->getAPIntValue() != 0xff00ff00 ||
8137 Mask1->getAPIntValue() != 0x00ff00ff)
8138 return SDValue();
8139 SDValue Shift0 = N0.getOperand(0);
8140 SDValue Shift1 = N1.getOperand(0);
8141 if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
8142 return SDValue();
8143 ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
8144 ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
8145 if (!ShiftAmt0 || !ShiftAmt1)
8146 return SDValue();
8147 if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
8148 return SDValue();
8149 if (Shift0.getOperand(0) != Shift1.getOperand(0))
8150 return SDValue();
8151
8152 SDLoc DL(N);
8153 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
8154 SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL);
8155 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
8156}
8157
8158/// Match a 32-bit packed halfword bswap. That is
8159/// ((x & 0x000000ff) << 8) |
8160/// ((x & 0x0000ff00) >> 8) |
8161/// ((x & 0x00ff0000) << 8) |
8162/// ((x & 0xff000000) >> 8)
8163/// => (rotl (bswap x), 16)
8164SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
8165 if (!LegalOperations)
8166 return SDValue();
8167
8168 EVT VT = N->getValueType(0);
8169 if (VT != MVT::i32)
8170 return SDValue();
8172 return SDValue();
8173
8174 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT))
8175 return BSwap;
8176
8177 // Try again with commuted operands.
8178 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT))
8179 return BSwap;
8180
8181
8182 // Look for either
8183 // (or (bswaphpair), (bswaphpair))
8184 // (or (or (bswaphpair), (and)), (and))
8185 // (or (or (and), (bswaphpair)), (and))
8186 SDNode *Parts[4] = {};
8187
8188 if (isBSwapHWordPair(N0, Parts)) {
8189 // (or (or (and), (and)), (or (and), (and)))
8190 if (!isBSwapHWordPair(N1, Parts))
8191 return SDValue();
8192 } else if (N0.getOpcode() == ISD::OR) {
8193 // (or (or (or (and), (and)), (and)), (and))
8194 if (!isBSwapHWordElement(N1, Parts))
8195 return SDValue();
8196 SDValue N00 = N0.getOperand(0);
8197 SDValue N01 = N0.getOperand(1);
8198 if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
8199 !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
8200 return SDValue();
8201 } else {
8202 return SDValue();
8203 }
8204
8205 // Make sure the parts are all coming from the same node.
8206 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
8207 return SDValue();
8208
8209 SDLoc DL(N);
8210 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
8211 SDValue(Parts[0], 0));
8212
8213 // Result of the bswap should be rotated by 16. If it's not legal, then
8214 // do (x << 16) | (x >> 16).
8215 SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL);
8217 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
8219 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
8220 return DAG.getNode(ISD::OR, DL, VT,
8221 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
8222 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
8223}
8224
8225/// This contains all DAGCombine rules which reduce two values combined by
8226/// an Or operation to a single value \see visitANDLike().
8227SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, const SDLoc &DL) {
8228 EVT VT = N1.getValueType();
8229
8230 // fold (or x, undef) -> -1
8231 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
8232 return DAG.getAllOnesConstant(DL, VT);
8233
8234 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
8235 return V;
8236
8237 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
8238 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
8239 // Don't increase # computations.
8240 (N0->hasOneUse() || N1->hasOneUse())) {
8241 // We can only do this xform if we know that bits from X that are set in C2
8242 // but not in C1 are already zero. Likewise for Y.
8243 if (const ConstantSDNode *N0O1C =
8245 if (const ConstantSDNode *N1O1C =
8247 // We can only do this xform if we know that bits from X that are set in
8248 // C2 but not in C1 are already zero. Likewise for Y.
8249 const APInt &LHSMask = N0O1C->getAPIntValue();
8250 const APInt &RHSMask = N1O1C->getAPIntValue();
8251
8252 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
8253 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
8254 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
8255 N0.getOperand(0), N1.getOperand(0));
8256 return DAG.getNode(ISD::AND, DL, VT, X,
8257 DAG.getConstant(LHSMask | RHSMask, DL, VT));
8258 }
8259 }
8260 }
8261 }
8262
8263 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
8264 if (N0.getOpcode() == ISD::AND &&
8265 N1.getOpcode() == ISD::AND &&
8266 N0.getOperand(0) == N1.getOperand(0) &&
8267 // Don't increase # computations.
8268 (N0->hasOneUse() || N1->hasOneUse())) {
8269 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
8270 N0.getOperand(1), N1.getOperand(1));
8271 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
8272 }
8273
8274 return SDValue();
8275}
8276
8277/// OR combines for which the commuted variant will be tried as well.
8279 SDNode *N) {
8280 EVT VT = N0.getValueType();
8281 unsigned BW = VT.getScalarSizeInBits();
8282 SDLoc DL(N);
8283
8284 auto peekThroughResize = [](SDValue V) {
8285 if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE)
8286 return V->getOperand(0);
8287 return V;
8288 };
8289
8290 SDValue N0Resized = peekThroughResize(N0);
8291 if (N0Resized.getOpcode() == ISD::AND) {
8292 SDValue N1Resized = peekThroughResize(N1);
8293 SDValue N00 = N0Resized.getOperand(0);
8294 SDValue N01 = N0Resized.getOperand(1);
8295
8296 // fold or (and x, y), x --> x
8297 if (N00 == N1Resized || N01 == N1Resized)
8298 return N1;
8299
8300 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
8301 // TODO: Set AllowUndefs = true.
8302 if (SDValue NotOperand = getBitwiseNotOperand(N01, N00,
8303 /* AllowUndefs */ false)) {
8304 if (peekThroughResize(NotOperand) == N1Resized)
8305 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N00, DL, VT),
8306 N1);
8307 }
8308
8309 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
8310 if (SDValue NotOperand = getBitwiseNotOperand(N00, N01,
8311 /* AllowUndefs */ false)) {
8312 if (peekThroughResize(NotOperand) == N1Resized)
8313 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N01, DL, VT),
8314 N1);
8315 }
8316 }
8317
8318 SDValue X, Y;
8319
8320 // fold or (xor X, N1), N1 --> or X, N1
8321 if (sd_match(N0, m_Xor(m_Value(X), m_Specific(N1))))
8322 return DAG.getNode(ISD::OR, DL, VT, X, N1);
8323
8324 // fold or (xor x, y), (x and/or y) --> or x, y
8325 if (sd_match(N0, m_Xor(m_Value(X), m_Value(Y))) &&
8326 (sd_match(N1, m_And(m_Specific(X), m_Specific(Y))) ||
8328 return DAG.getNode(ISD::OR, DL, VT, X, Y);
8329
8330 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
8331 return R;
8332
8333 auto peekThroughZext = [](SDValue V) {
8334 if (V->getOpcode() == ISD::ZERO_EXTEND)
8335 return V->getOperand(0);
8336 return V;
8337 };
8338
8339 // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
8340 if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
8341 N0.getOperand(0) == N1.getOperand(0) &&
8342 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
8343 return N0;
8344
8345 // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
8346 if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
8347 N0.getOperand(1) == N1.getOperand(0) &&
8348 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
8349 return N0;
8350
8351 // Attempt to match a legalized build_pair-esque pattern:
8352 // or(shl(aext(Hi),BW/2),zext(Lo))
8353 SDValue Lo, Hi;
8354 if (sd_match(N0,
8356 sd_match(N1, m_ZExt(m_Value(Lo))) &&
8357 Lo.getScalarValueSizeInBits() == (BW / 2) &&
8358 Lo.getValueType() == Hi.getValueType()) {
8359 // Fold build_pair(not(Lo),not(Hi)) -> not(build_pair(Lo,Hi)).
8360 SDValue NotLo, NotHi;
8361 if (sd_match(Lo, m_OneUse(m_Not(m_Value(NotLo)))) &&
8362 sd_match(Hi, m_OneUse(m_Not(m_Value(NotHi))))) {
8363 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotLo);
8364 Hi = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NotHi);
8365 Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
8366 DAG.getShiftAmountConstant(BW / 2, VT, DL));
8367 return DAG.getNOT(DL, DAG.getNode(ISD::OR, DL, VT, Lo, Hi), VT);
8368 }
8369 }
8370
8371 return SDValue();
8372}
8373
8374SDValue DAGCombiner::visitOR(SDNode *N) {
8375 SDValue N0 = N->getOperand(0);
8376 SDValue N1 = N->getOperand(1);
8377 EVT VT = N1.getValueType();
8378 SDLoc DL(N);
8379
8380 // x | x --> x
8381 if (N0 == N1)
8382 return N0;
8383
8384 // fold (or c1, c2) -> c1|c2
8385 if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, DL, VT, {N0, N1}))
8386 return C;
8387
8388 // canonicalize constant to RHS
8391 return DAG.getNode(ISD::OR, DL, VT, N1, N0);
8392
8393 // fold vector ops
8394 if (VT.isVector()) {
8395 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
8396 return FoldedVOp;
8397
8398 // fold (or x, 0) -> x, vector edition
8400 return N0;
8401
8402 // fold (or x, -1) -> -1, vector edition
8404 // do not return N1, because undef node may exist in N1
8405 return DAG.getAllOnesConstant(DL, N1.getValueType());
8406
8407 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
8408 // Do this only if the resulting type / shuffle is legal.
8409 auto *SV0 = dyn_cast<ShuffleVectorSDNode>(N0);
8410 auto *SV1 = dyn_cast<ShuffleVectorSDNode>(N1);
8411 if (SV0 && SV1 && TLI.isTypeLegal(VT)) {
8412 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
8413 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
8414 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
8415 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
8416 // Ensure both shuffles have a zero input.
8417 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
8418 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
8419 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
8420 bool CanFold = true;
8421 int NumElts = VT.getVectorNumElements();
8422 SmallVector<int, 4> Mask(NumElts, -1);
8423
8424 for (int i = 0; i != NumElts; ++i) {
8425 int M0 = SV0->getMaskElt(i);
8426 int M1 = SV1->getMaskElt(i);
8427
8428 // Determine if either index is pointing to a zero vector.
8429 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
8430 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
8431
8432 // If one element is zero and the otherside is undef, keep undef.
8433 // This also handles the case that both are undef.
8434 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0))
8435 continue;
8436
8437 // Make sure only one of the elements is zero.
8438 if (M0Zero == M1Zero) {
8439 CanFold = false;
8440 break;
8441 }
8442
8443 assert((M0 >= 0 || M1 >= 0) && "Undef index!");
8444
8445 // We have a zero and non-zero element. If the non-zero came from
8446 // SV0 make the index a LHS index. If it came from SV1, make it
8447 // a RHS index. We need to mod by NumElts because we don't care
8448 // which operand it came from in the original shuffles.
8449 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
8450 }
8451
8452 if (CanFold) {
8453 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
8454 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
8455 SDValue LegalShuffle =
8456 TLI.buildLegalVectorShuffle(VT, DL, NewLHS, NewRHS, Mask, DAG);
8457 if (LegalShuffle)
8458 return LegalShuffle;
8459 }
8460 }
8461 }
8462 }
8463
8464 // fold (or x, 0) -> x
8465 if (isNullConstant(N1))
8466 return N0;
8467
8468 // fold (or x, -1) -> -1
8469 if (isAllOnesConstant(N1))
8470 return N1;
8471
8472 if (SDValue NewSel = foldBinOpIntoSelect(N))
8473 return NewSel;
8474
8475 // fold (or x, c) -> c iff (x & ~c) == 0
8476 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
8477 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
8478 return N1;
8479
8480 if (SDValue R = foldAndOrOfSETCC(N, DAG))
8481 return R;
8482
8483 if (SDValue Combined = visitORLike(N0, N1, DL))
8484 return Combined;
8485
8486 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
8487 return Combined;
8488
8489 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
8490 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
8491 return BSwap;
8492 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
8493 return BSwap;
8494
8495 // reassociate or
8496 if (SDValue ROR = reassociateOps(ISD::OR, DL, N0, N1, N->getFlags()))
8497 return ROR;
8498
8499 // Fold or(vecreduce(x), vecreduce(y)) -> vecreduce(or(x, y))
8500 if (SDValue SD =
8501 reassociateReduction(ISD::VECREDUCE_OR, ISD::OR, DL, VT, N0, N1))
8502 return SD;
8503
8504 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
8505 // iff (c1 & c2) != 0 or c1/c2 are undef.
8506 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
8507 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
8508 };
8509 if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
8510 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
8511 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
8512 {N1, N0.getOperand(1)})) {
8513 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
8514 AddToWorklist(IOR.getNode());
8515 return DAG.getNode(ISD::AND, DL, VT, COR, IOR);
8516 }
8517 }
8518
8519 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
8520 return Combined;
8521 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
8522 return Combined;
8523
8524 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
8525 if (N0.getOpcode() == N1.getOpcode())
8526 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
8527 return V;
8528
8529 // See if this is some rotate idiom.
8530 if (SDValue Rot = MatchRotate(N0, N1, DL, /*FromAdd=*/false))
8531 return Rot;
8532
8533 if (SDValue Load = MatchLoadCombine(N))
8534 return Load;
8535
8536 // Simplify the operands using demanded-bits information.
8538 return SDValue(N, 0);
8539
8540 // If OR can be rewritten into ADD, try combines based on ADD.
8541 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
8542 DAG.isADDLike(SDValue(N, 0)))
8543 if (SDValue Combined = visitADDLike(N))
8544 return Combined;
8545
8546 // Postpone until legalization completed to avoid interference with bswap
8547 // folding
8548 if (LegalOperations || VT.isVector())
8549 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
8550 return R;
8551
8552 if (VT.isScalarInteger() && VT != MVT::i1)
8553 if (SDValue R = foldMaskedMerge(N, DAG, TLI, DL))
8554 return R;
8555
8556 return SDValue();
8557}
8558
8560 SDValue &Mask) {
8561 if (Op.getOpcode() == ISD::AND &&
8562 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
8563 Mask = Op.getOperand(1);
8564 return Op.getOperand(0);
8565 }
8566 return Op;
8567}
8568
8569/// Match "(X shl/srl V1) & V2" where V2 may not be present.
8570static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift,
8571 SDValue &Mask) {
8572 Op = stripConstantMask(DAG, Op, Mask);
8573 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
8574 Shift = Op;
8575 return true;
8576 }
8577 return false;
8578}
8579
8580/// Helper function for visitOR to extract the needed side of a rotate idiom
8581/// from a shl/srl/mul/udiv. This is meant to handle cases where
8582/// InstCombine merged some outside op with one of the shifts from
8583/// the rotate pattern.
8584/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
8585/// Otherwise, returns an expansion of \p ExtractFrom based on the following
8586/// patterns:
8587///
8588/// (or (add v v) (shrl v bitwidth-1)):
8589/// expands (add v v) -> (shl v 1)
8590///
8591/// (or (mul v c0) (shrl (mul v c1) c2)):
8592/// expands (mul v c0) -> (shl (mul v c1) c3)
8593///
8594/// (or (udiv v c0) (shl (udiv v c1) c2)):
8595/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
8596///
8597/// (or (shl v c0) (shrl (shl v c1) c2)):
8598/// expands (shl v c0) -> (shl (shl v c1) c3)
8599///
8600/// (or (shrl v c0) (shl (shrl v c1) c2)):
8601/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
8602///
8603/// Such that in all cases, c3+c2==bitwidth(op v c1).
8605 SDValue ExtractFrom, SDValue &Mask,
8606 const SDLoc &DL) {
8607 assert(OppShift && ExtractFrom && "Empty SDValue");
8608 if (OppShift.getOpcode() != ISD::SHL && OppShift.getOpcode() != ISD::SRL)
8609 return SDValue();
8610
8611 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
8612
8613 // Value and Type of the shift.
8614 SDValue OppShiftLHS = OppShift.getOperand(0);
8615 EVT ShiftedVT = OppShiftLHS.getValueType();
8616
8617 // Amount of the existing shift.
8618 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
8619
8620 // (add v v) -> (shl v 1)
8621 // TODO: Should this be a general DAG canonicalization?
8622 if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
8623 ExtractFrom.getOpcode() == ISD::ADD &&
8624 ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
8625 ExtractFrom.getOperand(0) == OppShiftLHS &&
8626 OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
8627 return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
8628 DAG.getShiftAmountConstant(1, ShiftedVT, DL));
8629
8630 // Preconditions:
8631 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
8632 //
8633 // Find opcode of the needed shift to be extracted from (op0 v c0).
8634 unsigned Opcode = ISD::DELETED_NODE;
8635 bool IsMulOrDiv = false;
8636 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
8637 // opcode or its arithmetic (mul or udiv) variant.
8638 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
8639 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
8640 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
8641 return false;
8642 Opcode = NeededShift;
8643 return true;
8644 };
8645 // op0 must be either the needed shift opcode or the mul/udiv equivalent
8646 // that the needed shift can be extracted from.
8647 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
8648 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
8649 return SDValue();
8650
8651 // op0 must be the same opcode on both sides, have the same LHS argument,
8652 // and produce the same value type.
8653 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
8654 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
8655 ShiftedVT != ExtractFrom.getValueType())
8656 return SDValue();
8657
8658 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
8659 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
8660 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
8661 ConstantSDNode *ExtractFromCst =
8662 isConstOrConstSplat(ExtractFrom.getOperand(1));
8663 // TODO: We should be able to handle non-uniform constant vectors for these values
8664 // Check that we have constant values.
8665 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
8666 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
8667 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
8668 return SDValue();
8669
8670 // Compute the shift amount we need to extract to complete the rotate.
8671 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
8672 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
8673 return SDValue();
8674 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
8675 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
8676 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
8677 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
8678 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
8679
8680 // Now try extract the needed shift from the ExtractFrom op and see if the
8681 // result matches up with the existing shift's LHS op.
8682 if (IsMulOrDiv) {
8683 // Op to extract from is a mul or udiv by a constant.
8684 // Check:
8685 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
8686 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
8687 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
8688 NeededShiftAmt.getZExtValue());
8689 APInt ResultAmt;
8690 APInt Rem;
8691 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
8692 if (Rem != 0 || ResultAmt != OppLHSAmt)
8693 return SDValue();
8694 } else {
8695 // Op to extract from is a shift by a constant.
8696 // Check:
8697 // c2 - (bitwidth(op0 v c0) - c1) == c0
8698 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
8699 ExtractFromAmt.getBitWidth()))
8700 return SDValue();
8701 }
8702
8703 // Return the expanded shift op that should allow a rotate to be formed.
8704 EVT ShiftVT = OppShift.getOperand(1).getValueType();
8705 EVT ResVT = ExtractFrom.getValueType();
8706 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
8707 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
8708}
8709
8710// Return true if we can prove that, whenever Neg and Pos are both in the
8711// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
8712// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
8713//
8714// (or (shift1 X, Neg), (shift2 X, Pos))
8715//
8716// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
8717// in direction shift1 by Neg. The range [0, EltSize) means that we only need
8718// to consider shift amounts with defined behavior.
8719//
8720// The IsRotate flag should be set when the LHS of both shifts is the same.
8721// Otherwise if matching a general funnel shift, it should be clear.
8722static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
8723 SelectionDAG &DAG, bool IsRotate, bool FromAdd) {
8724 const auto &TLI = DAG.getTargetLoweringInfo();
8725 // If EltSize is a power of 2 then:
8726 //
8727 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
8728 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
8729 //
8730 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
8731 // for the stronger condition:
8732 //
8733 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
8734 //
8735 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
8736 // we can just replace Neg with Neg' for the rest of the function.
8737 //
8738 // In other cases we check for the even stronger condition:
8739 //
8740 // Neg == EltSize - Pos [B]
8741 //
8742 // for all Neg and Pos. Note that the (or ...) then invokes undefined
8743 // behavior if Pos == 0 (and consequently Neg == EltSize).
8744 //
8745 // We could actually use [A] whenever EltSize is a power of 2, but the
8746 // only extra cases that it would match are those uninteresting ones
8747 // where Neg and Pos are never in range at the same time. E.g. for
8748 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
8749 // as well as (sub 32, Pos), but:
8750 //
8751 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
8752 //
8753 // always invokes undefined behavior for 32-bit X.
8754 //
8755 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
8756 // This allows us to peek through any operations that only affect Mask's
8757 // un-demanded bits.
8758 //
8759 // NOTE: We can only do this when matching operations which won't modify the
8760 // least Log2(EltSize) significant bits and not a general funnel shift.
8761 unsigned MaskLoBits = 0;
8762 if (IsRotate && !FromAdd && isPowerOf2_64(EltSize)) {
8763 unsigned Bits = Log2_64(EltSize);
8764 unsigned NegBits = Neg.getScalarValueSizeInBits();
8765 if (NegBits >= Bits) {
8766 APInt DemandedBits = APInt::getLowBitsSet(NegBits, Bits);
8767 if (SDValue Inner =
8769 Neg = Inner;
8770 MaskLoBits = Bits;
8771 }
8772 }
8773 }
8774
8775 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
8776 if (Neg.getOpcode() != ISD::SUB)
8777 return false;
8779 if (!NegC)
8780 return false;
8781 SDValue NegOp1 = Neg.getOperand(1);
8782
8783 // On the RHS of [A], if Pos is the result of operation on Pos' that won't
8784 // affect Mask's demanded bits, just replace Pos with Pos'. These operations
8785 // are redundant for the purpose of the equality.
8786 if (MaskLoBits) {
8787 unsigned PosBits = Pos.getScalarValueSizeInBits();
8788 if (PosBits >= MaskLoBits) {
8789 APInt DemandedBits = APInt::getLowBitsSet(PosBits, MaskLoBits);
8790 if (SDValue Inner =
8792 Pos = Inner;
8793 }
8794 }
8795 }
8796
8797 // The condition we need is now:
8798 //
8799 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
8800 //
8801 // If NegOp1 == Pos then we need:
8802 //
8803 // EltSize & Mask == NegC & Mask
8804 //
8805 // (because "x & Mask" is a truncation and distributes through subtraction).
8806 //
8807 // We also need to account for a potential truncation of NegOp1 if the amount
8808 // has already been legalized to a shift amount type.
8809 APInt Width;
8810 if ((Pos == NegOp1) ||
8811 (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
8812 Width = NegC->getAPIntValue();
8813
8814 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
8815 // Then the condition we want to prove becomes:
8816 //
8817 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
8818 //
8819 // which, again because "x & Mask" is a truncation, becomes:
8820 //
8821 // NegC & Mask == (EltSize - PosC) & Mask
8822 // EltSize & Mask == (NegC + PosC) & Mask
8823 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
8824 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
8825 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
8826 else
8827 return false;
8828 } else
8829 return false;
8830
8831 // Now we just need to check that EltSize & Mask == Width & Mask.
8832 if (MaskLoBits)
8833 // EltSize & Mask is 0 since Mask is EltSize - 1.
8834 return Width.getLoBits(MaskLoBits) == 0;
8835 return Width == EltSize;
8836}
8837
8838// A subroutine of MatchRotate used once we have found an OR of two opposite
8839// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
8840// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
8841// former being preferred if supported. InnerPos and InnerNeg are Pos and
8842// Neg with outer conversions stripped away.
8843SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
8844 SDValue Neg, SDValue InnerPos,
8845 SDValue InnerNeg, bool FromAdd,
8846 bool HasPos, unsigned PosOpcode,
8847 unsigned NegOpcode, const SDLoc &DL) {
8848 // fold (or/add (shl x, (*ext y)),
8849 // (srl x, (*ext (sub 32, y)))) ->
8850 // (rotl x, y) or (rotr x, (sub 32, y))
8851 //
8852 // fold (or/add (shl x, (*ext (sub 32, y))),
8853 // (srl x, (*ext y))) ->
8854 // (rotr x, y) or (rotl x, (sub 32, y))
8855 EVT VT = Shifted.getValueType();
8856 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
8857 /*IsRotate*/ true, FromAdd))
8858 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
8859 HasPos ? Pos : Neg);
8860
8861 return SDValue();
8862}
8863
8864// A subroutine of MatchRotate used once we have found an OR of two opposite
8865// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
8866// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
8867// former being preferred if supported. InnerPos and InnerNeg are Pos and
8868// Neg with outer conversions stripped away.
8869// TODO: Merge with MatchRotatePosNeg.
8870SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
8871 SDValue Neg, SDValue InnerPos,
8872 SDValue InnerNeg, bool FromAdd,
8873 bool HasPos, unsigned PosOpcode,
8874 unsigned NegOpcode, const SDLoc &DL) {
8875 EVT VT = N0.getValueType();
8876 unsigned EltBits = VT.getScalarSizeInBits();
8877
8878 // fold (or/add (shl x0, (*ext y)),
8879 // (srl x1, (*ext (sub 32, y)))) ->
8880 // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
8881 //
8882 // fold (or/add (shl x0, (*ext (sub 32, y))),
8883 // (srl x1, (*ext y))) ->
8884 // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
8885 if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1,
8886 FromAdd))
8887 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
8888 HasPos ? Pos : Neg);
8889
8890 // Matching the shift+xor cases, we can't easily use the xor'd shift amount
8891 // so for now just use the PosOpcode case if its legal.
8892 // TODO: When can we use the NegOpcode case?
8893 if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
8894 SDValue X;
8895 // fold (or/add (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
8896 // -> (fshl x0, x1, y)
8897 if (sd_match(N1, m_Srl(m_Value(X), m_One())) &&
8898 sd_match(InnerNeg,
8899 m_Xor(m_Specific(InnerPos), m_SpecificInt(EltBits - 1))) &&
8901 return DAG.getNode(ISD::FSHL, DL, VT, N0, X, Pos);
8902 }
8903
8904 // fold (or/add (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
8905 // -> (fshr x0, x1, y)
8906 if (sd_match(N0, m_Shl(m_Value(X), m_One())) &&
8907 sd_match(InnerPos,
8908 m_Xor(m_Specific(InnerNeg), m_SpecificInt(EltBits - 1))) &&
8910 return DAG.getNode(ISD::FSHR, DL, VT, X, N1, Neg);
8911 }
8912
8913 // fold (or/add (shl (add x0, x0), (xor y, 31)), (srl x1, y))
8914 // -> (fshr x0, x1, y)
8915 // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
8916 if (sd_match(N0, m_Add(m_Value(X), m_Deferred(X))) &&
8917 sd_match(InnerPos,
8918 m_Xor(m_Specific(InnerNeg), m_SpecificInt(EltBits - 1))) &&
8920 return DAG.getNode(ISD::FSHR, DL, VT, X, N1, Neg);
8921 }
8922 }
8923
8924 return SDValue();
8925}
8926
8927// MatchRotate - Handle an 'or' or 'add' of two operands. If this is one of the
8928// many idioms for rotate, and if the target supports rotation instructions,
8929// generate a rot[lr]. This also matches funnel shift patterns, similar to
8930// rotation but with different shifted sources.
8931SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL,
8932 bool FromAdd) {
8933 EVT VT = LHS.getValueType();
8934
8935 // The target must have at least one rotate/funnel flavor.
8936 // We still try to match rotate by constant pre-legalization.
8937 // TODO: Support pre-legalization funnel-shift by constant.
8938 bool HasROTL = hasOperation(ISD::ROTL, VT);
8939 bool HasROTR = hasOperation(ISD::ROTR, VT);
8940 bool HasFSHL = hasOperation(ISD::FSHL, VT);
8941 bool HasFSHR = hasOperation(ISD::FSHR, VT);
8942
8943 // If the type is going to be promoted and the target has enabled custom
8944 // lowering for rotate, allow matching rotate by non-constants. Only allow
8945 // this for scalar types.
8946 if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) ==
8950 }
8951
8952 if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8953 return SDValue();
8954
8955 // Check for truncated rotate.
8956 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
8957 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
8958 assert(LHS.getValueType() == RHS.getValueType());
8959 if (SDValue Rot =
8960 MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL, FromAdd))
8961 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
8962 }
8963
8964 // Match "(X shl/srl V1) & V2" where V2 may not be present.
8965 SDValue LHSShift; // The shift.
8966 SDValue LHSMask; // AND value if any.
8967 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
8968
8969 SDValue RHSShift; // The shift.
8970 SDValue RHSMask; // AND value if any.
8971 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
8972
8973 // If neither side matched a rotate half, bail
8974 if (!LHSShift && !RHSShift)
8975 return SDValue();
8976
8977 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
8978 // side of the rotate, so try to handle that here. In all cases we need to
8979 // pass the matched shift from the opposite side to compute the opcode and
8980 // needed shift amount to extract. We still want to do this if both sides
8981 // matched a rotate half because one half may be a potential overshift that
8982 // can be broken down (ie if InstCombine merged two shl or srl ops into a
8983 // single one).
8984
8985 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
8986 if (LHSShift)
8987 if (SDValue NewRHSShift =
8988 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
8989 RHSShift = NewRHSShift;
8990 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
8991 if (RHSShift)
8992 if (SDValue NewLHSShift =
8993 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
8994 LHSShift = NewLHSShift;
8995
8996 // If a side is still missing, nothing else we can do.
8997 if (!RHSShift || !LHSShift)
8998 return SDValue();
8999
9000 // At this point we've matched or extracted a shift op on each side.
9001
9002 if (LHSShift.getOpcode() == RHSShift.getOpcode())
9003 return SDValue(); // Shifts must disagree.
9004
9005 // Canonicalize shl to left side in a shl/srl pair.
9006 if (RHSShift.getOpcode() == ISD::SHL) {
9007 std::swap(LHS, RHS);
9008 std::swap(LHSShift, RHSShift);
9009 std::swap(LHSMask, RHSMask);
9010 }
9011
9012 // Something has gone wrong - we've lost the shl/srl pair - bail.
9013 if (LHSShift.getOpcode() != ISD::SHL || RHSShift.getOpcode() != ISD::SRL)
9014 return SDValue();
9015
9016 unsigned EltSizeInBits = VT.getScalarSizeInBits();
9017 SDValue LHSShiftArg = LHSShift.getOperand(0);
9018 SDValue LHSShiftAmt = LHSShift.getOperand(1);
9019 SDValue RHSShiftArg = RHSShift.getOperand(0);
9020 SDValue RHSShiftAmt = RHSShift.getOperand(1);
9021
9022 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
9023 ConstantSDNode *RHS) {
9024 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
9025 };
9026
9027 auto ApplyMasks = [&](SDValue Res) {
9028 // If there is an AND of either shifted operand, apply it to the result.
9029 if (LHSMask.getNode() || RHSMask.getNode()) {
9032
9033 if (LHSMask.getNode()) {
9034 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
9035 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
9036 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
9037 }
9038 if (RHSMask.getNode()) {
9039 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
9040 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
9041 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
9042 }
9043
9044 Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
9045 }
9046
9047 return Res;
9048 };
9049
9050 // TODO: Support pre-legalization funnel-shift by constant.
9051 bool IsRotate = LHSShiftArg == RHSShiftArg;
9052 if (!IsRotate && !(HasFSHL || HasFSHR)) {
9053 if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
9054 ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
9055 // Look for a disguised rotate by constant.
9056 // The common shifted operand X may be hidden inside another 'or'.
9057 SDValue X, Y;
9058 auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
9059 if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
9060 return false;
9061 if (CommonOp == Or.getOperand(0)) {
9062 X = CommonOp;
9063 Y = Or.getOperand(1);
9064 return true;
9065 }
9066 if (CommonOp == Or.getOperand(1)) {
9067 X = CommonOp;
9068 Y = Or.getOperand(0);
9069 return true;
9070 }
9071 return false;
9072 };
9073
9074 SDValue Res;
9075 if (matchOr(LHSShiftArg, RHSShiftArg)) {
9076 // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
9077 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
9078 SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
9079 Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
9080 } else if (matchOr(RHSShiftArg, LHSShiftArg)) {
9081 // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
9082 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
9083 SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
9084 Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
9085 } else {
9086 return SDValue();
9087 }
9088
9089 return ApplyMasks(Res);
9090 }
9091
9092 return SDValue(); // Requires funnel shift support.
9093 }
9094
9095 // fold (or/add (shl x, C1), (srl x, C2)) -> (rotl x, C1)
9096 // fold (or/add (shl x, C1), (srl x, C2)) -> (rotr x, C2)
9097 // fold (or/add (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
9098 // fold (or/add (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
9099 // iff C1+C2 == EltSizeInBits
9100 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
9101 SDValue Res;
9102 if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
9103 bool UseROTL = !LegalOperations || HasROTL;
9104 Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
9105 UseROTL ? LHSShiftAmt : RHSShiftAmt);
9106 } else {
9107 bool UseFSHL = !LegalOperations || HasFSHL;
9108 Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
9109 RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
9110 }
9111
9112 return ApplyMasks(Res);
9113 }
9114
9115 // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
9116 // shift.
9117 if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
9118 return SDValue();
9119
9120 // If there is a mask here, and we have a variable shift, we can't be sure
9121 // that we're masking out the right stuff.
9122 if (LHSMask.getNode() || RHSMask.getNode())
9123 return SDValue();
9124
9125 // If the shift amount is sign/zext/any-extended just peel it off.
9126 SDValue LExtOp0 = LHSShiftAmt;
9127 SDValue RExtOp0 = RHSShiftAmt;
9128 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
9129 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
9130 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
9131 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
9132 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
9133 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
9134 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
9135 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
9136 LExtOp0 = LHSShiftAmt.getOperand(0);
9137 RExtOp0 = RHSShiftAmt.getOperand(0);
9138 }
9139
9140 if (IsRotate && (HasROTL || HasROTR)) {
9141 if (SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
9142 LExtOp0, RExtOp0, FromAdd, HasROTL,
9144 return TryL;
9145
9146 if (SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
9147 RExtOp0, LExtOp0, FromAdd, HasROTR,
9149 return TryR;
9150 }
9151
9152 if (SDValue TryL = MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt,
9153 RHSShiftAmt, LExtOp0, RExtOp0, FromAdd,
9154 HasFSHL, ISD::FSHL, ISD::FSHR, DL))
9155 return TryL;
9156
9157 if (SDValue TryR = MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt,
9158 LHSShiftAmt, RExtOp0, LExtOp0, FromAdd,
9159 HasFSHR, ISD::FSHR, ISD::FSHL, DL))
9160 return TryR;
9161
9162 return SDValue();
9163}
9164
9165/// Recursively traverses the expression calculating the origin of the requested
9166/// byte of the given value. Returns std::nullopt if the provider can't be
9167/// calculated.
9168///
9169/// For all the values except the root of the expression, we verify that the
9170/// value has exactly one use and if not then return std::nullopt. This way if
9171/// the origin of the byte is returned it's guaranteed that the values which
9172/// contribute to the byte are not used outside of this expression.
9173
9174/// However, there is a special case when dealing with vector loads -- we allow
9175/// more than one use if the load is a vector type. Since the values that
9176/// contribute to the byte ultimately come from the ExtractVectorElements of the
9177/// Load, we don't care if the Load has uses other than ExtractVectorElements,
9178/// because those operations are independent from the pattern to be combined.
9179/// For vector loads, we simply care that the ByteProviders are adjacent
9180/// positions of the same vector, and their index matches the byte that is being
9181/// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex
9182/// is the index used in an ExtractVectorElement, and \p StartingIndex is the
9183/// byte position we are trying to provide for the LoadCombine. If these do
9184/// not match, then we can not combine the vector loads. \p Index uses the
9185/// byte position we are trying to provide for and is matched against the
9186/// shl and load size. The \p Index algorithm ensures the requested byte is
9187/// provided for by the pattern, and the pattern does not over provide bytes.
9188///
9189///
9190/// The supported LoadCombine pattern for vector loads is as follows
9191/// or
9192/// / \
9193/// or shl
9194/// / \ |
9195/// or shl zext
9196/// / \ | |
9197/// shl zext zext EVE*
9198/// | | | |
9199/// zext EVE* EVE* LOAD
9200/// | | |
9201/// EVE* LOAD LOAD
9202/// |
9203/// LOAD
9204///
9205/// *ExtractVectorElement
9207
9208static std::optional<SDByteProvider>
9209calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
9210 std::optional<uint64_t> VectorIndex,
9211 unsigned StartingIndex = 0) {
9212
9213 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
9214 if (Depth == 10)
9215 return std::nullopt;
9216
9217 // Only allow multiple uses if the instruction is a vector load (in which
9218 // case we will use the load for every ExtractVectorElement)
9219 if (Depth && !Op.hasOneUse() &&
9220 (Op.getOpcode() != ISD::LOAD || !Op.getValueType().isVector()))
9221 return std::nullopt;
9222
9223 // Fail to combine if we have encountered anything but a LOAD after handling
9224 // an ExtractVectorElement.
9225 if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value())
9226 return std::nullopt;
9227
9228 unsigned BitWidth = Op.getScalarValueSizeInBits();
9229 if (BitWidth % 8 != 0)
9230 return std::nullopt;
9231 unsigned ByteWidth = BitWidth / 8;
9232 assert(Index < ByteWidth && "invalid index requested");
9233 (void) ByteWidth;
9234
9235 switch (Op.getOpcode()) {
9236 case ISD::OR: {
9237 auto LHS =
9238 calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex);
9239 if (!LHS)
9240 return std::nullopt;
9241 auto RHS =
9242 calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex);
9243 if (!RHS)
9244 return std::nullopt;
9245
9246 if (LHS->isConstantZero())
9247 return RHS;
9248 if (RHS->isConstantZero())
9249 return LHS;
9250 return std::nullopt;
9251 }
9252 case ISD::SHL: {
9253 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
9254 if (!ShiftOp)
9255 return std::nullopt;
9256
9257 uint64_t BitShift = ShiftOp->getZExtValue();
9258
9259 if (BitShift % 8 != 0)
9260 return std::nullopt;
9261 uint64_t ByteShift = BitShift / 8;
9262
9263 // If we are shifting by an amount greater than the index we are trying to
9264 // provide, then do not provide anything. Otherwise, subtract the index by
9265 // the amount we shifted by.
9266 return Index < ByteShift
9268 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
9269 Depth + 1, VectorIndex, Index);
9270 }
9271 case ISD::ANY_EXTEND:
9272 case ISD::SIGN_EXTEND:
9273 case ISD::ZERO_EXTEND: {
9274 SDValue NarrowOp = Op->getOperand(0);
9275 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
9276 if (NarrowBitWidth % 8 != 0)
9277 return std::nullopt;
9278 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
9279
9280 if (Index >= NarrowByteWidth)
9281 return Op.getOpcode() == ISD::ZERO_EXTEND
9282 ? std::optional<SDByteProvider>(
9284 : std::nullopt;
9285 return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex,
9286 StartingIndex);
9287 }
9288 case ISD::BSWAP:
9289 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
9290 Depth + 1, VectorIndex, StartingIndex);
9292 auto OffsetOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
9293 if (!OffsetOp)
9294 return std::nullopt;
9295
9296 VectorIndex = OffsetOp->getZExtValue();
9297
9298 SDValue NarrowOp = Op->getOperand(0);
9299 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
9300 if (NarrowBitWidth % 8 != 0)
9301 return std::nullopt;
9302 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
9303 // EXTRACT_VECTOR_ELT can extend the element type to the width of the return
9304 // type, leaving the high bits undefined.
9305 if (Index >= NarrowByteWidth)
9306 return std::nullopt;
9307
9308 // Check to see if the position of the element in the vector corresponds
9309 // with the byte we are trying to provide for. In the case of a vector of
9310 // i8, this simply means the VectorIndex == StartingIndex. For non i8 cases,
9311 // the element will provide a range of bytes. For example, if we have a
9312 // vector of i16s, each element provides two bytes (V[1] provides byte 2 and
9313 // 3).
9314 if (*VectorIndex * NarrowByteWidth > StartingIndex)
9315 return std::nullopt;
9316 if ((*VectorIndex + 1) * NarrowByteWidth <= StartingIndex)
9317 return std::nullopt;
9318
9319 return calculateByteProvider(Op->getOperand(0), Index, Depth + 1,
9320 VectorIndex, StartingIndex);
9321 }
9322 case ISD::LOAD: {
9323 auto L = cast<LoadSDNode>(Op.getNode());
9324 if (!L->isSimple() || L->isIndexed())
9325 return std::nullopt;
9326
9327 unsigned NarrowBitWidth = L->getMemoryVT().getScalarSizeInBits();
9328 if (NarrowBitWidth % 8 != 0)
9329 return std::nullopt;
9330 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
9331
9332 // If the width of the load does not reach byte we are trying to provide for
9333 // and it is not a ZEXTLOAD, then the load does not provide for the byte in
9334 // question
9335 if (Index >= NarrowByteWidth)
9336 return L->getExtensionType() == ISD::ZEXTLOAD
9337 ? std::optional<SDByteProvider>(
9339 : std::nullopt;
9340
9341 unsigned BPVectorIndex = VectorIndex.value_or(0U);
9342 return SDByteProvider::getSrc(L, Index, BPVectorIndex);
9343 }
9344 }
9345
9346 return std::nullopt;
9347}
9348
9349static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
9350 return i;
9351}
9352
9353static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
9354 return BW - i - 1;
9355}
9356
9357// Check if the bytes offsets we are looking at match with either big or
9358// little endian value loaded. Return true for big endian, false for little
9359// endian, and std::nullopt if match failed.
9360static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
9361 int64_t FirstOffset) {
9362 // The endian can be decided only when it is 2 bytes at least.
9363 unsigned Width = ByteOffsets.size();
9364 if (Width < 2)
9365 return std::nullopt;
9366
9367 bool BigEndian = true, LittleEndian = true;
9368 for (unsigned i = 0; i < Width; i++) {
9369 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
9370 LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
9371 BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
9372 if (!BigEndian && !LittleEndian)
9373 return std::nullopt;
9374 }
9375
9376 assert((BigEndian != LittleEndian) && "It should be either big endian or"
9377 "little endian");
9378 return BigEndian;
9379}
9380
9381// Look through one layer of truncate or extend.
9383 switch (Value.getOpcode()) {
9384 case ISD::TRUNCATE:
9385 case ISD::ZERO_EXTEND:
9386 case ISD::SIGN_EXTEND:
9387 case ISD::ANY_EXTEND:
9388 return Value.getOperand(0);
9389 }
9390 return SDValue();
9391}
9392
9393/// Match a pattern where a wide type scalar value is stored by several narrow
9394/// stores. Fold it into a single store or a BSWAP and a store if the targets
9395/// supports it.
9396///
9397/// Assuming little endian target:
9398/// i8 *p = ...
9399/// i32 val = ...
9400/// p[0] = (val >> 0) & 0xFF;
9401/// p[1] = (val >> 8) & 0xFF;
9402/// p[2] = (val >> 16) & 0xFF;
9403/// p[3] = (val >> 24) & 0xFF;
9404/// =>
9405/// *((i32)p) = val;
9406///
9407/// i8 *p = ...
9408/// i32 val = ...
9409/// p[0] = (val >> 24) & 0xFF;
9410/// p[1] = (val >> 16) & 0xFF;
9411/// p[2] = (val >> 8) & 0xFF;
9412/// p[3] = (val >> 0) & 0xFF;
9413/// =>
9414/// *((i32)p) = BSWAP(val);
9415SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
9416 // The matching looks for "store (trunc x)" patterns that appear early but are
9417 // likely to be replaced by truncating store nodes during combining.
9418 // TODO: If there is evidence that running this later would help, this
9419 // limitation could be removed. Legality checks may need to be added
9420 // for the created store and optional bswap/rotate.
9421 if (LegalOperations || OptLevel == CodeGenOptLevel::None)
9422 return SDValue();
9423
9424 // We only handle merging simple stores of 1-4 bytes.
9425 // TODO: Allow unordered atomics when wider type is legal (see D66309)
9426 EVT MemVT = N->getMemoryVT();
9427 if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
9428 !N->isSimple() || N->isIndexed())
9429 return SDValue();
9430
9431 // Collect all of the stores in the chain, upto the maximum store width (i64).
9432 SDValue Chain = N->getChain();
9434 unsigned NarrowNumBits = MemVT.getScalarSizeInBits();
9435 unsigned MaxWideNumBits = 64;
9436 unsigned MaxStores = MaxWideNumBits / NarrowNumBits;
9437 while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
9438 // All stores must be the same size to ensure that we are writing all of the
9439 // bytes in the wide value.
9440 // This store should have exactly one use as a chain operand for another
9441 // store in the merging set. If there are other chain uses, then the
9442 // transform may not be safe because order of loads/stores outside of this
9443 // set may not be preserved.
9444 // TODO: We could allow multiple sizes by tracking each stored byte.
9445 if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
9446 Store->isIndexed() || !Store->hasOneUse())
9447 return SDValue();
9448 Stores.push_back(Store);
9449 Chain = Store->getChain();
9450 if (MaxStores < Stores.size())
9451 return SDValue();
9452 }
9453 // There is no reason to continue if we do not have at least a pair of stores.
9454 if (Stores.size() < 2)
9455 return SDValue();
9456
9457 // Handle simple types only.
9458 LLVMContext &Context = *DAG.getContext();
9459 unsigned NumStores = Stores.size();
9460 unsigned WideNumBits = NumStores * NarrowNumBits;
9461 if (WideNumBits != 16 && WideNumBits != 32 && WideNumBits != 64)
9462 return SDValue();
9463
9464 // Check if all bytes of the source value that we are looking at are stored
9465 // to the same base address. Collect offsets from Base address into OffsetMap.
9466 SDValue SourceValue;
9467 SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
9468 int64_t FirstOffset = INT64_MAX;
9469 StoreSDNode *FirstStore = nullptr;
9470 std::optional<BaseIndexOffset> Base;
9471 for (auto *Store : Stores) {
9472 // All the stores store different parts of the CombinedValue. A truncate is
9473 // required to get the partial value.
9474 SDValue Trunc = Store->getValue();
9475 if (Trunc.getOpcode() != ISD::TRUNCATE)
9476 return SDValue();
9477 // Other than the first/last part, a shift operation is required to get the
9478 // offset.
9479 int64_t Offset = 0;
9480 SDValue WideVal = Trunc.getOperand(0);
9481 if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
9482 isa<ConstantSDNode>(WideVal.getOperand(1))) {
9483 // The shift amount must be a constant multiple of the narrow type.
9484 // It is translated to the offset address in the wide source value "y".
9485 //
9486 // x = srl y, ShiftAmtC
9487 // i8 z = trunc x
9488 // store z, ...
9489 uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
9490 if (ShiftAmtC % NarrowNumBits != 0)
9491 return SDValue();
9492
9493 // Make sure we aren't reading bits that are shifted in.
9494 if (ShiftAmtC > WideVal.getScalarValueSizeInBits() - NarrowNumBits)
9495 return SDValue();
9496
9497 Offset = ShiftAmtC / NarrowNumBits;
9498 WideVal = WideVal.getOperand(0);
9499 }
9500
9501 // Stores must share the same source value with different offsets.
9502 if (!SourceValue)
9503 SourceValue = WideVal;
9504 else if (SourceValue != WideVal) {
9505 // Truncate and extends can be stripped to see if the values are related.
9506 if (stripTruncAndExt(SourceValue) != WideVal &&
9507 stripTruncAndExt(WideVal) != SourceValue)
9508 return SDValue();
9509
9510 if (WideVal.getScalarValueSizeInBits() >
9511 SourceValue.getScalarValueSizeInBits())
9512 SourceValue = WideVal;
9513
9514 // Give up if the source value type is smaller than the store size.
9515 if (SourceValue.getScalarValueSizeInBits() < WideNumBits)
9516 return SDValue();
9517 }
9518
9519 // Stores must share the same base address.
9520 BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
9521 int64_t ByteOffsetFromBase = 0;
9522 if (!Base)
9523 Base = Ptr;
9524 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9525 return SDValue();
9526
9527 // Remember the first store.
9528 if (ByteOffsetFromBase < FirstOffset) {
9529 FirstStore = Store;
9530 FirstOffset = ByteOffsetFromBase;
9531 }
9532 // Map the offset in the store and the offset in the combined value, and
9533 // early return if it has been set before.
9534 if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
9535 return SDValue();
9536 OffsetMap[Offset] = ByteOffsetFromBase;
9537 }
9538
9539 EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
9540
9541 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9542 assert(FirstStore && "First store must be set");
9543
9544 // Check that a store of the wide type is both allowed and fast on the target
9545 const DataLayout &Layout = DAG.getDataLayout();
9546 unsigned Fast = 0;
9547 bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
9548 *FirstStore->getMemOperand(), &Fast);
9549 if (!Allowed || !Fast)
9550 return SDValue();
9551
9552 // Check if the pieces of the value are going to the expected places in memory
9553 // to merge the stores.
9554 auto checkOffsets = [&](bool MatchLittleEndian) {
9555 if (MatchLittleEndian) {
9556 for (unsigned i = 0; i != NumStores; ++i)
9557 if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
9558 return false;
9559 } else { // MatchBigEndian by reversing loop counter.
9560 for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
9561 if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
9562 return false;
9563 }
9564 return true;
9565 };
9566
9567 // Check if the offsets line up for the native data layout of this target.
9568 bool NeedBswap = false;
9569 bool NeedRotate = false;
9570 if (!checkOffsets(Layout.isLittleEndian())) {
9571 // Special-case: check if byte offsets line up for the opposite endian.
9572 if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
9573 NeedBswap = true;
9574 else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
9575 NeedRotate = true;
9576 else
9577 return SDValue();
9578 }
9579
9580 SDLoc DL(N);
9581 if (WideVT != SourceValue.getValueType()) {
9582 assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
9583 "Unexpected store value to merge");
9584 SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
9585 }
9586
9587 // Before legalize we can introduce illegal bswaps/rotates which will be later
9588 // converted to an explicit bswap sequence. This way we end up with a single
9589 // store and byte shuffling instead of several stores and byte shuffling.
9590 if (NeedBswap) {
9591 SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
9592 } else if (NeedRotate) {
9593 assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
9594 SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
9595 SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
9596 }
9597
9598 SDValue NewStore =
9599 DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
9600 FirstStore->getPointerInfo(), FirstStore->getAlign());
9601
9602 // Rely on other DAG combine rules to remove the other individual stores.
9603 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
9604 return NewStore;
9605}
9606
9607/// Match a pattern where a wide type scalar value is loaded by several narrow
9608/// loads and combined by shifts and ors. Fold it into a single load or a load
9609/// and a BSWAP if the targets supports it.
9610///
9611/// Assuming little endian target:
9612/// i8 *a = ...
9613/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
9614/// =>
9615/// i32 val = *((i32)a)
9616///
9617/// i8 *a = ...
9618/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
9619/// =>
9620/// i32 val = BSWAP(*((i32)a))
9621///
9622/// TODO: This rule matches complex patterns with OR node roots and doesn't
9623/// interact well with the worklist mechanism. When a part of the pattern is
9624/// updated (e.g. one of the loads) its direct users are put into the worklist,
9625/// but the root node of the pattern which triggers the load combine is not
9626/// necessarily a direct user of the changed node. For example, once the address
9627/// of t28 load is reassociated load combine won't be triggered:
9628/// t25: i32 = add t4, Constant:i32<2>
9629/// t26: i64 = sign_extend t25
9630/// t27: i64 = add t2, t26
9631/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
9632/// t29: i32 = zero_extend t28
9633/// t32: i32 = shl t29, Constant:i8<8>
9634/// t33: i32 = or t23, t32
9635/// As a possible fix visitLoad can check if the load can be a part of a load
9636/// combine pattern and add corresponding OR roots to the worklist.
9637SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
9638 assert(N->getOpcode() == ISD::OR &&
9639 "Can only match load combining against OR nodes");
9640
9641 // Handles simple types only
9642 EVT VT = N->getValueType(0);
9643 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
9644 return SDValue();
9645 unsigned ByteWidth = VT.getSizeInBits() / 8;
9646
9647 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
9648 auto MemoryByteOffset = [&](SDByteProvider P) {
9649 assert(P.hasSrc() && "Must be a memory byte provider");
9650 auto *Load = cast<LoadSDNode>(P.Src.value());
9651
9652 unsigned LoadBitWidth = Load->getMemoryVT().getScalarSizeInBits();
9653
9654 assert(LoadBitWidth % 8 == 0 &&
9655 "can only analyze providers for individual bytes not bit");
9656 unsigned LoadByteWidth = LoadBitWidth / 8;
9657 return IsBigEndianTarget ? bigEndianByteAt(LoadByteWidth, P.DestOffset)
9658 : littleEndianByteAt(LoadByteWidth, P.DestOffset);
9659 };
9660
9661 std::optional<BaseIndexOffset> Base;
9662 SDValue Chain;
9663
9664 SmallPtrSet<LoadSDNode *, 8> Loads;
9665 std::optional<SDByteProvider> FirstByteProvider;
9666 int64_t FirstOffset = INT64_MAX;
9667
9668 // Check if all the bytes of the OR we are looking at are loaded from the same
9669 // base address. Collect bytes offsets from Base address in ByteOffsets.
9670 SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
9671 unsigned ZeroExtendedBytes = 0;
9672 for (int i = ByteWidth - 1; i >= 0; --i) {
9673 auto P =
9674 calculateByteProvider(SDValue(N, 0), i, 0, /*VectorIndex*/ std::nullopt,
9675 /*StartingIndex*/ i);
9676 if (!P)
9677 return SDValue();
9678
9679 if (P->isConstantZero()) {
9680 // It's OK for the N most significant bytes to be 0, we can just
9681 // zero-extend the load.
9682 if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
9683 return SDValue();
9684 continue;
9685 }
9686 assert(P->hasSrc() && "provenance should either be memory or zero");
9687 auto *L = cast<LoadSDNode>(P->Src.value());
9688
9689 // All loads must share the same chain
9690 SDValue LChain = L->getChain();
9691 if (!Chain)
9692 Chain = LChain;
9693 else if (Chain != LChain)
9694 return SDValue();
9695
9696 // Loads must share the same base address
9697 BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
9698 int64_t ByteOffsetFromBase = 0;
9699
9700 // For vector loads, the expected load combine pattern will have an
9701 // ExtractElement for each index in the vector. While each of these
9702 // ExtractElements will be accessing the same base address as determined
9703 // by the load instruction, the actual bytes they interact with will differ
9704 // due to different ExtractElement indices. To accurately determine the
9705 // byte position of an ExtractElement, we offset the base load ptr with
9706 // the index multiplied by the byte size of each element in the vector.
9707 if (L->getMemoryVT().isVector()) {
9708 unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
9709 if (LoadWidthInBit % 8 != 0)
9710 return SDValue();
9711 unsigned ByteOffsetFromVector = P->SrcOffset * LoadWidthInBit / 8;
9712 Ptr.addToOffset(ByteOffsetFromVector);
9713 }
9714
9715 if (!Base)
9716 Base = Ptr;
9717
9718 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9719 return SDValue();
9720
9721 // Calculate the offset of the current byte from the base address
9722 ByteOffsetFromBase += MemoryByteOffset(*P);
9723 ByteOffsets[i] = ByteOffsetFromBase;
9724
9725 // Remember the first byte load
9726 if (ByteOffsetFromBase < FirstOffset) {
9727 FirstByteProvider = P;
9728 FirstOffset = ByteOffsetFromBase;
9729 }
9730
9731 Loads.insert(L);
9732 }
9733
9734 assert(!Loads.empty() && "All the bytes of the value must be loaded from "
9735 "memory, so there must be at least one load which produces the value");
9736 assert(Base && "Base address of the accessed memory location must be set");
9737 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9738
9739 bool NeedsZext = ZeroExtendedBytes > 0;
9740
9741 EVT MemVT =
9742 EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
9743
9744 if (!MemVT.isSimple())
9745 return SDValue();
9746
9747 // Before legalize we can introduce too wide illegal loads which will be later
9748 // split into legal sized loads. This enables us to combine i64 load by i8
9749 // patterns to a couple of i32 loads on 32 bit targets.
9750 if (LegalOperations &&
9751 !TLI.isLoadExtLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, VT,
9752 MemVT))
9753 return SDValue();
9754
9755 // Check if the bytes of the OR we are looking at match with either big or
9756 // little endian value load
9757 std::optional<bool> IsBigEndian = isBigEndian(
9758 ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
9759 if (!IsBigEndian)
9760 return SDValue();
9761
9762 assert(FirstByteProvider && "must be set");
9763
9764 // Ensure that the first byte is loaded from zero offset of the first load.
9765 // So the combined value can be loaded from the first load address.
9766 if (MemoryByteOffset(*FirstByteProvider) != 0)
9767 return SDValue();
9768 auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
9769
9770 // The node we are looking at matches with the pattern, check if we can
9771 // replace it with a single (possibly zero-extended) load and bswap + shift if
9772 // needed.
9773
9774 // If the load needs byte swap check if the target supports it
9775 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
9776
9777 // Before legalize we can introduce illegal bswaps which will be later
9778 // converted to an explicit bswap sequence. This way we end up with a single
9779 // load and byte shuffling instead of several loads and byte shuffling.
9780 // We do not introduce illegal bswaps when zero-extending as this tends to
9781 // introduce too many arithmetic instructions.
9782 if (NeedsBswap && (LegalOperations || NeedsZext) &&
9783 !TLI.isOperationLegal(ISD::BSWAP, VT))
9784 return SDValue();
9785
9786 // If we need to bswap and zero extend, we have to insert a shift. Check that
9787 // it is legal.
9788 if (NeedsBswap && NeedsZext && LegalOperations &&
9789 !TLI.isOperationLegal(ISD::SHL, VT))
9790 return SDValue();
9791
9792 // Check that a load of the wide type is both allowed and fast on the target
9793 unsigned Fast = 0;
9794 bool Allowed =
9795 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
9796 *FirstLoad->getMemOperand(), &Fast);
9797 if (!Allowed || !Fast)
9798 return SDValue();
9799
9800 SDValue NewLoad =
9801 DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
9802 Chain, FirstLoad->getBasePtr(),
9803 FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
9804
9805 // Transfer chain users from old loads to the new load.
9806 for (LoadSDNode *L : Loads)
9807 DAG.makeEquivalentMemoryOrdering(L, NewLoad);
9808
9809 if (!NeedsBswap)
9810 return NewLoad;
9811
9812 SDValue ShiftedLoad =
9813 NeedsZext ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
9814 DAG.getShiftAmountConstant(ZeroExtendedBytes * 8,
9815 VT, SDLoc(N)))
9816 : NewLoad;
9817 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
9818}
9819
9820// If the target has andn, bsl, or a similar bit-select instruction,
9821// we want to unfold masked merge, with canonical pattern of:
9822// | A | |B|
9823// ((x ^ y) & m) ^ y
9824// | D |
9825// Into:
9826// (x & m) | (y & ~m)
9827// If y is a constant, m is not a 'not', and the 'andn' does not work with
9828// immediates, we unfold into a different pattern:
9829// ~(~x & m) & (m | y)
9830// If x is a constant, m is a 'not', and the 'andn' does not work with
9831// immediates, we unfold into a different pattern:
9832// (x | ~m) & ~(~m & ~y)
9833// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
9834// the very least that breaks andnpd / andnps patterns, and because those
9835// patterns are simplified in IR and shouldn't be created in the DAG
9836SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
9837 assert(N->getOpcode() == ISD::XOR);
9838
9839 // Don't touch 'not' (i.e. where y = -1).
9840 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
9841 return SDValue();
9842
9843 EVT VT = N->getValueType(0);
9844
9845 // There are 3 commutable operators in the pattern,
9846 // so we have to deal with 8 possible variants of the basic pattern.
9847 SDValue X, Y, M;
9848 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
9849 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
9850 return false;
9851 SDValue Xor = And.getOperand(XorIdx);
9852 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
9853 return false;
9854 SDValue Xor0 = Xor.getOperand(0);
9855 SDValue Xor1 = Xor.getOperand(1);
9856 // Don't touch 'not' (i.e. where y = -1).
9857 if (isAllOnesOrAllOnesSplat(Xor1))
9858 return false;
9859 if (Other == Xor0)
9860 std::swap(Xor0, Xor1);
9861 if (Other != Xor1)
9862 return false;
9863 X = Xor0;
9864 Y = Xor1;
9865 M = And.getOperand(XorIdx ? 0 : 1);
9866 return true;
9867 };
9868
9869 SDValue N0 = N->getOperand(0);
9870 SDValue N1 = N->getOperand(1);
9871 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
9872 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
9873 return SDValue();
9874
9875 // Don't do anything if the mask is constant. This should not be reachable.
9876 // InstCombine should have already unfolded this pattern, and DAGCombiner
9877 // probably shouldn't produce it, too.
9878 if (isa<ConstantSDNode>(M.getNode()))
9879 return SDValue();
9880
9881 // We can transform if the target has AndNot
9882 if (!TLI.hasAndNot(M))
9883 return SDValue();
9884
9885 SDLoc DL(N);
9886
9887 // If Y is a constant, check that 'andn' works with immediates. Unless M is
9888 // a bitwise not that would already allow ANDN to be used.
9889 if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
9890 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
9891 // If not, we need to do a bit more work to make sure andn is still used.
9892 SDValue NotX = DAG.getNOT(DL, X, VT);
9893 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
9894 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
9895 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
9896 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
9897 }
9898
9899 // If X is a constant and M is a bitwise not, check that 'andn' works with
9900 // immediates.
9901 if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
9902 assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
9903 // If not, we need to do a bit more work to make sure andn is still used.
9904 SDValue NotM = M.getOperand(0);
9905 SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
9906 SDValue NotY = DAG.getNOT(DL, Y, VT);
9907 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
9908 SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
9909 return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
9910 }
9911
9912 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
9913 SDValue NotM = DAG.getNOT(DL, M, VT);
9914 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
9915
9916 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
9917}
9918
9919SDValue DAGCombiner::visitXOR(SDNode *N) {
9920 SDValue N0 = N->getOperand(0);
9921 SDValue N1 = N->getOperand(1);
9922 EVT VT = N0.getValueType();
9923 SDLoc DL(N);
9924
9925 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
9926 if (N0.isUndef() && N1.isUndef())
9927 return DAG.getConstant(0, DL, VT);
9928
9929 // fold (xor x, undef) -> undef
9930 if (N0.isUndef())
9931 return N0;
9932 if (N1.isUndef())
9933 return N1;
9934
9935 // fold (xor c1, c2) -> c1^c2
9936 if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
9937 return C;
9938
9939 // canonicalize constant to RHS
9942 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
9943
9944 // fold vector ops
9945 if (VT.isVector()) {
9946 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
9947 return FoldedVOp;
9948
9949 // fold (xor x, 0) -> x, vector edition
9951 return N0;
9952 }
9953
9954 // fold (xor x, 0) -> x
9955 if (isNullConstant(N1))
9956 return N0;
9957
9958 if (SDValue NewSel = foldBinOpIntoSelect(N))
9959 return NewSel;
9960
9961 // reassociate xor
9962 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
9963 return RXOR;
9964
9965 // Fold xor(vecreduce(x), vecreduce(y)) -> vecreduce(xor(x, y))
9966 if (SDValue SD =
9967 reassociateReduction(ISD::VECREDUCE_XOR, ISD::XOR, DL, VT, N0, N1))
9968 return SD;
9969
9970 // fold (a^b) -> (a|b) iff a and b share no bits.
9971 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
9972 DAG.haveNoCommonBitsSet(N0, N1))
9973 return DAG.getNode(ISD::OR, DL, VT, N0, N1, SDNodeFlags::Disjoint);
9974
9975 // look for 'add-like' folds:
9976 // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
9977 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
9979 if (SDValue Combined = visitADDLike(N))
9980 return Combined;
9981
9982 // fold not (setcc x, y, cc) -> setcc x y !cc
9983 // Avoid breaking: and (not(setcc x, y, cc), z) -> andn for vec
9984 unsigned N0Opcode = N0.getOpcode();
9985 SDValue LHS, RHS, CC;
9986 if (TLI.isConstTrueVal(N1) &&
9987 isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true) &&
9988 !(VT.isVector() && TLI.hasAndNot(SDValue(N, 0)) && N->hasOneUse() &&
9989 N->use_begin()->getUser()->getOpcode() == ISD::AND)) {
9991 LHS.getValueType());
9992 if (!LegalOperations ||
9993 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
9994 switch (N0Opcode) {
9995 default:
9996 llvm_unreachable("Unhandled SetCC Equivalent!");
9997 case ISD::SETCC:
9998 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
9999 case ISD::SELECT_CC:
10000 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
10001 N0.getOperand(3), NotCC);
10002 case ISD::STRICT_FSETCC:
10003 case ISD::STRICT_FSETCCS: {
10004 if (N0.hasOneUse()) {
10005 // FIXME Can we handle multiple uses? Could we token factor the chain
10006 // results from the new/old setcc?
10007 SDValue SetCC =
10008 DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
10009 N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
10010 CombineTo(N, SetCC);
10011 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
10012 recursivelyDeleteUnusedNodes(N0.getNode());
10013 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10014 }
10015 break;
10016 }
10017 }
10018 }
10019 }
10020
10021 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
10022 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
10023 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
10024 SDValue V = N0.getOperand(0);
10025 SDLoc DL0(N0);
10026 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
10027 DAG.getConstant(1, DL0, V.getValueType()));
10028 AddToWorklist(V.getNode());
10029 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
10030 }
10031
10032 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
10033 // fold (not (and x, y)) -> (or (not x), (not y)) iff x or y are setcc
10034 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
10035 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
10036 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
10037 if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
10038 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
10039 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
10040 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
10041 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
10042 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
10043 }
10044 }
10045 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
10046 // fold (not (and x, y)) -> (or (not x), (not y)) iff x or y are constants
10047 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
10048 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
10049 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
10050 if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
10051 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
10052 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
10053 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
10054 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
10055 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
10056 }
10057 }
10058
10059 // fold (not (sub Y, X)) -> (add X, ~Y) if Y is a constant
10060 if (N0.getOpcode() == ISD::SUB && isAllOnesConstant(N1)) {
10061 SDValue Y = N0.getOperand(0);
10062 SDValue X = N0.getOperand(1);
10063
10064 if (auto *YConst = dyn_cast<ConstantSDNode>(Y)) {
10065 APInt NotYValue = ~YConst->getAPIntValue();
10066 SDValue NotY = DAG.getConstant(NotYValue, DL, VT);
10067 return DAG.getNode(ISD::ADD, DL, VT, X, NotY, N->getFlags());
10068 }
10069 }
10070
10071 // fold (not (add X, -1)) -> (neg X)
10072 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && isAllOnesConstant(N1) &&
10074 return DAG.getNegative(N0.getOperand(0), DL, VT);
10075 }
10076
10077 // fold (xor (and x, y), y) -> (and (not x), y)
10078 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
10079 SDValue X = N0.getOperand(0);
10080 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
10081 AddToWorklist(NotX.getNode());
10082 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
10083 }
10084
10085 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
10086 if (!LegalOperations || hasOperation(ISD::ABS, VT)) {
10087 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
10088 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
10089 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
10090 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
10091 SDValue S0 = S.getOperand(0);
10092 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
10093 if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
10094 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
10095 return DAG.getNode(ISD::ABS, DL, VT, S0);
10096 }
10097 }
10098
10099 // fold (xor x, x) -> 0
10100 if (N0 == N1)
10101 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
10102
10103 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
10104 // Here is a concrete example of this equivalence:
10105 // i16 x == 14
10106 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
10107 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
10108 //
10109 // =>
10110 //
10111 // i16 ~1 == 0b1111111111111110
10112 // i16 rol(~1, 14) == 0b1011111111111111
10113 //
10114 // Some additional tips to help conceptualize this transform:
10115 // - Try to see the operation as placing a single zero in a value of all ones.
10116 // - There exists no value for x which would allow the result to contain zero.
10117 // - Values of x larger than the bitwidth are undefined and do not require a
10118 // consistent result.
10119 // - Pushing the zero left requires shifting one bits in from the right.
10120 // A rotate left of ~1 is a nice way of achieving the desired result.
10121 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
10123 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getSignedConstant(~1, DL, VT),
10124 N0.getOperand(1));
10125 }
10126
10127 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
10128 if (N0Opcode == N1.getOpcode())
10129 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
10130 return V;
10131
10132 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
10133 return R;
10134 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
10135 return R;
10136 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
10137 return R;
10138
10139 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
10140 if (SDValue MM = unfoldMaskedMerge(N))
10141 return MM;
10142
10143 // Simplify the expression using non-local knowledge.
10145 return SDValue(N, 0);
10146
10147 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
10148 return Combined;
10149
10150 // fold (xor (smin(x, C), C)) -> select (x < C), xor(x, C), 0
10151 // fold (xor (smax(x, C), C)) -> select (x > C), xor(x, C), 0
10152 // fold (xor (umin(x, C), C)) -> select (x < C), xor(x, C), 0
10153 // fold (xor (umax(x, C), C)) -> select (x > C), xor(x, C), 0
10154 SDValue Op0;
10155 if (sd_match(N0, m_OneUse(m_AnyOf(m_SMin(m_Value(Op0), m_Specific(N1)),
10156 m_SMax(m_Value(Op0), m_Specific(N1)),
10157 m_UMin(m_Value(Op0), m_Specific(N1)),
10158 m_UMax(m_Value(Op0), m_Specific(N1)))))) {
10159
10160 if (isa<ConstantSDNode>(N1) ||
10162 // For vectors, only optimize when the constant is zero or all-ones to
10163 // avoid generating more instructions
10164 if (VT.isVector()) {
10165 ConstantSDNode *N1C = isConstOrConstSplat(N1);
10166 if (!N1C || (!N1C->isZero() && !N1C->isAllOnes()))
10167 return SDValue();
10168 }
10169
10170 // Avoid the fold if the minmax operation is legal and select is expensive
10171 if (TLI.isOperationLegal(N0.getOpcode(), VT) &&
10173 return SDValue();
10174
10175 EVT CCVT = getSetCCResultType(VT);
10176 ISD::CondCode CC;
10177 switch (N0.getOpcode()) {
10178 case ISD::SMIN:
10179 CC = ISD::SETLT;
10180 break;
10181 case ISD::SMAX:
10182 CC = ISD::SETGT;
10183 break;
10184 case ISD::UMIN:
10185 CC = ISD::SETULT;
10186 break;
10187 case ISD::UMAX:
10188 CC = ISD::SETUGT;
10189 break;
10190 }
10191 SDValue FN1 = DAG.getFreeze(N1);
10192 SDValue Cmp = DAG.getSetCC(DL, CCVT, Op0, FN1, CC);
10193 SDValue XorXC = DAG.getNode(ISD::XOR, DL, VT, Op0, FN1);
10194 SDValue Zero = DAG.getConstant(0, DL, VT);
10195 return DAG.getSelect(DL, VT, Cmp, XorXC, Zero);
10196 }
10197 }
10198
10199 return SDValue();
10200}
10201
10202/// If we have a shift-by-constant of a bitwise logic op that itself has a
10203/// shift-by-constant operand with identical opcode, we may be able to convert
10204/// that into 2 independent shifts followed by the logic op. This is a
10205/// throughput improvement.
10207 // Match a one-use bitwise logic op.
10208 SDValue LogicOp = Shift->getOperand(0);
10209 if (!LogicOp.hasOneUse())
10210 return SDValue();
10211
10212 unsigned LogicOpcode = LogicOp.getOpcode();
10213 if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
10214 LogicOpcode != ISD::XOR)
10215 return SDValue();
10216
10217 // Find a matching one-use shift by constant.
10218 unsigned ShiftOpcode = Shift->getOpcode();
10219 SDValue C1 = Shift->getOperand(1);
10220 ConstantSDNode *C1Node = isConstOrConstSplat(C1);
10221 assert(C1Node && "Expected a shift with constant operand");
10222 const APInt &C1Val = C1Node->getAPIntValue();
10223 auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
10224 const APInt *&ShiftAmtVal) {
10225 if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
10226 return false;
10227
10228 ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
10229 if (!ShiftCNode)
10230 return false;
10231
10232 // Capture the shifted operand and shift amount value.
10233 ShiftOp = V.getOperand(0);
10234 ShiftAmtVal = &ShiftCNode->getAPIntValue();
10235
10236 // Shift amount types do not have to match their operand type, so check that
10237 // the constants are the same width.
10238 if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
10239 return false;
10240
10241 // The fold is not valid if the sum of the shift values doesn't fit in the
10242 // given shift amount type.
10243 bool Overflow = false;
10244 APInt NewShiftAmt = C1Val.uadd_ov(*ShiftAmtVal, Overflow);
10245 if (Overflow)
10246 return false;
10247
10248 // The fold is not valid if the sum of the shift values exceeds bitwidth.
10249 if (NewShiftAmt.uge(V.getScalarValueSizeInBits()))
10250 return false;
10251
10252 return true;
10253 };
10254
10255 // Logic ops are commutative, so check each operand for a match.
10256 SDValue X, Y;
10257 const APInt *C0Val;
10258 if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
10259 Y = LogicOp.getOperand(1);
10260 else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
10261 Y = LogicOp.getOperand(0);
10262 else
10263 return SDValue();
10264
10265 // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
10266 SDLoc DL(Shift);
10267 EVT VT = Shift->getValueType(0);
10268 EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
10269 SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
10270 SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
10271 SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
10272 return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2,
10273 LogicOp->getFlags());
10274}
10275
10276/// Handle transforms common to the three shifts, when the shift amount is a
10277/// constant.
10278/// We are looking for: (shift being one of shl/sra/srl)
10279/// shift (binop X, C0), C1
10280/// And want to transform into:
10281/// binop (shift X, C1), (shift C0, C1)
10282SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
10283 assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
10284
10285 // Do not turn a 'not' into a regular xor.
10286 if (isBitwiseNot(N->getOperand(0)))
10287 return SDValue();
10288
10289 // The inner binop must be one-use, since we want to replace it.
10290 SDValue LHS = N->getOperand(0);
10291 if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
10292 return SDValue();
10293
10294 // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)).
10295 if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
10296 return R;
10297
10298 // We want to pull some binops through shifts, so that we have (and (shift))
10299 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
10300 // thing happens with address calculations, so it's important to canonicalize
10301 // it.
10302 switch (LHS.getOpcode()) {
10303 default:
10304 return SDValue();
10305 case ISD::OR:
10306 case ISD::XOR:
10307 case ISD::AND:
10308 break;
10309 case ISD::ADD:
10310 if (N->getOpcode() != ISD::SHL)
10311 return SDValue(); // only shl(add) not sr[al](add).
10312 break;
10313 }
10314
10315 // FIXME: disable this unless the input to the binop is a shift by a constant
10316 // or is copy/select. Enable this in other cases when figure out it's exactly
10317 // profitable.
10318 SDValue BinOpLHSVal = LHS.getOperand(0);
10319 bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
10320 BinOpLHSVal.getOpcode() == ISD::SRA ||
10321 BinOpLHSVal.getOpcode() == ISD::SRL) &&
10322 isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
10323 bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
10324 BinOpLHSVal.getOpcode() == ISD::SELECT;
10325
10326 if (!IsShiftByConstant && !IsCopyOrSelect)
10327 return SDValue();
10328
10329 if (IsCopyOrSelect && N->hasOneUse())
10330 return SDValue();
10331
10332 // Attempt to fold the constants, shifting the binop RHS by the shift amount.
10333 SDLoc DL(N);
10334 EVT VT = N->getValueType(0);
10335 if (SDValue NewRHS = DAG.FoldConstantArithmetic(
10336 N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) {
10337 SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
10338 N->getOperand(1));
10339 return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
10340 }
10341
10342 return SDValue();
10343}
10344
10345SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
10346 assert(N->getOpcode() == ISD::TRUNCATE);
10347 assert(N->getOperand(0).getOpcode() == ISD::AND);
10348
10349 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
10350 EVT TruncVT = N->getValueType(0);
10351 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
10352 TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
10353 SDValue N01 = N->getOperand(0).getOperand(1);
10354 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
10355 SDLoc DL(N);
10356 SDValue N00 = N->getOperand(0).getOperand(0);
10357 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
10358 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
10359 AddToWorklist(Trunc00.getNode());
10360 AddToWorklist(Trunc01.getNode());
10361 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
10362 }
10363 }
10364
10365 return SDValue();
10366}
10367
10368SDValue DAGCombiner::visitRotate(SDNode *N) {
10369 SDLoc dl(N);
10370 SDValue N0 = N->getOperand(0);
10371 SDValue N1 = N->getOperand(1);
10372 EVT VT = N->getValueType(0);
10373 unsigned Bitsize = VT.getScalarSizeInBits();
10374
10375 // fold (rot x, 0) -> x
10376 if (isNullOrNullSplat(N1))
10377 return N0;
10378
10379 // fold (rot x, c) -> x iff (c % BitSize) == 0
10380 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
10381 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
10382 if (DAG.MaskedValueIsZero(N1, ModuloMask))
10383 return N0;
10384 }
10385
10386 // fold (rot x, c) -> (rot x, c % BitSize)
10387 bool OutOfRange = false;
10388 auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
10389 OutOfRange |= C->getAPIntValue().uge(Bitsize);
10390 return true;
10391 };
10392 if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
10393 EVT AmtVT = N1.getValueType();
10394 SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
10395 if (SDValue Amt =
10396 DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
10397 return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
10398 }
10399
10400 // rot i16 X, 8 --> bswap X
10401 auto *RotAmtC = isConstOrConstSplat(N1);
10402 if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
10403 VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
10404 return DAG.getNode(ISD::BSWAP, dl, VT, N0);
10405
10406 // Simplify the operands using demanded-bits information.
10408 return SDValue(N, 0);
10409
10410 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
10411 if (N1.getOpcode() == ISD::TRUNCATE &&
10412 N1.getOperand(0).getOpcode() == ISD::AND) {
10413 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10414 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
10415 }
10416
10417 unsigned NextOp = N0.getOpcode();
10418
10419 // fold (rot* (rot* x, c2), c1)
10420 // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize)
10421 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
10422 bool C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
10424 if (C1 && C2 && N1.getValueType() == N0.getOperand(1).getValueType()) {
10425 EVT ShiftVT = N1.getValueType();
10426 bool SameSide = (N->getOpcode() == NextOp);
10427 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
10428 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
10429 SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
10430 {N1, BitsizeC});
10431 SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
10432 {N0.getOperand(1), BitsizeC});
10433 if (Norm1 && Norm2)
10434 if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
10435 CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
10436 CombinedShift = DAG.FoldConstantArithmetic(ISD::ADD, dl, ShiftVT,
10437 {CombinedShift, BitsizeC});
10438 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
10439 ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
10440 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
10441 CombinedShiftNorm);
10442 }
10443 }
10444 }
10445 return SDValue();
10446}
10447
10448SDValue DAGCombiner::visitSHL(SDNode *N) {
10449 SDValue N0 = N->getOperand(0);
10450 SDValue N1 = N->getOperand(1);
10451 if (SDValue V = DAG.simplifyShift(N0, N1))
10452 return V;
10453
10454 SDLoc DL(N);
10455 EVT VT = N0.getValueType();
10456 EVT ShiftVT = N1.getValueType();
10457 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10458
10459 // fold (shl c1, c2) -> c1<<c2
10460 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N0, N1}))
10461 return C;
10462
10463 // fold vector ops
10464 if (VT.isVector()) {
10465 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10466 return FoldedVOp;
10467
10468 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
10469 // If setcc produces all-one true value then:
10470 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
10471 if (N1CV && N1CV->isConstant()) {
10472 if (N0.getOpcode() == ISD::AND) {
10473 SDValue N00 = N0->getOperand(0);
10474 SDValue N01 = N0->getOperand(1);
10475 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
10476
10477 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
10480 if (SDValue C =
10481 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N01, N1}))
10482 return DAG.getNode(ISD::AND, DL, VT, N00, C);
10483 }
10484 }
10485 }
10486 }
10487
10488 if (SDValue NewSel = foldBinOpIntoSelect(N))
10489 return NewSel;
10490
10491 // if (shl x, c) is known to be zero, return 0
10492 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
10493 return DAG.getConstant(0, DL, VT);
10494
10495 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
10496 if (N1.getOpcode() == ISD::TRUNCATE &&
10497 N1.getOperand(0).getOpcode() == ISD::AND) {
10498 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10499 return DAG.getNode(ISD::SHL, DL, VT, N0, NewOp1);
10500 }
10501
10502 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
10503 if (N0.getOpcode() == ISD::SHL) {
10504 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
10505 ConstantSDNode *RHS) {
10506 APInt c1 = LHS->getAPIntValue();
10507 APInt c2 = RHS->getAPIntValue();
10508 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10509 return (c1 + c2).uge(OpSizeInBits);
10510 };
10511 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
10512 return DAG.getConstant(0, DL, VT);
10513
10514 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
10515 ConstantSDNode *RHS) {
10516 APInt c1 = LHS->getAPIntValue();
10517 APInt c2 = RHS->getAPIntValue();
10518 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10519 return (c1 + c2).ult(OpSizeInBits);
10520 };
10521 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
10522 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
10523 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
10524 }
10525 }
10526
10527 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
10528 // For this to be valid, the second form must not preserve any of the bits
10529 // that are shifted out by the inner shift in the first form. This means
10530 // the outer shift size must be >= the number of bits added by the ext.
10531 // As a corollary, we don't care what kind of ext it is.
10532 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
10533 N0.getOpcode() == ISD::ANY_EXTEND ||
10534 N0.getOpcode() == ISD::SIGN_EXTEND) &&
10535 N0.getOperand(0).getOpcode() == ISD::SHL) {
10536 SDValue N0Op0 = N0.getOperand(0);
10537 SDValue InnerShiftAmt = N0Op0.getOperand(1);
10538 EVT InnerVT = N0Op0.getValueType();
10539 uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
10540
10541 auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
10542 ConstantSDNode *RHS) {
10543 APInt c1 = LHS->getAPIntValue();
10544 APInt c2 = RHS->getAPIntValue();
10545 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10546 return c2.uge(OpSizeInBits - InnerBitwidth) &&
10547 (c1 + c2).uge(OpSizeInBits);
10548 };
10549 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
10550 /*AllowUndefs*/ false,
10551 /*AllowTypeMismatch*/ true))
10552 return DAG.getConstant(0, DL, VT);
10553
10554 auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
10555 ConstantSDNode *RHS) {
10556 APInt c1 = LHS->getAPIntValue();
10557 APInt c2 = RHS->getAPIntValue();
10558 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10559 return c2.uge(OpSizeInBits - InnerBitwidth) &&
10560 (c1 + c2).ult(OpSizeInBits);
10561 };
10562 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
10563 /*AllowUndefs*/ false,
10564 /*AllowTypeMismatch*/ true)) {
10565 SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
10566 SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
10567 Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
10568 return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
10569 }
10570 }
10571
10572 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
10573 // Only fold this if the inner zext has no other uses to avoid increasing
10574 // the total number of instructions.
10575 if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
10576 N0.getOperand(0).getOpcode() == ISD::SRL) {
10577 SDValue N0Op0 = N0.getOperand(0);
10578 SDValue InnerShiftAmt = N0Op0.getOperand(1);
10579
10580 auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10581 APInt c1 = LHS->getAPIntValue();
10582 APInt c2 = RHS->getAPIntValue();
10583 zeroExtendToMatch(c1, c2);
10584 return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
10585 };
10586 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
10587 /*AllowUndefs*/ false,
10588 /*AllowTypeMismatch*/ true)) {
10589 EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
10590 SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
10591 NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
10592 AddToWorklist(NewSHL.getNode());
10593 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
10594 }
10595 }
10596
10597 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) {
10598 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
10599 ConstantSDNode *RHS) {
10600 const APInt &LHSC = LHS->getAPIntValue();
10601 const APInt &RHSC = RHS->getAPIntValue();
10602 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
10603 LHSC.getZExtValue() <= RHSC.getZExtValue();
10604 };
10605
10606 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
10607 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
10608 if (N0->getFlags().hasExact()) {
10609 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10610 /*AllowUndefs*/ false,
10611 /*AllowTypeMismatch*/ true)) {
10612 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10613 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10614 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10615 }
10616 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10617 /*AllowUndefs*/ false,
10618 /*AllowTypeMismatch*/ true)) {
10619 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10620 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10621 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
10622 }
10623 }
10624
10625 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
10626 // (and (srl x, (sub c1, c2), MASK)
10627 // Only fold this if the inner shift has no other uses -- if it does,
10628 // folding this will increase the total number of instructions.
10629 if (N0.getOpcode() == ISD::SRL &&
10630 (N0.getOperand(1) == N1 || N0.hasOneUse()) &&
10632 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10633 /*AllowUndefs*/ false,
10634 /*AllowTypeMismatch*/ true)) {
10635 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10636 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10637 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10638 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
10639 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
10640 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10641 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10642 }
10643 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10644 /*AllowUndefs*/ false,
10645 /*AllowTypeMismatch*/ true)) {
10646 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10647 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10648 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10649 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
10650 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10651 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10652 }
10653 }
10654 }
10655
10656 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
10657 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
10658 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
10659 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
10660 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
10661 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
10662 }
10663
10664 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
10665 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
10666 // Variant of version done on multiply, except mul by a power of 2 is turned
10667 // into a shift.
10668 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
10669 TLI.isDesirableToCommuteWithShift(N, Level)) {
10670 SDValue N01 = N0.getOperand(1);
10671 if (SDValue Shl1 =
10672 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
10673 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
10674 AddToWorklist(Shl0.getNode());
10675 SDNodeFlags Flags;
10676 // Preserve the disjoint flag for Or.
10677 if (N0.getOpcode() == ISD::OR && N0->getFlags().hasDisjoint())
10679 return DAG.getNode(N0.getOpcode(), DL, VT, Shl0, Shl1, Flags);
10680 }
10681 }
10682
10683 // fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)
10684 // TODO: Add zext/add_nuw variant with suitable test coverage
10685 // TODO: Should we limit this with isLegalAddImmediate?
10686 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
10687 N0.getOperand(0).getOpcode() == ISD::ADD &&
10688 N0.getOperand(0)->getFlags().hasNoSignedWrap() &&
10689 TLI.isDesirableToCommuteWithShift(N, Level)) {
10690 SDValue Add = N0.getOperand(0);
10691 SDLoc DL(N0);
10692 if (SDValue ExtC = DAG.FoldConstantArithmetic(N0.getOpcode(), DL, VT,
10693 {Add.getOperand(1)})) {
10694 if (SDValue ShlC =
10695 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {ExtC, N1})) {
10696 SDValue ExtX = DAG.getNode(N0.getOpcode(), DL, VT, Add.getOperand(0));
10697 SDValue ShlX = DAG.getNode(ISD::SHL, DL, VT, ExtX, N1);
10698 return DAG.getNode(ISD::ADD, DL, VT, ShlX, ShlC);
10699 }
10700 }
10701 }
10702
10703 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
10704 if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
10705 SDValue N01 = N0.getOperand(1);
10706 if (SDValue Shl =
10707 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1}))
10708 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), Shl);
10709 }
10710
10711 ConstantSDNode *N1C = isConstOrConstSplat(N1);
10712 if (N1C && !N1C->isOpaque())
10713 if (SDValue NewSHL = visitShiftByConstant(N))
10714 return NewSHL;
10715
10716 // fold (shl X, cttz(Y)) -> (mul (Y & -Y), X) if cttz is unsupported on the
10717 // target.
10718 if (((N1.getOpcode() == ISD::CTTZ &&
10719 VT.getScalarSizeInBits() <= ShiftVT.getScalarSizeInBits()) ||
10721 N1.hasOneUse() && !TLI.isOperationLegalOrCustom(ISD::CTTZ, ShiftVT) &&
10723 SDValue Y = N1.getOperand(0);
10724 SDLoc DL(N);
10725 SDValue NegY = DAG.getNegative(Y, DL, ShiftVT);
10726 SDValue And =
10727 DAG.getZExtOrTrunc(DAG.getNode(ISD::AND, DL, ShiftVT, Y, NegY), DL, VT);
10728 return DAG.getNode(ISD::MUL, DL, VT, And, N0);
10729 }
10730
10732 return SDValue(N, 0);
10733
10734 // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
10735 if (N0.getOpcode() == ISD::VSCALE && N1C) {
10736 const APInt &C0 = N0.getConstantOperandAPInt(0);
10737 const APInt &C1 = N1C->getAPIntValue();
10738 return DAG.getVScale(DL, VT, C0 << C1);
10739 }
10740
10741 SDValue X;
10742 APInt VS0;
10743
10744 // fold (shl (X * vscale(VS0)), C1) -> (X * vscale(VS0 << C1))
10745 if (N1C && sd_match(N0, m_Mul(m_Value(X), m_VScale(m_ConstInt(VS0))))) {
10746 SDNodeFlags Flags;
10747 Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap() &&
10748 N0->getFlags().hasNoUnsignedWrap());
10749
10750 SDValue VScale = DAG.getVScale(DL, VT, VS0 << N1C->getAPIntValue());
10751 return DAG.getNode(ISD::MUL, DL, VT, X, VScale, Flags);
10752 }
10753
10754 // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
10755 APInt ShlVal;
10756 if (N0.getOpcode() == ISD::STEP_VECTOR &&
10757 ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
10758 const APInt &C0 = N0.getConstantOperandAPInt(0);
10759 if (ShlVal.ult(C0.getBitWidth())) {
10760 APInt NewStep = C0 << ShlVal;
10761 return DAG.getStepVector(DL, VT, NewStep);
10762 }
10763 }
10764
10765 return SDValue();
10766}
10767
10768// Transform a right shift of a multiply into a multiply-high.
10769// Examples:
10770// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
10771// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
10773 const TargetLowering &TLI) {
10774 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
10775 "SRL or SRA node is required here!");
10776
10777 // Check the shift amount. Proceed with the transformation if the shift
10778 // amount is constant.
10779 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
10780 if (!ShiftAmtSrc)
10781 return SDValue();
10782
10783 // The operation feeding into the shift must be a multiply.
10784 SDValue ShiftOperand = N->getOperand(0);
10785 if (ShiftOperand.getOpcode() != ISD::MUL)
10786 return SDValue();
10787
10788 // Both operands must be equivalent extend nodes.
10789 SDValue LeftOp = ShiftOperand.getOperand(0);
10790 SDValue RightOp = ShiftOperand.getOperand(1);
10791
10792 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
10793 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
10794
10795 if (!IsSignExt && !IsZeroExt)
10796 return SDValue();
10797
10798 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
10799 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
10800
10801 // return true if U may use the lower bits of its operands
10802 auto UserOfLowerBits = [NarrowVTSize](SDNode *U) {
10803 if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) {
10804 return true;
10805 }
10806 ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(U->getOperand(1));
10807 if (!UShiftAmtSrc) {
10808 return true;
10809 }
10810 unsigned UShiftAmt = UShiftAmtSrc->getZExtValue();
10811 return UShiftAmt < NarrowVTSize;
10812 };
10813
10814 // If the lower part of the MUL is also used and MUL_LOHI is supported
10815 // do not introduce the MULH in favor of MUL_LOHI
10816 unsigned MulLoHiOp = IsSignExt ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10817 if (!ShiftOperand.hasOneUse() &&
10818 TLI.isOperationLegalOrCustom(MulLoHiOp, NarrowVT) &&
10819 llvm::any_of(ShiftOperand->users(), UserOfLowerBits)) {
10820 return SDValue();
10821 }
10822
10823 SDValue MulhRightOp;
10825 unsigned ActiveBits = IsSignExt
10826 ? Constant->getAPIntValue().getSignificantBits()
10827 : Constant->getAPIntValue().getActiveBits();
10828 if (ActiveBits > NarrowVTSize)
10829 return SDValue();
10830 MulhRightOp = DAG.getConstant(
10831 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
10832 NarrowVT);
10833 } else {
10834 if (LeftOp.getOpcode() != RightOp.getOpcode())
10835 return SDValue();
10836 // Check that the two extend nodes are the same type.
10837 if (NarrowVT != RightOp.getOperand(0).getValueType())
10838 return SDValue();
10839 MulhRightOp = RightOp.getOperand(0);
10840 }
10841
10842 EVT WideVT = LeftOp.getValueType();
10843 // Proceed with the transformation if the wide types match.
10844 assert((WideVT == RightOp.getValueType()) &&
10845 "Cannot have a multiply node with two different operand types.");
10846
10847 // Proceed with the transformation if the wide type is twice as large
10848 // as the narrow type.
10849 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
10850 return SDValue();
10851
10852 // Check the shift amount with the narrow type size.
10853 // Proceed with the transformation if the shift amount is the width
10854 // of the narrow type.
10855 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
10856 if (ShiftAmt != NarrowVTSize)
10857 return SDValue();
10858
10859 // If the operation feeding into the MUL is a sign extend (sext),
10860 // we use mulhs. Othewise, zero extends (zext) use mulhu.
10861 unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
10862
10863 // Combine to mulh if mulh is legal/custom for the narrow type on the target
10864 // or if it is a vector type then we could transform to an acceptable type and
10865 // rely on legalization to split/combine the result.
10866 if (NarrowVT.isVector()) {
10867 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), NarrowVT);
10868 if (TransformVT.getVectorElementType() != NarrowVT.getVectorElementType() ||
10869 !TLI.isOperationLegalOrCustom(MulhOpcode, TransformVT))
10870 return SDValue();
10871 } else {
10872 if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
10873 return SDValue();
10874 }
10875
10876 SDValue Result =
10877 DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
10878 bool IsSigned = N->getOpcode() == ISD::SRA;
10879 return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
10880}
10881
10882// fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y))
10883// This helper function accept SDNode with opcode ISD::BSWAP and ISD::BITREVERSE
10885 unsigned Opcode = N->getOpcode();
10886 if (Opcode != ISD::BSWAP && Opcode != ISD::BITREVERSE)
10887 return SDValue();
10888
10889 SDValue N0 = N->getOperand(0);
10890 EVT VT = N->getValueType(0);
10891 SDLoc DL(N);
10892 SDValue X, Y;
10893
10894 // If both operands are bswap/bitreverse, ignore the multiuse
10896 m_UnaryOp(Opcode, m_Value(Y))))))
10897 return DAG.getNode(N0.getOpcode(), DL, VT, X, Y);
10898
10899 // Otherwise need to ensure logic_op and bswap/bitreverse(x) have one use.
10901 m_OneUse(m_UnaryOp(Opcode, m_Value(X))), m_Value(Y))))) {
10902 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, Y);
10903 return DAG.getNode(N0.getOpcode(), DL, VT, X, NewBitReorder);
10904 }
10905
10906 return SDValue();
10907}
10908
10909SDValue DAGCombiner::visitSRA(SDNode *N) {
10910 SDValue N0 = N->getOperand(0);
10911 SDValue N1 = N->getOperand(1);
10912 if (SDValue V = DAG.simplifyShift(N0, N1))
10913 return V;
10914
10915 SDLoc DL(N);
10916 EVT VT = N0.getValueType();
10917 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10918
10919 // fold (sra c1, c2) -> (sra c1, c2)
10920 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, DL, VT, {N0, N1}))
10921 return C;
10922
10923 // Arithmetic shifting an all-sign-bit value is a no-op.
10924 // fold (sra 0, x) -> 0
10925 // fold (sra -1, x) -> -1
10926 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
10927 return N0;
10928
10929 // fold vector ops
10930 if (VT.isVector())
10931 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10932 return FoldedVOp;
10933
10934 if (SDValue NewSel = foldBinOpIntoSelect(N))
10935 return NewSel;
10936
10937 ConstantSDNode *N1C = isConstOrConstSplat(N1);
10938
10939 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
10940 // clamp (add c1, c2) to max shift.
10941 if (N0.getOpcode() == ISD::SRA) {
10942 EVT ShiftVT = N1.getValueType();
10943 EVT ShiftSVT = ShiftVT.getScalarType();
10944 SmallVector<SDValue, 16> ShiftValues;
10945
10946 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10947 APInt c1 = LHS->getAPIntValue();
10948 APInt c2 = RHS->getAPIntValue();
10949 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10950 APInt Sum = c1 + c2;
10951 unsigned ShiftSum =
10952 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
10953 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
10954 return true;
10955 };
10956 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
10957 SDValue ShiftValue;
10958 if (N1.getOpcode() == ISD::BUILD_VECTOR)
10959 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
10960 else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
10961 assert(ShiftValues.size() == 1 &&
10962 "Expected matchBinaryPredicate to return one element for "
10963 "SPLAT_VECTORs");
10964 ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
10965 } else
10966 ShiftValue = ShiftValues[0];
10967 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
10968 }
10969 }
10970
10971 // fold (sra (shl X, m), (sub result_size, n))
10972 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
10973 // result_size - n != m.
10974 // If truncate is free for the target sext(shl) is likely to result in better
10975 // code.
10976 if (N0.getOpcode() == ISD::SHL && N1C) {
10977 // Get the two constants of the shifts, CN0 = m, CN = n.
10978 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
10979 if (N01C) {
10980 LLVMContext &Ctx = *DAG.getContext();
10981 // Determine what the truncate's result bitsize and type would be.
10982 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
10983
10984 if (VT.isVector())
10985 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10986
10987 // Determine the residual right-shift amount.
10988 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
10989
10990 // If the shift is not a no-op (in which case this should be just a sign
10991 // extend already), the truncated to type is legal, sign_extend is legal
10992 // on that type, and the truncate to that type is both legal and free,
10993 // perform the transform.
10994 if ((ShiftAmt > 0) &&
10997 TLI.isTruncateFree(VT, TruncVT)) {
10998 SDValue Amt = DAG.getShiftAmountConstant(ShiftAmt, VT, DL);
10999 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
11000 N0.getOperand(0), Amt);
11001 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
11002 Shift);
11003 return DAG.getNode(ISD::SIGN_EXTEND, DL,
11004 N->getValueType(0), Trunc);
11005 }
11006 }
11007 }
11008
11009 // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
11010 // sra (add (shl X, N1C), AddC), N1C -->
11011 // sext (add (trunc X to (width - N1C)), AddC')
11012 // sra (sub AddC, (shl X, N1C)), N1C -->
11013 // sext (sub AddC1',(trunc X to (width - N1C)))
11014 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C &&
11015 N0.hasOneUse()) {
11016 bool IsAdd = N0.getOpcode() == ISD::ADD;
11017 SDValue Shl = N0.getOperand(IsAdd ? 0 : 1);
11018 if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 &&
11019 Shl.hasOneUse()) {
11020 // TODO: AddC does not need to be a splat.
11021 if (ConstantSDNode *AddC =
11022 isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) {
11023 // Determine what the truncate's type would be and ask the target if
11024 // that is a free operation.
11025 LLVMContext &Ctx = *DAG.getContext();
11026 unsigned ShiftAmt = N1C->getZExtValue();
11027 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
11028 if (VT.isVector())
11029 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
11030
11031 // TODO: The simple type check probably belongs in the default hook
11032 // implementation and/or target-specific overrides (because
11033 // non-simple types likely require masking when legalized), but
11034 // that restriction may conflict with other transforms.
11035 if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
11036 TLI.isTruncateFree(VT, TruncVT)) {
11037 SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
11038 SDValue ShiftC =
11039 DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
11040 TruncVT.getScalarSizeInBits()),
11041 DL, TruncVT);
11042 SDValue Add;
11043 if (IsAdd)
11044 Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
11045 else
11046 Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc);
11047 return DAG.getSExtOrTrunc(Add, DL, VT);
11048 }
11049 }
11050 }
11051 }
11052
11053 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
11054 if (N1.getOpcode() == ISD::TRUNCATE &&
11055 N1.getOperand(0).getOpcode() == ISD::AND) {
11056 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
11057 return DAG.getNode(ISD::SRA, DL, VT, N0, NewOp1);
11058 }
11059
11060 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
11061 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
11062 // if c1 is equal to the number of bits the trunc removes
11063 // TODO - support non-uniform vector shift amounts.
11064 if (N0.getOpcode() == ISD::TRUNCATE &&
11065 (N0.getOperand(0).getOpcode() == ISD::SRL ||
11066 N0.getOperand(0).getOpcode() == ISD::SRA) &&
11067 N0.getOperand(0).hasOneUse() &&
11068 N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
11069 SDValue N0Op0 = N0.getOperand(0);
11070 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
11071 EVT LargeVT = N0Op0.getValueType();
11072 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
11073 if (LargeShift->getAPIntValue() == TruncBits) {
11074 EVT LargeShiftVT = getShiftAmountTy(LargeVT);
11075 SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT);
11076 Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt,
11077 DAG.getConstant(TruncBits, DL, LargeShiftVT));
11078 SDValue SRA =
11079 DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
11080 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
11081 }
11082 }
11083 }
11084
11085 // Simplify, based on bits shifted out of the LHS.
11087 return SDValue(N, 0);
11088
11089 // If the sign bit is known to be zero, switch this to a SRL.
11090 if (DAG.SignBitIsZero(N0))
11091 return DAG.getNode(ISD::SRL, DL, VT, N0, N1);
11092
11093 if (N1C && !N1C->isOpaque())
11094 if (SDValue NewSRA = visitShiftByConstant(N))
11095 return NewSRA;
11096
11097 // Try to transform this shift into a multiply-high if
11098 // it matches the appropriate pattern detected in combineShiftToMULH.
11099 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
11100 return MULH;
11101
11102 // Attempt to convert a sra of a load into a narrower sign-extending load.
11103 if (SDValue NarrowLoad = reduceLoadWidth(N))
11104 return NarrowLoad;
11105
11106 if (SDValue AVG = foldShiftToAvg(N, DL))
11107 return AVG;
11108
11109 return SDValue();
11110}
11111
11112SDValue DAGCombiner::visitSRL(SDNode *N) {
11113 SDValue N0 = N->getOperand(0);
11114 SDValue N1 = N->getOperand(1);
11115 if (SDValue V = DAG.simplifyShift(N0, N1))
11116 return V;
11117
11118 SDLoc DL(N);
11119 EVT VT = N0.getValueType();
11120 EVT ShiftVT = N1.getValueType();
11121 unsigned OpSizeInBits = VT.getScalarSizeInBits();
11122
11123 // fold (srl c1, c2) -> c1 >>u c2
11124 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, DL, VT, {N0, N1}))
11125 return C;
11126
11127 // fold vector ops
11128 if (VT.isVector())
11129 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
11130 return FoldedVOp;
11131
11132 if (SDValue NewSel = foldBinOpIntoSelect(N))
11133 return NewSel;
11134
11135 // if (srl x, c) is known to be zero, return 0
11136 ConstantSDNode *N1C = isConstOrConstSplat(N1);
11137 if (N1C &&
11138 DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
11139 return DAG.getConstant(0, DL, VT);
11140
11141 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
11142 if (N0.getOpcode() == ISD::SRL) {
11143 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
11144 ConstantSDNode *RHS) {
11145 APInt c1 = LHS->getAPIntValue();
11146 APInt c2 = RHS->getAPIntValue();
11147 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
11148 return (c1 + c2).uge(OpSizeInBits);
11149 };
11150 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
11151 return DAG.getConstant(0, DL, VT);
11152
11153 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
11154 ConstantSDNode *RHS) {
11155 APInt c1 = LHS->getAPIntValue();
11156 APInt c2 = RHS->getAPIntValue();
11157 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
11158 return (c1 + c2).ult(OpSizeInBits);
11159 };
11160 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
11161 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
11162 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
11163 }
11164 }
11165
11166 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
11167 N0.getOperand(0).getOpcode() == ISD::SRL) {
11168 SDValue InnerShift = N0.getOperand(0);
11169 // TODO - support non-uniform vector shift amounts.
11170 if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
11171 uint64_t c1 = N001C->getZExtValue();
11172 uint64_t c2 = N1C->getZExtValue();
11173 EVT InnerShiftVT = InnerShift.getValueType();
11174 EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
11175 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
11176 // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
11177 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
11178 if (c1 + OpSizeInBits == InnerShiftSize) {
11179 if (c1 + c2 >= InnerShiftSize)
11180 return DAG.getConstant(0, DL, VT);
11181 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
11182 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
11183 InnerShift.getOperand(0), NewShiftAmt);
11184 return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
11185 }
11186 // In the more general case, we can clear the high bits after the shift:
11187 // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
11188 if (N0.hasOneUse() && InnerShift.hasOneUse() &&
11189 c1 + c2 < InnerShiftSize) {
11190 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
11191 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
11192 InnerShift.getOperand(0), NewShiftAmt);
11193 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
11194 OpSizeInBits - c2),
11195 DL, InnerShiftVT);
11196 SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
11197 return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
11198 }
11199 }
11200 }
11201
11202 if (N0.getOpcode() == ISD::SHL) {
11203 // fold (srl (shl nuw x, c), c) -> x
11204 if (N0.getOperand(1) == N1 && N0->getFlags().hasNoUnsignedWrap())
11205 return N0.getOperand(0);
11206
11207 // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
11208 // (and (srl x, (sub c2, c1), MASK)
11209 if ((N0.getOperand(1) == N1 || N0->hasOneUse()) &&
11211 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
11212 ConstantSDNode *RHS) {
11213 const APInt &LHSC = LHS->getAPIntValue();
11214 const APInt &RHSC = RHS->getAPIntValue();
11215 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
11216 LHSC.getZExtValue() <= RHSC.getZExtValue();
11217 };
11218 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
11219 /*AllowUndefs*/ false,
11220 /*AllowTypeMismatch*/ true)) {
11221 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
11222 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
11223 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
11224 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
11225 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
11226 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
11227 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
11228 }
11229 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
11230 /*AllowUndefs*/ false,
11231 /*AllowTypeMismatch*/ true)) {
11232 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
11233 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
11234 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
11235 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
11236 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
11237 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
11238 }
11239 }
11240 }
11241
11242 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
11243 // TODO - support non-uniform vector shift amounts.
11244 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
11245 // Shifting in all undef bits?
11246 EVT SmallVT = N0.getOperand(0).getValueType();
11247 unsigned BitSize = SmallVT.getScalarSizeInBits();
11248 if (N1C->getAPIntValue().uge(BitSize))
11249 return DAG.getUNDEF(VT);
11250
11251 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
11252 uint64_t ShiftAmt = N1C->getZExtValue();
11253 SDLoc DL0(N0);
11254 SDValue SmallShift =
11255 DAG.getNode(ISD::SRL, DL0, SmallVT, N0.getOperand(0),
11256 DAG.getShiftAmountConstant(ShiftAmt, SmallVT, DL0));
11257 AddToWorklist(SmallShift.getNode());
11258 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
11259 return DAG.getNode(ISD::AND, DL, VT,
11260 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
11261 DAG.getConstant(Mask, DL, VT));
11262 }
11263 }
11264
11265 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
11266 // bit, which is unmodified by sra.
11267 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
11268 if (N0.getOpcode() == ISD::SRA)
11269 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1);
11270 }
11271
11272 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit), and x has a power
11273 // of two bitwidth. The "5" represents (log2 (bitwidth x)).
11274 if (N1C && N0.getOpcode() == ISD::CTLZ &&
11275 isPowerOf2_32(OpSizeInBits) &&
11276 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
11277 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
11278
11279 // If any of the input bits are KnownOne, then the input couldn't be all
11280 // zeros, thus the result of the srl will always be zero.
11281 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
11282
11283 // If all of the bits input the to ctlz node are known to be zero, then
11284 // the result of the ctlz is "32" and the result of the shift is one.
11285 APInt UnknownBits = ~Known.Zero;
11286 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
11287
11288 // Otherwise, check to see if there is exactly one bit input to the ctlz.
11289 if (UnknownBits.isPowerOf2()) {
11290 // Okay, we know that only that the single bit specified by UnknownBits
11291 // could be set on input to the CTLZ node. If this bit is set, the SRL
11292 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
11293 // to an SRL/XOR pair, which is likely to simplify more.
11294 unsigned ShAmt = UnknownBits.countr_zero();
11295 SDValue Op = N0.getOperand(0);
11296
11297 if (ShAmt) {
11298 SDLoc DL(N0);
11299 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
11300 DAG.getShiftAmountConstant(ShAmt, VT, DL));
11301 AddToWorklist(Op.getNode());
11302 }
11303 return DAG.getNode(ISD::XOR, DL, VT, Op, DAG.getConstant(1, DL, VT));
11304 }
11305 }
11306
11307 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
11308 if (N1.getOpcode() == ISD::TRUNCATE &&
11309 N1.getOperand(0).getOpcode() == ISD::AND) {
11310 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
11311 return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1);
11312 }
11313
11314 // fold (srl (logic_op x, (shl (zext y), c1)), c1)
11315 // -> (logic_op (srl x, c1), (zext y))
11316 // c1 <= leadingzeros(zext(y))
11317 SDValue X, ZExtY;
11318 if (N1C && sd_match(N0, m_OneUse(m_BitwiseLogic(
11319 m_Value(X),
11322 m_Specific(N1))))))) {
11323 unsigned NumLeadingZeros = ZExtY.getScalarValueSizeInBits() -
11325 if (N1C->getZExtValue() <= NumLeadingZeros)
11326 return DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
11327 DAG.getNode(ISD::SRL, SDLoc(N0), VT, X, N1), ZExtY);
11328 }
11329
11330 // fold operands of srl based on knowledge that the low bits are not
11331 // demanded.
11333 return SDValue(N, 0);
11334
11335 if (N1C && !N1C->isOpaque())
11336 if (SDValue NewSRL = visitShiftByConstant(N))
11337 return NewSRL;
11338
11339 // Attempt to convert a srl of a load into a narrower zero-extending load.
11340 if (SDValue NarrowLoad = reduceLoadWidth(N))
11341 return NarrowLoad;
11342
11343 // Here is a common situation. We want to optimize:
11344 //
11345 // %a = ...
11346 // %b = and i32 %a, 2
11347 // %c = srl i32 %b, 1
11348 // brcond i32 %c ...
11349 //
11350 // into
11351 //
11352 // %a = ...
11353 // %b = and %a, 2
11354 // %c = setcc eq %b, 0
11355 // brcond %c ...
11356 //
11357 // However when after the source operand of SRL is optimized into AND, the SRL
11358 // itself may not be optimized further. Look for it and add the BRCOND into
11359 // the worklist.
11360 //
11361 // The also tends to happen for binary operations when SimplifyDemandedBits
11362 // is involved.
11363 //
11364 // FIXME: This is unecessary if we process the DAG in topological order,
11365 // which we plan to do. This workaround can be removed once the DAG is
11366 // processed in topological order.
11367 if (N->hasOneUse()) {
11368 SDNode *User = *N->user_begin();
11369
11370 // Look pass the truncate.
11371 if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse())
11372 User = *User->user_begin();
11373
11374 if (User->getOpcode() == ISD::BRCOND || User->getOpcode() == ISD::AND ||
11375 User->getOpcode() == ISD::OR || User->getOpcode() == ISD::XOR)
11376 AddToWorklist(User);
11377 }
11378
11379 // Try to transform this shift into a multiply-high if
11380 // it matches the appropriate pattern detected in combineShiftToMULH.
11381 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
11382 return MULH;
11383
11384 if (SDValue AVG = foldShiftToAvg(N, DL))
11385 return AVG;
11386
11387 return SDValue();
11388}
11389
11390SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
11391 EVT VT = N->getValueType(0);
11392 SDValue N0 = N->getOperand(0);
11393 SDValue N1 = N->getOperand(1);
11394 SDValue N2 = N->getOperand(2);
11395 bool IsFSHL = N->getOpcode() == ISD::FSHL;
11396 unsigned BitWidth = VT.getScalarSizeInBits();
11397 SDLoc DL(N);
11398
11399 // fold (fshl/fshr C0, C1, C2) -> C3
11400 if (SDValue C =
11401 DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1, N2}))
11402 return C;
11403
11404 // fold (fshl N0, N1, 0) -> N0
11405 // fold (fshr N0, N1, 0) -> N1
11407 if (DAG.MaskedValueIsZero(
11408 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
11409 return IsFSHL ? N0 : N1;
11410
11411 auto IsUndefOrZero = [](SDValue V) {
11412 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
11413 };
11414
11415 // TODO - support non-uniform vector shift amounts.
11416 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
11417 EVT ShAmtTy = N2.getValueType();
11418
11419 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
11420 if (Cst->getAPIntValue().uge(BitWidth)) {
11421 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
11422 return DAG.getNode(N->getOpcode(), DL, VT, N0, N1,
11423 DAG.getConstant(RotAmt, DL, ShAmtTy));
11424 }
11425
11426 unsigned ShAmt = Cst->getZExtValue();
11427 if (ShAmt == 0)
11428 return IsFSHL ? N0 : N1;
11429
11430 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
11431 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
11432 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
11433 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
11434 if (IsUndefOrZero(N0))
11435 return DAG.getNode(
11436 ISD::SRL, DL, VT, N1,
11437 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt, DL, ShAmtTy));
11438 if (IsUndefOrZero(N1))
11439 return DAG.getNode(
11440 ISD::SHL, DL, VT, N0,
11441 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt, DL, ShAmtTy));
11442
11443 // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
11444 // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
11445 // TODO - bigendian support once we have test coverage.
11446 // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
11447 // TODO - permit LHS EXTLOAD if extensions are shifted out.
11448 if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
11449 !DAG.getDataLayout().isBigEndian()) {
11450 auto *LHS = dyn_cast<LoadSDNode>(N0);
11451 auto *RHS = dyn_cast<LoadSDNode>(N1);
11452 if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
11453 LHS->getAddressSpace() == RHS->getAddressSpace() &&
11454 (LHS->hasNUsesOfValue(1, 0) || RHS->hasNUsesOfValue(1, 0)) &&
11456 if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
11457 SDLoc DL(RHS);
11458 uint64_t PtrOff =
11459 IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
11460 Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
11461 unsigned Fast = 0;
11462 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
11463 RHS->getAddressSpace(), NewAlign,
11464 RHS->getMemOperand()->getFlags(), &Fast) &&
11465 Fast) {
11466 SDValue NewPtr = DAG.getMemBasePlusOffset(
11467 RHS->getBasePtr(), TypeSize::getFixed(PtrOff), DL);
11468 AddToWorklist(NewPtr.getNode());
11469 SDValue Load = DAG.getLoad(
11470 VT, DL, RHS->getChain(), NewPtr,
11471 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
11472 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
11473 DAG.makeEquivalentMemoryOrdering(LHS, Load.getValue(1));
11474 DAG.makeEquivalentMemoryOrdering(RHS, Load.getValue(1));
11475 return Load;
11476 }
11477 }
11478 }
11479 }
11480 }
11481
11482 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
11483 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
11484 // iff We know the shift amount is in range.
11485 // TODO: when is it worth doing SUB(BW, N2) as well?
11486 if (isPowerOf2_32(BitWidth)) {
11487 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
11488 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
11489 return DAG.getNode(ISD::SRL, DL, VT, N1, N2);
11490 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
11491 return DAG.getNode(ISD::SHL, DL, VT, N0, N2);
11492 }
11493
11494 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
11495 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
11496 // TODO: Investigate flipping this rotate if only one is legal.
11497 // If funnel shift is legal as well we might be better off avoiding
11498 // non-constant (BW - N2).
11499 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
11500 if (N0 == N1 && hasOperation(RotOpc, VT))
11501 return DAG.getNode(RotOpc, DL, VT, N0, N2);
11502
11503 // Simplify, based on bits shifted out of N0/N1.
11505 return SDValue(N, 0);
11506
11507 return SDValue();
11508}
11509
11510SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
11511 SDValue N0 = N->getOperand(0);
11512 SDValue N1 = N->getOperand(1);
11513 if (SDValue V = DAG.simplifyShift(N0, N1))
11514 return V;
11515
11516 SDLoc DL(N);
11517 EVT VT = N0.getValueType();
11518
11519 // fold (*shlsat c1, c2) -> c1<<c2
11520 if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
11521 return C;
11522
11523 ConstantSDNode *N1C = isConstOrConstSplat(N1);
11524
11525 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
11526 // fold (sshlsat x, c) -> (shl x, c)
11527 if (N->getOpcode() == ISD::SSHLSAT && N1C &&
11528 N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
11529 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
11530
11531 // fold (ushlsat x, c) -> (shl x, c)
11532 if (N->getOpcode() == ISD::USHLSAT && N1C &&
11533 N1C->getAPIntValue().ule(
11535 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
11536 }
11537
11538 return SDValue();
11539}
11540
11541// Given a ABS node, detect the following patterns:
11542// (ABS (SUB (EXTEND a), (EXTEND b))).
11543// (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))).
11544// Generates UABD/SABD instruction.
11545SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
11546 EVT SrcVT = N->getValueType(0);
11547
11548 if (N->getOpcode() == ISD::TRUNCATE)
11549 N = N->getOperand(0).getNode();
11550
11551 EVT VT = N->getValueType(0);
11552 SDValue Op0, Op1;
11553
11554 if (!sd_match(N, m_Abs(m_Sub(m_Value(Op0), m_Value(Op1)))))
11555 return SDValue();
11556
11557 SDValue AbsOp0 = N->getOperand(0);
11558 unsigned Opc0 = Op0.getOpcode();
11559
11560 // Check if the operands of the sub are (zero|sign)-extended, otherwise
11561 // fallback to ValueTracking.
11562 if (Opc0 != Op1.getOpcode() ||
11563 (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
11564 Opc0 != ISD::SIGN_EXTEND_INREG)) {
11565 // fold (abs (sub nsw x, y)) -> abds(x, y)
11566 // Don't fold this for unsupported types as we lose the NSW handling.
11567 if (hasOperation(ISD::ABDS, VT) && TLI.preferABDSToABSWithNSW(VT) &&
11568 (AbsOp0->getFlags().hasNoSignedWrap() ||
11569 DAG.willNotOverflowSub(/*IsSigned=*/true, Op0, Op1))) {
11570 SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
11571 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11572 }
11573 // fold (abs (sub x, y)) -> abdu(x, y)
11574 if (hasOperation(ISD::ABDU, VT) && DAG.SignBitIsZero(Op0) &&
11575 DAG.SignBitIsZero(Op1)) {
11576 SDValue ABD = DAG.getNode(ISD::ABDU, DL, VT, Op0, Op1);
11577 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11578 }
11579 return SDValue();
11580 }
11581
11582 EVT VT0, VT1;
11583 if (Opc0 == ISD::SIGN_EXTEND_INREG) {
11584 VT0 = cast<VTSDNode>(Op0.getOperand(1))->getVT();
11585 VT1 = cast<VTSDNode>(Op1.getOperand(1))->getVT();
11586 } else {
11587 VT0 = Op0.getOperand(0).getValueType();
11588 VT1 = Op1.getOperand(0).getValueType();
11589 }
11590 unsigned ABDOpcode = (Opc0 == ISD::ZERO_EXTEND) ? ISD::ABDU : ISD::ABDS;
11591
11592 // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
11593 // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
11594 EVT MaxVT = VT0.bitsGT(VT1) ? VT0 : VT1;
11595 if ((VT0 == MaxVT || Op0->hasOneUse()) &&
11596 (VT1 == MaxVT || Op1->hasOneUse()) &&
11597 (!LegalTypes || hasOperation(ABDOpcode, MaxVT))) {
11598 SDValue ABD = DAG.getNode(ABDOpcode, DL, MaxVT,
11599 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op0),
11600 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op1));
11601 ABD = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ABD);
11602 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11603 }
11604
11605 // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
11606 // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
11607 if (!LegalOperations || hasOperation(ABDOpcode, VT)) {
11608 SDValue ABD = DAG.getNode(ABDOpcode, DL, VT, Op0, Op1);
11609 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11610 }
11611
11612 return SDValue();
11613}
11614
11615SDValue DAGCombiner::visitABS(SDNode *N) {
11616 SDValue N0 = N->getOperand(0);
11617 EVT VT = N->getValueType(0);
11618 SDLoc DL(N);
11619
11620 // fold (abs c1) -> c2
11621 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, DL, VT, {N0}))
11622 return C;
11623 // fold (abs (abs x)) -> (abs x)
11624 if (N0.getOpcode() == ISD::ABS)
11625 return N0;
11626 // fold (abs x) -> x iff not-negative
11627 if (DAG.SignBitIsZero(N0))
11628 return N0;
11629
11630 if (SDValue ABD = foldABSToABD(N, DL))
11631 return ABD;
11632
11633 // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
11634 // iff zero_extend/truncate are free.
11635 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
11636 EVT ExtVT = cast<VTSDNode>(N0.getOperand(1))->getVT();
11637 if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) &&
11638 TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) &&
11639 hasOperation(ISD::ABS, ExtVT)) {
11640 return DAG.getNode(
11641 ISD::ZERO_EXTEND, DL, VT,
11642 DAG.getNode(ISD::ABS, DL, ExtVT,
11643 DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N0.getOperand(0))));
11644 }
11645 }
11646
11647 return SDValue();
11648}
11649
11650SDValue DAGCombiner::visitBSWAP(SDNode *N) {
11651 SDValue N0 = N->getOperand(0);
11652 EVT VT = N->getValueType(0);
11653 SDLoc DL(N);
11654
11655 // fold (bswap c1) -> c2
11656 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BSWAP, DL, VT, {N0}))
11657 return C;
11658 // fold (bswap (bswap x)) -> x
11659 if (N0.getOpcode() == ISD::BSWAP)
11660 return N0.getOperand(0);
11661
11662 // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
11663 // isn't supported, it will be expanded to bswap followed by a manual reversal
11664 // of bits in each byte. By placing bswaps before bitreverse, we can remove
11665 // the two bswaps if the bitreverse gets expanded.
11666 if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
11667 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11668 return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
11669 }
11670
11671 // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
11672 // iff x >= bw/2 (i.e. lower half is known zero)
11673 unsigned BW = VT.getScalarSizeInBits();
11674 if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
11675 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11676 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
11677 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11678 ShAmt->getZExtValue() >= (BW / 2) &&
11679 (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
11680 TLI.isTruncateFree(VT, HalfVT) &&
11681 (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
11682 SDValue Res = N0.getOperand(0);
11683 if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
11684 Res = DAG.getNode(ISD::SHL, DL, VT, Res,
11685 DAG.getShiftAmountConstant(NewShAmt, VT, DL));
11686 Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
11687 Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
11688 return DAG.getZExtOrTrunc(Res, DL, VT);
11689 }
11690 }
11691
11692 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
11693 // inverse-shift-of-bswap:
11694 // bswap (X u<< C) --> (bswap X) u>> C
11695 // bswap (X u>> C) --> (bswap X) u<< C
11696 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
11697 N0.hasOneUse()) {
11698 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11699 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11700 ShAmt->getZExtValue() % 8 == 0) {
11701 SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11702 unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
11703 return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
11704 }
11705 }
11706
11707 if (SDValue V = foldBitOrderCrossLogicOp(N, DAG))
11708 return V;
11709
11710 return SDValue();
11711}
11712
11713SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
11714 SDValue N0 = N->getOperand(0);
11715 EVT VT = N->getValueType(0);
11716 SDLoc DL(N);
11717
11718 // fold (bitreverse c1) -> c2
11719 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0}))
11720 return C;
11721
11722 // fold (bitreverse (bitreverse x)) -> x
11723 if (N0.getOpcode() == ISD::BITREVERSE)
11724 return N0.getOperand(0);
11725
11726 SDValue X, Y;
11727
11728 // fold (bitreverse (lshr (bitreverse x), y)) -> (shl x, y)
11729 if ((!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
11731 return DAG.getNode(ISD::SHL, DL, VT, X, Y);
11732
11733 // fold (bitreverse (shl (bitreverse x), y)) -> (lshr x, y)
11734 if ((!LegalOperations || TLI.isOperationLegal(ISD::SRL, VT)) &&
11736 return DAG.getNode(ISD::SRL, DL, VT, X, Y);
11737
11738 return SDValue();
11739}
11740
11741SDValue DAGCombiner::visitCTLZ(SDNode *N) {
11742 SDValue N0 = N->getOperand(0);
11743 EVT VT = N->getValueType(0);
11744 SDLoc DL(N);
11745
11746 // fold (ctlz c1) -> c2
11747 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTLZ, DL, VT, {N0}))
11748 return C;
11749
11750 // If the value is known never to be zero, switch to the undef version.
11751 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT))
11752 if (DAG.isKnownNeverZero(N0))
11753 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, N0);
11754
11755 return SDValue();
11756}
11757
11758SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
11759 SDValue N0 = N->getOperand(0);
11760 EVT VT = N->getValueType(0);
11761 SDLoc DL(N);
11762
11763 // fold (ctlz_zero_undef c1) -> c2
11764 if (SDValue C =
11766 return C;
11767 return SDValue();
11768}
11769
11770SDValue DAGCombiner::visitCTTZ(SDNode *N) {
11771 SDValue N0 = N->getOperand(0);
11772 EVT VT = N->getValueType(0);
11773 SDLoc DL(N);
11774
11775 // fold (cttz c1) -> c2
11776 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTTZ, DL, VT, {N0}))
11777 return C;
11778
11779 // If the value is known never to be zero, switch to the undef version.
11780 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT))
11781 if (DAG.isKnownNeverZero(N0))
11782 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, DL, VT, N0);
11783
11784 return SDValue();
11785}
11786
11787SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
11788 SDValue N0 = N->getOperand(0);
11789 EVT VT = N->getValueType(0);
11790 SDLoc DL(N);
11791
11792 // fold (cttz_zero_undef c1) -> c2
11793 if (SDValue C =
11795 return C;
11796 return SDValue();
11797}
11798
11799SDValue DAGCombiner::visitCTPOP(SDNode *N) {
11800 SDValue N0 = N->getOperand(0);
11801 EVT VT = N->getValueType(0);
11802 unsigned NumBits = VT.getScalarSizeInBits();
11803 SDLoc DL(N);
11804
11805 // fold (ctpop c1) -> c2
11806 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTPOP, DL, VT, {N0}))
11807 return C;
11808
11809 // If the source is being shifted, but doesn't affect any active bits,
11810 // then we can call CTPOP on the shift source directly.
11811 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SHL) {
11812 if (ConstantSDNode *AmtC = isConstOrConstSplat(N0.getOperand(1))) {
11813 const APInt &Amt = AmtC->getAPIntValue();
11814 if (Amt.ult(NumBits)) {
11815 KnownBits KnownSrc = DAG.computeKnownBits(N0.getOperand(0));
11816 if ((N0.getOpcode() == ISD::SRL &&
11817 Amt.ule(KnownSrc.countMinTrailingZeros())) ||
11818 (N0.getOpcode() == ISD::SHL &&
11819 Amt.ule(KnownSrc.countMinLeadingZeros()))) {
11820 return DAG.getNode(ISD::CTPOP, DL, VT, N0.getOperand(0));
11821 }
11822 }
11823 }
11824 }
11825
11826 // If the upper bits are known to be zero, then see if its profitable to
11827 // only count the lower bits.
11828 if (VT.isScalarInteger() && NumBits > 8 && (NumBits & 1) == 0) {
11829 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), NumBits / 2);
11830 if (hasOperation(ISD::CTPOP, HalfVT) &&
11831 TLI.isTypeDesirableForOp(ISD::CTPOP, HalfVT) &&
11832 TLI.isTruncateFree(N0, HalfVT) && TLI.isZExtFree(HalfVT, VT)) {
11833 APInt UpperBits = APInt::getHighBitsSet(NumBits, NumBits / 2);
11834 if (DAG.MaskedValueIsZero(N0, UpperBits)) {
11835 SDValue PopCnt = DAG.getNode(ISD::CTPOP, DL, HalfVT,
11836 DAG.getZExtOrTrunc(N0, DL, HalfVT));
11837 return DAG.getZExtOrTrunc(PopCnt, DL, VT);
11838 }
11839 }
11840 }
11841
11842 return SDValue();
11843}
11844
11846 SDValue RHS, const SDNodeFlags Flags,
11847 const TargetLowering &TLI) {
11848 EVT VT = LHS.getValueType();
11849 if (!VT.isFloatingPoint())
11850 return false;
11851
11852 return Flags.hasNoSignedZeros() &&
11854 (Flags.hasNoNaNs() ||
11855 (DAG.isKnownNeverNaN(RHS) && DAG.isKnownNeverNaN(LHS)));
11856}
11857
11859 SDValue RHS, SDValue True, SDValue False,
11860 ISD::CondCode CC,
11861 const TargetLowering &TLI,
11862 SelectionDAG &DAG) {
11863 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
11864 switch (CC) {
11865 case ISD::SETOLT:
11866 case ISD::SETOLE:
11867 case ISD::SETLT:
11868 case ISD::SETLE:
11869 case ISD::SETULT:
11870 case ISD::SETULE: {
11871 // Since it's known never nan to get here already, either fminnum or
11872 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
11873 // expanded in terms of it.
11874 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
11875 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11876 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11877
11878 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
11879 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11880 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11881 return SDValue();
11882 }
11883 case ISD::SETOGT:
11884 case ISD::SETOGE:
11885 case ISD::SETGT:
11886 case ISD::SETGE:
11887 case ISD::SETUGT:
11888 case ISD::SETUGE: {
11889 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
11890 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11891 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11892
11893 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
11894 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11895 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11896 return SDValue();
11897 }
11898 default:
11899 return SDValue();
11900 }
11901}
11902
11903// Convert (sr[al] (add n[su]w x, y)) -> (avgfloor[su] x, y)
11904SDValue DAGCombiner::foldShiftToAvg(SDNode *N, const SDLoc &DL) {
11905 const unsigned Opcode = N->getOpcode();
11906 if (Opcode != ISD::SRA && Opcode != ISD::SRL)
11907 return SDValue();
11908
11909 EVT VT = N->getValueType(0);
11910 bool IsUnsigned = Opcode == ISD::SRL;
11911
11912 // Captured values.
11913 SDValue A, B, Add;
11914
11915 // Match floor average as it is common to both floor/ceil avgs.
11916 if (sd_match(N, m_BinOp(Opcode,
11918 m_One()))) {
11919 // Decide whether signed or unsigned.
11920 unsigned FloorISD = IsUnsigned ? ISD::AVGFLOORU : ISD::AVGFLOORS;
11921 if (!hasOperation(FloorISD, VT))
11922 return SDValue();
11923
11924 // Can't optimize adds that may wrap.
11925 if ((IsUnsigned && !Add->getFlags().hasNoUnsignedWrap()) ||
11926 (!IsUnsigned && !Add->getFlags().hasNoSignedWrap()))
11927 return SDValue();
11928
11929 return DAG.getNode(FloorISD, DL, N->getValueType(0), {A, B});
11930 }
11931
11932 return SDValue();
11933}
11934
11935SDValue DAGCombiner::foldBitwiseOpWithNeg(SDNode *N, const SDLoc &DL, EVT VT) {
11936 unsigned Opc = N->getOpcode();
11937 SDValue X, Y, Z;
11938 if (sd_match(
11940 return DAG.getNode(Opc, DL, VT, X,
11941 DAG.getNOT(DL, DAG.getNode(ISD::SUB, DL, VT, Y, Z), VT));
11942
11944 m_Value(Z)))))
11945 return DAG.getNode(Opc, DL, VT, X,
11946 DAG.getNOT(DL, DAG.getNode(ISD::ADD, DL, VT, Y, Z), VT));
11947
11948 return SDValue();
11949}
11950
11951/// Generate Min/Max node
11952SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
11953 SDValue RHS, SDValue True,
11954 SDValue False, ISD::CondCode CC) {
11955 if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
11956 return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG);
11957
11958 // If we can't directly match this, try to see if we can pull an fneg out of
11959 // the select.
11961 True, DAG, LegalOperations, ForCodeSize);
11962 if (!NegTrue)
11963 return SDValue();
11964
11965 HandleSDNode NegTrueHandle(NegTrue);
11966
11967 // Try to unfold an fneg from the select if we are comparing the negated
11968 // constant.
11969 //
11970 // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
11971 //
11972 // TODO: Handle fabs
11973 if (LHS == NegTrue) {
11974 // If we can't directly match this, try to see if we can pull an fneg out of
11975 // the select.
11977 RHS, DAG, LegalOperations, ForCodeSize);
11978 if (NegRHS) {
11979 HandleSDNode NegRHSHandle(NegRHS);
11980 if (NegRHS == False) {
11981 SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
11982 False, CC, TLI, DAG);
11983 if (Combined)
11984 return DAG.getNode(ISD::FNEG, DL, VT, Combined);
11985 }
11986 }
11987 }
11988
11989 return SDValue();
11990}
11991
11992/// If a (v)select has a condition value that is a sign-bit test, try to smear
11993/// the condition operand sign-bit across the value width and use it as a mask.
11995 SelectionDAG &DAG) {
11996 SDValue Cond = N->getOperand(0);
11997 SDValue C1 = N->getOperand(1);
11998 SDValue C2 = N->getOperand(2);
12000 return SDValue();
12001
12002 EVT VT = N->getValueType(0);
12003 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
12004 VT != Cond.getOperand(0).getValueType())
12005 return SDValue();
12006
12007 // The inverted-condition + commuted-select variants of these patterns are
12008 // canonicalized to these forms in IR.
12009 SDValue X = Cond.getOperand(0);
12010 SDValue CondC = Cond.getOperand(1);
12011 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
12012 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
12014 // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
12015 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
12016 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
12017 return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
12018 }
12019 if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
12020 // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
12021 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
12022 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
12023 return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
12024 }
12025 return SDValue();
12026}
12027
12029 const TargetLowering &TLI) {
12030 if (!TLI.convertSelectOfConstantsToMath(VT))
12031 return false;
12032
12033 if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse())
12034 return true;
12036 return true;
12037
12038 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
12039 if (CC == ISD::SETLT && isNullOrNullSplat(Cond.getOperand(1)))
12040 return true;
12041 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond.getOperand(1)))
12042 return true;
12043
12044 return false;
12045}
12046
12047SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
12048 SDValue Cond = N->getOperand(0);
12049 SDValue N1 = N->getOperand(1);
12050 SDValue N2 = N->getOperand(2);
12051 EVT VT = N->getValueType(0);
12052 EVT CondVT = Cond.getValueType();
12053 SDLoc DL(N);
12054
12055 if (!VT.isInteger())
12056 return SDValue();
12057
12058 auto *C1 = dyn_cast<ConstantSDNode>(N1);
12059 auto *C2 = dyn_cast<ConstantSDNode>(N2);
12060 if (!C1 || !C2)
12061 return SDValue();
12062
12063 if (CondVT != MVT::i1 || LegalOperations) {
12064 // fold (select Cond, 0, 1) -> (xor Cond, 1)
12065 // We can't do this reliably if integer based booleans have different contents
12066 // to floating point based booleans. This is because we can't tell whether we
12067 // have an integer-based boolean or a floating-point-based boolean unless we
12068 // can find the SETCC that produced it and inspect its operands. This is
12069 // fairly easy if C is the SETCC node, but it can potentially be
12070 // undiscoverable (or not reasonably discoverable). For example, it could be
12071 // in another basic block or it could require searching a complicated
12072 // expression.
12073 if (CondVT.isInteger() &&
12074 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
12076 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
12078 C1->isZero() && C2->isOne()) {
12079 SDValue NotCond =
12080 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
12081 if (VT.bitsEq(CondVT))
12082 return NotCond;
12083 return DAG.getZExtOrTrunc(NotCond, DL, VT);
12084 }
12085
12086 return SDValue();
12087 }
12088
12089 // Only do this before legalization to avoid conflicting with target-specific
12090 // transforms in the other direction (create a select from a zext/sext). There
12091 // is also a target-independent combine here in DAGCombiner in the other
12092 // direction for (select Cond, -1, 0) when the condition is not i1.
12093 assert(CondVT == MVT::i1 && !LegalOperations);
12094
12095 // select Cond, 1, 0 --> zext (Cond)
12096 if (C1->isOne() && C2->isZero())
12097 return DAG.getZExtOrTrunc(Cond, DL, VT);
12098
12099 // select Cond, -1, 0 --> sext (Cond)
12100 if (C1->isAllOnes() && C2->isZero())
12101 return DAG.getSExtOrTrunc(Cond, DL, VT);
12102
12103 // select Cond, 0, 1 --> zext (!Cond)
12104 if (C1->isZero() && C2->isOne()) {
12105 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
12106 NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT);
12107 return NotCond;
12108 }
12109
12110 // select Cond, 0, -1 --> sext (!Cond)
12111 if (C1->isZero() && C2->isAllOnes()) {
12112 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
12113 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
12114 return NotCond;
12115 }
12116
12117 // Use a target hook because some targets may prefer to transform in the
12118 // other direction.
12120 return SDValue();
12121
12122 // For any constants that differ by 1, we can transform the select into
12123 // an extend and add.
12124 const APInt &C1Val = C1->getAPIntValue();
12125 const APInt &C2Val = C2->getAPIntValue();
12126
12127 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
12128 if (C1Val - 1 == C2Val) {
12129 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
12130 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
12131 }
12132
12133 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
12134 if (C1Val + 1 == C2Val) {
12135 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
12136 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
12137 }
12138
12139 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
12140 if (C1Val.isPowerOf2() && C2Val.isZero()) {
12141 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
12142 SDValue ShAmtC =
12143 DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
12144 return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
12145 }
12146
12147 // select Cond, -1, C --> or (sext Cond), C
12148 if (C1->isAllOnes()) {
12149 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
12150 return DAG.getNode(ISD::OR, DL, VT, Cond, N2);
12151 }
12152
12153 // select Cond, C, -1 --> or (sext (not Cond)), C
12154 if (C2->isAllOnes()) {
12155 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
12156 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
12157 return DAG.getNode(ISD::OR, DL, VT, NotCond, N1);
12158 }
12159
12161 return V;
12162
12163 return SDValue();
12164}
12165
12166template <class MatchContextClass>
12168 SelectionDAG &DAG) {
12169 assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT ||
12170 N->getOpcode() == ISD::VP_SELECT) &&
12171 "Expected a (v)(vp.)select");
12172 SDValue Cond = N->getOperand(0);
12173 SDValue T = N->getOperand(1), F = N->getOperand(2);
12174 EVT VT = N->getValueType(0);
12175 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12176 MatchContextClass matcher(DAG, TLI, N);
12177
12178 if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
12179 return SDValue();
12180
12181 // select Cond, Cond, F --> or Cond, freeze(F)
12182 // select Cond, 1, F --> or Cond, freeze(F)
12183 if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
12184 return matcher.getNode(ISD::OR, DL, VT, Cond, DAG.getFreeze(F));
12185
12186 // select Cond, T, Cond --> and Cond, freeze(T)
12187 // select Cond, T, 0 --> and Cond, freeze(T)
12188 if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
12189 return matcher.getNode(ISD::AND, DL, VT, Cond, DAG.getFreeze(T));
12190
12191 // select Cond, T, 1 --> or (not Cond), freeze(T)
12192 if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
12193 SDValue NotCond =
12194 matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
12195 return matcher.getNode(ISD::OR, DL, VT, NotCond, DAG.getFreeze(T));
12196 }
12197
12198 // select Cond, 0, F --> and (not Cond), freeze(F)
12199 if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
12200 SDValue NotCond =
12201 matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
12202 return matcher.getNode(ISD::AND, DL, VT, NotCond, DAG.getFreeze(F));
12203 }
12204
12205 return SDValue();
12206}
12207
12209 SDValue N0 = N->getOperand(0);
12210 SDValue N1 = N->getOperand(1);
12211 SDValue N2 = N->getOperand(2);
12212 EVT VT = N->getValueType(0);
12213 unsigned EltSizeInBits = VT.getScalarSizeInBits();
12214
12215 SDValue Cond0, Cond1;
12216 ISD::CondCode CC;
12217 if (!sd_match(N0, m_OneUse(m_SetCC(m_Value(Cond0), m_Value(Cond1),
12218 m_CondCode(CC)))) ||
12219 VT != Cond0.getValueType())
12220 return SDValue();
12221
12222 // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
12223 // compare is inverted from that pattern ("Cond0 s> -1").
12224 if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
12225 ; // This is the pattern we are looking for.
12226 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
12227 std::swap(N1, N2);
12228 else
12229 return SDValue();
12230
12231 // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & freeze(N1)
12232 if (isNullOrNullSplat(N2)) {
12233 SDLoc DL(N);
12234 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
12235 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
12236 return DAG.getNode(ISD::AND, DL, VT, Sra, DAG.getFreeze(N1));
12237 }
12238
12239 // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | freeze(N2)
12240 if (isAllOnesOrAllOnesSplat(N1)) {
12241 SDLoc DL(N);
12242 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
12243 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
12244 return DAG.getNode(ISD::OR, DL, VT, Sra, DAG.getFreeze(N2));
12245 }
12246
12247 // If we have to invert the sign bit mask, only do that transform if the
12248 // target has a bitwise 'and not' instruction (the invert is free).
12249 // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & freeze(N2)
12250 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12251 if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
12252 SDLoc DL(N);
12253 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
12254 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
12255 SDValue Not = DAG.getNOT(DL, Sra, VT);
12256 return DAG.getNode(ISD::AND, DL, VT, Not, DAG.getFreeze(N2));
12257 }
12258
12259 // TODO: There's another pattern in this family, but it may require
12260 // implementing hasOrNot() to check for profitability:
12261 // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | freeze(N2)
12262
12263 return SDValue();
12264}
12265
12266// Match SELECTs with absolute difference patterns.
12267// (select (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
12268// (select (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
12269// (select (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
12270// (select (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
12271SDValue DAGCombiner::foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
12272 SDValue False, ISD::CondCode CC,
12273 const SDLoc &DL) {
12274 bool IsSigned = isSignedIntSetCC(CC);
12275 unsigned ABDOpc = IsSigned ? ISD::ABDS : ISD::ABDU;
12276 EVT VT = LHS.getValueType();
12277
12278 if (LegalOperations && !hasOperation(ABDOpc, VT))
12279 return SDValue();
12280
12281 switch (CC) {
12282 case ISD::SETGT:
12283 case ISD::SETGE:
12284 case ISD::SETUGT:
12285 case ISD::SETUGE:
12286 if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
12288 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12289 if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
12290 sd_match(False, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
12291 hasOperation(ABDOpc, VT))
12292 return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
12293 break;
12294 case ISD::SETLT:
12295 case ISD::SETLE:
12296 case ISD::SETULT:
12297 case ISD::SETULE:
12298 if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
12300 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12301 if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
12302 sd_match(False, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
12303 hasOperation(ABDOpc, VT))
12304 return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
12305 break;
12306 default:
12307 break;
12308 }
12309
12310 return SDValue();
12311}
12312
12313// ([v]select (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
12314// ([v]select (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
12315SDValue DAGCombiner::foldSelectToUMin(SDValue LHS, SDValue RHS, SDValue True,
12316 SDValue False, ISD::CondCode CC,
12317 const SDLoc &DL) {
12318 APInt C;
12319 EVT VT = True.getValueType();
12320 if (sd_match(RHS, m_ConstInt(C)) && hasUMin(VT)) {
12321 if (CC == ISD::SETUGT && LHS == False &&
12322 sd_match(True, m_Add(m_Specific(False), m_SpecificInt(~C)))) {
12323 SDValue AddC = DAG.getConstant(~C, DL, VT);
12324 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, False, AddC);
12325 return DAG.getNode(ISD::UMIN, DL, VT, Add, False);
12326 }
12327 if (CC == ISD::SETULT && LHS == True &&
12328 sd_match(False, m_Add(m_Specific(True), m_SpecificInt(-C)))) {
12329 SDValue AddC = DAG.getConstant(-C, DL, VT);
12330 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, True, AddC);
12331 return DAG.getNode(ISD::UMIN, DL, VT, True, Add);
12332 }
12333 }
12334 return SDValue();
12335}
12336
12337SDValue DAGCombiner::visitSELECT(SDNode *N) {
12338 SDValue N0 = N->getOperand(0);
12339 SDValue N1 = N->getOperand(1);
12340 SDValue N2 = N->getOperand(2);
12341 EVT VT = N->getValueType(0);
12342 EVT VT0 = N0.getValueType();
12343 SDLoc DL(N);
12344 SDNodeFlags Flags = N->getFlags();
12345
12346 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12347 return V;
12348
12350 return V;
12351
12352 // select (not Cond), N1, N2 -> select Cond, N2, N1
12353 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
12354 return DAG.getSelect(DL, VT, F, N2, N1, Flags);
12355
12356 if (SDValue V = foldSelectOfConstants(N))
12357 return V;
12358
12359 // If we can fold this based on the true/false value, do so.
12360 if (SimplifySelectOps(N, N1, N2))
12361 return SDValue(N, 0); // Don't revisit N.
12362
12363 if (VT0 == MVT::i1) {
12364 // The code in this block deals with the following 2 equivalences:
12365 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
12366 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
12367 // The target can specify its preferred form with the
12368 // shouldNormalizeToSelectSequence() callback. However we always transform
12369 // to the right anyway if we find the inner select exists in the DAG anyway
12370 // and we always transform to the left side if we know that we can further
12371 // optimize the combination of the conditions.
12372 bool normalizeToSequence =
12374 // select (and Cond0, Cond1), X, Y
12375 // -> select Cond0, (select Cond1, X, Y), Y
12376 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
12377 SDValue Cond0 = N0->getOperand(0);
12378 SDValue Cond1 = N0->getOperand(1);
12379 SDValue InnerSelect =
12380 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
12381 if (normalizeToSequence || !InnerSelect.use_empty())
12382 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
12383 InnerSelect, N2, Flags);
12384 // Cleanup on failure.
12385 if (InnerSelect.use_empty())
12386 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
12387 }
12388 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
12389 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
12390 SDValue Cond0 = N0->getOperand(0);
12391 SDValue Cond1 = N0->getOperand(1);
12392 SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
12393 Cond1, N1, N2, Flags);
12394 if (normalizeToSequence || !InnerSelect.use_empty())
12395 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
12396 InnerSelect, Flags);
12397 // Cleanup on failure.
12398 if (InnerSelect.use_empty())
12399 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
12400 }
12401
12402 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
12403 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
12404 SDValue N1_0 = N1->getOperand(0);
12405 SDValue N1_1 = N1->getOperand(1);
12406 SDValue N1_2 = N1->getOperand(2);
12407 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
12408 // Create the actual and node if we can generate good code for it.
12409 if (!normalizeToSequence) {
12410 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
12411 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
12412 N2, Flags);
12413 }
12414 // Otherwise see if we can optimize the "and" to a better pattern.
12415 if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
12416 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
12417 N2, Flags);
12418 }
12419 }
12420 }
12421 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
12422 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
12423 SDValue N2_0 = N2->getOperand(0);
12424 SDValue N2_1 = N2->getOperand(1);
12425 SDValue N2_2 = N2->getOperand(2);
12426 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
12427 // Create the actual or node if we can generate good code for it.
12428 if (!normalizeToSequence) {
12429 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
12430 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
12431 N2_2, Flags);
12432 }
12433 // Otherwise see if we can optimize to a better pattern.
12434 if (SDValue Combined = visitORLike(N0, N2_0, DL))
12435 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
12436 N2_2, Flags);
12437 }
12438 }
12439
12440 // select usubo(x, y).overflow, (sub y, x), (usubo x, y) -> abdu(x, y)
12441 if (N0.getOpcode() == ISD::USUBO && N0.getResNo() == 1 &&
12442 N2.getNode() == N0.getNode() && N2.getResNo() == 0 &&
12443 N1.getOpcode() == ISD::SUB && N2.getOperand(0) == N1.getOperand(1) &&
12444 N2.getOperand(1) == N1.getOperand(0) &&
12445 (!LegalOperations || TLI.isOperationLegal(ISD::ABDU, VT)))
12446 return DAG.getNode(ISD::ABDU, DL, VT, N0.getOperand(0), N0.getOperand(1));
12447
12448 // select usubo(x, y).overflow, (usubo x, y), (sub y, x) -> neg (abdu x, y)
12449 if (N0.getOpcode() == ISD::USUBO && N0.getResNo() == 1 &&
12450 N1.getNode() == N0.getNode() && N1.getResNo() == 0 &&
12451 N2.getOpcode() == ISD::SUB && N2.getOperand(0) == N1.getOperand(1) &&
12452 N2.getOperand(1) == N1.getOperand(0) &&
12453 (!LegalOperations || TLI.isOperationLegal(ISD::ABDU, VT)))
12454 return DAG.getNegative(
12455 DAG.getNode(ISD::ABDU, DL, VT, N0.getOperand(0), N0.getOperand(1)),
12456 DL, VT);
12457 }
12458
12459 // Fold selects based on a setcc into other things, such as min/max/abs.
12460 if (N0.getOpcode() == ISD::SETCC) {
12461 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
12463
12464 // select (fcmp lt x, y), x, y -> fminnum x, y
12465 // select (fcmp gt x, y), x, y -> fmaxnum x, y
12466 //
12467 // This is OK if we don't care what happens if either operand is a NaN.
12468 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, Flags, TLI))
12469 if (SDValue FMinMax =
12470 combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC))
12471 return FMinMax;
12472
12473 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
12474 // This is conservatively limited to pre-legal-operations to give targets
12475 // a chance to reverse the transform if they want to do that. Also, it is
12476 // unlikely that the pattern would be formed late, so it's probably not
12477 // worth going through the other checks.
12478 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
12479 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
12480 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
12481 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
12482 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
12483 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
12484 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
12485 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
12486 //
12487 // The IR equivalent of this transform would have this form:
12488 // %a = add %x, C
12489 // %c = icmp ugt %x, ~C
12490 // %r = select %c, -1, %a
12491 // =>
12492 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
12493 // %u0 = extractvalue %u, 0
12494 // %u1 = extractvalue %u, 1
12495 // %r = select %u1, -1, %u0
12496 SDVTList VTs = DAG.getVTList(VT, VT0);
12497 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
12498 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
12499 }
12500 }
12501
12502 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
12503 (!LegalOperations &&
12505 // Any flags available in a select/setcc fold will be on the setcc as they
12506 // migrated from fcmp
12507 return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2,
12508 N0.getOperand(2), N0->getFlags());
12509 }
12510
12511 if (SDValue ABD = foldSelectToABD(Cond0, Cond1, N1, N2, CC, DL))
12512 return ABD;
12513
12514 if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
12515 return NewSel;
12516
12517 // (select (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
12518 // (select (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
12519 if (SDValue UMin = foldSelectToUMin(Cond0, Cond1, N1, N2, CC, DL))
12520 return UMin;
12521 }
12522
12523 if (!VT.isVector())
12524 if (SDValue BinOp = foldSelectOfBinops(N))
12525 return BinOp;
12526
12527 if (SDValue R = combineSelectAsExtAnd(N0, N1, N2, DL, DAG))
12528 return R;
12529
12530 return SDValue();
12531}
12532
12533// This function assumes all the vselect's arguments are CONCAT_VECTOR
12534// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
12536 SDLoc DL(N);
12537 SDValue Cond = N->getOperand(0);
12538 SDValue LHS = N->getOperand(1);
12539 SDValue RHS = N->getOperand(2);
12540 EVT VT = N->getValueType(0);
12541 int NumElems = VT.getVectorNumElements();
12542 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
12543 RHS.getOpcode() == ISD::CONCAT_VECTORS &&
12544 Cond.getOpcode() == ISD::BUILD_VECTOR);
12545
12546 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
12547 // binary ones here.
12548 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
12549 return SDValue();
12550
12551 // We're sure we have an even number of elements due to the
12552 // concat_vectors we have as arguments to vselect.
12553 // Skip BV elements until we find one that's not an UNDEF
12554 // After we find an UNDEF element, keep looping until we get to half the
12555 // length of the BV and see if all the non-undef nodes are the same.
12556 ConstantSDNode *BottomHalf = nullptr;
12557 for (int i = 0; i < NumElems / 2; ++i) {
12558 if (Cond->getOperand(i)->isUndef())
12559 continue;
12560
12561 if (BottomHalf == nullptr)
12562 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
12563 else if (Cond->getOperand(i).getNode() != BottomHalf)
12564 return SDValue();
12565 }
12566
12567 // Do the same for the second half of the BuildVector
12568 ConstantSDNode *TopHalf = nullptr;
12569 for (int i = NumElems / 2; i < NumElems; ++i) {
12570 if (Cond->getOperand(i)->isUndef())
12571 continue;
12572
12573 if (TopHalf == nullptr)
12574 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
12575 else if (Cond->getOperand(i).getNode() != TopHalf)
12576 return SDValue();
12577 }
12578
12579 assert(TopHalf && BottomHalf &&
12580 "One half of the selector was all UNDEFs and the other was all the "
12581 "same value. This should have been addressed before this function.");
12582 return DAG.getNode(
12584 BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
12585 TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
12586}
12587
12588bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
12589 SelectionDAG &DAG, const SDLoc &DL) {
12590
12591 // Only perform the transformation when existing operands can be reused.
12592 if (IndexIsScaled)
12593 return false;
12594
12595 if (!isNullConstant(BasePtr) && !Index.hasOneUse())
12596 return false;
12597
12598 EVT VT = BasePtr.getValueType();
12599
12600 if (SDValue SplatVal = DAG.getSplatValue(Index);
12601 SplatVal && !isNullConstant(SplatVal) &&
12602 SplatVal.getValueType() == VT) {
12603 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12604 Index = DAG.getSplat(Index.getValueType(), DL, DAG.getConstant(0, DL, VT));
12605 return true;
12606 }
12607
12608 if (Index.getOpcode() != ISD::ADD)
12609 return false;
12610
12611 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0));
12612 SplatVal && SplatVal.getValueType() == VT) {
12613 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12614 Index = Index.getOperand(1);
12615 return true;
12616 }
12617 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1));
12618 SplatVal && SplatVal.getValueType() == VT) {
12619 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12620 Index = Index.getOperand(0);
12621 return true;
12622 }
12623 return false;
12624}
12625
12626// Fold sext/zext of index into index type.
12627bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT,
12628 SelectionDAG &DAG) {
12629 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12630
12631 // It's always safe to look through zero extends.
12632 if (Index.getOpcode() == ISD::ZERO_EXTEND) {
12633 if (TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
12634 IndexType = ISD::UNSIGNED_SCALED;
12635 Index = Index.getOperand(0);
12636 return true;
12637 }
12638 if (ISD::isIndexTypeSigned(IndexType)) {
12639 IndexType = ISD::UNSIGNED_SCALED;
12640 return true;
12641 }
12642 }
12643
12644 // It's only safe to look through sign extends when Index is signed.
12645 if (Index.getOpcode() == ISD::SIGN_EXTEND &&
12646 ISD::isIndexTypeSigned(IndexType) &&
12647 TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
12648 Index = Index.getOperand(0);
12649 return true;
12650 }
12651
12652 return false;
12653}
12654
12655SDValue DAGCombiner::visitVPSCATTER(SDNode *N) {
12656 VPScatterSDNode *MSC = cast<VPScatterSDNode>(N);
12657 SDValue Mask = MSC->getMask();
12658 SDValue Chain = MSC->getChain();
12659 SDValue Index = MSC->getIndex();
12660 SDValue Scale = MSC->getScale();
12661 SDValue StoreVal = MSC->getValue();
12662 SDValue BasePtr = MSC->getBasePtr();
12663 SDValue VL = MSC->getVectorLength();
12664 ISD::MemIndexType IndexType = MSC->getIndexType();
12665 SDLoc DL(N);
12666
12667 // Zap scatters with a zero mask.
12669 return Chain;
12670
12671 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
12672 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
12673 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12674 DL, Ops, MSC->getMemOperand(), IndexType);
12675 }
12676
12677 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
12678 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
12679 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12680 DL, Ops, MSC->getMemOperand(), IndexType);
12681 }
12682
12683 return SDValue();
12684}
12685
12686SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
12687 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
12688 SDValue Mask = MSC->getMask();
12689 SDValue Chain = MSC->getChain();
12690 SDValue Index = MSC->getIndex();
12691 SDValue Scale = MSC->getScale();
12692 SDValue StoreVal = MSC->getValue();
12693 SDValue BasePtr = MSC->getBasePtr();
12694 ISD::MemIndexType IndexType = MSC->getIndexType();
12695 SDLoc DL(N);
12696
12697 // Zap scatters with a zero mask.
12699 return Chain;
12700
12701 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
12702 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
12703 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12704 DL, Ops, MSC->getMemOperand(), IndexType,
12705 MSC->isTruncatingStore());
12706 }
12707
12708 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
12709 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
12710 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12711 DL, Ops, MSC->getMemOperand(), IndexType,
12712 MSC->isTruncatingStore());
12713 }
12714
12715 return SDValue();
12716}
12717
12718SDValue DAGCombiner::visitMSTORE(SDNode *N) {
12719 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
12720 SDValue Mask = MST->getMask();
12721 SDValue Chain = MST->getChain();
12722 SDValue Value = MST->getValue();
12723 SDValue Ptr = MST->getBasePtr();
12724
12725 // Zap masked stores with a zero mask.
12727 return Chain;
12728
12729 // Remove a masked store if base pointers and masks are equal.
12730 if (MaskedStoreSDNode *MST1 = dyn_cast<MaskedStoreSDNode>(Chain)) {
12731 if (MST->isUnindexed() && MST->isSimple() && MST1->isUnindexed() &&
12732 MST1->isSimple() && MST1->getBasePtr() == Ptr &&
12733 !MST->getBasePtr().isUndef() &&
12734 ((Mask == MST1->getMask() && MST->getMemoryVT().getStoreSize() ==
12735 MST1->getMemoryVT().getStoreSize()) ||
12737 TypeSize::isKnownLE(MST1->getMemoryVT().getStoreSize(),
12738 MST->getMemoryVT().getStoreSize())) {
12739 CombineTo(MST1, MST1->getChain());
12740 if (N->getOpcode() != ISD::DELETED_NODE)
12741 AddToWorklist(N);
12742 return SDValue(N, 0);
12743 }
12744 }
12745
12746 // If this is a masked load with an all ones mask, we can use a unmasked load.
12747 // FIXME: Can we do this for indexed, compressing, or truncating stores?
12748 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
12749 !MST->isCompressingStore() && !MST->isTruncatingStore())
12750 return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
12751 MST->getBasePtr(), MST->getPointerInfo(),
12752 MST->getBaseAlign(), MST->getMemOperand()->getFlags(),
12753 MST->getAAInfo());
12754
12755 // Try transforming N to an indexed store.
12756 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12757 return SDValue(N, 0);
12758
12759 if (MST->isTruncatingStore() && MST->isUnindexed() &&
12760 Value.getValueType().isInteger() &&
12762 !cast<ConstantSDNode>(Value)->isOpaque())) {
12763 APInt TruncDemandedBits =
12764 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
12766
12767 // See if we can simplify the operation with
12768 // SimplifyDemandedBits, which only works if the value has a single use.
12769 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
12770 // Re-visit the store if anything changed and the store hasn't been merged
12771 // with another node (N is deleted) SimplifyDemandedBits will add Value's
12772 // node back to the worklist if necessary, but we also need to re-visit
12773 // the Store node itself.
12774 if (N->getOpcode() != ISD::DELETED_NODE)
12775 AddToWorklist(N);
12776 return SDValue(N, 0);
12777 }
12778 }
12779
12780 // If this is a TRUNC followed by a masked store, fold this into a masked
12781 // truncating store. We can do this even if this is already a masked
12782 // truncstore.
12783 // TODO: Try combine to masked compress store if possiable.
12784 if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
12785 MST->isUnindexed() && !MST->isCompressingStore() &&
12786 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
12787 MST->getMemoryVT(), LegalOperations)) {
12788 auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
12789 Value.getOperand(0).getValueType());
12790 return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
12791 MST->getOffset(), Mask, MST->getMemoryVT(),
12792 MST->getMemOperand(), MST->getAddressingMode(),
12793 /*IsTruncating=*/true);
12794 }
12795
12796 return SDValue();
12797}
12798
12799SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) {
12800 auto *SST = cast<VPStridedStoreSDNode>(N);
12801 EVT EltVT = SST->getValue().getValueType().getVectorElementType();
12802 // Combine strided stores with unit-stride to a regular VP store.
12803 if (auto *CStride = dyn_cast<ConstantSDNode>(SST->getStride());
12804 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
12805 return DAG.getStoreVP(SST->getChain(), SDLoc(N), SST->getValue(),
12806 SST->getBasePtr(), SST->getOffset(), SST->getMask(),
12807 SST->getVectorLength(), SST->getMemoryVT(),
12808 SST->getMemOperand(), SST->getAddressingMode(),
12809 SST->isTruncatingStore(), SST->isCompressingStore());
12810 }
12811 return SDValue();
12812}
12813
12814SDValue DAGCombiner::visitVECTOR_COMPRESS(SDNode *N) {
12815 SDLoc DL(N);
12816 SDValue Vec = N->getOperand(0);
12817 SDValue Mask = N->getOperand(1);
12818 SDValue Passthru = N->getOperand(2);
12819 EVT VecVT = Vec.getValueType();
12820
12821 bool HasPassthru = !Passthru.isUndef();
12822
12823 APInt SplatVal;
12824 if (ISD::isConstantSplatVector(Mask.getNode(), SplatVal))
12825 return TLI.isConstTrueVal(Mask) ? Vec : Passthru;
12826
12827 if (Vec.isUndef() || Mask.isUndef())
12828 return Passthru;
12829
12830 // No need for potentially expensive compress if the mask is constant.
12833 EVT ScalarVT = VecVT.getVectorElementType();
12834 unsigned NumSelected = 0;
12835 unsigned NumElmts = VecVT.getVectorNumElements();
12836 for (unsigned I = 0; I < NumElmts; ++I) {
12837 SDValue MaskI = Mask.getOperand(I);
12838 // We treat undef mask entries as "false".
12839 if (MaskI.isUndef())
12840 continue;
12841
12842 if (TLI.isConstTrueVal(MaskI)) {
12843 SDValue VecI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Vec,
12844 DAG.getVectorIdxConstant(I, DL));
12845 Ops.push_back(VecI);
12846 NumSelected++;
12847 }
12848 }
12849 for (unsigned Rest = NumSelected; Rest < NumElmts; ++Rest) {
12850 SDValue Val =
12851 HasPassthru
12852 ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Passthru,
12853 DAG.getVectorIdxConstant(Rest, DL))
12854 : DAG.getUNDEF(ScalarVT);
12855 Ops.push_back(Val);
12856 }
12857 return DAG.getBuildVector(VecVT, DL, Ops);
12858 }
12859
12860 return SDValue();
12861}
12862
12863SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
12864 VPGatherSDNode *MGT = cast<VPGatherSDNode>(N);
12865 SDValue Mask = MGT->getMask();
12866 SDValue Chain = MGT->getChain();
12867 SDValue Index = MGT->getIndex();
12868 SDValue Scale = MGT->getScale();
12869 SDValue BasePtr = MGT->getBasePtr();
12870 SDValue VL = MGT->getVectorLength();
12871 ISD::MemIndexType IndexType = MGT->getIndexType();
12872 SDLoc DL(N);
12873
12874 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12875 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
12876 return DAG.getGatherVP(
12877 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12878 Ops, MGT->getMemOperand(), IndexType);
12879 }
12880
12881 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12882 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
12883 return DAG.getGatherVP(
12884 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12885 Ops, MGT->getMemOperand(), IndexType);
12886 }
12887
12888 return SDValue();
12889}
12890
12891SDValue DAGCombiner::visitMGATHER(SDNode *N) {
12892 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
12893 SDValue Mask = MGT->getMask();
12894 SDValue Chain = MGT->getChain();
12895 SDValue Index = MGT->getIndex();
12896 SDValue Scale = MGT->getScale();
12897 SDValue PassThru = MGT->getPassThru();
12898 SDValue BasePtr = MGT->getBasePtr();
12899 ISD::MemIndexType IndexType = MGT->getIndexType();
12900 SDLoc DL(N);
12901
12902 // Zap gathers with a zero mask.
12904 return CombineTo(N, PassThru, MGT->getChain());
12905
12906 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12907 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12908 return DAG.getMaskedGather(
12909 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12910 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12911 }
12912
12913 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12914 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12915 return DAG.getMaskedGather(
12916 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12917 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12918 }
12919
12920 return SDValue();
12921}
12922
12923SDValue DAGCombiner::visitMLOAD(SDNode *N) {
12924 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
12925 SDValue Mask = MLD->getMask();
12926
12927 // Zap masked loads with a zero mask.
12929 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
12930
12931 // If this is a masked load with an all ones mask, we can use a unmasked load.
12932 // FIXME: Can we do this for indexed, expanding, or extending loads?
12933 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
12934 !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
12935 SDValue NewLd = DAG.getLoad(
12936 N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
12937 MLD->getPointerInfo(), MLD->getBaseAlign(),
12938 MLD->getMemOperand()->getFlags(), MLD->getAAInfo(), MLD->getRanges());
12939 return CombineTo(N, NewLd, NewLd.getValue(1));
12940 }
12941
12942 // Try transforming N to an indexed load.
12943 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12944 return SDValue(N, 0);
12945
12946 return SDValue();
12947}
12948
12949SDValue DAGCombiner::visitMHISTOGRAM(SDNode *N) {
12950 MaskedHistogramSDNode *HG = cast<MaskedHistogramSDNode>(N);
12951 SDValue Chain = HG->getChain();
12952 SDValue Inc = HG->getInc();
12953 SDValue Mask = HG->getMask();
12954 SDValue BasePtr = HG->getBasePtr();
12955 SDValue Index = HG->getIndex();
12956 SDLoc DL(HG);
12957
12958 EVT MemVT = HG->getMemoryVT();
12959 EVT DataVT = Index.getValueType();
12960 MachineMemOperand *MMO = HG->getMemOperand();
12961 ISD::MemIndexType IndexType = HG->getIndexType();
12962
12964 return Chain;
12965
12966 if (refineUniformBase(BasePtr, Index, HG->isIndexScaled(), DAG, DL) ||
12967 refineIndexType(Index, IndexType, DataVT, DAG)) {
12968 SDValue Ops[] = {Chain, Inc, Mask, BasePtr, Index,
12969 HG->getScale(), HG->getIntID()};
12970 return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, Ops,
12971 MMO, IndexType);
12972 }
12973
12974 return SDValue();
12975}
12976
12977SDValue DAGCombiner::visitPARTIAL_REDUCE_MLA(SDNode *N) {
12978 if (SDValue Res = foldPartialReduceMLAMulOp(N))
12979 return Res;
12980 if (SDValue Res = foldPartialReduceAdd(N))
12981 return Res;
12982 return SDValue();
12983}
12984
12985// partial_reduce_*mla(acc, mul(ext(a), ext(b)), splat(1))
12986// -> partial_reduce_*mla(acc, a, b)
12987//
12988// partial_reduce_*mla(acc, mul(ext(x), splat(C)), splat(1))
12989// -> partial_reduce_*mla(acc, x, C)
12990SDValue DAGCombiner::foldPartialReduceMLAMulOp(SDNode *N) {
12991 SDLoc DL(N);
12992 auto *Context = DAG.getContext();
12993 SDValue Acc = N->getOperand(0);
12994 SDValue Op1 = N->getOperand(1);
12995 SDValue Op2 = N->getOperand(2);
12996
12997 unsigned Opc = Op1->getOpcode();
12998 if (Opc != ISD::MUL && Opc != ISD::SHL)
12999 return SDValue();
13000
13001 SDValue LHS = Op1->getOperand(0);
13002 SDValue RHS = Op1->getOperand(1);
13003
13004 // Try to treat (shl %a, %c) as (mul %a, (1 << %c)) for constant %c.
13005 if (Opc == ISD::SHL) {
13006 APInt C;
13007 if (!ISD::isConstantSplatVector(RHS.getNode(), C))
13008 return SDValue();
13009
13010 RHS =
13011 DAG.getSplatVector(RHS.getValueType(), DL,
13012 DAG.getConstant(APInt(C.getBitWidth(), 1).shl(C), DL,
13013 RHS.getValueType().getScalarType()));
13014 Opc = ISD::MUL;
13015 }
13016
13017 APInt C;
13018 if (Opc != ISD::MUL || !ISD::isConstantSplatVector(Op2.getNode(), C) ||
13019 !C.isOne())
13020 return SDValue();
13021
13022 unsigned LHSOpcode = LHS->getOpcode();
13023 if (!ISD::isExtOpcode(LHSOpcode))
13024 return SDValue();
13025
13026 SDValue LHSExtOp = LHS->getOperand(0);
13027 EVT LHSExtOpVT = LHSExtOp.getValueType();
13028
13029 // partial_reduce_*mla(acc, mul(ext(x), splat(C)), splat(1))
13030 // -> partial_reduce_*mla(acc, x, C)
13031 if (ISD::isConstantSplatVector(RHS.getNode(), C)) {
13032 // TODO: Make use of partial_reduce_sumla here
13033 APInt CTrunc = C.trunc(LHSExtOpVT.getScalarSizeInBits());
13034 unsigned LHSBits = LHS.getValueType().getScalarSizeInBits();
13035 if ((LHSOpcode != ISD::ZERO_EXTEND || CTrunc.zext(LHSBits) != C) &&
13036 (LHSOpcode != ISD::SIGN_EXTEND || CTrunc.sext(LHSBits) != C))
13037 return SDValue();
13038
13039 unsigned NewOpcode = LHSOpcode == ISD::SIGN_EXTEND
13040 ? ISD::PARTIAL_REDUCE_SMLA
13041 : ISD::PARTIAL_REDUCE_UMLA;
13042
13043 // Only perform these combines if the target supports folding
13044 // the extends into the operation.
13046 NewOpcode, TLI.getTypeToTransformTo(*Context, N->getValueType(0)),
13047 TLI.getTypeToTransformTo(*Context, LHSExtOpVT)))
13048 return SDValue();
13049
13050 return DAG.getNode(NewOpcode, DL, N->getValueType(0), Acc, LHSExtOp,
13051 DAG.getConstant(CTrunc, DL, LHSExtOpVT));
13052 }
13053
13054 unsigned RHSOpcode = RHS->getOpcode();
13055 if (!ISD::isExtOpcode(RHSOpcode))
13056 return SDValue();
13057
13058 SDValue RHSExtOp = RHS->getOperand(0);
13059 if (LHSExtOpVT != RHSExtOp.getValueType())
13060 return SDValue();
13061
13062 unsigned NewOpc;
13063 if (LHSOpcode == ISD::SIGN_EXTEND && RHSOpcode == ISD::SIGN_EXTEND)
13064 NewOpc = ISD::PARTIAL_REDUCE_SMLA;
13065 else if (LHSOpcode == ISD::ZERO_EXTEND && RHSOpcode == ISD::ZERO_EXTEND)
13066 NewOpc = ISD::PARTIAL_REDUCE_UMLA;
13067 else if (LHSOpcode == ISD::SIGN_EXTEND && RHSOpcode == ISD::ZERO_EXTEND)
13068 NewOpc = ISD::PARTIAL_REDUCE_SUMLA;
13069 else if (LHSOpcode == ISD::ZERO_EXTEND && RHSOpcode == ISD::SIGN_EXTEND) {
13070 NewOpc = ISD::PARTIAL_REDUCE_SUMLA;
13071 std::swap(LHSExtOp, RHSExtOp);
13072 } else
13073 return SDValue();
13074 // For a 2-stage extend the signedness of both of the extends must match
13075 // If the mul has the same type, there is no outer extend, and thus we
13076 // can simply use the inner extends to pick the result node.
13077 // TODO: extend to handle nonneg zext as sext
13078 EVT AccElemVT = Acc.getValueType().getVectorElementType();
13079 if (Op1.getValueType().getVectorElementType() != AccElemVT &&
13080 NewOpc != N->getOpcode())
13081 return SDValue();
13082
13083 // Only perform these combines if the target supports folding
13084 // the extends into the operation.
13086 NewOpc, TLI.getTypeToTransformTo(*Context, N->getValueType(0)),
13087 TLI.getTypeToTransformTo(*Context, LHSExtOpVT)))
13088 return SDValue();
13089
13090 return DAG.getNode(NewOpc, DL, N->getValueType(0), Acc, LHSExtOp, RHSExtOp);
13091}
13092
13093// partial.reduce.umla(acc, zext(op), splat(1))
13094// -> partial.reduce.umla(acc, op, splat(trunc(1)))
13095// partial.reduce.smla(acc, sext(op), splat(1))
13096// -> partial.reduce.smla(acc, op, splat(trunc(1)))
13097// partial.reduce.sumla(acc, sext(op), splat(1))
13098// -> partial.reduce.smla(acc, op, splat(trunc(1)))
13099SDValue DAGCombiner::foldPartialReduceAdd(SDNode *N) {
13100 SDLoc DL(N);
13101 SDValue Acc = N->getOperand(0);
13102 SDValue Op1 = N->getOperand(1);
13103 SDValue Op2 = N->getOperand(2);
13104
13105 APInt ConstantOne;
13106 if (!ISD::isConstantSplatVector(Op2.getNode(), ConstantOne) ||
13107 !ConstantOne.isOne())
13108 return SDValue();
13109
13110 unsigned Op1Opcode = Op1.getOpcode();
13111 if (!ISD::isExtOpcode(Op1Opcode))
13112 return SDValue();
13113
13114 bool Op1IsSigned = Op1Opcode == ISD::SIGN_EXTEND;
13115 bool NodeIsSigned = N->getOpcode() != ISD::PARTIAL_REDUCE_UMLA;
13116 EVT AccElemVT = Acc.getValueType().getVectorElementType();
13117 if (Op1IsSigned != NodeIsSigned &&
13118 Op1.getValueType().getVectorElementType() != AccElemVT)
13119 return SDValue();
13120
13121 unsigned NewOpcode =
13122 Op1IsSigned ? ISD::PARTIAL_REDUCE_SMLA : ISD::PARTIAL_REDUCE_UMLA;
13123
13124 SDValue UnextOp1 = Op1.getOperand(0);
13125 EVT UnextOp1VT = UnextOp1.getValueType();
13126 auto *Context = DAG.getContext();
13128 NewOpcode, TLI.getTypeToTransformTo(*Context, N->getValueType(0)),
13129 TLI.getTypeToTransformTo(*Context, UnextOp1VT)))
13130 return SDValue();
13131
13132 return DAG.getNode(NewOpcode, DL, N->getValueType(0), Acc, UnextOp1,
13133 DAG.getConstant(1, DL, UnextOp1VT));
13134}
13135
13136SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) {
13137 auto *SLD = cast<VPStridedLoadSDNode>(N);
13138 EVT EltVT = SLD->getValueType(0).getVectorElementType();
13139 // Combine strided loads with unit-stride to a regular VP load.
13140 if (auto *CStride = dyn_cast<ConstantSDNode>(SLD->getStride());
13141 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
13142 SDValue NewLd = DAG.getLoadVP(
13143 SLD->getAddressingMode(), SLD->getExtensionType(), SLD->getValueType(0),
13144 SDLoc(N), SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(),
13145 SLD->getMask(), SLD->getVectorLength(), SLD->getMemoryVT(),
13146 SLD->getMemOperand(), SLD->isExpandingLoad());
13147 return CombineTo(N, NewLd, NewLd.getValue(1));
13148 }
13149 return SDValue();
13150}
13151
13152/// A vector select of 2 constant vectors can be simplified to math/logic to
13153/// avoid a variable select instruction and possibly avoid constant loads.
13154SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
13155 SDValue Cond = N->getOperand(0);
13156 SDValue N1 = N->getOperand(1);
13157 SDValue N2 = N->getOperand(2);
13158 EVT VT = N->getValueType(0);
13159 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
13163 return SDValue();
13164
13165 // Check if we can use the condition value to increment/decrement a single
13166 // constant value. This simplifies a select to an add and removes a constant
13167 // load/materialization from the general case.
13168 bool AllAddOne = true;
13169 bool AllSubOne = true;
13170 unsigned Elts = VT.getVectorNumElements();
13171 for (unsigned i = 0; i != Elts; ++i) {
13172 SDValue N1Elt = N1.getOperand(i);
13173 SDValue N2Elt = N2.getOperand(i);
13174 if (N1Elt.isUndef())
13175 continue;
13176 // N2 should not contain undef values since it will be reused in the fold.
13177 if (N2Elt.isUndef() || N1Elt.getValueType() != N2Elt.getValueType()) {
13178 AllAddOne = false;
13179 AllSubOne = false;
13180 break;
13181 }
13182
13183 const APInt &C1 = N1Elt->getAsAPIntVal();
13184 const APInt &C2 = N2Elt->getAsAPIntVal();
13185 if (C1 != C2 + 1)
13186 AllAddOne = false;
13187 if (C1 != C2 - 1)
13188 AllSubOne = false;
13189 }
13190
13191 // Further simplifications for the extra-special cases where the constants are
13192 // all 0 or all -1 should be implemented as folds of these patterns.
13193 SDLoc DL(N);
13194 if (AllAddOne || AllSubOne) {
13195 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
13196 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
13197 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
13198 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
13199 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
13200 }
13201
13202 // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
13203 APInt Pow2C;
13204 if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
13205 isNullOrNullSplat(N2)) {
13206 SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
13207 SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
13208 return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
13209 }
13210
13212 return V;
13213
13214 // The general case for select-of-constants:
13215 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
13216 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
13217 // leave that to a machine-specific pass.
13218 return SDValue();
13219}
13220
13221SDValue DAGCombiner::visitVP_SELECT(SDNode *N) {
13222 SDValue N0 = N->getOperand(0);
13223 SDValue N1 = N->getOperand(1);
13224 SDValue N2 = N->getOperand(2);
13225 SDLoc DL(N);
13226
13227 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
13228 return V;
13229
13231 return V;
13232
13233 return SDValue();
13234}
13235
13237 SDValue FVal,
13238 const TargetLowering &TLI,
13239 SelectionDAG &DAG,
13240 const SDLoc &DL) {
13241 EVT VT = TVal.getValueType();
13242 if (!TLI.isTypeLegal(VT))
13243 return SDValue();
13244
13245 EVT CondVT = Cond.getValueType();
13246 assert(CondVT.isVector() && "Vector select expects a vector selector!");
13247
13248 bool IsTAllZero = ISD::isConstantSplatVectorAllZeros(TVal.getNode());
13249 bool IsTAllOne = ISD::isConstantSplatVectorAllOnes(TVal.getNode());
13250 bool IsFAllZero = ISD::isConstantSplatVectorAllZeros(FVal.getNode());
13251 bool IsFAllOne = ISD::isConstantSplatVectorAllOnes(FVal.getNode());
13252
13253 // no vselect(cond, 0/-1, X) or vselect(cond, X, 0/-1), return
13254 if (!IsTAllZero && !IsTAllOne && !IsFAllZero && !IsFAllOne)
13255 return SDValue();
13256
13257 // select Cond, 0, 0 → 0
13258 if (IsTAllZero && IsFAllZero) {
13259 return VT.isFloatingPoint() ? DAG.getConstantFP(0.0, DL, VT)
13260 : DAG.getConstant(0, DL, VT);
13261 }
13262
13263 // check select(setgt lhs, -1), 1, -1 --> or (sra lhs, bitwidth - 1), 1
13264 APInt TValAPInt;
13265 if (Cond.getOpcode() == ISD::SETCC &&
13266 Cond.getOperand(2) == DAG.getCondCode(ISD::SETGT) &&
13267 Cond.getOperand(0).getValueType() == VT && VT.isSimple() &&
13268 ISD::isConstantSplatVector(TVal.getNode(), TValAPInt) &&
13269 TValAPInt.isOne() &&
13270 ISD::isConstantSplatVectorAllOnes(Cond.getOperand(1).getNode()) &&
13272 return SDValue();
13273 }
13274
13275 // To use the condition operand as a bitwise mask, it must have elements that
13276 // are the same size as the select elements. i.e, the condition operand must
13277 // have already been promoted from the IR select condition type <N x i1>.
13278 // Don't check if the types themselves are equal because that excludes
13279 // vector floating-point selects.
13280 if (CondVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
13281 return SDValue();
13282
13283 // Cond value must be 'sign splat' to be converted to a logical op.
13284 if (DAG.ComputeNumSignBits(Cond) != CondVT.getScalarSizeInBits())
13285 return SDValue();
13286
13287 // Try inverting Cond and swapping T/F if it gives all-ones/all-zeros form
13288 if (!IsTAllOne && !IsFAllZero && Cond.hasOneUse() &&
13289 Cond.getOpcode() == ISD::SETCC &&
13290 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT) ==
13291 CondVT) {
13292 if (IsTAllZero || IsFAllOne) {
13293 SDValue CC = Cond.getOperand(2);
13295 cast<CondCodeSDNode>(CC)->get(), Cond.getOperand(0).getValueType());
13296 Cond = DAG.getSetCC(DL, CondVT, Cond.getOperand(0), Cond.getOperand(1),
13297 InverseCC);
13298 std::swap(TVal, FVal);
13299 std::swap(IsTAllOne, IsFAllOne);
13300 std::swap(IsTAllZero, IsFAllZero);
13301 }
13302 }
13303
13305 "Select condition no longer all-sign bits");
13306
13307 // select Cond, -1, 0 → bitcast Cond
13308 if (IsTAllOne && IsFAllZero)
13309 return DAG.getBitcast(VT, Cond);
13310
13311 // select Cond, -1, x → or Cond, x
13312 if (IsTAllOne) {
13313 SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(FVal));
13314 SDValue Or = DAG.getNode(ISD::OR, DL, CondVT, Cond, X);
13315 return DAG.getBitcast(VT, Or);
13316 }
13317
13318 // select Cond, x, 0 → and Cond, x
13319 if (IsFAllZero) {
13320 SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(TVal));
13321 SDValue And = DAG.getNode(ISD::AND, DL, CondVT, Cond, X);
13322 return DAG.getBitcast(VT, And);
13323 }
13324
13325 // select Cond, 0, x -> and not(Cond), x
13326 if (IsTAllZero &&
13328 SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(FVal));
13329 SDValue And =
13330 DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT), X);
13331 return DAG.getBitcast(VT, And);
13332 }
13333
13334 return SDValue();
13335}
13336
13337SDValue DAGCombiner::visitVSELECT(SDNode *N) {
13338 SDValue N0 = N->getOperand(0);
13339 SDValue N1 = N->getOperand(1);
13340 SDValue N2 = N->getOperand(2);
13341 EVT VT = N->getValueType(0);
13342 SDLoc DL(N);
13343
13344 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
13345 return V;
13346
13348 return V;
13349
13350 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
13351 if (!TLI.isTargetCanonicalSelect(N))
13352 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
13353 return DAG.getSelect(DL, VT, F, N2, N1);
13354
13355 // select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
13356 if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N2 && N1->hasOneUse() &&
13359 TLI.getBooleanContents(N0.getValueType()) ==
13361 return DAG.getNode(
13362 ISD::ADD, DL, N1.getValueType(), N2,
13363 DAG.getNode(ISD::AND, DL, N0.getValueType(), N1.getOperand(1), N0));
13364 }
13365
13366 // Canonicalize integer abs.
13367 // vselect (setg[te] X, 0), X, -X ->
13368 // vselect (setgt X, -1), X, -X ->
13369 // vselect (setl[te] X, 0), -X, X ->
13370 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
13371 if (N0.getOpcode() == ISD::SETCC) {
13372 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
13374 bool isAbs = false;
13375 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
13376
13377 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
13378 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
13379 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
13381 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
13382 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
13384
13385 if (isAbs) {
13387 return DAG.getNode(ISD::ABS, DL, VT, LHS);
13388
13389 SDValue Shift = DAG.getNode(
13390 ISD::SRA, DL, VT, LHS,
13391 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, DL));
13392 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
13393 AddToWorklist(Shift.getNode());
13394 AddToWorklist(Add.getNode());
13395 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
13396 }
13397
13398 // vselect x, y (fcmp lt x, y) -> fminnum x, y
13399 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
13400 //
13401 // This is OK if we don't care about what happens if either operand is a
13402 // NaN.
13403 //
13404 if (N0.hasOneUse() &&
13405 isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, N->getFlags(), TLI)) {
13406 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC))
13407 return FMinMax;
13408 }
13409
13410 if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
13411 return S;
13412 if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
13413 return S;
13414
13415 // If this select has a condition (setcc) with narrower operands than the
13416 // select, try to widen the compare to match the select width.
13417 // TODO: This should be extended to handle any constant.
13418 // TODO: This could be extended to handle non-loading patterns, but that
13419 // requires thorough testing to avoid regressions.
13420 if (isNullOrNullSplat(RHS)) {
13421 EVT NarrowVT = LHS.getValueType();
13423 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
13424 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
13425 unsigned WideWidth = WideVT.getScalarSizeInBits();
13426 bool IsSigned = isSignedIntSetCC(CC);
13427 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13428 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
13429 SetCCWidth != 1 && SetCCWidth < WideWidth &&
13430 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
13431 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
13432 // Both compare operands can be widened for free. The LHS can use an
13433 // extended load, and the RHS is a constant:
13434 // vselect (ext (setcc load(X), C)), N1, N2 -->
13435 // vselect (setcc extload(X), C'), N1, N2
13436 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13437 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
13438 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
13439 EVT WideSetCCVT = getSetCCResultType(WideVT);
13440 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
13441 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
13442 }
13443 }
13444
13445 if (SDValue ABD = foldSelectToABD(LHS, RHS, N1, N2, CC, DL))
13446 return ABD;
13447
13448 // Match VSELECTs into add with unsigned saturation.
13449 if (hasOperation(ISD::UADDSAT, VT)) {
13450 // Check if one of the arms of the VSELECT is vector with all bits set.
13451 // If it's on the left side invert the predicate to simplify logic below.
13452 SDValue Other;
13453 ISD::CondCode SatCC = CC;
13455 Other = N2;
13456 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
13457 } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
13458 Other = N1;
13459 }
13460
13461 if (Other && Other.getOpcode() == ISD::ADD) {
13462 SDValue CondLHS = LHS, CondRHS = RHS;
13463 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
13464
13465 // Canonicalize condition operands.
13466 if (SatCC == ISD::SETUGE) {
13467 std::swap(CondLHS, CondRHS);
13468 SatCC = ISD::SETULE;
13469 }
13470
13471 // We can test against either of the addition operands.
13472 // x <= x+y ? x+y : ~0 --> uaddsat x, y
13473 // x+y >= x ? x+y : ~0 --> uaddsat x, y
13474 if (SatCC == ISD::SETULE && Other == CondRHS &&
13475 (OpLHS == CondLHS || OpRHS == CondLHS))
13476 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
13477
13478 if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
13479 (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
13480 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
13481 CondLHS == OpLHS) {
13482 // If the RHS is a constant we have to reverse the const
13483 // canonicalization.
13484 // x >= ~C ? x+C : ~0 --> uaddsat x, C
13485 auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
13486 return Cond->getAPIntValue() == ~Op->getAPIntValue();
13487 };
13488 if (SatCC == ISD::SETULE &&
13489 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
13490 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
13491 }
13492 }
13493 }
13494
13495 // Match VSELECTs into sub with unsigned saturation.
13496 if (hasOperation(ISD::USUBSAT, VT)) {
13497 // Check if one of the arms of the VSELECT is a zero vector. If it's on
13498 // the left side invert the predicate to simplify logic below.
13499 SDValue Other;
13500 ISD::CondCode SatCC = CC;
13502 Other = N2;
13503 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
13505 Other = N1;
13506 }
13507
13508 // zext(x) >= y ? trunc(zext(x) - y) : 0
13509 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
13510 // zext(x) > y ? trunc(zext(x) - y) : 0
13511 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
13512 if (Other && Other.getOpcode() == ISD::TRUNCATE &&
13513 Other.getOperand(0).getOpcode() == ISD::SUB &&
13514 (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) {
13515 SDValue OpLHS = Other.getOperand(0).getOperand(0);
13516 SDValue OpRHS = Other.getOperand(0).getOperand(1);
13517 if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND)
13518 if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS,
13519 DAG, DL))
13520 return R;
13521 }
13522
13523 if (Other && Other.getNumOperands() == 2) {
13524 SDValue CondRHS = RHS;
13525 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
13526
13527 if (OpLHS == LHS) {
13528 // Look for a general sub with unsigned saturation first.
13529 // x >= y ? x-y : 0 --> usubsat x, y
13530 // x > y ? x-y : 0 --> usubsat x, y
13531 if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
13532 Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
13533 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
13534
13535 if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
13536 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
13537 if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
13538 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
13539 // If the RHS is a constant we have to reverse the const
13540 // canonicalization.
13541 // x > C-1 ? x+-C : 0 --> usubsat x, C
13542 auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
13543 return (!Op && !Cond) ||
13544 (Op && Cond &&
13545 Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
13546 };
13547 if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
13548 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
13549 /*AllowUndefs*/ true)) {
13550 OpRHS = DAG.getNegative(OpRHS, DL, VT);
13551 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
13552 }
13553
13554 // Another special case: If C was a sign bit, the sub has been
13555 // canonicalized into a xor.
13556 // FIXME: Would it be better to use computeKnownBits to
13557 // determine whether it's safe to decanonicalize the xor?
13558 // x s< 0 ? x^C : 0 --> usubsat x, C
13559 APInt SplatValue;
13560 if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
13561 ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
13563 SplatValue.isSignMask()) {
13564 // Note that we have to rebuild the RHS constant here to
13565 // ensure we don't rely on particular values of undef lanes.
13566 OpRHS = DAG.getConstant(SplatValue, DL, VT);
13567 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
13568 }
13569 }
13570 }
13571 }
13572 }
13573 }
13574
13575 // (vselect (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
13576 // (vselect (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
13577 if (SDValue UMin = foldSelectToUMin(LHS, RHS, N1, N2, CC, DL))
13578 return UMin;
13579 }
13580
13581 if (SimplifySelectOps(N, N1, N2))
13582 return SDValue(N, 0); // Don't revisit N.
13583
13584 // Fold (vselect all_ones, N1, N2) -> N1
13586 return N1;
13587 // Fold (vselect all_zeros, N1, N2) -> N2
13589 return N2;
13590
13591 // The ConvertSelectToConcatVector function is assuming both the above
13592 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
13593 // and addressed.
13594 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
13597 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
13598 return CV;
13599 }
13600
13601 if (SDValue V = foldVSelectOfConstants(N))
13602 return V;
13603
13604 if (hasOperation(ISD::SRA, VT))
13606 return V;
13607
13609 return SDValue(N, 0);
13610
13611 if (SDValue V = combineVSelectWithAllOnesOrZeros(N0, N1, N2, TLI, DAG, DL))
13612 return V;
13613
13614 return SDValue();
13615}
13616
13617SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
13618 SDValue N0 = N->getOperand(0);
13619 SDValue N1 = N->getOperand(1);
13620 SDValue N2 = N->getOperand(2);
13621 SDValue N3 = N->getOperand(3);
13622 SDValue N4 = N->getOperand(4);
13623 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
13624 SDLoc DL(N);
13625
13626 // fold select_cc lhs, rhs, x, x, cc -> x
13627 if (N2 == N3)
13628 return N2;
13629
13630 // select_cc bool, 0, x, y, seteq -> select bool, y, x
13631 if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 &&
13632 isNullConstant(N1))
13633 return DAG.getSelect(DL, N2.getValueType(), N0, N3, N2);
13634
13635 // Determine if the condition we're dealing with is constant
13636 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
13637 CC, DL, false)) {
13638 AddToWorklist(SCC.getNode());
13639
13640 // cond always true -> true val
13641 // cond always false -> false val
13642 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode()))
13643 return SCCC->isZero() ? N3 : N2;
13644
13645 // When the condition is UNDEF, just return the first operand. This is
13646 // coherent the DAG creation, no setcc node is created in this case
13647 if (SCC->isUndef())
13648 return N2;
13649
13650 // Fold to a simpler select_cc
13651 if (SCC.getOpcode() == ISD::SETCC) {
13652 return DAG.getNode(ISD::SELECT_CC, DL, N2.getValueType(),
13653 SCC.getOperand(0), SCC.getOperand(1), N2, N3,
13654 SCC.getOperand(2), SCC->getFlags());
13655 }
13656 }
13657
13658 // If we can fold this based on the true/false value, do so.
13659 if (SimplifySelectOps(N, N2, N3))
13660 return SDValue(N, 0); // Don't revisit N.
13661
13662 // fold select_cc into other things, such as min/max/abs
13663 return SimplifySelectCC(DL, N0, N1, N2, N3, CC);
13664}
13665
13666SDValue DAGCombiner::visitSETCC(SDNode *N) {
13667 // setcc is very commonly used as an argument to brcond. This pattern
13668 // also lend itself to numerous combines and, as a result, it is desired
13669 // we keep the argument to a brcond as a setcc as much as possible.
13670 bool PreferSetCC =
13671 N->hasOneUse() && N->user_begin()->getOpcode() == ISD::BRCOND;
13672
13673 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
13674 EVT VT = N->getValueType(0);
13675 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
13676 SDLoc DL(N);
13677
13678 if (SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, DL, !PreferSetCC)) {
13679 // If we prefer to have a setcc, and we don't, we'll try our best to
13680 // recreate one using rebuildSetCC.
13681 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
13682 SDValue NewSetCC = rebuildSetCC(Combined);
13683
13684 // We don't have anything interesting to combine to.
13685 if (NewSetCC.getNode() == N)
13686 return SDValue();
13687
13688 if (NewSetCC)
13689 return NewSetCC;
13690 }
13691 return Combined;
13692 }
13693
13694 // Optimize
13695 // 1) (icmp eq/ne (and X, C0), (shift X, C1))
13696 // or
13697 // 2) (icmp eq/ne X, (rotate X, C1))
13698 // If C0 is a mask or shifted mask and the shift amt (C1) isolates the
13699 // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
13700 // Then:
13701 // If C1 is a power of 2, then the rotate and shift+and versions are
13702 // equivilent, so we can interchange them depending on target preference.
13703 // Otherwise, if we have the shift+and version we can interchange srl/shl
13704 // which inturn affects the constant C0. We can use this to get better
13705 // constants again determined by target preference.
13706 if (Cond == ISD::SETNE || Cond == ISD::SETEQ) {
13707 auto IsAndWithShift = [](SDValue A, SDValue B) {
13708 return A.getOpcode() == ISD::AND &&
13709 (B.getOpcode() == ISD::SRL || B.getOpcode() == ISD::SHL) &&
13710 A.getOperand(0) == B.getOperand(0);
13711 };
13712 auto IsRotateWithOp = [](SDValue A, SDValue B) {
13713 return (B.getOpcode() == ISD::ROTL || B.getOpcode() == ISD::ROTR) &&
13714 B.getOperand(0) == A;
13715 };
13716 SDValue AndOrOp = SDValue(), ShiftOrRotate = SDValue();
13717 bool IsRotate = false;
13718
13719 // Find either shift+and or rotate pattern.
13720 if (IsAndWithShift(N0, N1)) {
13721 AndOrOp = N0;
13722 ShiftOrRotate = N1;
13723 } else if (IsAndWithShift(N1, N0)) {
13724 AndOrOp = N1;
13725 ShiftOrRotate = N0;
13726 } else if (IsRotateWithOp(N0, N1)) {
13727 IsRotate = true;
13728 AndOrOp = N0;
13729 ShiftOrRotate = N1;
13730 } else if (IsRotateWithOp(N1, N0)) {
13731 IsRotate = true;
13732 AndOrOp = N1;
13733 ShiftOrRotate = N0;
13734 }
13735
13736 if (AndOrOp && ShiftOrRotate && ShiftOrRotate.hasOneUse() &&
13737 (IsRotate || AndOrOp.hasOneUse())) {
13738 EVT OpVT = N0.getValueType();
13739 // Get constant shift/rotate amount and possibly mask (if its shift+and
13740 // variant).
13741 auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> {
13742 ConstantSDNode *CNode = isConstOrConstSplat(Op, /*AllowUndefs*/ false,
13743 /*AllowTrunc*/ false);
13744 if (CNode == nullptr)
13745 return std::nullopt;
13746 return CNode->getAPIntValue();
13747 };
13748 std::optional<APInt> AndCMask =
13749 IsRotate ? std::nullopt : GetAPIntValue(AndOrOp.getOperand(1));
13750 std::optional<APInt> ShiftCAmt =
13751 GetAPIntValue(ShiftOrRotate.getOperand(1));
13752 unsigned NumBits = OpVT.getScalarSizeInBits();
13753
13754 // We found constants.
13755 if (ShiftCAmt && (IsRotate || AndCMask) && ShiftCAmt->ult(NumBits)) {
13756 unsigned ShiftOpc = ShiftOrRotate.getOpcode();
13757 // Check that the constants meet the constraints.
13758 bool CanTransform = IsRotate;
13759 if (!CanTransform) {
13760 // Check that mask and shift compliment eachother
13761 CanTransform = *ShiftCAmt == (~*AndCMask).popcount();
13762 // Check that we are comparing all bits
13763 CanTransform &= (*ShiftCAmt + AndCMask->popcount()) == NumBits;
13764 // Check that the and mask is correct for the shift
13765 CanTransform &=
13766 ShiftOpc == ISD::SHL ? (~*AndCMask).isMask() : AndCMask->isMask();
13767 }
13768
13769 // See if target prefers another shift/rotate opcode.
13770 unsigned NewShiftOpc = TLI.preferedOpcodeForCmpEqPiecesOfOperand(
13771 OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
13772 // Transform is valid and we have a new preference.
13773 if (CanTransform && NewShiftOpc != ShiftOpc) {
13774 SDValue NewShiftOrRotate =
13775 DAG.getNode(NewShiftOpc, DL, OpVT, ShiftOrRotate.getOperand(0),
13776 ShiftOrRotate.getOperand(1));
13777 SDValue NewAndOrOp = SDValue();
13778
13779 if (NewShiftOpc == ISD::SHL || NewShiftOpc == ISD::SRL) {
13780 APInt NewMask =
13781 NewShiftOpc == ISD::SHL
13782 ? APInt::getHighBitsSet(NumBits,
13783 NumBits - ShiftCAmt->getZExtValue())
13784 : APInt::getLowBitsSet(NumBits,
13785 NumBits - ShiftCAmt->getZExtValue());
13786 NewAndOrOp =
13787 DAG.getNode(ISD::AND, DL, OpVT, ShiftOrRotate.getOperand(0),
13788 DAG.getConstant(NewMask, DL, OpVT));
13789 } else {
13790 NewAndOrOp = ShiftOrRotate.getOperand(0);
13791 }
13792
13793 return DAG.getSetCC(DL, VT, NewAndOrOp, NewShiftOrRotate, Cond);
13794 }
13795 }
13796 }
13797 }
13798 return SDValue();
13799}
13800
13801SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
13802 SDValue LHS = N->getOperand(0);
13803 SDValue RHS = N->getOperand(1);
13804 SDValue Carry = N->getOperand(2);
13805 SDValue Cond = N->getOperand(3);
13806
13807 // If Carry is false, fold to a regular SETCC.
13808 if (isNullConstant(Carry))
13809 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
13810
13811 return SDValue();
13812}
13813
13814/// Check if N satisfies:
13815/// N is used once.
13816/// N is a Load.
13817/// The load is compatible with ExtOpcode. It means
13818/// If load has explicit zero/sign extension, ExpOpcode must have the same
13819/// extension.
13820/// Otherwise returns true.
13821static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
13822 if (!N.hasOneUse())
13823 return false;
13824
13825 if (!isa<LoadSDNode>(N))
13826 return false;
13827
13828 LoadSDNode *Load = cast<LoadSDNode>(N);
13829 ISD::LoadExtType LoadExt = Load->getExtensionType();
13830 if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
13831 return true;
13832
13833 // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
13834 // extension.
13835 if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
13836 (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
13837 return false;
13838
13839 return true;
13840}
13841
13842/// Fold
13843/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
13844/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
13845/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
13846/// This function is called by the DAGCombiner when visiting sext/zext/aext
13847/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
13849 SelectionDAG &DAG, const SDLoc &DL,
13850 CombineLevel Level) {
13851 unsigned Opcode = N->getOpcode();
13852 SDValue N0 = N->getOperand(0);
13853 EVT VT = N->getValueType(0);
13854 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
13855 Opcode == ISD::ANY_EXTEND) &&
13856 "Expected EXTEND dag node in input!");
13857
13858 if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
13859 !N0.hasOneUse())
13860 return SDValue();
13861
13862 SDValue Op1 = N0->getOperand(1);
13863 SDValue Op2 = N0->getOperand(2);
13864 if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
13865 return SDValue();
13866
13867 auto ExtLoadOpcode = ISD::EXTLOAD;
13868 if (Opcode == ISD::SIGN_EXTEND)
13869 ExtLoadOpcode = ISD::SEXTLOAD;
13870 else if (Opcode == ISD::ZERO_EXTEND)
13871 ExtLoadOpcode = ISD::ZEXTLOAD;
13872
13873 // Illegal VSELECT may ISel fail if happen after legalization (DAG
13874 // Combine2), so we should conservatively check the OperationAction.
13875 LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
13876 LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
13877 if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
13878 !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) ||
13879 (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes &&
13881 return SDValue();
13882
13883 SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
13884 SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
13885 return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
13886}
13887
13888/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
13889/// a build_vector of constants.
13890/// This function is called by the DAGCombiner when visiting sext/zext/aext
13891/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
13892/// Vector extends are not folded if operations are legal; this is to
13893/// avoid introducing illegal build_vector dag nodes.
13895 const TargetLowering &TLI,
13896 SelectionDAG &DAG, bool LegalTypes) {
13897 unsigned Opcode = N->getOpcode();
13898 SDValue N0 = N->getOperand(0);
13899 EVT VT = N->getValueType(0);
13900
13901 assert((ISD::isExtOpcode(Opcode) || ISD::isExtVecInRegOpcode(Opcode)) &&
13902 "Expected EXTEND dag node in input!");
13903
13904 // fold (sext c1) -> c1
13905 // fold (zext c1) -> c1
13906 // fold (aext c1) -> c1
13907 if (isa<ConstantSDNode>(N0))
13908 return DAG.getNode(Opcode, DL, VT, N0);
13909
13910 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
13911 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
13912 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
13913 if (N0->getOpcode() == ISD::SELECT) {
13914 SDValue Op1 = N0->getOperand(1);
13915 SDValue Op2 = N0->getOperand(2);
13916 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
13917 (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
13918 // For any_extend, choose sign extension of the constants to allow a
13919 // possible further transform to sign_extend_inreg.i.e.
13920 //
13921 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
13922 // t2: i64 = any_extend t1
13923 // -->
13924 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
13925 // -->
13926 // t4: i64 = sign_extend_inreg t3
13927 unsigned FoldOpc = Opcode;
13928 if (FoldOpc == ISD::ANY_EXTEND)
13929 FoldOpc = ISD::SIGN_EXTEND;
13930 return DAG.getSelect(DL, VT, N0->getOperand(0),
13931 DAG.getNode(FoldOpc, DL, VT, Op1),
13932 DAG.getNode(FoldOpc, DL, VT, Op2));
13933 }
13934 }
13935
13936 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
13937 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
13938 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
13939 EVT SVT = VT.getScalarType();
13940 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
13942 return SDValue();
13943
13944 // We can fold this node into a build_vector.
13945 unsigned VTBits = SVT.getSizeInBits();
13946 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
13948 unsigned NumElts = VT.getVectorNumElements();
13949
13950 for (unsigned i = 0; i != NumElts; ++i) {
13951 SDValue Op = N0.getOperand(i);
13952 if (Op.isUndef()) {
13953 if (Opcode == ISD::ANY_EXTEND || Opcode == ISD::ANY_EXTEND_VECTOR_INREG)
13954 Elts.push_back(DAG.getUNDEF(SVT));
13955 else
13956 Elts.push_back(DAG.getConstant(0, DL, SVT));
13957 continue;
13958 }
13959
13960 SDLoc DL(Op);
13961 // Get the constant value and if needed trunc it to the size of the type.
13962 // Nodes like build_vector might have constants wider than the scalar type.
13963 APInt C = Op->getAsAPIntVal().zextOrTrunc(EVTBits);
13964 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
13965 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
13966 else
13967 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
13968 }
13969
13970 return DAG.getBuildVector(VT, DL, Elts);
13971}
13972
13973// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
13974// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
13975// transformation. Returns true if extension are possible and the above
13976// mentioned transformation is profitable.
13978 unsigned ExtOpc,
13979 SmallVectorImpl<SDNode *> &ExtendNodes,
13980 const TargetLowering &TLI) {
13981 bool HasCopyToRegUses = false;
13982 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
13983 for (SDUse &Use : N0->uses()) {
13984 SDNode *User = Use.getUser();
13985 if (User == N)
13986 continue;
13987 if (Use.getResNo() != N0.getResNo())
13988 continue;
13989 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
13990 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
13992 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
13993 // Sign bits will be lost after a zext.
13994 return false;
13995 bool Add = false;
13996 for (unsigned i = 0; i != 2; ++i) {
13997 SDValue UseOp = User->getOperand(i);
13998 if (UseOp == N0)
13999 continue;
14000 if (!isa<ConstantSDNode>(UseOp))
14001 return false;
14002 Add = true;
14003 }
14004 if (Add)
14005 ExtendNodes.push_back(User);
14006 continue;
14007 }
14008 // If truncates aren't free and there are users we can't
14009 // extend, it isn't worthwhile.
14010 if (!isTruncFree)
14011 return false;
14012 // Remember if this value is live-out.
14013 if (User->getOpcode() == ISD::CopyToReg)
14014 HasCopyToRegUses = true;
14015 }
14016
14017 if (HasCopyToRegUses) {
14018 bool BothLiveOut = false;
14019 for (SDUse &Use : N->uses()) {
14020 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
14021 BothLiveOut = true;
14022 break;
14023 }
14024 }
14025 if (BothLiveOut)
14026 // Both unextended and extended values are live out. There had better be
14027 // a good reason for the transformation.
14028 return !ExtendNodes.empty();
14029 }
14030 return true;
14031}
14032
14033void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
14034 SDValue OrigLoad, SDValue ExtLoad,
14035 ISD::NodeType ExtType) {
14036 // Extend SetCC uses if necessary.
14037 SDLoc DL(ExtLoad);
14038 for (SDNode *SetCC : SetCCs) {
14040
14041 for (unsigned j = 0; j != 2; ++j) {
14042 SDValue SOp = SetCC->getOperand(j);
14043 if (SOp == OrigLoad)
14044 Ops.push_back(ExtLoad);
14045 else
14046 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
14047 }
14048
14049 Ops.push_back(SetCC->getOperand(2));
14050 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
14051 }
14052}
14053
14054// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
14055SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
14056 SDValue N0 = N->getOperand(0);
14057 EVT DstVT = N->getValueType(0);
14058 EVT SrcVT = N0.getValueType();
14059
14060 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
14061 N->getOpcode() == ISD::ZERO_EXTEND) &&
14062 "Unexpected node type (not an extend)!");
14063
14064 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
14065 // For example, on a target with legal v4i32, but illegal v8i32, turn:
14066 // (v8i32 (sext (v8i16 (load x))))
14067 // into:
14068 // (v8i32 (concat_vectors (v4i32 (sextload x)),
14069 // (v4i32 (sextload (x + 16)))))
14070 // Where uses of the original load, i.e.:
14071 // (v8i16 (load x))
14072 // are replaced with:
14073 // (v8i16 (truncate
14074 // (v8i32 (concat_vectors (v4i32 (sextload x)),
14075 // (v4i32 (sextload (x + 16)))))))
14076 //
14077 // This combine is only applicable to illegal, but splittable, vectors.
14078 // All legal types, and illegal non-vector types, are handled elsewhere.
14079 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
14080 //
14081 if (N0->getOpcode() != ISD::LOAD)
14082 return SDValue();
14083
14084 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14085
14086 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
14087 !N0.hasOneUse() || !LN0->isSimple() ||
14088 !DstVT.isVector() || !DstVT.isPow2VectorType() ||
14090 return SDValue();
14091
14093 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
14094 return SDValue();
14095
14096 ISD::LoadExtType ExtType =
14097 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
14098
14099 // Try to split the vector types to get down to legal types.
14100 EVT SplitSrcVT = SrcVT;
14101 EVT SplitDstVT = DstVT;
14102 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
14103 SplitSrcVT.getVectorNumElements() > 1) {
14104 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
14105 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
14106 }
14107
14108 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
14109 return SDValue();
14110
14111 assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
14112
14113 SDLoc DL(N);
14114 const unsigned NumSplits =
14115 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
14116 const unsigned Stride = SplitSrcVT.getStoreSize();
14119
14120 SDValue BasePtr = LN0->getBasePtr();
14121 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
14122 const unsigned Offset = Idx * Stride;
14123
14125 DAG.getExtLoad(ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(),
14126 BasePtr, LN0->getPointerInfo().getWithOffset(Offset),
14127 SplitSrcVT, LN0->getBaseAlign(),
14128 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14129
14130 BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::getFixed(Stride), DL);
14131
14132 Loads.push_back(SplitLoad.getValue(0));
14133 Chains.push_back(SplitLoad.getValue(1));
14134 }
14135
14136 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
14137 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
14138
14139 // Simplify TF.
14140 AddToWorklist(NewChain.getNode());
14141
14142 CombineTo(N, NewValue);
14143
14144 // Replace uses of the original load (before extension)
14145 // with a truncate of the concatenated sextloaded vectors.
14146 SDValue Trunc =
14147 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
14148 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
14149 CombineTo(N0.getNode(), Trunc, NewChain);
14150 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14151}
14152
14153// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
14154// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
14155SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
14156 assert(N->getOpcode() == ISD::ZERO_EXTEND);
14157 EVT VT = N->getValueType(0);
14158 EVT OrigVT = N->getOperand(0).getValueType();
14159 if (TLI.isZExtFree(OrigVT, VT))
14160 return SDValue();
14161
14162 // and/or/xor
14163 SDValue N0 = N->getOperand(0);
14164 if (!ISD::isBitwiseLogicOp(N0.getOpcode()) ||
14165 N0.getOperand(1).getOpcode() != ISD::Constant ||
14166 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
14167 return SDValue();
14168
14169 // shl/shr
14170 SDValue N1 = N0->getOperand(0);
14171 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
14172 N1.getOperand(1).getOpcode() != ISD::Constant ||
14173 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
14174 return SDValue();
14175
14176 // load
14177 if (!isa<LoadSDNode>(N1.getOperand(0)))
14178 return SDValue();
14179 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
14180 EVT MemVT = Load->getMemoryVT();
14181 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
14182 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
14183 return SDValue();
14184
14185
14186 // If the shift op is SHL, the logic op must be AND, otherwise the result
14187 // will be wrong.
14188 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
14189 return SDValue();
14190
14191 if (!N0.hasOneUse() || !N1.hasOneUse())
14192 return SDValue();
14193
14195 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
14196 ISD::ZERO_EXTEND, SetCCs, TLI))
14197 return SDValue();
14198
14199 // Actually do the transformation.
14200 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
14201 Load->getChain(), Load->getBasePtr(),
14202 Load->getMemoryVT(), Load->getMemOperand());
14203
14204 SDLoc DL1(N1);
14205 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
14206 N1.getOperand(1));
14207
14208 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
14209 SDLoc DL0(N0);
14210 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
14211 DAG.getConstant(Mask, DL0, VT));
14212
14213 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
14214 CombineTo(N, And);
14215 if (SDValue(Load, 0).hasOneUse()) {
14216 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
14217 } else {
14218 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
14219 Load->getValueType(0), ExtLoad);
14220 CombineTo(Load, Trunc, ExtLoad.getValue(1));
14221 }
14222
14223 // N0 is dead at this point.
14224 recursivelyDeleteUnusedNodes(N0.getNode());
14225
14226 return SDValue(N,0); // Return N so it doesn't get rechecked!
14227}
14228
14229/// If we're narrowing or widening the result of a vector select and the final
14230/// size is the same size as a setcc (compare) feeding the select, then try to
14231/// apply the cast operation to the select's operands because matching vector
14232/// sizes for a select condition and other operands should be more efficient.
14233SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
14234 unsigned CastOpcode = Cast->getOpcode();
14235 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
14236 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
14237 CastOpcode == ISD::FP_ROUND) &&
14238 "Unexpected opcode for vector select narrowing/widening");
14239
14240 // We only do this transform before legal ops because the pattern may be
14241 // obfuscated by target-specific operations after legalization. Do not create
14242 // an illegal select op, however, because that may be difficult to lower.
14243 EVT VT = Cast->getValueType(0);
14244 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
14245 return SDValue();
14246
14247 SDValue VSel = Cast->getOperand(0);
14248 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
14249 VSel.getOperand(0).getOpcode() != ISD::SETCC)
14250 return SDValue();
14251
14252 // Does the setcc have the same vector size as the casted select?
14253 SDValue SetCC = VSel.getOperand(0);
14254 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
14255 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
14256 return SDValue();
14257
14258 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
14259 SDValue A = VSel.getOperand(1);
14260 SDValue B = VSel.getOperand(2);
14261 SDValue CastA, CastB;
14262 SDLoc DL(Cast);
14263 if (CastOpcode == ISD::FP_ROUND) {
14264 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
14265 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
14266 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
14267 } else {
14268 CastA = DAG.getNode(CastOpcode, DL, VT, A);
14269 CastB = DAG.getNode(CastOpcode, DL, VT, B);
14270 }
14271 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
14272}
14273
14274// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
14275// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
14277 const TargetLowering &TLI, EVT VT,
14278 bool LegalOperations, SDNode *N,
14279 SDValue N0, ISD::LoadExtType ExtLoadType) {
14280 SDNode *N0Node = N0.getNode();
14281 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
14282 : ISD::isZEXTLoad(N0Node);
14283 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
14284 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
14285 return SDValue();
14286
14287 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14288 EVT MemVT = LN0->getMemoryVT();
14289 if ((LegalOperations || !LN0->isSimple() ||
14290 VT.isVector()) &&
14291 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
14292 return SDValue();
14293
14294 SDValue ExtLoad =
14295 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
14296 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
14297 Combiner.CombineTo(N, ExtLoad);
14298 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14299 if (LN0->use_empty())
14300 Combiner.recursivelyDeleteUnusedNodes(LN0);
14301 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14302}
14303
14304// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
14305// Only generate vector extloads when 1) they're legal, and 2) they are
14306// deemed desirable by the target. NonNegZExt can be set to true if a zero
14307// extend has the nonneg flag to allow use of sextload if profitable.
14309 const TargetLowering &TLI, EVT VT,
14310 bool LegalOperations, SDNode *N, SDValue N0,
14311 ISD::LoadExtType ExtLoadType,
14312 ISD::NodeType ExtOpc,
14313 bool NonNegZExt = false) {
14315 return {};
14316
14317 // If this is zext nneg, see if it would make sense to treat it as a sext.
14318 if (NonNegZExt) {
14319 assert(ExtLoadType == ISD::ZEXTLOAD && ExtOpc == ISD::ZERO_EXTEND &&
14320 "Unexpected load type or opcode");
14321 for (SDNode *User : N0->users()) {
14322 if (User->getOpcode() == ISD::SETCC) {
14324 if (ISD::isSignedIntSetCC(CC)) {
14325 ExtLoadType = ISD::SEXTLOAD;
14326 ExtOpc = ISD::SIGN_EXTEND;
14327 break;
14328 }
14329 }
14330 }
14331 }
14332
14333 // TODO: isFixedLengthVector() should be removed and any negative effects on
14334 // code generation being the result of that target's implementation of
14335 // isVectorLoadExtDesirable().
14336 if ((LegalOperations || VT.isFixedLengthVector() ||
14337 !cast<LoadSDNode>(N0)->isSimple()) &&
14338 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))
14339 return {};
14340
14341 bool DoXform = true;
14343 if (!N0.hasOneUse())
14344 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
14345 if (VT.isVector())
14346 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
14347 if (!DoXform)
14348 return {};
14349
14350 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14351 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
14352 LN0->getBasePtr(), N0.getValueType(),
14353 LN0->getMemOperand());
14354 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
14355 // If the load value is used only by N, replace it via CombineTo N.
14356 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
14357 Combiner.CombineTo(N, ExtLoad);
14358 if (NoReplaceTrunc) {
14359 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14360 Combiner.recursivelyDeleteUnusedNodes(LN0);
14361 } else {
14362 SDValue Trunc =
14363 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
14364 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
14365 }
14366 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14367}
14368
14369static SDValue
14371 bool LegalOperations, SDNode *N, SDValue N0,
14372 ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) {
14373 if (!N0.hasOneUse())
14374 return SDValue();
14375
14377 if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
14378 return SDValue();
14379
14380 if ((LegalOperations || !cast<MaskedLoadSDNode>(N0)->isSimple()) &&
14381 !TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
14382 return SDValue();
14383
14384 if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
14385 return SDValue();
14386
14387 SDLoc dl(Ld);
14388 SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
14389 SDValue NewLoad = DAG.getMaskedLoad(
14390 VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
14391 PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
14392 ExtLoadType, Ld->isExpandingLoad());
14393 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
14394 return NewLoad;
14395}
14396
14397// fold ([s|z]ext (atomic_load)) -> ([s|z]ext (truncate ([s|z]ext atomic_load)))
14399 const TargetLowering &TLI, EVT VT,
14400 SDValue N0,
14401 ISD::LoadExtType ExtLoadType) {
14402 auto *ALoad = dyn_cast<AtomicSDNode>(N0);
14403 if (!ALoad || ALoad->getOpcode() != ISD::ATOMIC_LOAD)
14404 return {};
14405 EVT MemoryVT = ALoad->getMemoryVT();
14406 if (!TLI.isAtomicLoadExtLegal(ExtLoadType, VT, MemoryVT))
14407 return {};
14408 // Can't fold into ALoad if it is already extending differently.
14409 ISD::LoadExtType ALoadExtTy = ALoad->getExtensionType();
14410 if ((ALoadExtTy == ISD::ZEXTLOAD && ExtLoadType == ISD::SEXTLOAD) ||
14411 (ALoadExtTy == ISD::SEXTLOAD && ExtLoadType == ISD::ZEXTLOAD))
14412 return {};
14413
14414 EVT OrigVT = ALoad->getValueType(0);
14415 assert(OrigVT.getSizeInBits() < VT.getSizeInBits() && "VT should be wider.");
14416 auto *NewALoad = cast<AtomicSDNode>(DAG.getAtomicLoad(
14417 ExtLoadType, SDLoc(ALoad), MemoryVT, VT, ALoad->getChain(),
14418 ALoad->getBasePtr(), ALoad->getMemOperand()));
14420 SDValue(ALoad, 0),
14421 DAG.getNode(ISD::TRUNCATE, SDLoc(ALoad), OrigVT, SDValue(NewALoad, 0)));
14422 // Update the chain uses.
14423 DAG.ReplaceAllUsesOfValueWith(SDValue(ALoad, 1), SDValue(NewALoad, 1));
14424 return SDValue(NewALoad, 0);
14425}
14426
14428 bool LegalOperations) {
14429 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
14430 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
14431
14432 SDValue SetCC = N->getOperand(0);
14433 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
14434 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
14435 return SDValue();
14436
14437 SDValue X = SetCC.getOperand(0);
14438 SDValue Ones = SetCC.getOperand(1);
14439 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
14440 EVT VT = N->getValueType(0);
14441 EVT XVT = X.getValueType();
14442 // setge X, C is canonicalized to setgt, so we do not need to match that
14443 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
14444 // not require the 'not' op.
14445 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
14446 // Invert and smear/shift the sign bit:
14447 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
14448 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
14449 SDLoc DL(N);
14450 unsigned ShCt = VT.getSizeInBits() - 1;
14451 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14452 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
14453 SDValue NotX = DAG.getNOT(DL, X, VT);
14454 SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
14455 auto ShiftOpcode =
14456 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
14457 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
14458 }
14459 }
14460 return SDValue();
14461}
14462
14463SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
14464 SDValue N0 = N->getOperand(0);
14465 if (N0.getOpcode() != ISD::SETCC)
14466 return SDValue();
14467
14468 SDValue N00 = N0.getOperand(0);
14469 SDValue N01 = N0.getOperand(1);
14471 EVT VT = N->getValueType(0);
14472 EVT N00VT = N00.getValueType();
14473 SDLoc DL(N);
14474
14475 // Propagate fast-math-flags.
14476 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14477
14478 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
14479 // the same size as the compared operands. Try to optimize sext(setcc())
14480 // if this is the case.
14481 if (VT.isVector() && !LegalOperations &&
14482 TLI.getBooleanContents(N00VT) ==
14484 EVT SVT = getSetCCResultType(N00VT);
14485
14486 // If we already have the desired type, don't change it.
14487 if (SVT != N0.getValueType()) {
14488 // We know that the # elements of the results is the same as the
14489 // # elements of the compare (and the # elements of the compare result
14490 // for that matter). Check to see that they are the same size. If so,
14491 // we know that the element size of the sext'd result matches the
14492 // element size of the compare operands.
14493 if (VT.getSizeInBits() == SVT.getSizeInBits())
14494 return DAG.getSetCC(DL, VT, N00, N01, CC);
14495
14496 // If the desired elements are smaller or larger than the source
14497 // elements, we can use a matching integer vector type and then
14498 // truncate/sign extend.
14499 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
14500 if (SVT == MatchingVecType) {
14501 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
14502 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
14503 }
14504 }
14505
14506 // Try to eliminate the sext of a setcc by zexting the compare operands.
14507 if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
14509 bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
14510 unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
14511 unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
14512
14513 // We have an unsupported narrow vector compare op that would be legal
14514 // if extended to the destination type. See if the compare operands
14515 // can be freely extended to the destination type.
14516 auto IsFreeToExtend = [&](SDValue V) {
14517 if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
14518 return true;
14519 // Match a simple, non-extended load that can be converted to a
14520 // legal {z/s}ext-load.
14521 // TODO: Allow widening of an existing {z/s}ext-load?
14522 if (!(ISD::isNON_EXTLoad(V.getNode()) &&
14523 ISD::isUNINDEXEDLoad(V.getNode()) &&
14524 cast<LoadSDNode>(V)->isSimple() &&
14525 TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
14526 return false;
14527
14528 // Non-chain users of this value must either be the setcc in this
14529 // sequence or extends that can be folded into the new {z/s}ext-load.
14530 for (SDUse &Use : V->uses()) {
14531 // Skip uses of the chain and the setcc.
14532 SDNode *User = Use.getUser();
14533 if (Use.getResNo() != 0 || User == N0.getNode())
14534 continue;
14535 // Extra users must have exactly the same cast we are about to create.
14536 // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
14537 // is enhanced similarly.
14538 if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
14539 return false;
14540 }
14541 return true;
14542 };
14543
14544 if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
14545 SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
14546 SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
14547 return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
14548 }
14549 }
14550 }
14551
14552 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
14553 // Here, T can be 1 or -1, depending on the type of the setcc and
14554 // getBooleanContents().
14555 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
14556
14557 // To determine the "true" side of the select, we need to know the high bit
14558 // of the value returned by the setcc if it evaluates to true.
14559 // If the type of the setcc is i1, then the true case of the select is just
14560 // sext(i1 1), that is, -1.
14561 // If the type of the setcc is larger (say, i8) then the value of the high
14562 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
14563 // of the appropriate width.
14564 SDValue ExtTrueVal = (SetCCWidth == 1)
14565 ? DAG.getAllOnesConstant(DL, VT)
14566 : DAG.getBoolConstant(true, DL, VT, N00VT);
14567 SDValue Zero = DAG.getConstant(0, DL, VT);
14568 if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
14569 return SCC;
14570
14571 if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(N0, VT, TLI)) {
14572 EVT SetCCVT = getSetCCResultType(N00VT);
14573 // Don't do this transform for i1 because there's a select transform
14574 // that would reverse it.
14575 // TODO: We should not do this transform at all without a target hook
14576 // because a sext is likely cheaper than a select?
14577 if (SetCCVT.getScalarSizeInBits() != 1 &&
14578 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
14579 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
14580 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
14581 }
14582 }
14583
14584 return SDValue();
14585}
14586
14587SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
14588 SDValue N0 = N->getOperand(0);
14589 EVT VT = N->getValueType(0);
14590 SDLoc DL(N);
14591
14592 if (VT.isVector())
14593 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
14594 return FoldedVOp;
14595
14596 // sext(undef) = 0 because the top bit will all be the same.
14597 if (N0.isUndef())
14598 return DAG.getConstant(0, DL, VT);
14599
14600 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14601 return Res;
14602
14603 // fold (sext (sext x)) -> (sext x)
14604 // fold (sext (aext x)) -> (sext x)
14605 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
14606 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
14607
14608 // fold (sext (aext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
14609 // fold (sext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
14612 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
14613 N0.getOperand(0));
14614
14615 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
14616 SDValue N00 = N0.getOperand(0);
14617 EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
14618 if (N00.getOpcode() == ISD::TRUNCATE || TLI.isTruncateFree(N00, ExtVT)) {
14619 // fold (sext (sext_inreg x)) -> (sext (trunc x))
14620 if ((!LegalTypes || TLI.isTypeLegal(ExtVT))) {
14621 SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00);
14622 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
14623 }
14624
14625 // If the trunc wasn't legal, try to fold to (sext_inreg (anyext x))
14626 if (!LegalTypes || TLI.isTypeLegal(VT)) {
14627 SDValue ExtSrc = DAG.getAnyExtOrTrunc(N00, DL, VT);
14628 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, ExtSrc,
14629 N0->getOperand(1));
14630 }
14631 }
14632 }
14633
14634 if (N0.getOpcode() == ISD::TRUNCATE) {
14635 // fold (sext (truncate (load x))) -> (sext (smaller load x))
14636 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
14637 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
14638 SDNode *oye = N0.getOperand(0).getNode();
14639 if (NarrowLoad.getNode() != N0.getNode()) {
14640 CombineTo(N0.getNode(), NarrowLoad);
14641 // CombineTo deleted the truncate, if needed, but not what's under it.
14642 AddToWorklist(oye);
14643 }
14644 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14645 }
14646
14647 // See if the value being truncated is already sign extended. If so, just
14648 // eliminate the trunc/sext pair.
14649 SDValue Op = N0.getOperand(0);
14650 unsigned OpBits = Op.getScalarValueSizeInBits();
14651 unsigned MidBits = N0.getScalarValueSizeInBits();
14652 unsigned DestBits = VT.getScalarSizeInBits();
14653
14654 if (N0->getFlags().hasNoSignedWrap() ||
14655 DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
14656 if (OpBits == DestBits) {
14657 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
14658 // bits, it is already ready.
14659 return Op;
14660 }
14661
14662 if (OpBits < DestBits) {
14663 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
14664 // bits, just sext from i32.
14665 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
14666 }
14667
14668 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
14669 // bits, just truncate to i32.
14670 SDNodeFlags Flags;
14671 Flags.setNoSignedWrap(true);
14672 Flags.setNoUnsignedWrap(N0->getFlags().hasNoUnsignedWrap());
14673 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op, Flags);
14674 }
14675
14676 // fold (sext (truncate x)) -> (sextinreg x).
14677 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
14678 N0.getValueType())) {
14679 if (OpBits < DestBits)
14680 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
14681 else if (OpBits > DestBits)
14682 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
14683 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
14684 DAG.getValueType(N0.getValueType()));
14685 }
14686 }
14687
14688 // Try to simplify (sext (load x)).
14689 if (SDValue foldedExt =
14690 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
14692 return foldedExt;
14693
14694 if (SDValue foldedExt =
14695 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
14697 return foldedExt;
14698
14699 // fold (sext (load x)) to multiple smaller sextloads.
14700 // Only on illegal but splittable vectors.
14701 if (SDValue ExtLoad = CombineExtLoad(N))
14702 return ExtLoad;
14703
14704 // Try to simplify (sext (sextload x)).
14705 if (SDValue foldedExt = tryToFoldExtOfExtload(
14706 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
14707 return foldedExt;
14708
14709 // Try to simplify (sext (atomic_load x)).
14710 if (SDValue foldedExt =
14711 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::SEXTLOAD))
14712 return foldedExt;
14713
14714 // fold (sext (and/or/xor (load x), cst)) ->
14715 // (and/or/xor (sextload x), (sext cst))
14716 if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
14717 isa<LoadSDNode>(N0.getOperand(0)) &&
14718 N0.getOperand(1).getOpcode() == ISD::Constant &&
14719 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
14720 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
14721 EVT MemVT = LN00->getMemoryVT();
14722 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
14723 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
14725 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
14726 ISD::SIGN_EXTEND, SetCCs, TLI);
14727 if (DoXform) {
14728 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
14729 LN00->getChain(), LN00->getBasePtr(),
14730 LN00->getMemoryVT(),
14731 LN00->getMemOperand());
14732 APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
14733 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
14734 ExtLoad, DAG.getConstant(Mask, DL, VT));
14735 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
14736 bool NoReplaceTruncAnd = !N0.hasOneUse();
14737 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
14738 CombineTo(N, And);
14739 // If N0 has multiple uses, change other uses as well.
14740 if (NoReplaceTruncAnd) {
14741 SDValue TruncAnd =
14743 CombineTo(N0.getNode(), TruncAnd);
14744 }
14745 if (NoReplaceTrunc) {
14746 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
14747 } else {
14748 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
14749 LN00->getValueType(0), ExtLoad);
14750 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
14751 }
14752 return SDValue(N,0); // Return N so it doesn't get rechecked!
14753 }
14754 }
14755 }
14756
14757 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
14758 return V;
14759
14760 if (SDValue V = foldSextSetcc(N))
14761 return V;
14762
14763 // fold (sext x) -> (zext x) if the sign bit is known zero.
14764 if (!TLI.isSExtCheaperThanZExt(N0.getValueType(), VT) &&
14765 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
14766 DAG.SignBitIsZero(N0))
14767 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0, SDNodeFlags::NonNeg);
14768
14769 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14770 return NewVSel;
14771
14772 // Eliminate this sign extend by doing a negation in the destination type:
14773 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
14774 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
14778 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
14779 return DAG.getNegative(Zext, DL, VT);
14780 }
14781 // Eliminate this sign extend by doing a decrement in the destination type:
14782 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
14783 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
14787 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
14788 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
14789 }
14790
14791 // fold sext (not i1 X) -> add (zext i1 X), -1
14792 // TODO: This could be extended to handle bool vectors.
14793 if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
14794 (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
14795 TLI.isOperationLegal(ISD::ADD, VT)))) {
14796 // If we can eliminate the 'not', the sext form should be better
14797 if (SDValue NewXor = visitXOR(N0.getNode())) {
14798 // Returning N0 is a form of in-visit replacement that may have
14799 // invalidated N0.
14800 if (NewXor.getNode() == N0.getNode()) {
14801 // Return SDValue here as the xor should have already been replaced in
14802 // this sext.
14803 return SDValue();
14804 }
14805
14806 // Return a new sext with the new xor.
14807 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
14808 }
14809
14810 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
14811 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
14812 }
14813
14814 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
14815 return Res;
14816
14817 return SDValue();
14818}
14819
14820/// Given an extending node with a pop-count operand, if the target does not
14821/// support a pop-count in the narrow source type but does support it in the
14822/// destination type, widen the pop-count to the destination type.
14823static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG, const SDLoc &DL) {
14824 assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
14825 Extend->getOpcode() == ISD::ANY_EXTEND) &&
14826 "Expected extend op");
14827
14828 SDValue CtPop = Extend->getOperand(0);
14829 if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
14830 return SDValue();
14831
14832 EVT VT = Extend->getValueType(0);
14833 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14836 return SDValue();
14837
14838 // zext (ctpop X) --> ctpop (zext X)
14839 SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
14840 return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
14841}
14842
14843// If we have (zext (abs X)) where X is a type that will be promoted by type
14844// legalization, convert to (abs (sext X)). But don't extend past a legal type.
14845static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) {
14846 assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend.");
14847
14848 EVT VT = Extend->getValueType(0);
14849 if (VT.isVector())
14850 return SDValue();
14851
14852 SDValue Abs = Extend->getOperand(0);
14853 if (Abs.getOpcode() != ISD::ABS || !Abs.hasOneUse())
14854 return SDValue();
14855
14856 EVT AbsVT = Abs.getValueType();
14857 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14858 if (TLI.getTypeAction(*DAG.getContext(), AbsVT) !=
14860 return SDValue();
14861
14862 EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), AbsVT);
14863
14864 SDValue SExt =
14865 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Abs), LegalVT, Abs.getOperand(0));
14866 SDValue NewAbs = DAG.getNode(ISD::ABS, SDLoc(Abs), LegalVT, SExt);
14867 return DAG.getZExtOrTrunc(NewAbs, SDLoc(Extend), VT);
14868}
14869
14870SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
14871 SDValue N0 = N->getOperand(0);
14872 EVT VT = N->getValueType(0);
14873 SDLoc DL(N);
14874
14875 if (VT.isVector())
14876 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
14877 return FoldedVOp;
14878
14879 // zext(undef) = 0
14880 if (N0.isUndef())
14881 return DAG.getConstant(0, DL, VT);
14882
14883 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14884 return Res;
14885
14886 // fold (zext (zext x)) -> (zext x)
14887 // fold (zext (aext x)) -> (zext x)
14888 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
14889 SDNodeFlags Flags;
14890 if (N0.getOpcode() == ISD::ZERO_EXTEND)
14891 Flags.setNonNeg(N0->getFlags().hasNonNeg());
14892 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0), Flags);
14893 }
14894
14895 // fold (zext (aext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14896 // fold (zext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14899 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, N0.getOperand(0));
14900
14901 // fold (zext (truncate x)) -> (zext x) or
14902 // (zext (truncate x)) -> (truncate x)
14903 // This is valid when the truncated bits of x are already zero.
14904 SDValue Op;
14905 KnownBits Known;
14906 if (isTruncateOf(DAG, N0, Op, Known)) {
14907 APInt TruncatedBits =
14908 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
14909 APInt(Op.getScalarValueSizeInBits(), 0) :
14910 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
14911 N0.getScalarValueSizeInBits(),
14912 std::min(Op.getScalarValueSizeInBits(),
14913 VT.getScalarSizeInBits()));
14914 if (TruncatedBits.isSubsetOf(Known.Zero)) {
14915 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
14916 DAG.salvageDebugInfo(*N0.getNode());
14917
14918 return ZExtOrTrunc;
14919 }
14920 }
14921
14922 // fold (zext (truncate x)) -> (and x, mask)
14923 if (N0.getOpcode() == ISD::TRUNCATE) {
14924 // fold (zext (truncate (load x))) -> (zext (smaller load x))
14925 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
14926 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
14927 SDNode *oye = N0.getOperand(0).getNode();
14928 if (NarrowLoad.getNode() != N0.getNode()) {
14929 CombineTo(N0.getNode(), NarrowLoad);
14930 // CombineTo deleted the truncate, if needed, but not what's under it.
14931 AddToWorklist(oye);
14932 }
14933 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14934 }
14935
14936 EVT SrcVT = N0.getOperand(0).getValueType();
14937 EVT MinVT = N0.getValueType();
14938
14939 if (N->getFlags().hasNonNeg()) {
14940 SDValue Op = N0.getOperand(0);
14941 unsigned OpBits = SrcVT.getScalarSizeInBits();
14942 unsigned MidBits = MinVT.getScalarSizeInBits();
14943 unsigned DestBits = VT.getScalarSizeInBits();
14944
14945 if (N0->getFlags().hasNoSignedWrap() ||
14946 DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
14947 if (OpBits == DestBits) {
14948 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
14949 // bits, it is already ready.
14950 return Op;
14951 }
14952
14953 if (OpBits < DestBits) {
14954 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
14955 // bits, just sext from i32.
14956 // FIXME: This can probably be ZERO_EXTEND nneg?
14957 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
14958 }
14959
14960 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
14961 // bits, just truncate to i32.
14962 SDNodeFlags Flags;
14963 Flags.setNoSignedWrap(true);
14964 Flags.setNoUnsignedWrap(true);
14965 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op, Flags);
14966 }
14967 }
14968
14969 // Try to mask before the extension to avoid having to generate a larger mask,
14970 // possibly over several sub-vectors.
14971 if (SrcVT.bitsLT(VT) && VT.isVector()) {
14972 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
14974 SDValue Op = N0.getOperand(0);
14975 Op = DAG.getZeroExtendInReg(Op, DL, MinVT);
14976 AddToWorklist(Op.getNode());
14977 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
14978 // Transfer the debug info; the new node is equivalent to N0.
14979 DAG.transferDbgValues(N0, ZExtOrTrunc);
14980 return ZExtOrTrunc;
14981 }
14982 }
14983
14984 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
14985 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
14986 AddToWorklist(Op.getNode());
14987 SDValue And = DAG.getZeroExtendInReg(Op, DL, MinVT);
14988 // We may safely transfer the debug info describing the truncate node over
14989 // to the equivalent and operation.
14990 DAG.transferDbgValues(N0, And);
14991 return And;
14992 }
14993 }
14994
14995 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
14996 // if either of the casts is not free.
14997 if (N0.getOpcode() == ISD::AND &&
14998 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
14999 N0.getOperand(1).getOpcode() == ISD::Constant &&
15000 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType()) ||
15001 !TLI.isZExtFree(N0.getValueType(), VT))) {
15002 SDValue X = N0.getOperand(0).getOperand(0);
15003 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
15004 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
15005 return DAG.getNode(ISD::AND, DL, VT,
15006 X, DAG.getConstant(Mask, DL, VT));
15007 }
15008
15009 // Try to simplify (zext (load x)).
15010 if (SDValue foldedExt = tryToFoldExtOfLoad(
15011 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD,
15012 ISD::ZERO_EXTEND, N->getFlags().hasNonNeg()))
15013 return foldedExt;
15014
15015 if (SDValue foldedExt =
15016 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
15018 return foldedExt;
15019
15020 // fold (zext (load x)) to multiple smaller zextloads.
15021 // Only on illegal but splittable vectors.
15022 if (SDValue ExtLoad = CombineExtLoad(N))
15023 return ExtLoad;
15024
15025 // Try to simplify (zext (atomic_load x)).
15026 if (SDValue foldedExt =
15027 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::ZEXTLOAD))
15028 return foldedExt;
15029
15030 // fold (zext (and/or/xor (load x), cst)) ->
15031 // (and/or/xor (zextload x), (zext cst))
15032 // Unless (and (load x) cst) will match as a zextload already and has
15033 // additional users, or the zext is already free.
15034 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && !TLI.isZExtFree(N0, VT) &&
15035 isa<LoadSDNode>(N0.getOperand(0)) &&
15036 N0.getOperand(1).getOpcode() == ISD::Constant &&
15037 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
15038 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
15039 EVT MemVT = LN00->getMemoryVT();
15040 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
15041 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
15042 bool DoXform = true;
15044 if (!N0.hasOneUse()) {
15045 if (N0.getOpcode() == ISD::AND) {
15046 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
15047 EVT LoadResultTy = AndC->getValueType(0);
15048 EVT ExtVT;
15049 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
15050 DoXform = false;
15051 }
15052 }
15053 if (DoXform)
15054 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
15055 ISD::ZERO_EXTEND, SetCCs, TLI);
15056 if (DoXform) {
15057 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
15058 LN00->getChain(), LN00->getBasePtr(),
15059 LN00->getMemoryVT(),
15060 LN00->getMemOperand());
15061 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
15062 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
15063 ExtLoad, DAG.getConstant(Mask, DL, VT));
15064 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
15065 bool NoReplaceTruncAnd = !N0.hasOneUse();
15066 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
15067 CombineTo(N, And);
15068 // If N0 has multiple uses, change other uses as well.
15069 if (NoReplaceTruncAnd) {
15070 SDValue TruncAnd =
15072 CombineTo(N0.getNode(), TruncAnd);
15073 }
15074 if (NoReplaceTrunc) {
15075 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
15076 } else {
15077 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
15078 LN00->getValueType(0), ExtLoad);
15079 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
15080 }
15081 return SDValue(N,0); // Return N so it doesn't get rechecked!
15082 }
15083 }
15084 }
15085
15086 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
15087 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
15088 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
15089 return ZExtLoad;
15090
15091 // Try to simplify (zext (zextload x)).
15092 if (SDValue foldedExt = tryToFoldExtOfExtload(
15093 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
15094 return foldedExt;
15095
15096 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
15097 return V;
15098
15099 if (N0.getOpcode() == ISD::SETCC) {
15100 // Propagate fast-math-flags.
15101 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
15102
15103 // Only do this before legalize for now.
15104 if (!LegalOperations && VT.isVector() &&
15105 N0.getValueType().getVectorElementType() == MVT::i1) {
15106 EVT N00VT = N0.getOperand(0).getValueType();
15107 if (getSetCCResultType(N00VT) == N0.getValueType())
15108 return SDValue();
15109
15110 // We know that the # elements of the results is the same as the #
15111 // elements of the compare (and the # elements of the compare result for
15112 // that matter). Check to see that they are the same size. If so, we know
15113 // that the element size of the sext'd result matches the element size of
15114 // the compare operands.
15115 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
15116 // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
15117 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
15118 N0.getOperand(1), N0.getOperand(2));
15119 return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
15120 }
15121
15122 // If the desired elements are smaller or larger than the source
15123 // elements we can use a matching integer vector type and then
15124 // truncate/any extend followed by zext_in_reg.
15125 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
15126 SDValue VsetCC =
15127 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
15128 N0.getOperand(1), N0.getOperand(2));
15129 return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
15130 N0.getValueType());
15131 }
15132
15133 // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
15134 EVT N0VT = N0.getValueType();
15135 EVT N00VT = N0.getOperand(0).getValueType();
15136 if (SDValue SCC = SimplifySelectCC(
15137 DL, N0.getOperand(0), N0.getOperand(1),
15138 DAG.getBoolConstant(true, DL, N0VT, N00VT),
15139 DAG.getBoolConstant(false, DL, N0VT, N00VT),
15140 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
15141 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
15142 }
15143
15144 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
15145 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
15146 !TLI.isZExtFree(N0, VT)) {
15147 SDValue ShVal = N0.getOperand(0);
15148 SDValue ShAmt = N0.getOperand(1);
15149 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt)) {
15150 if (ShVal.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) {
15151 if (N0.getOpcode() == ISD::SHL) {
15152 // If the original shl may be shifting out bits, do not perform this
15153 // transformation.
15154 unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
15155 ShVal.getOperand(0).getValueSizeInBits();
15156 if (ShAmtC->getAPIntValue().ugt(KnownZeroBits)) {
15157 // If the shift is too large, then see if we can deduce that the
15158 // shift is safe anyway.
15159
15160 // Check if the bits being shifted out are known to be zero.
15161 KnownBits KnownShVal = DAG.computeKnownBits(ShVal);
15162 if (ShAmtC->getAPIntValue().ugt(KnownShVal.countMinLeadingZeros()))
15163 return SDValue();
15164 }
15165 }
15166
15167 // Ensure that the shift amount is wide enough for the shifted value.
15168 if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
15169 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
15170
15171 return DAG.getNode(N0.getOpcode(), DL, VT,
15172 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ShVal), ShAmt);
15173 }
15174 }
15175 }
15176
15177 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15178 return NewVSel;
15179
15180 if (SDValue NewCtPop = widenCtPop(N, DAG, DL))
15181 return NewCtPop;
15182
15183 if (SDValue V = widenAbs(N, DAG))
15184 return V;
15185
15186 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
15187 return Res;
15188
15189 // CSE zext nneg with sext if the zext is not free.
15190 if (N->getFlags().hasNonNeg() && !TLI.isZExtFree(N0.getValueType(), VT)) {
15191 SDNode *CSENode = DAG.getNodeIfExists(ISD::SIGN_EXTEND, N->getVTList(), N0);
15192 if (CSENode)
15193 return SDValue(CSENode, 0);
15194 }
15195
15196 return SDValue();
15197}
15198
15199SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
15200 SDValue N0 = N->getOperand(0);
15201 EVT VT = N->getValueType(0);
15202 SDLoc DL(N);
15203
15204 // aext(undef) = undef
15205 if (N0.isUndef())
15206 return DAG.getUNDEF(VT);
15207
15208 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
15209 return Res;
15210
15211 // fold (aext (aext x)) -> (aext x)
15212 // fold (aext (zext x)) -> (zext x)
15213 // fold (aext (sext x)) -> (sext x)
15214 if (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::ZERO_EXTEND ||
15215 N0.getOpcode() == ISD::SIGN_EXTEND) {
15216 SDNodeFlags Flags;
15217 if (N0.getOpcode() == ISD::ZERO_EXTEND)
15218 Flags.setNonNeg(N0->getFlags().hasNonNeg());
15219 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
15220 }
15221
15222 // fold (aext (aext_extend_vector_inreg x)) -> (aext_extend_vector_inreg x)
15223 // fold (aext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
15224 // fold (aext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
15228 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
15229
15230 // fold (aext (truncate (load x))) -> (aext (smaller load x))
15231 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
15232 if (N0.getOpcode() == ISD::TRUNCATE) {
15233 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
15234 SDNode *oye = N0.getOperand(0).getNode();
15235 if (NarrowLoad.getNode() != N0.getNode()) {
15236 CombineTo(N0.getNode(), NarrowLoad);
15237 // CombineTo deleted the truncate, if needed, but not what's under it.
15238 AddToWorklist(oye);
15239 }
15240 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15241 }
15242 }
15243
15244 // fold (aext (truncate x))
15245 if (N0.getOpcode() == ISD::TRUNCATE)
15246 return DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
15247
15248 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
15249 // if the trunc is not free.
15250 if (N0.getOpcode() == ISD::AND &&
15251 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
15252 N0.getOperand(1).getOpcode() == ISD::Constant &&
15253 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType())) {
15254 SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
15255 SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
15256 assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!");
15257 return DAG.getNode(ISD::AND, DL, VT, X, Y);
15258 }
15259
15260 // fold (aext (load x)) -> (aext (truncate (extload x)))
15261 // None of the supported targets knows how to perform load and any_ext
15262 // on vectors in one instruction, so attempt to fold to zext instead.
15263 if (VT.isVector()) {
15264 // Try to simplify (zext (load x)).
15265 if (SDValue foldedExt =
15266 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
15268 return foldedExt;
15269 } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
15272 bool DoXform = true;
15274 if (!N0.hasOneUse())
15275 DoXform =
15276 ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
15277 if (DoXform) {
15278 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15279 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, LN0->getChain(),
15280 LN0->getBasePtr(), N0.getValueType(),
15281 LN0->getMemOperand());
15282 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
15283 // If the load value is used only by N, replace it via CombineTo N.
15284 bool NoReplaceTrunc = N0.hasOneUse();
15285 CombineTo(N, ExtLoad);
15286 if (NoReplaceTrunc) {
15287 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
15288 recursivelyDeleteUnusedNodes(LN0);
15289 } else {
15290 SDValue Trunc =
15291 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
15292 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
15293 }
15294 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15295 }
15296 }
15297
15298 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
15299 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
15300 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
15301 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
15302 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
15303 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15304 ISD::LoadExtType ExtType = LN0->getExtensionType();
15305 EVT MemVT = LN0->getMemoryVT();
15306 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
15307 SDValue ExtLoad =
15308 DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), LN0->getBasePtr(),
15309 MemVT, LN0->getMemOperand());
15310 CombineTo(N, ExtLoad);
15311 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
15312 recursivelyDeleteUnusedNodes(LN0);
15313 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15314 }
15315 }
15316
15317 if (N0.getOpcode() == ISD::SETCC) {
15318 // Propagate fast-math-flags.
15319 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
15320
15321 // For vectors:
15322 // aext(setcc) -> vsetcc
15323 // aext(setcc) -> truncate(vsetcc)
15324 // aext(setcc) -> aext(vsetcc)
15325 // Only do this before legalize for now.
15326 if (VT.isVector() && !LegalOperations) {
15327 EVT N00VT = N0.getOperand(0).getValueType();
15328 if (getSetCCResultType(N00VT) == N0.getValueType())
15329 return SDValue();
15330
15331 // We know that the # elements of the results is the same as the
15332 // # elements of the compare (and the # elements of the compare result
15333 // for that matter). Check to see that they are the same size. If so,
15334 // we know that the element size of the sext'd result matches the
15335 // element size of the compare operands.
15336 if (VT.getSizeInBits() == N00VT.getSizeInBits())
15337 return DAG.getSetCC(DL, VT, N0.getOperand(0), N0.getOperand(1),
15338 cast<CondCodeSDNode>(N0.getOperand(2))->get());
15339
15340 // If the desired elements are smaller or larger than the source
15341 // elements we can use a matching integer vector type and then
15342 // truncate/any extend
15343 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
15344 SDValue VsetCC = DAG.getSetCC(
15345 DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1),
15346 cast<CondCodeSDNode>(N0.getOperand(2))->get());
15347 return DAG.getAnyExtOrTrunc(VsetCC, DL, VT);
15348 }
15349
15350 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
15351 if (SDValue SCC = SimplifySelectCC(
15352 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
15353 DAG.getConstant(0, DL, VT),
15354 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
15355 return SCC;
15356 }
15357
15358 if (SDValue NewCtPop = widenCtPop(N, DAG, DL))
15359 return NewCtPop;
15360
15361 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
15362 return Res;
15363
15364 return SDValue();
15365}
15366
15367SDValue DAGCombiner::visitAssertExt(SDNode *N) {
15368 unsigned Opcode = N->getOpcode();
15369 SDValue N0 = N->getOperand(0);
15370 SDValue N1 = N->getOperand(1);
15371 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
15372
15373 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
15374 if (N0.getOpcode() == Opcode &&
15375 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
15376 return N0;
15377
15378 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
15379 N0.getOperand(0).getOpcode() == Opcode) {
15380 // We have an assert, truncate, assert sandwich. Make one stronger assert
15381 // by asserting on the smallest asserted type to the larger source type.
15382 // This eliminates the later assert:
15383 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
15384 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
15385 SDLoc DL(N);
15386 SDValue BigA = N0.getOperand(0);
15387 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
15388 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
15389 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
15390 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
15391 BigA.getOperand(0), MinAssertVTVal);
15392 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
15393 }
15394
15395 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
15396 // than X. Just move the AssertZext in front of the truncate and drop the
15397 // AssertSExt.
15398 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
15400 Opcode == ISD::AssertZext) {
15401 SDValue BigA = N0.getOperand(0);
15402 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
15403 if (AssertVT.bitsLT(BigA_AssertVT)) {
15404 SDLoc DL(N);
15405 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
15406 BigA.getOperand(0), N1);
15407 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
15408 }
15409 }
15410
15411 if (Opcode == ISD::AssertZext && N0.getOpcode() == ISD::AND &&
15413 const APInt &Mask = N0.getConstantOperandAPInt(1);
15414
15415 // If we have (AssertZext (and (AssertSext X, iX), M), iY) and Y is smaller
15416 // than X, and the And doesn't change the lower iX bits, we can move the
15417 // AssertZext in front of the And and drop the AssertSext.
15418 if (N0.getOperand(0).getOpcode() == ISD::AssertSext && N0.hasOneUse()) {
15419 SDValue BigA = N0.getOperand(0);
15420 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
15421 if (AssertVT.bitsLT(BigA_AssertVT) &&
15422 Mask.countr_one() >= BigA_AssertVT.getScalarSizeInBits()) {
15423 SDLoc DL(N);
15424 SDValue NewAssert =
15425 DAG.getNode(Opcode, DL, N->getValueType(0), BigA.getOperand(0), N1);
15426 return DAG.getNode(ISD::AND, DL, N->getValueType(0), NewAssert,
15427 N0.getOperand(1));
15428 }
15429 }
15430
15431 // Remove AssertZext entirely if the mask guarantees the assertion cannot
15432 // fail.
15433 // TODO: Use KB countMinLeadingZeros to handle non-constant masks?
15434 if (Mask.isIntN(AssertVT.getScalarSizeInBits()))
15435 return N0;
15436 }
15437
15438 return SDValue();
15439}
15440
15441SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
15442 SDLoc DL(N);
15443
15444 Align AL = cast<AssertAlignSDNode>(N)->getAlign();
15445 SDValue N0 = N->getOperand(0);
15446
15447 // Fold (assertalign (assertalign x, AL0), AL1) ->
15448 // (assertalign x, max(AL0, AL1))
15449 if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
15450 return DAG.getAssertAlign(DL, N0.getOperand(0),
15451 std::max(AL, AAN->getAlign()));
15452
15453 // In rare cases, there are trivial arithmetic ops in source operands. Sink
15454 // this assert down to source operands so that those arithmetic ops could be
15455 // exposed to the DAG combining.
15456 switch (N0.getOpcode()) {
15457 default:
15458 break;
15459 case ISD::ADD:
15460 case ISD::PTRADD:
15461 case ISD::SUB: {
15462 unsigned AlignShift = Log2(AL);
15463 SDValue LHS = N0.getOperand(0);
15464 SDValue RHS = N0.getOperand(1);
15465 unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
15466 unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
15467 if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
15468 if (LHSAlignShift < AlignShift)
15469 LHS = DAG.getAssertAlign(DL, LHS, AL);
15470 if (RHSAlignShift < AlignShift)
15471 RHS = DAG.getAssertAlign(DL, RHS, AL);
15472 return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
15473 }
15474 break;
15475 }
15476 }
15477
15478 return SDValue();
15479}
15480
15481/// If the result of a load is shifted/masked/truncated to an effectively
15482/// narrower type, try to transform the load to a narrower type and/or
15483/// use an extending load.
15484SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
15485 unsigned Opc = N->getOpcode();
15486
15488 SDValue N0 = N->getOperand(0);
15489 EVT VT = N->getValueType(0);
15490 EVT ExtVT = VT;
15491
15492 // This transformation isn't valid for vector loads.
15493 if (VT.isVector())
15494 return SDValue();
15495
15496 // The ShAmt variable is used to indicate that we've consumed a right
15497 // shift. I.e. we want to narrow the width of the load by skipping to load the
15498 // ShAmt least significant bits.
15499 unsigned ShAmt = 0;
15500 // A special case is when the least significant bits from the load are masked
15501 // away, but using an AND rather than a right shift. HasShiftedOffset is used
15502 // to indicate that the narrowed load should be left-shifted ShAmt bits to get
15503 // the result.
15504 unsigned ShiftedOffset = 0;
15505 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
15506 // extended to VT.
15507 if (Opc == ISD::SIGN_EXTEND_INREG) {
15508 ExtType = ISD::SEXTLOAD;
15509 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
15510 } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
15511 // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
15512 // value, or it may be shifting a higher subword, half or byte into the
15513 // lowest bits.
15514
15515 // Only handle shift with constant shift amount, and the shiftee must be a
15516 // load.
15517 auto *LN = dyn_cast<LoadSDNode>(N0);
15518 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
15519 if (!N1C || !LN)
15520 return SDValue();
15521 // If the shift amount is larger than the memory type then we're not
15522 // accessing any of the loaded bytes.
15523 ShAmt = N1C->getZExtValue();
15524 uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
15525 if (MemoryWidth <= ShAmt)
15526 return SDValue();
15527 // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
15528 ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
15529 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
15530 // If original load is a SEXTLOAD then we can't simply replace it by a
15531 // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
15532 // followed by a ZEXT, but that is not handled at the moment). Similarly if
15533 // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
15534 if ((LN->getExtensionType() == ISD::SEXTLOAD ||
15535 LN->getExtensionType() == ISD::ZEXTLOAD) &&
15536 LN->getExtensionType() != ExtType)
15537 return SDValue();
15538 } else if (Opc == ISD::AND) {
15539 // An AND with a constant mask is the same as a truncate + zero-extend.
15540 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
15541 if (!AndC)
15542 return SDValue();
15543
15544 const APInt &Mask = AndC->getAPIntValue();
15545 unsigned ActiveBits = 0;
15546 if (Mask.isMask()) {
15547 ActiveBits = Mask.countr_one();
15548 } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
15549 ShiftedOffset = ShAmt;
15550 } else {
15551 return SDValue();
15552 }
15553
15554 ExtType = ISD::ZEXTLOAD;
15555 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
15556 }
15557
15558 // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
15559 // a right shift. Here we redo some of those checks, to possibly adjust the
15560 // ExtVT even further based on "a masking AND". We could also end up here for
15561 // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
15562 // need to be done here as well.
15563 if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
15564 SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
15565 // Bail out when the SRL has more than one use. This is done for historical
15566 // (undocumented) reasons. Maybe intent was to guard the AND-masking below
15567 // check below? And maybe it could be non-profitable to do the transform in
15568 // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
15569 // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
15570 if (!SRL.hasOneUse())
15571 return SDValue();
15572
15573 // Only handle shift with constant shift amount, and the shiftee must be a
15574 // load.
15575 auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
15576 auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
15577 if (!SRL1C || !LN)
15578 return SDValue();
15579
15580 // If the shift amount is larger than the input type then we're not
15581 // accessing any of the loaded bytes. If the load was a zextload/extload
15582 // then the result of the shift+trunc is zero/undef (handled elsewhere).
15583 ShAmt = SRL1C->getZExtValue();
15584 uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
15585 if (ShAmt >= MemoryWidth)
15586 return SDValue();
15587
15588 // Because a SRL must be assumed to *need* to zero-extend the high bits
15589 // (as opposed to anyext the high bits), we can't combine the zextload
15590 // lowering of SRL and an sextload.
15591 if (LN->getExtensionType() == ISD::SEXTLOAD)
15592 return SDValue();
15593
15594 // Avoid reading outside the memory accessed by the original load (could
15595 // happened if we only adjust the load base pointer by ShAmt). Instead we
15596 // try to narrow the load even further. The typical scenario here is:
15597 // (i64 (truncate (i96 (srl (load x), 64)))) ->
15598 // (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
15599 if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
15600 // Don't replace sextload by zextload.
15601 if (ExtType == ISD::SEXTLOAD)
15602 return SDValue();
15603 // Narrow the load.
15604 ExtType = ISD::ZEXTLOAD;
15605 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
15606 }
15607
15608 // If the SRL is only used by a masking AND, we may be able to adjust
15609 // the ExtVT to make the AND redundant.
15610 SDNode *Mask = *(SRL->user_begin());
15611 if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
15612 isa<ConstantSDNode>(Mask->getOperand(1))) {
15613 unsigned Offset, ActiveBits;
15614 const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
15615 if (ShiftMask.isMask()) {
15616 EVT MaskedVT =
15617 EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countr_one());
15618 // If the mask is smaller, recompute the type.
15619 if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
15620 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
15621 ExtVT = MaskedVT;
15622 } else if (ExtType == ISD::ZEXTLOAD &&
15623 ShiftMask.isShiftedMask(Offset, ActiveBits) &&
15624 (Offset + ShAmt) < VT.getScalarSizeInBits()) {
15625 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
15626 // If the mask is shifted we can use a narrower load and a shl to insert
15627 // the trailing zeros.
15628 if (((Offset + ActiveBits) <= ExtVT.getScalarSizeInBits()) &&
15629 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT)) {
15630 ExtVT = MaskedVT;
15631 ShAmt = Offset + ShAmt;
15632 ShiftedOffset = Offset;
15633 }
15634 }
15635 }
15636
15637 N0 = SRL.getOperand(0);
15638 }
15639
15640 // If the load is shifted left (and the result isn't shifted back right), we
15641 // can fold a truncate through the shift. The typical scenario is that N
15642 // points at a TRUNCATE here so the attempted fold is:
15643 // (truncate (shl (load x), c))) -> (shl (narrow load x), c)
15644 // ShLeftAmt will indicate how much a narrowed load should be shifted left.
15645 unsigned ShLeftAmt = 0;
15646 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
15647 ExtVT == VT && TLI.isNarrowingProfitable(N, N0.getValueType(), VT)) {
15648 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
15649 ShLeftAmt = N01->getZExtValue();
15650 N0 = N0.getOperand(0);
15651 }
15652 }
15653
15654 // If we haven't found a load, we can't narrow it.
15655 if (!isa<LoadSDNode>(N0))
15656 return SDValue();
15657
15658 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15659 // Reducing the width of a volatile load is illegal. For atomics, we may be
15660 // able to reduce the width provided we never widen again. (see D66309)
15661 if (!LN0->isSimple() ||
15662 !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
15663 return SDValue();
15664
15665 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
15666 unsigned LVTStoreBits =
15668 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedValue();
15669 return LVTStoreBits - EVTStoreBits - ShAmt;
15670 };
15671
15672 // We need to adjust the pointer to the load by ShAmt bits in order to load
15673 // the correct bytes.
15674 unsigned PtrAdjustmentInBits =
15675 DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
15676
15677 uint64_t PtrOff = PtrAdjustmentInBits / 8;
15678 SDLoc DL(LN0);
15679 // The original load itself didn't wrap, so an offset within it doesn't.
15680 SDValue NewPtr =
15683 AddToWorklist(NewPtr.getNode());
15684
15685 SDValue Load;
15686 if (ExtType == ISD::NON_EXTLOAD) {
15687 const MDNode *OldRanges = LN0->getRanges();
15688 const MDNode *NewRanges = nullptr;
15689 // If LSBs are loaded and the truncated ConstantRange for the OldRanges
15690 // metadata is not the full-set for the new width then create a NewRanges
15691 // metadata for the truncated load
15692 if (ShAmt == 0 && OldRanges) {
15693 ConstantRange CR = getConstantRangeFromMetadata(*OldRanges);
15694 unsigned BitSize = VT.getScalarSizeInBits();
15695
15696 // It is possible for an 8-bit extending load with 8-bit range
15697 // metadata to be narrowed to an 8-bit load. This guard is necessary to
15698 // ensure that truncation is strictly smaller.
15699 if (CR.getBitWidth() > BitSize) {
15700 ConstantRange TruncatedCR = CR.truncate(BitSize);
15701 if (!TruncatedCR.isFullSet()) {
15702 Metadata *Bounds[2] = {
15704 ConstantInt::get(*DAG.getContext(), TruncatedCR.getLower())),
15706 ConstantInt::get(*DAG.getContext(), TruncatedCR.getUpper()))};
15707 NewRanges = MDNode::get(*DAG.getContext(), Bounds);
15708 }
15709 } else if (CR.getBitWidth() == BitSize)
15710 NewRanges = OldRanges;
15711 }
15712 Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
15713 LN0->getPointerInfo().getWithOffset(PtrOff),
15714 LN0->getBaseAlign(), LN0->getMemOperand()->getFlags(),
15715 LN0->getAAInfo(), NewRanges);
15716 } else
15717 Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
15718 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
15719 LN0->getBaseAlign(), LN0->getMemOperand()->getFlags(),
15720 LN0->getAAInfo());
15721
15722 // Replace the old load's chain with the new load's chain.
15723 WorklistRemover DeadNodes(*this);
15724 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15725
15726 // Shift the result left, if we've swallowed a left shift.
15728 if (ShLeftAmt != 0) {
15729 // If the shift amount is as large as the result size (but, presumably,
15730 // no larger than the source) then the useful bits of the result are
15731 // zero; we can't simply return the shortened shift, because the result
15732 // of that operation is undefined.
15733 if (ShLeftAmt >= VT.getScalarSizeInBits())
15734 Result = DAG.getConstant(0, DL, VT);
15735 else
15736 Result = DAG.getNode(ISD::SHL, DL, VT, Result,
15737 DAG.getShiftAmountConstant(ShLeftAmt, VT, DL));
15738 }
15739
15740 if (ShiftedOffset != 0) {
15741 // We're using a shifted mask, so the load now has an offset. This means
15742 // that data has been loaded into the lower bytes than it would have been
15743 // before, so we need to shl the loaded data into the correct position in the
15744 // register.
15745 SDValue ShiftC = DAG.getConstant(ShiftedOffset, DL, VT);
15746 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
15747 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
15748 }
15749
15750 // Return the new loaded value.
15751 return Result;
15752}
15753
15754SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
15755 SDValue N0 = N->getOperand(0);
15756 SDValue N1 = N->getOperand(1);
15757 EVT VT = N->getValueType(0);
15758 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
15759 unsigned VTBits = VT.getScalarSizeInBits();
15760 unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
15761 SDLoc DL(N);
15762
15763 // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
15764 if (N0.isUndef())
15765 return DAG.getConstant(0, DL, VT);
15766
15767 // fold (sext_in_reg c1) -> c1
15768 if (SDValue C =
15770 return C;
15771
15772 // If the input is already sign extended, just drop the extension.
15773 if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
15774 return N0;
15775
15776 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
15777 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
15778 ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
15779 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N0.getOperand(0), N1);
15780
15781 // fold (sext_in_reg (sext x)) -> (sext x)
15782 // fold (sext_in_reg (aext x)) -> (sext x)
15783 // if x is small enough or if we know that x has more than 1 sign bit and the
15784 // sign_extend_inreg is extending from one of them.
15785 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
15786 SDValue N00 = N0.getOperand(0);
15787 unsigned N00Bits = N00.getScalarValueSizeInBits();
15788 if ((N00Bits <= ExtVTBits ||
15789 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
15790 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
15791 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N00);
15792 }
15793
15794 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
15795 // if x is small enough or if we know that x has more than 1 sign bit and the
15796 // sign_extend_inreg is extending from one of them.
15798 SDValue N00 = N0.getOperand(0);
15799 unsigned N00Bits = N00.getScalarValueSizeInBits();
15800 bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
15801 if ((N00Bits == ExtVTBits ||
15802 (!IsZext && (N00Bits < ExtVTBits ||
15803 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
15804 (!LegalOperations ||
15806 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, N00);
15807 }
15808
15809 // fold (sext_in_reg (zext x)) -> (sext x)
15810 // iff we are extending the source sign bit.
15811 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
15812 SDValue N00 = N0.getOperand(0);
15813 if (N00.getScalarValueSizeInBits() == ExtVTBits &&
15814 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
15815 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N00);
15816 }
15817
15818 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
15819 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
15820 return DAG.getZeroExtendInReg(N0, DL, ExtVT);
15821
15822 // fold operands of sext_in_reg based on knowledge that the top bits are not
15823 // demanded.
15825 return SDValue(N, 0);
15826
15827 // fold (sext_in_reg (load x)) -> (smaller sextload x)
15828 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
15829 if (SDValue NarrowLoad = reduceLoadWidth(N))
15830 return NarrowLoad;
15831
15832 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
15833 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
15834 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
15835 if (N0.getOpcode() == ISD::SRL) {
15836 if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
15837 if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
15838 // We can turn this into an SRA iff the input to the SRL is already sign
15839 // extended enough.
15840 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
15841 if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
15842 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
15843 N0.getOperand(1));
15844 }
15845 }
15846
15847 // fold (sext_inreg (extload x)) -> (sextload x)
15848 // If sextload is not supported by target, we can only do the combine when
15849 // load has one use. Doing otherwise can block folding the extload with other
15850 // extends that the target does support.
15852 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
15853 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
15854 N0.hasOneUse()) ||
15855 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
15856 auto *LN0 = cast<LoadSDNode>(N0);
15857 SDValue ExtLoad =
15858 DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
15859 LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
15860 CombineTo(N, ExtLoad);
15861 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
15862 AddToWorklist(ExtLoad.getNode());
15863 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15864 }
15865
15866 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
15868 N0.hasOneUse() && ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
15869 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
15870 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
15871 auto *LN0 = cast<LoadSDNode>(N0);
15872 SDValue ExtLoad =
15873 DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
15874 LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
15875 CombineTo(N, ExtLoad);
15876 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
15877 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15878 }
15879
15880 // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
15881 // ignore it if the masked load is already sign extended
15882 if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
15883 if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
15884 Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
15885 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
15886 SDValue ExtMaskedLoad = DAG.getMaskedLoad(
15887 VT, DL, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
15888 Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
15889 Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
15890 CombineTo(N, ExtMaskedLoad);
15891 CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
15892 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15893 }
15894 }
15895
15896 // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
15897 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
15898 if (SDValue(GN0, 0).hasOneUse() && ExtVT == GN0->getMemoryVT() &&
15900 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
15901 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
15902
15903 SDValue ExtLoad = DAG.getMaskedGather(
15904 DAG.getVTList(VT, MVT::Other), ExtVT, DL, Ops, GN0->getMemOperand(),
15905 GN0->getIndexType(), ISD::SEXTLOAD);
15906
15907 CombineTo(N, ExtLoad);
15908 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
15909 AddToWorklist(ExtLoad.getNode());
15910 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15911 }
15912 }
15913
15914 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
15915 if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
15916 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
15917 N0.getOperand(1), false))
15918 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, BSwap, N1);
15919 }
15920
15921 // Fold (iM_signext_inreg
15922 // (extract_subvector (zext|anyext|sext iN_v to _) _)
15923 // from iN)
15924 // -> (extract_subvector (signext iN_v to iM))
15925 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() &&
15927 SDValue InnerExt = N0.getOperand(0);
15928 EVT InnerExtVT = InnerExt->getValueType(0);
15929 SDValue Extendee = InnerExt->getOperand(0);
15930
15931 if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() &&
15932 (!LegalOperations ||
15933 TLI.isOperationLegal(ISD::SIGN_EXTEND, InnerExtVT))) {
15934 SDValue SignExtExtendee =
15935 DAG.getNode(ISD::SIGN_EXTEND, DL, InnerExtVT, Extendee);
15936 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SignExtExtendee,
15937 N0.getOperand(1));
15938 }
15939 }
15940
15941 return SDValue();
15942}
15943
15945 SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG,
15946 bool LegalOperations) {
15947 unsigned InregOpcode = N->getOpcode();
15948 unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode);
15949
15950 SDValue Src = N->getOperand(0);
15951 EVT VT = N->getValueType(0);
15952 EVT SrcVT = EVT::getVectorVT(*DAG.getContext(),
15953 Src.getValueType().getVectorElementType(),
15955
15956 assert(ISD::isExtVecInRegOpcode(InregOpcode) &&
15957 "Expected EXTEND_VECTOR_INREG dag node in input!");
15958
15959 // Profitability check: our operand must be an one-use CONCAT_VECTORS.
15960 // FIXME: one-use check may be overly restrictive
15961 if (!Src.hasOneUse() || Src.getOpcode() != ISD::CONCAT_VECTORS)
15962 return SDValue();
15963
15964 // Profitability check: we must be extending exactly one of it's operands.
15965 // FIXME: this is probably overly restrictive.
15966 Src = Src.getOperand(0);
15967 if (Src.getValueType() != SrcVT)
15968 return SDValue();
15969
15970 if (LegalOperations && !TLI.isOperationLegal(Opcode, VT))
15971 return SDValue();
15972
15973 return DAG.getNode(Opcode, DL, VT, Src);
15974}
15975
15976SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
15977 SDValue N0 = N->getOperand(0);
15978 EVT VT = N->getValueType(0);
15979 SDLoc DL(N);
15980
15981 if (N0.isUndef()) {
15982 // aext_vector_inreg(undef) = undef because the top bits are undefined.
15983 // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
15984 return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
15985 ? DAG.getUNDEF(VT)
15986 : DAG.getConstant(0, DL, VT);
15987 }
15988
15989 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
15990 return Res;
15991
15993 return SDValue(N, 0);
15994
15996 LegalOperations))
15997 return R;
15998
15999 return SDValue();
16000}
16001
16002SDValue DAGCombiner::visitTRUNCATE_USAT_U(SDNode *N) {
16003 EVT VT = N->getValueType(0);
16004 SDValue N0 = N->getOperand(0);
16005
16006 SDValue FPVal;
16007 if (sd_match(N0, m_FPToUI(m_Value(FPVal))) &&
16009 ISD::FP_TO_UINT_SAT, FPVal.getValueType(), VT))
16010 return DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), VT, FPVal,
16011 DAG.getValueType(VT.getScalarType()));
16012
16013 return SDValue();
16014}
16015
16016/// Detect patterns of truncation with unsigned saturation:
16017///
16018/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
16019/// Return the source value x to be truncated or SDValue() if the pattern was
16020/// not matched.
16021///
16023 unsigned NumDstBits = VT.getScalarSizeInBits();
16024 unsigned NumSrcBits = In.getScalarValueSizeInBits();
16025 // Saturation with truncation. We truncate from InVT to VT.
16026 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
16027
16028 SDValue Min;
16029 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
16030 if (sd_match(In, m_UMin(m_Value(Min), m_SpecificInt(UnsignedMax))))
16031 return Min;
16032
16033 return SDValue();
16034}
16035
16036/// Detect patterns of truncation with signed saturation:
16037/// (truncate (smin (smax (x, signed_min_of_dest_type),
16038/// signed_max_of_dest_type)) to dest_type)
16039/// or:
16040/// (truncate (smax (smin (x, signed_max_of_dest_type),
16041/// signed_min_of_dest_type)) to dest_type).
16042///
16043/// Return the source value to be truncated or SDValue() if the pattern was not
16044/// matched.
16046 unsigned NumDstBits = VT.getScalarSizeInBits();
16047 unsigned NumSrcBits = In.getScalarValueSizeInBits();
16048 // Saturation with truncation. We truncate from InVT to VT.
16049 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
16050
16051 SDValue Val;
16052 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
16053 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
16054
16055 if (sd_match(In, m_SMin(m_SMax(m_Value(Val), m_SpecificInt(SignedMin)),
16056 m_SpecificInt(SignedMax))))
16057 return Val;
16058
16059 if (sd_match(In, m_SMax(m_SMin(m_Value(Val), m_SpecificInt(SignedMax)),
16060 m_SpecificInt(SignedMin))))
16061 return Val;
16062
16063 return SDValue();
16064}
16065
16066/// Detect patterns of truncation with unsigned saturation:
16068 const SDLoc &DL) {
16069 unsigned NumDstBits = VT.getScalarSizeInBits();
16070 unsigned NumSrcBits = In.getScalarValueSizeInBits();
16071 // Saturation with truncation. We truncate from InVT to VT.
16072 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
16073
16074 SDValue Val;
16075 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
16076 // Min == 0, Max is unsigned max of destination type.
16077 if (sd_match(In, m_SMax(m_SMin(m_Value(Val), m_SpecificInt(UnsignedMax)),
16078 m_Zero())))
16079 return Val;
16080
16081 if (sd_match(In, m_SMin(m_SMax(m_Value(Val), m_Zero()),
16082 m_SpecificInt(UnsignedMax))))
16083 return Val;
16084
16085 if (sd_match(In, m_UMin(m_SMax(m_Value(Val), m_Zero()),
16086 m_SpecificInt(UnsignedMax))))
16087 return Val;
16088
16089 return SDValue();
16090}
16091
16092static SDValue foldToSaturated(SDNode *N, EVT &VT, SDValue &Src, EVT &SrcVT,
16093 SDLoc &DL, const TargetLowering &TLI,
16094 SelectionDAG &DAG) {
16095 auto AllowedTruncateSat = [&](unsigned Opc, EVT SrcVT, EVT VT) -> bool {
16096 return (TLI.isOperationLegalOrCustom(Opc, SrcVT) &&
16097 TLI.isTypeDesirableForOp(Opc, VT));
16098 };
16099
16100 if (Src.getOpcode() == ISD::SMIN || Src.getOpcode() == ISD::SMAX) {
16101 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_S, SrcVT, VT))
16102 if (SDValue SSatVal = detectSSatSPattern(Src, VT))
16103 return DAG.getNode(ISD::TRUNCATE_SSAT_S, DL, VT, SSatVal);
16104 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_U, SrcVT, VT))
16105 if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
16106 return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
16107 } else if (Src.getOpcode() == ISD::UMIN) {
16108 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_U, SrcVT, VT))
16109 if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
16110 return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
16111 if (AllowedTruncateSat(ISD::TRUNCATE_USAT_U, SrcVT, VT))
16112 if (SDValue USatVal = detectUSatUPattern(Src, VT))
16113 return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, VT, USatVal);
16114 }
16115
16116 return SDValue();
16117}
16118
16119SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
16120 SDValue N0 = N->getOperand(0);
16121 EVT VT = N->getValueType(0);
16122 EVT SrcVT = N0.getValueType();
16123 bool isLE = DAG.getDataLayout().isLittleEndian();
16124 SDLoc DL(N);
16125
16126 // trunc(undef) = undef
16127 if (N0.isUndef())
16128 return DAG.getUNDEF(VT);
16129
16130 // fold (truncate (truncate x)) -> (truncate x)
16131 if (N0.getOpcode() == ISD::TRUNCATE)
16132 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16133
16134 // fold saturated truncate
16135 if (SDValue SaturatedTR = foldToSaturated(N, VT, N0, SrcVT, DL, TLI, DAG))
16136 return SaturatedTR;
16137
16138 // fold (truncate c1) -> c1
16139 if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, DL, VT, {N0}))
16140 return C;
16141
16142 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
16143 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
16144 N0.getOpcode() == ISD::SIGN_EXTEND ||
16145 N0.getOpcode() == ISD::ANY_EXTEND) {
16146 // if the source is smaller than the dest, we still need an extend.
16147 if (N0.getOperand(0).getValueType().bitsLT(VT)) {
16148 SDNodeFlags Flags;
16149 if (N0.getOpcode() == ISD::ZERO_EXTEND)
16150 Flags.setNonNeg(N0->getFlags().hasNonNeg());
16151 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
16152 }
16153 // if the source is larger than the dest, than we just need the truncate.
16154 if (N0.getOperand(0).getValueType().bitsGT(VT))
16155 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16156 // if the source and dest are the same type, we can drop both the extend
16157 // and the truncate.
16158 return N0.getOperand(0);
16159 }
16160
16161 // Try to narrow a truncate-of-sext_in_reg to the destination type:
16162 // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
16163 if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
16164 N0.hasOneUse()) {
16165 SDValue X = N0.getOperand(0);
16166 SDValue ExtVal = N0.getOperand(1);
16167 EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
16168 if (ExtVT.bitsLT(VT) && TLI.preferSextInRegOfTruncate(VT, SrcVT, ExtVT)) {
16169 SDValue TrX = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
16170 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, TrX, ExtVal);
16171 }
16172 }
16173
16174 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
16175 if (N->hasOneUse() && (N->user_begin()->getOpcode() == ISD::ANY_EXTEND))
16176 return SDValue();
16177
16178 // Fold extract-and-trunc into a narrow extract. For example:
16179 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
16180 // i32 y = TRUNCATE(i64 x)
16181 // -- becomes --
16182 // v16i8 b = BITCAST (v2i64 val)
16183 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
16184 //
16185 // Note: We only run this optimization after type legalization (which often
16186 // creates this pattern) and before operation legalization after which
16187 // we need to be more careful about the vector instructions that we generate.
16188 if (LegalTypes && !LegalOperations && VT.isScalarInteger() && VT != MVT::i1 &&
16189 N0->hasOneUse()) {
16190 EVT TrTy = N->getValueType(0);
16191 SDValue Src = N0;
16192
16193 // Check for cases where we shift down an upper element before truncation.
16194 int EltOffset = 0;
16195 if (Src.getOpcode() == ISD::SRL && Src.getOperand(0)->hasOneUse()) {
16196 if (auto ShAmt = DAG.getValidShiftAmount(Src)) {
16197 if ((*ShAmt % TrTy.getSizeInBits()) == 0) {
16198 Src = Src.getOperand(0);
16199 EltOffset = *ShAmt / TrTy.getSizeInBits();
16200 }
16201 }
16202 }
16203
16204 if (Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16205 EVT VecTy = Src.getOperand(0).getValueType();
16206 EVT ExTy = Src.getValueType();
16207
16208 auto EltCnt = VecTy.getVectorElementCount();
16209 unsigned SizeRatio = ExTy.getSizeInBits() / TrTy.getSizeInBits();
16210 auto NewEltCnt = EltCnt * SizeRatio;
16211
16212 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
16213 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
16214
16215 SDValue EltNo = Src->getOperand(1);
16216 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
16217 int Elt = EltNo->getAsZExtVal();
16218 int Index = isLE ? (Elt * SizeRatio + EltOffset)
16219 : (Elt * SizeRatio + (SizeRatio - 1) - EltOffset);
16220 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
16221 DAG.getBitcast(NVT, Src.getOperand(0)),
16222 DAG.getVectorIdxConstant(Index, DL));
16223 }
16224 }
16225 }
16226
16227 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
16228 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse() &&
16229 TLI.isTruncateFree(SrcVT, VT)) {
16230 if (!LegalOperations ||
16231 (TLI.isOperationLegal(ISD::SELECT, SrcVT) &&
16232 TLI.isNarrowingProfitable(N0.getNode(), SrcVT, VT))) {
16233 SDLoc SL(N0);
16234 SDValue Cond = N0.getOperand(0);
16235 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
16236 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
16237 return DAG.getNode(ISD::SELECT, DL, VT, Cond, TruncOp0, TruncOp1);
16238 }
16239 }
16240
16241 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
16242 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
16243 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
16244 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
16245 SDValue Amt = N0.getOperand(1);
16246 KnownBits Known = DAG.computeKnownBits(Amt);
16247 unsigned Size = VT.getScalarSizeInBits();
16248 if (Known.countMaxActiveBits() <= Log2_32(Size)) {
16249 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
16250 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16251 if (AmtVT != Amt.getValueType()) {
16252 Amt = DAG.getZExtOrTrunc(Amt, DL, AmtVT);
16253 AddToWorklist(Amt.getNode());
16254 }
16255 return DAG.getNode(ISD::SHL, DL, VT, Trunc, Amt);
16256 }
16257 }
16258
16259 if (SDValue V = foldSubToUSubSat(VT, N0.getNode(), DL))
16260 return V;
16261
16262 if (SDValue ABD = foldABSToABD(N, DL))
16263 return ABD;
16264
16265 // Attempt to pre-truncate BUILD_VECTOR sources.
16266 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
16267 N0.hasOneUse() &&
16268 TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
16269 // Avoid creating illegal types if running after type legalizer.
16270 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
16271 EVT SVT = VT.getScalarType();
16272 SmallVector<SDValue, 8> TruncOps;
16273 for (const SDValue &Op : N0->op_values()) {
16274 SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
16275 TruncOps.push_back(TruncOp);
16276 }
16277 return DAG.getBuildVector(VT, DL, TruncOps);
16278 }
16279
16280 // trunc (splat_vector x) -> splat_vector (trunc x)
16281 if (N0.getOpcode() == ISD::SPLAT_VECTOR &&
16282 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType())) &&
16283 (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) {
16284 EVT SVT = VT.getScalarType();
16285 return DAG.getSplatVector(
16286 VT, DL, DAG.getNode(ISD::TRUNCATE, DL, SVT, N0->getOperand(0)));
16287 }
16288
16289 // Fold a series of buildvector, bitcast, and truncate if possible.
16290 // For example fold
16291 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
16292 // (2xi32 (buildvector x, y)).
16293 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
16294 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
16296 N0.getOperand(0).hasOneUse()) {
16297 SDValue BuildVect = N0.getOperand(0);
16298 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
16299 EVT TruncVecEltTy = VT.getVectorElementType();
16300
16301 // Check that the element types match.
16302 if (BuildVectEltTy == TruncVecEltTy) {
16303 // Now we only need to compute the offset of the truncated elements.
16304 unsigned BuildVecNumElts = BuildVect.getNumOperands();
16305 unsigned TruncVecNumElts = VT.getVectorNumElements();
16306 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
16307 unsigned FirstElt = isLE ? 0 : (TruncEltOffset - 1);
16308
16309 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
16310 "Invalid number of elements");
16311
16313 for (unsigned i = FirstElt, e = BuildVecNumElts; i < e;
16314 i += TruncEltOffset)
16315 Opnds.push_back(BuildVect.getOperand(i));
16316
16317 return DAG.getBuildVector(VT, DL, Opnds);
16318 }
16319 }
16320
16321 // fold (truncate (load x)) -> (smaller load x)
16322 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
16323 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
16324 if (SDValue Reduced = reduceLoadWidth(N))
16325 return Reduced;
16326
16327 // Handle the case where the truncated result is at least as wide as the
16328 // loaded type.
16329 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
16330 auto *LN0 = cast<LoadSDNode>(N0);
16331 if (LN0->isSimple() && LN0->getMemoryVT().bitsLE(VT)) {
16332 SDValue NewLoad = DAG.getExtLoad(
16333 LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(),
16334 LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand());
16335 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
16336 return NewLoad;
16337 }
16338 }
16339 }
16340
16341 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
16342 // where ... are all 'undef'.
16343 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
16345 SDValue V;
16346 unsigned Idx = 0;
16347 unsigned NumDefs = 0;
16348
16349 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
16350 SDValue X = N0.getOperand(i);
16351 if (!X.isUndef()) {
16352 V = X;
16353 Idx = i;
16354 NumDefs++;
16355 }
16356 // Stop if more than one members are non-undef.
16357 if (NumDefs > 1)
16358 break;
16359
16362 X.getValueType().getVectorElementCount()));
16363 }
16364
16365 if (NumDefs == 0)
16366 return DAG.getUNDEF(VT);
16367
16368 if (NumDefs == 1) {
16369 assert(V.getNode() && "The single defined operand is empty!");
16371 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
16372 if (i != Idx) {
16373 Opnds.push_back(DAG.getUNDEF(VTs[i]));
16374 continue;
16375 }
16376 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
16377 AddToWorklist(NV.getNode());
16378 Opnds.push_back(NV);
16379 }
16380 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
16381 }
16382 }
16383
16384 // Fold truncate of a bitcast of a vector to an extract of the low vector
16385 // element.
16386 //
16387 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
16388 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
16389 SDValue VecSrc = N0.getOperand(0);
16390 EVT VecSrcVT = VecSrc.getValueType();
16391 if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
16392 (!LegalOperations ||
16393 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
16394 unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
16395 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VecSrc,
16396 DAG.getVectorIdxConstant(Idx, DL));
16397 }
16398 }
16399
16400 // Simplify the operands using demanded-bits information.
16402 return SDValue(N, 0);
16403
16404 // fold (truncate (extract_subvector(ext x))) ->
16405 // (extract_subvector x)
16406 // TODO: This can be generalized to cover cases where the truncate and extract
16407 // do not fully cancel each other out.
16408 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
16409 SDValue N00 = N0.getOperand(0);
16410 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
16411 N00.getOpcode() == ISD::ZERO_EXTEND ||
16412 N00.getOpcode() == ISD::ANY_EXTEND) {
16413 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
16415 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
16416 N00.getOperand(0), N0.getOperand(1));
16417 }
16418 }
16419
16420 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
16421 return NewVSel;
16422
16423 // Narrow a suitable binary operation with a non-opaque constant operand by
16424 // moving it ahead of the truncate. This is limited to pre-legalization
16425 // because targets may prefer a wider type during later combines and invert
16426 // this transform.
16427 switch (N0.getOpcode()) {
16428 case ISD::ADD:
16429 case ISD::SUB:
16430 case ISD::MUL:
16431 case ISD::AND:
16432 case ISD::OR:
16433 case ISD::XOR:
16434 if (!LegalOperations && N0.hasOneUse() &&
16435 (isConstantOrConstantVector(N0.getOperand(0), true) ||
16436 isConstantOrConstantVector(N0.getOperand(1), true))) {
16437 // TODO: We already restricted this to pre-legalization, but for vectors
16438 // we are extra cautious to not create an unsupported operation.
16439 // Target-specific changes are likely needed to avoid regressions here.
16440 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
16441 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16442 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
16443 SDNodeFlags Flags;
16444 // Propagate nuw for sub.
16445 if (N0->getOpcode() == ISD::SUB && N0->getFlags().hasNoUnsignedWrap() &&
16447 N0->getOperand(0),
16449 VT.getScalarSizeInBits())))
16450 Flags.setNoUnsignedWrap(true);
16451 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR, Flags);
16452 }
16453 }
16454 break;
16455 case ISD::ADDE:
16456 case ISD::UADDO_CARRY:
16457 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
16458 // (trunc uaddo_carry(X, Y, Carry)) ->
16459 // (uaddo_carry trunc(X), trunc(Y), Carry)
16460 // When the adde's carry is not used.
16461 // We only do for uaddo_carry before legalize operation
16462 if (((!LegalOperations && N0.getOpcode() == ISD::UADDO_CARRY) ||
16463 TLI.isOperationLegal(N0.getOpcode(), VT)) &&
16464 N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
16465 SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16466 SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
16467 SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
16468 return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2));
16469 }
16470 break;
16471 case ISD::USUBSAT:
16472 // Truncate the USUBSAT only if LHS is a known zero-extension, its not
16473 // enough to know that the upper bits are zero we must ensure that we don't
16474 // introduce an extra truncate.
16475 if (!LegalOperations && N0.hasOneUse() &&
16478 VT.getScalarSizeInBits() &&
16479 hasOperation(N0.getOpcode(), VT)) {
16480 return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
16481 DAG, DL);
16482 }
16483 break;
16484 case ISD::AVGFLOORS:
16485 case ISD::AVGFLOORU:
16486 case ISD::AVGCEILS:
16487 case ISD::AVGCEILU:
16488 case ISD::ABDS:
16489 case ISD::ABDU:
16490 // (trunc (avg a, b)) -> (avg (trunc a), (trunc b))
16491 // (trunc (abdu/abds a, b)) -> (abdu/abds (trunc a), (trunc b))
16492 if (!LegalOperations && N0.hasOneUse() &&
16493 TLI.isOperationLegal(N0.getOpcode(), VT)) {
16494 EVT TruncVT = VT;
16495 unsigned SrcBits = SrcVT.getScalarSizeInBits();
16496 unsigned TruncBits = TruncVT.getScalarSizeInBits();
16497
16498 SDValue A = N0.getOperand(0);
16499 SDValue B = N0.getOperand(1);
16500 bool CanFold = false;
16501
16502 if (N0.getOpcode() == ISD::AVGFLOORU || N0.getOpcode() == ISD::AVGCEILU ||
16503 N0.getOpcode() == ISD::ABDU) {
16504 APInt UpperBits = APInt::getBitsSetFrom(SrcBits, TruncBits);
16505 CanFold = DAG.MaskedValueIsZero(B, UpperBits) &&
16506 DAG.MaskedValueIsZero(A, UpperBits);
16507 } else {
16508 unsigned NeededBits = SrcBits - TruncBits;
16509 CanFold = DAG.ComputeNumSignBits(B) > NeededBits &&
16510 DAG.ComputeNumSignBits(A) > NeededBits;
16511 }
16512
16513 if (CanFold) {
16514 SDValue NewA = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, A);
16515 SDValue NewB = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, B);
16516 return DAG.getNode(N0.getOpcode(), DL, TruncVT, NewA, NewB);
16517 }
16518 }
16519 break;
16520 }
16521
16522 return SDValue();
16523}
16524
16525static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
16526 SDValue Elt = N->getOperand(i);
16527 if (Elt.getOpcode() != ISD::MERGE_VALUES)
16528 return Elt.getNode();
16529 return Elt.getOperand(Elt.getResNo()).getNode();
16530}
16531
16532/// build_pair (load, load) -> load
16533/// if load locations are consecutive.
16534SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
16535 assert(N->getOpcode() == ISD::BUILD_PAIR);
16536
16537 auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
16538 auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
16539
16540 // A BUILD_PAIR is always having the least significant part in elt 0 and the
16541 // most significant part in elt 1. So when combining into one large load, we
16542 // need to consider the endianness.
16543 if (DAG.getDataLayout().isBigEndian())
16544 std::swap(LD1, LD2);
16545
16546 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
16547 !LD1->hasOneUse() || !LD2->hasOneUse() ||
16548 LD1->getAddressSpace() != LD2->getAddressSpace())
16549 return SDValue();
16550
16551 unsigned LD1Fast = 0;
16552 EVT LD1VT = LD1->getValueType(0);
16553 unsigned LD1Bytes = LD1VT.getStoreSize();
16554 if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
16555 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
16556 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
16557 *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
16558 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
16559 LD1->getPointerInfo(), LD1->getAlign());
16560
16561 return SDValue();
16562}
16563
16564static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
16565 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
16566 // and Lo parts; on big-endian machines it doesn't.
16567 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
16568}
16569
16570SDValue DAGCombiner::foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
16571 const TargetLowering &TLI) {
16572 // If this is not a bitcast to an FP type or if the target doesn't have
16573 // IEEE754-compliant FP logic, we're done.
16574 EVT VT = N->getValueType(0);
16575 SDValue N0 = N->getOperand(0);
16576 EVT SourceVT = N0.getValueType();
16577
16578 if (!VT.isFloatingPoint())
16579 return SDValue();
16580
16581 // TODO: Handle cases where the integer constant is a different scalar
16582 // bitwidth to the FP.
16583 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
16584 return SDValue();
16585
16586 unsigned FPOpcode;
16587 APInt SignMask;
16588 switch (N0.getOpcode()) {
16589 case ISD::AND:
16590 FPOpcode = ISD::FABS;
16591 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
16592 break;
16593 case ISD::XOR:
16594 FPOpcode = ISD::FNEG;
16595 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
16596 break;
16597 case ISD::OR:
16598 FPOpcode = ISD::FABS;
16599 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
16600 break;
16601 default:
16602 return SDValue();
16603 }
16604
16605 if (LegalOperations && !TLI.isOperationLegal(FPOpcode, VT))
16606 return SDValue();
16607
16608 // This needs to be the inverse of logic in foldSignChangeInBitcast.
16609 // FIXME: I don't think looking for bitcast intrinsically makes sense, but
16610 // removing this would require more changes.
16611 auto IsBitCastOrFree = [&TLI, FPOpcode](SDValue Op, EVT VT) {
16612 if (sd_match(Op, m_BitCast(m_SpecificVT(VT))))
16613 return true;
16614
16615 return FPOpcode == ISD::FABS ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
16616 };
16617
16618 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
16619 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
16620 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
16621 // fneg (fabs X)
16622 SDValue LogicOp0 = N0.getOperand(0);
16623 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
16624 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
16625 IsBitCastOrFree(LogicOp0, VT)) {
16626 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, LogicOp0);
16627 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, CastOp0);
16628 NumFPLogicOpsConv++;
16629 if (N0.getOpcode() == ISD::OR)
16630 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
16631 return FPOp;
16632 }
16633
16634 return SDValue();
16635}
16636
16637SDValue DAGCombiner::visitBITCAST(SDNode *N) {
16638 SDValue N0 = N->getOperand(0);
16639 EVT VT = N->getValueType(0);
16640
16641 if (N0.isUndef())
16642 return DAG.getUNDEF(VT);
16643
16644 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
16645 // Only do this before legalize types, unless both types are integer and the
16646 // scalar type is legal. Only do this before legalize ops, since the target
16647 // maybe depending on the bitcast.
16648 // First check to see if this is all constant.
16649 // TODO: Support FP bitcasts after legalize types.
16650 if (VT.isVector() &&
16651 (!LegalTypes ||
16652 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
16653 TLI.isTypeLegal(VT.getVectorElementType()))) &&
16654 N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
16655 cast<BuildVectorSDNode>(N0)->isConstant())
16656 return DAG.FoldConstantBuildVector(cast<BuildVectorSDNode>(N0), SDLoc(N),
16658
16659 // If the input is a constant, let getNode fold it.
16660 if (isIntOrFPConstant(N0)) {
16661 // If we can't allow illegal operations, we need to check that this is just
16662 // a fp -> int or int -> conversion and that the resulting operation will
16663 // be legal.
16664 if (!LegalOperations ||
16665 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
16667 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
16668 TLI.isOperationLegal(ISD::Constant, VT))) {
16669 SDValue C = DAG.getBitcast(VT, N0);
16670 if (C.getNode() != N)
16671 return C;
16672 }
16673 }
16674
16675 // (conv (conv x, t1), t2) -> (conv x, t2)
16676 if (N0.getOpcode() == ISD::BITCAST)
16677 return DAG.getBitcast(VT, N0.getOperand(0));
16678
16679 // fold (conv (logicop (conv x), (c))) -> (logicop x, (conv c))
16680 // iff the current bitwise logicop type isn't legal
16681 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && VT.isInteger() &&
16682 !TLI.isTypeLegal(N0.getOperand(0).getValueType())) {
16683 auto IsFreeBitcast = [VT](SDValue V) {
16684 return (V.getOpcode() == ISD::BITCAST &&
16685 V.getOperand(0).getValueType() == VT) ||
16687 V->hasOneUse());
16688 };
16689 if (IsFreeBitcast(N0.getOperand(0)) && IsFreeBitcast(N0.getOperand(1)))
16690 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
16691 DAG.getBitcast(VT, N0.getOperand(0)),
16692 DAG.getBitcast(VT, N0.getOperand(1)));
16693 }
16694
16695 // fold (conv (load x)) -> (load (conv*)x)
16696 // If the resultant load doesn't need a higher alignment than the original!
16697 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
16698 // Do not remove the cast if the types differ in endian layout.
16700 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
16701 // If the load is volatile, we only want to change the load type if the
16702 // resulting load is legal. Otherwise we might increase the number of
16703 // memory accesses. We don't care if the original type was legal or not
16704 // as we assume software couldn't rely on the number of accesses of an
16705 // illegal type.
16706 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
16707 TLI.isOperationLegal(ISD::LOAD, VT))) {
16708 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
16709
16710 if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
16711 *LN0->getMemOperand())) {
16712 // If the range metadata type does not match the new memory
16713 // operation type, remove the range metadata.
16714 if (const MDNode *MD = LN0->getRanges()) {
16715 ConstantInt *Lower = mdconst::extract<ConstantInt>(MD->getOperand(0));
16716 if (Lower->getBitWidth() != VT.getScalarSizeInBits() ||
16717 !VT.isInteger()) {
16718 LN0->getMemOperand()->clearRanges();
16719 }
16720 }
16721 SDValue Load =
16722 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
16723 LN0->getMemOperand());
16724 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
16725 return Load;
16726 }
16727 }
16728
16729 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
16730 return V;
16731
16732 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
16733 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
16734 //
16735 // For ppc_fp128:
16736 // fold (bitcast (fneg x)) ->
16737 // flipbit = signbit
16738 // (xor (bitcast x) (build_pair flipbit, flipbit))
16739 //
16740 // fold (bitcast (fabs x)) ->
16741 // flipbit = (and (extract_element (bitcast x), 0), signbit)
16742 // (xor (bitcast x) (build_pair flipbit, flipbit))
16743 // This often reduces constant pool loads.
16744 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
16745 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
16746 N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
16747 !N0.getValueType().isVector()) {
16748 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
16749 AddToWorklist(NewConv.getNode());
16750
16751 SDLoc DL(N);
16752 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
16753 assert(VT.getSizeInBits() == 128);
16754 SDValue SignBit = DAG.getConstant(
16755 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
16756 SDValue FlipBit;
16757 if (N0.getOpcode() == ISD::FNEG) {
16758 FlipBit = SignBit;
16759 AddToWorklist(FlipBit.getNode());
16760 } else {
16761 assert(N0.getOpcode() == ISD::FABS);
16762 SDValue Hi =
16763 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
16765 SDLoc(NewConv)));
16766 AddToWorklist(Hi.getNode());
16767 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
16768 AddToWorklist(FlipBit.getNode());
16769 }
16770 SDValue FlipBits =
16771 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
16772 AddToWorklist(FlipBits.getNode());
16773 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
16774 }
16775 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
16776 if (N0.getOpcode() == ISD::FNEG)
16777 return DAG.getNode(ISD::XOR, DL, VT,
16778 NewConv, DAG.getConstant(SignBit, DL, VT));
16779 assert(N0.getOpcode() == ISD::FABS);
16780 return DAG.getNode(ISD::AND, DL, VT,
16781 NewConv, DAG.getConstant(~SignBit, DL, VT));
16782 }
16783
16784 // fold (bitconvert (fcopysign cst, x)) ->
16785 // (or (and (bitconvert x), sign), (and cst, (not sign)))
16786 // Note that we don't handle (copysign x, cst) because this can always be
16787 // folded to an fneg or fabs.
16788 //
16789 // For ppc_fp128:
16790 // fold (bitcast (fcopysign cst, x)) ->
16791 // flipbit = (and (extract_element
16792 // (xor (bitcast cst), (bitcast x)), 0),
16793 // signbit)
16794 // (xor (bitcast cst) (build_pair flipbit, flipbit))
16795 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
16797 !VT.isVector()) {
16798 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
16799 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
16800 if (isTypeLegal(IntXVT)) {
16801 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
16802 AddToWorklist(X.getNode());
16803
16804 // If X has a different width than the result/lhs, sext it or truncate it.
16805 unsigned VTWidth = VT.getSizeInBits();
16806 if (OrigXWidth < VTWidth) {
16807 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
16808 AddToWorklist(X.getNode());
16809 } else if (OrigXWidth > VTWidth) {
16810 // To get the sign bit in the right place, we have to shift it right
16811 // before truncating.
16812 SDLoc DL(X);
16813 X = DAG.getNode(ISD::SRL, DL,
16814 X.getValueType(), X,
16815 DAG.getConstant(OrigXWidth-VTWidth, DL,
16816 X.getValueType()));
16817 AddToWorklist(X.getNode());
16818 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
16819 AddToWorklist(X.getNode());
16820 }
16821
16822 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
16823 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
16824 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
16825 AddToWorklist(Cst.getNode());
16826 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
16827 AddToWorklist(X.getNode());
16828 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
16829 AddToWorklist(XorResult.getNode());
16830 SDValue XorResult64 = DAG.getNode(
16831 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
16833 SDLoc(XorResult)));
16834 AddToWorklist(XorResult64.getNode());
16835 SDValue FlipBit =
16836 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
16837 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
16838 AddToWorklist(FlipBit.getNode());
16839 SDValue FlipBits =
16840 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
16841 AddToWorklist(FlipBits.getNode());
16842 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
16843 }
16844 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
16845 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
16846 X, DAG.getConstant(SignBit, SDLoc(X), VT));
16847 AddToWorklist(X.getNode());
16848
16849 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
16850 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
16851 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
16852 AddToWorklist(Cst.getNode());
16853
16854 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
16855 }
16856 }
16857
16858 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
16859 if (N0.getOpcode() == ISD::BUILD_PAIR)
16860 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
16861 return CombineLD;
16862
16863 // int_vt (bitcast (vec_vt (scalar_to_vector elt_vt:x)))
16864 // => int_vt (any_extend elt_vt:x)
16865 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && VT.isScalarInteger()) {
16866 SDValue SrcScalar = N0.getOperand(0);
16867 if (SrcScalar.getValueType().isScalarInteger())
16868 return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SrcScalar);
16869 }
16870
16871 // Remove double bitcasts from shuffles - this is often a legacy of
16872 // XformToShuffleWithZero being used to combine bitmaskings (of
16873 // float vectors bitcast to integer vectors) into shuffles.
16874 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
16875 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
16876 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
16879 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
16880
16881 // If operands are a bitcast, peek through if it casts the original VT.
16882 // If operands are a constant, just bitcast back to original VT.
16883 auto PeekThroughBitcast = [&](SDValue Op) {
16884 if (Op.getOpcode() == ISD::BITCAST &&
16885 Op.getOperand(0).getValueType() == VT)
16886 return SDValue(Op.getOperand(0));
16887 if (Op.isUndef() || isAnyConstantBuildVector(Op))
16888 return DAG.getBitcast(VT, Op);
16889 return SDValue();
16890 };
16891
16892 // FIXME: If either input vector is bitcast, try to convert the shuffle to
16893 // the result type of this bitcast. This would eliminate at least one
16894 // bitcast. See the transform in InstCombine.
16895 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
16896 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
16897 if (!(SV0 && SV1))
16898 return SDValue();
16899
16900 int MaskScale =
16902 SmallVector<int, 8> NewMask;
16903 for (int M : SVN->getMask())
16904 for (int i = 0; i != MaskScale; ++i)
16905 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
16906
16907 SDValue LegalShuffle =
16908 TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
16909 if (LegalShuffle)
16910 return LegalShuffle;
16911 }
16912
16913 return SDValue();
16914}
16915
16916SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
16917 EVT VT = N->getValueType(0);
16918 return CombineConsecutiveLoads(N, VT);
16919}
16920
16921SDValue DAGCombiner::visitFREEZE(SDNode *N) {
16922 SDValue N0 = N->getOperand(0);
16923
16924 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
16925 return N0;
16926
16927 // If we have frozen and unfrozen users of N0, update so everything uses N.
16928 if (!N0.isUndef() && !N0.hasOneUse()) {
16929 SDValue FrozenN0(N, 0);
16930 // Unfreeze all uses of N to avoid double deleting N from the CSE map.
16931 DAG.ReplaceAllUsesOfValueWith(FrozenN0, N0);
16932 DAG.ReplaceAllUsesOfValueWith(N0, FrozenN0);
16933 // ReplaceAllUsesOfValueWith will have also updated the use in N, thus
16934 // creating a cycle in a DAG. Let's undo that by mutating the freeze.
16935 assert(N->getOperand(0) == FrozenN0 && "Expected cycle in DAG");
16936 DAG.UpdateNodeOperands(N, N0);
16937 return FrozenN0;
16938 }
16939
16940 // We currently avoid folding freeze over SRA/SRL, due to the problems seen
16941 // with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for
16942 // example https://reviews.llvm.org/D136529#4120959.
16943 if (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)
16944 return SDValue();
16945
16946 // Fold freeze(op(x, ...)) -> op(freeze(x), ...).
16947 // Try to push freeze through instructions that propagate but don't produce
16948 // poison as far as possible. If an operand of freeze follows three
16949 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
16950 // guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
16951 // the freeze through to the operands that are not guaranteed non-poison.
16952 // NOTE: we will strip poison-generating flags, so ignore them here.
16953 if (DAG.canCreateUndefOrPoison(N0, /*PoisonOnly*/ false,
16954 /*ConsiderFlags*/ false) ||
16955 N0->getNumValues() != 1 || !N0->hasOneUse())
16956 return SDValue();
16957
16958 // TOOD: we should always allow multiple operands, however this increases the
16959 // likelihood of infinite loops due to the ReplaceAllUsesOfValueWith call
16960 // below causing later nodes that share frozen operands to fold again and no
16961 // longer being able to confirm other operands are not poison due to recursion
16962 // depth limits on isGuaranteedNotToBeUndefOrPoison.
16963 bool AllowMultipleMaybePoisonOperands =
16964 N0.getOpcode() == ISD::SELECT_CC || N0.getOpcode() == ISD::SETCC ||
16965 N0.getOpcode() == ISD::BUILD_VECTOR ||
16967 N0.getOpcode() == ISD::BUILD_PAIR ||
16970
16971 // Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
16972 // ones" or "constant" into something that depends on FrozenUndef. We can
16973 // instead pick undef values to keep those properties, while at the same time
16974 // folding away the freeze.
16975 // If we implement a more general solution for folding away freeze(undef) in
16976 // the future, then this special handling can be removed.
16977 if (N0.getOpcode() == ISD::BUILD_VECTOR) {
16978 SDLoc DL(N0);
16979 EVT VT = N0.getValueType();
16981 return DAG.getAllOnesConstant(DL, VT);
16984 for (const SDValue &Op : N0->op_values())
16985 NewVecC.push_back(
16986 Op.isUndef() ? DAG.getConstant(0, DL, Op.getValueType()) : Op);
16987 return DAG.getBuildVector(VT, DL, NewVecC);
16988 }
16989 }
16990
16991 SmallSet<SDValue, 8> MaybePoisonOperands;
16992 SmallVector<unsigned, 8> MaybePoisonOperandNumbers;
16993 for (auto [OpNo, Op] : enumerate(N0->ops())) {
16994 if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly=*/false))
16995 continue;
16996 bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
16997 bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op).second;
16998 if (IsNewMaybePoisonOperand)
16999 MaybePoisonOperandNumbers.push_back(OpNo);
17000 if (!HadMaybePoisonOperands)
17001 continue;
17002 if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
17003 // Multiple maybe-poison ops when not allowed - bail out.
17004 return SDValue();
17005 }
17006 }
17007 // NOTE: the whole op may be not guaranteed to not be undef or poison because
17008 // it could create undef or poison due to it's poison-generating flags.
17009 // So not finding any maybe-poison operands is fine.
17010
17011 for (unsigned OpNo : MaybePoisonOperandNumbers) {
17012 // N0 can mutate during iteration, so make sure to refetch the maybe poison
17013 // operands via the operand numbers. The typical scenario is that we have
17014 // something like this
17015 // t262: i32 = freeze t181
17016 // t150: i32 = ctlz_zero_undef t262
17017 // t184: i32 = ctlz_zero_undef t181
17018 // t268: i32 = select_cc t181, Constant:i32<0>, t184, t186, setne:ch
17019 // When freezing the t181 operand we get t262 back, and then the
17020 // ReplaceAllUsesOfValueWith call will not only replace t181 by t262, but
17021 // also recursively replace t184 by t150.
17022 SDValue MaybePoisonOperand = N->getOperand(0).getOperand(OpNo);
17023 // Don't replace every single UNDEF everywhere with frozen UNDEF, though.
17024 if (MaybePoisonOperand.isUndef())
17025 continue;
17026 // First, freeze each offending operand.
17027 SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand);
17028 // Then, change all other uses of unfrozen operand to use frozen operand.
17029 DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
17030 if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE &&
17031 FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) {
17032 // But, that also updated the use in the freeze we just created, thus
17033 // creating a cycle in a DAG. Let's undo that by mutating the freeze.
17034 DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(),
17035 MaybePoisonOperand);
17036 }
17037
17038 // This node has been merged with another.
17039 if (N->getOpcode() == ISD::DELETED_NODE)
17040 return SDValue(N, 0);
17041 }
17042
17043 assert(N->getOpcode() != ISD::DELETED_NODE && "Node was deleted!");
17044
17045 // The whole node may have been updated, so the value we were holding
17046 // may no longer be valid. Re-fetch the operand we're `freeze`ing.
17047 N0 = N->getOperand(0);
17048
17049 // Finally, recreate the node, it's operands were updated to use
17050 // frozen operands, so we just need to use it's "original" operands.
17052 // TODO: ISD::UNDEF and ISD::POISON should get separate handling, but best
17053 // leave for a future patch.
17054 for (SDValue &Op : Ops) {
17055 if (Op.isUndef())
17056 Op = DAG.getFreeze(Op);
17057 }
17058
17059 SDLoc DL(N0);
17060
17061 // Special case handling for ShuffleVectorSDNode nodes.
17062 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N0))
17063 return DAG.getVectorShuffle(N0.getValueType(), DL, Ops[0], Ops[1],
17064 SVN->getMask());
17065
17066 // NOTE: this strips poison generating flags.
17067 // Folding freeze(op(x, ...)) -> op(freeze(x), ...) does not require nnan,
17068 // ninf, nsz, or fast.
17069 // However, contract, reassoc, afn, and arcp should be preserved,
17070 // as these fast-math flags do not introduce poison values.
17071 SDNodeFlags SrcFlags = N0->getFlags();
17072 SDNodeFlags SafeFlags;
17073 SafeFlags.setAllowContract(SrcFlags.hasAllowContract());
17074 SafeFlags.setAllowReassociation(SrcFlags.hasAllowReassociation());
17075 SafeFlags.setApproximateFuncs(SrcFlags.hasApproximateFuncs());
17076 SafeFlags.setAllowReciprocal(SrcFlags.hasAllowReciprocal());
17077 return DAG.getNode(N0.getOpcode(), DL, N0->getVTList(), Ops, SafeFlags);
17078}
17079
17080// Returns true if floating point contraction is allowed on the FMUL-SDValue
17081// `N`
17083 assert(N.getOpcode() == ISD::FMUL);
17084
17085 return Options.AllowFPOpFusion == FPOpFusion::Fast ||
17086 N->getFlags().hasAllowContract();
17087}
17088
17089/// Try to perform FMA combining on a given FADD node.
17090template <class MatchContextClass>
17091SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
17092 SDValue N0 = N->getOperand(0);
17093 SDValue N1 = N->getOperand(1);
17094 EVT VT = N->getValueType(0);
17095 SDLoc SL(N);
17096 MatchContextClass matcher(DAG, TLI, N);
17097 const TargetOptions &Options = DAG.getTarget().Options;
17098
17099 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
17100
17101 // Floating-point multiply-add with intermediate rounding.
17102 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
17103 // FIXME: Add VP_FMAD opcode.
17104 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
17105
17106 // Floating-point multiply-add without intermediate rounding.
17107 bool HasFMA =
17108 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT)) &&
17110
17111 // No valid opcode, do not combine.
17112 if (!HasFMAD && !HasFMA)
17113 return SDValue();
17114
17115 bool AllowFusionGlobally =
17116 Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD;
17117 // If the addition is not contractable, do not combine.
17118 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
17119 return SDValue();
17120
17121 // Folding fadd (fmul x, y), (fmul x, y) -> fma x, y, (fmul x, y) is never
17122 // beneficial. It does not reduce latency. It increases register pressure. It
17123 // replaces an fadd with an fma which is a more complex instruction, so is
17124 // likely to have a larger encoding, use more functional units, etc.
17125 if (N0 == N1)
17126 return SDValue();
17127
17128 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
17129 return SDValue();
17130
17131 // Always prefer FMAD to FMA for precision.
17132 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
17134
17135 auto isFusedOp = [&](SDValue N) {
17136 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
17137 };
17138
17139 // Is the node an FMUL and contractable either due to global flags or
17140 // SDNodeFlags.
17141 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
17142 if (!matcher.match(N, ISD::FMUL))
17143 return false;
17144 return AllowFusionGlobally || N->getFlags().hasAllowContract();
17145 };
17146 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
17147 // prefer to fold the multiply with fewer uses.
17149 if (N0->use_size() > N1->use_size())
17150 std::swap(N0, N1);
17151 }
17152
17153 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
17154 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
17155 return matcher.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
17156 N0.getOperand(1), N1);
17157 }
17158
17159 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
17160 // Note: Commutes FADD operands.
17161 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
17162 return matcher.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
17163 N1.getOperand(1), N0);
17164 }
17165
17166 // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
17167 // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
17168 // This also works with nested fma instructions:
17169 // fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G -->
17170 // fma A, B, (fma C, D, fma (E, F, G))
17171 // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
17172 // fma A, B, (fma C, D, fma (E, F, G)).
17173 // This requires reassociation because it changes the order of operations.
17174 bool CanReassociate = N->getFlags().hasAllowReassociation();
17175 if (CanReassociate) {
17176 SDValue FMA, E;
17177 if (isFusedOp(N0) && N0.hasOneUse()) {
17178 FMA = N0;
17179 E = N1;
17180 } else if (isFusedOp(N1) && N1.hasOneUse()) {
17181 FMA = N1;
17182 E = N0;
17183 }
17184
17185 SDValue TmpFMA = FMA;
17186 while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) {
17187 SDValue FMul = TmpFMA->getOperand(2);
17188 if (matcher.match(FMul, ISD::FMUL) && FMul.hasOneUse()) {
17189 SDValue C = FMul.getOperand(0);
17190 SDValue D = FMul.getOperand(1);
17191 SDValue CDE = matcher.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
17193 // Replacing the inner FMul could cause the outer FMA to be simplified
17194 // away.
17195 return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue(N, 0) : FMA;
17196 }
17197
17198 TmpFMA = TmpFMA->getOperand(2);
17199 }
17200 }
17201
17202 // Look through FP_EXTEND nodes to do more combining.
17203
17204 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
17205 if (matcher.match(N0, ISD::FP_EXTEND)) {
17206 SDValue N00 = N0.getOperand(0);
17207 if (isContractableFMUL(N00) &&
17208 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17209 N00.getValueType())) {
17210 return matcher.getNode(
17211 PreferredFusedOpcode, SL, VT,
17212 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
17213 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), N1);
17214 }
17215 }
17216
17217 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
17218 // Note: Commutes FADD operands.
17219 if (matcher.match(N1, ISD::FP_EXTEND)) {
17220 SDValue N10 = N1.getOperand(0);
17221 if (isContractableFMUL(N10) &&
17222 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17223 N10.getValueType())) {
17224 return matcher.getNode(
17225 PreferredFusedOpcode, SL, VT,
17226 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
17227 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
17228 }
17229 }
17230
17231 // More folding opportunities when target permits.
17232 if (Aggressive) {
17233 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
17234 // -> (fma x, y, (fma (fpext u), (fpext v), z))
17235 auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
17236 SDValue Z) {
17237 return matcher.getNode(
17238 PreferredFusedOpcode, SL, VT, X, Y,
17239 matcher.getNode(PreferredFusedOpcode, SL, VT,
17240 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
17241 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
17242 };
17243 if (isFusedOp(N0)) {
17244 SDValue N02 = N0.getOperand(2);
17245 if (matcher.match(N02, ISD::FP_EXTEND)) {
17246 SDValue N020 = N02.getOperand(0);
17247 if (isContractableFMUL(N020) &&
17248 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17249 N020.getValueType())) {
17250 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
17251 N020.getOperand(0), N020.getOperand(1),
17252 N1);
17253 }
17254 }
17255 }
17256
17257 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
17258 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
17259 // FIXME: This turns two single-precision and one double-precision
17260 // operation into two double-precision operations, which might not be
17261 // interesting for all targets, especially GPUs.
17262 auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
17263 SDValue Z) {
17264 return matcher.getNode(
17265 PreferredFusedOpcode, SL, VT,
17266 matcher.getNode(ISD::FP_EXTEND, SL, VT, X),
17267 matcher.getNode(ISD::FP_EXTEND, SL, VT, Y),
17268 matcher.getNode(PreferredFusedOpcode, SL, VT,
17269 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
17270 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
17271 };
17272 if (N0.getOpcode() == ISD::FP_EXTEND) {
17273 SDValue N00 = N0.getOperand(0);
17274 if (isFusedOp(N00)) {
17275 SDValue N002 = N00.getOperand(2);
17276 if (isContractableFMUL(N002) &&
17277 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17278 N00.getValueType())) {
17279 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
17280 N002.getOperand(0), N002.getOperand(1),
17281 N1);
17282 }
17283 }
17284 }
17285
17286 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
17287 // -> (fma y, z, (fma (fpext u), (fpext v), x))
17288 if (isFusedOp(N1)) {
17289 SDValue N12 = N1.getOperand(2);
17290 if (N12.getOpcode() == ISD::FP_EXTEND) {
17291 SDValue N120 = N12.getOperand(0);
17292 if (isContractableFMUL(N120) &&
17293 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17294 N120.getValueType())) {
17295 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
17296 N120.getOperand(0), N120.getOperand(1),
17297 N0);
17298 }
17299 }
17300 }
17301
17302 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
17303 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
17304 // FIXME: This turns two single-precision and one double-precision
17305 // operation into two double-precision operations, which might not be
17306 // interesting for all targets, especially GPUs.
17307 if (N1.getOpcode() == ISD::FP_EXTEND) {
17308 SDValue N10 = N1.getOperand(0);
17309 if (isFusedOp(N10)) {
17310 SDValue N102 = N10.getOperand(2);
17311 if (isContractableFMUL(N102) &&
17312 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17313 N10.getValueType())) {
17314 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
17315 N102.getOperand(0), N102.getOperand(1),
17316 N0);
17317 }
17318 }
17319 }
17320 }
17321
17322 return SDValue();
17323}
17324
17325/// Try to perform FMA combining on a given FSUB node.
17326template <class MatchContextClass>
17327SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
17328 SDValue N0 = N->getOperand(0);
17329 SDValue N1 = N->getOperand(1);
17330 EVT VT = N->getValueType(0);
17331 SDLoc SL(N);
17332 MatchContextClass matcher(DAG, TLI, N);
17333 const TargetOptions &Options = DAG.getTarget().Options;
17334
17335 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
17336
17337 // Floating-point multiply-add with intermediate rounding.
17338 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
17339 // FIXME: Add VP_FMAD opcode.
17340 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
17341
17342 // Floating-point multiply-add without intermediate rounding.
17343 bool HasFMA =
17344 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT)) &&
17346
17347 // No valid opcode, do not combine.
17348 if (!HasFMAD && !HasFMA)
17349 return SDValue();
17350
17351 const SDNodeFlags Flags = N->getFlags();
17352 bool AllowFusionGlobally =
17353 (Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD);
17354
17355 // If the subtraction is not contractable, do not combine.
17356 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
17357 return SDValue();
17358
17359 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
17360 return SDValue();
17361
17362 // Always prefer FMAD to FMA for precision.
17363 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
17365 bool NoSignedZero = Flags.hasNoSignedZeros();
17366
17367 // Is the node an FMUL and contractable either due to global flags or
17368 // SDNodeFlags.
17369 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
17370 if (!matcher.match(N, ISD::FMUL))
17371 return false;
17372 return AllowFusionGlobally || N->getFlags().hasAllowContract();
17373 };
17374
17375 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
17376 auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
17377 if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
17378 return matcher.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
17379 XY.getOperand(1),
17380 matcher.getNode(ISD::FNEG, SL, VT, Z));
17381 }
17382 return SDValue();
17383 };
17384
17385 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
17386 // Note: Commutes FSUB operands.
17387 auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
17388 if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
17389 return matcher.getNode(
17390 PreferredFusedOpcode, SL, VT,
17391 matcher.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
17392 YZ.getOperand(1), X);
17393 }
17394 return SDValue();
17395 };
17396
17397 // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
17398 // prefer to fold the multiply with fewer uses.
17399 if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
17400 (N0->use_size() > N1->use_size())) {
17401 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
17402 if (SDValue V = tryToFoldXSubYZ(N0, N1))
17403 return V;
17404 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
17405 if (SDValue V = tryToFoldXYSubZ(N0, N1))
17406 return V;
17407 } else {
17408 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
17409 if (SDValue V = tryToFoldXYSubZ(N0, N1))
17410 return V;
17411 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
17412 if (SDValue V = tryToFoldXSubYZ(N0, N1))
17413 return V;
17414 }
17415
17416 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
17417 if (matcher.match(N0, ISD::FNEG) && isContractableFMUL(N0.getOperand(0)) &&
17418 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
17419 SDValue N00 = N0.getOperand(0).getOperand(0);
17420 SDValue N01 = N0.getOperand(0).getOperand(1);
17421 return matcher.getNode(PreferredFusedOpcode, SL, VT,
17422 matcher.getNode(ISD::FNEG, SL, VT, N00), N01,
17423 matcher.getNode(ISD::FNEG, SL, VT, N1));
17424 }
17425
17426 // Look through FP_EXTEND nodes to do more combining.
17427
17428 // fold (fsub (fpext (fmul x, y)), z)
17429 // -> (fma (fpext x), (fpext y), (fneg z))
17430 if (matcher.match(N0, ISD::FP_EXTEND)) {
17431 SDValue N00 = N0.getOperand(0);
17432 if (isContractableFMUL(N00) &&
17433 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17434 N00.getValueType())) {
17435 return matcher.getNode(
17436 PreferredFusedOpcode, SL, VT,
17437 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
17438 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
17439 matcher.getNode(ISD::FNEG, SL, VT, N1));
17440 }
17441 }
17442
17443 // fold (fsub x, (fpext (fmul y, z)))
17444 // -> (fma (fneg (fpext y)), (fpext z), x)
17445 // Note: Commutes FSUB operands.
17446 if (matcher.match(N1, ISD::FP_EXTEND)) {
17447 SDValue N10 = N1.getOperand(0);
17448 if (isContractableFMUL(N10) &&
17449 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17450 N10.getValueType())) {
17451 return matcher.getNode(
17452 PreferredFusedOpcode, SL, VT,
17453 matcher.getNode(
17454 ISD::FNEG, SL, VT,
17455 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
17456 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
17457 }
17458 }
17459
17460 // fold (fsub (fpext (fneg (fmul, x, y))), z)
17461 // -> (fneg (fma (fpext x), (fpext y), z))
17462 // Note: This could be removed with appropriate canonicalization of the
17463 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
17464 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
17465 // from implementing the canonicalization in visitFSUB.
17466 if (matcher.match(N0, ISD::FP_EXTEND)) {
17467 SDValue N00 = N0.getOperand(0);
17468 if (matcher.match(N00, ISD::FNEG)) {
17469 SDValue N000 = N00.getOperand(0);
17470 if (isContractableFMUL(N000) &&
17471 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17472 N00.getValueType())) {
17473 return matcher.getNode(
17474 ISD::FNEG, SL, VT,
17475 matcher.getNode(
17476 PreferredFusedOpcode, SL, VT,
17477 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
17478 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
17479 N1));
17480 }
17481 }
17482 }
17483
17484 // fold (fsub (fneg (fpext (fmul, x, y))), z)
17485 // -> (fneg (fma (fpext x)), (fpext y), z)
17486 // Note: This could be removed with appropriate canonicalization of the
17487 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
17488 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
17489 // from implementing the canonicalization in visitFSUB.
17490 if (matcher.match(N0, ISD::FNEG)) {
17491 SDValue N00 = N0.getOperand(0);
17492 if (matcher.match(N00, ISD::FP_EXTEND)) {
17493 SDValue N000 = N00.getOperand(0);
17494 if (isContractableFMUL(N000) &&
17495 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17496 N000.getValueType())) {
17497 return matcher.getNode(
17498 ISD::FNEG, SL, VT,
17499 matcher.getNode(
17500 PreferredFusedOpcode, SL, VT,
17501 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
17502 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
17503 N1));
17504 }
17505 }
17506 }
17507
17508 auto isContractableAndReassociableFMUL = [&isContractableFMUL](SDValue N) {
17509 return isContractableFMUL(N) && N->getFlags().hasAllowReassociation();
17510 };
17511
17512 auto isFusedOp = [&](SDValue N) {
17513 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
17514 };
17515
17516 // More folding opportunities when target permits.
17517 if (Aggressive && N->getFlags().hasAllowReassociation()) {
17518 bool CanFuse = N->getFlags().hasAllowContract();
17519 // fold (fsub (fma x, y, (fmul u, v)), z)
17520 // -> (fma x, y (fma u, v, (fneg z)))
17521 if (CanFuse && isFusedOp(N0) &&
17522 isContractableAndReassociableFMUL(N0.getOperand(2)) &&
17523 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
17524 return matcher.getNode(
17525 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
17526 matcher.getNode(PreferredFusedOpcode, SL, VT,
17527 N0.getOperand(2).getOperand(0),
17528 N0.getOperand(2).getOperand(1),
17529 matcher.getNode(ISD::FNEG, SL, VT, N1)));
17530 }
17531
17532 // fold (fsub x, (fma y, z, (fmul u, v)))
17533 // -> (fma (fneg y), z, (fma (fneg u), v, x))
17534 if (CanFuse && isFusedOp(N1) &&
17535 isContractableAndReassociableFMUL(N1.getOperand(2)) &&
17536 N1->hasOneUse() && NoSignedZero) {
17537 SDValue N20 = N1.getOperand(2).getOperand(0);
17538 SDValue N21 = N1.getOperand(2).getOperand(1);
17539 return matcher.getNode(
17540 PreferredFusedOpcode, SL, VT,
17541 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
17542 N1.getOperand(1),
17543 matcher.getNode(PreferredFusedOpcode, SL, VT,
17544 matcher.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
17545 }
17546
17547 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
17548 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
17549 if (isFusedOp(N0) && N0->hasOneUse()) {
17550 SDValue N02 = N0.getOperand(2);
17551 if (matcher.match(N02, ISD::FP_EXTEND)) {
17552 SDValue N020 = N02.getOperand(0);
17553 if (isContractableAndReassociableFMUL(N020) &&
17554 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17555 N020.getValueType())) {
17556 return matcher.getNode(
17557 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
17558 matcher.getNode(
17559 PreferredFusedOpcode, SL, VT,
17560 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
17561 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
17562 matcher.getNode(ISD::FNEG, SL, VT, N1)));
17563 }
17564 }
17565 }
17566
17567 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
17568 // -> (fma (fpext x), (fpext y),
17569 // (fma (fpext u), (fpext v), (fneg z)))
17570 // FIXME: This turns two single-precision and one double-precision
17571 // operation into two double-precision operations, which might not be
17572 // interesting for all targets, especially GPUs.
17573 if (matcher.match(N0, ISD::FP_EXTEND)) {
17574 SDValue N00 = N0.getOperand(0);
17575 if (isFusedOp(N00)) {
17576 SDValue N002 = N00.getOperand(2);
17577 if (isContractableAndReassociableFMUL(N002) &&
17578 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17579 N00.getValueType())) {
17580 return matcher.getNode(
17581 PreferredFusedOpcode, SL, VT,
17582 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
17583 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
17584 matcher.getNode(
17585 PreferredFusedOpcode, SL, VT,
17586 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
17587 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
17588 matcher.getNode(ISD::FNEG, SL, VT, N1)));
17589 }
17590 }
17591 }
17592
17593 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
17594 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
17595 if (isFusedOp(N1) && matcher.match(N1.getOperand(2), ISD::FP_EXTEND) &&
17596 N1->hasOneUse()) {
17597 SDValue N120 = N1.getOperand(2).getOperand(0);
17598 if (isContractableAndReassociableFMUL(N120) &&
17599 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17600 N120.getValueType())) {
17601 SDValue N1200 = N120.getOperand(0);
17602 SDValue N1201 = N120.getOperand(1);
17603 return matcher.getNode(
17604 PreferredFusedOpcode, SL, VT,
17605 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
17606 N1.getOperand(1),
17607 matcher.getNode(
17608 PreferredFusedOpcode, SL, VT,
17609 matcher.getNode(ISD::FNEG, SL, VT,
17610 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
17611 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
17612 }
17613 }
17614
17615 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
17616 // -> (fma (fneg (fpext y)), (fpext z),
17617 // (fma (fneg (fpext u)), (fpext v), x))
17618 // FIXME: This turns two single-precision and one double-precision
17619 // operation into two double-precision operations, which might not be
17620 // interesting for all targets, especially GPUs.
17621 if (matcher.match(N1, ISD::FP_EXTEND) && isFusedOp(N1.getOperand(0))) {
17622 SDValue CvtSrc = N1.getOperand(0);
17623 SDValue N100 = CvtSrc.getOperand(0);
17624 SDValue N101 = CvtSrc.getOperand(1);
17625 SDValue N102 = CvtSrc.getOperand(2);
17626 if (isContractableAndReassociableFMUL(N102) &&
17627 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17628 CvtSrc.getValueType())) {
17629 SDValue N1020 = N102.getOperand(0);
17630 SDValue N1021 = N102.getOperand(1);
17631 return matcher.getNode(
17632 PreferredFusedOpcode, SL, VT,
17633 matcher.getNode(ISD::FNEG, SL, VT,
17634 matcher.getNode(ISD::FP_EXTEND, SL, VT, N100)),
17635 matcher.getNode(ISD::FP_EXTEND, SL, VT, N101),
17636 matcher.getNode(
17637 PreferredFusedOpcode, SL, VT,
17638 matcher.getNode(ISD::FNEG, SL, VT,
17639 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
17640 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
17641 }
17642 }
17643 }
17644
17645 return SDValue();
17646}
17647
17648/// Try to perform FMA combining on a given FMUL node based on the distributive
17649/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
17650/// subtraction instead of addition).
17651SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
17652 SDValue N0 = N->getOperand(0);
17653 SDValue N1 = N->getOperand(1);
17654 EVT VT = N->getValueType(0);
17655 SDLoc SL(N);
17656
17657 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
17658
17659 const TargetOptions &Options = DAG.getTarget().Options;
17660
17661 // The transforms below are incorrect when x == 0 and y == inf, because the
17662 // intermediate multiplication produces a nan.
17663 SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
17664 if (!FAdd->getFlags().hasNoInfs())
17665 return SDValue();
17666
17667 // Floating-point multiply-add without intermediate rounding.
17668 bool HasFMA =
17670 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
17672
17673 // Floating-point multiply-add with intermediate rounding. This can result
17674 // in a less precise result due to the changed rounding order.
17675 bool HasFMAD = LegalOperations && TLI.isFMADLegal(DAG, N);
17676
17677 // No valid opcode, do not combine.
17678 if (!HasFMAD && !HasFMA)
17679 return SDValue();
17680
17681 // Always prefer FMAD to FMA for precision.
17682 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
17684
17685 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
17686 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
17687 auto FuseFADD = [&](SDValue X, SDValue Y) {
17688 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
17689 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
17690 if (C->isExactlyValue(+1.0))
17691 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17692 Y);
17693 if (C->isExactlyValue(-1.0))
17694 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17695 DAG.getNode(ISD::FNEG, SL, VT, Y));
17696 }
17697 }
17698 return SDValue();
17699 };
17700
17701 if (SDValue FMA = FuseFADD(N0, N1))
17702 return FMA;
17703 if (SDValue FMA = FuseFADD(N1, N0))
17704 return FMA;
17705
17706 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
17707 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
17708 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
17709 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
17710 auto FuseFSUB = [&](SDValue X, SDValue Y) {
17711 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
17712 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
17713 if (C0->isExactlyValue(+1.0))
17714 return DAG.getNode(PreferredFusedOpcode, SL, VT,
17715 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
17716 Y);
17717 if (C0->isExactlyValue(-1.0))
17718 return DAG.getNode(PreferredFusedOpcode, SL, VT,
17719 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
17720 DAG.getNode(ISD::FNEG, SL, VT, Y));
17721 }
17722 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
17723 if (C1->isExactlyValue(+1.0))
17724 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17725 DAG.getNode(ISD::FNEG, SL, VT, Y));
17726 if (C1->isExactlyValue(-1.0))
17727 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17728 Y);
17729 }
17730 }
17731 return SDValue();
17732 };
17733
17734 if (SDValue FMA = FuseFSUB(N0, N1))
17735 return FMA;
17736 if (SDValue FMA = FuseFSUB(N1, N0))
17737 return FMA;
17738
17739 return SDValue();
17740}
17741
17742SDValue DAGCombiner::visitVP_FADD(SDNode *N) {
17743 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17744
17745 // FADD -> FMA combines:
17746 if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) {
17747 if (Fused.getOpcode() != ISD::DELETED_NODE)
17748 AddToWorklist(Fused.getNode());
17749 return Fused;
17750 }
17751 return SDValue();
17752}
17753
17754SDValue DAGCombiner::visitFADD(SDNode *N) {
17755 SDValue N0 = N->getOperand(0);
17756 SDValue N1 = N->getOperand(1);
17757 bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
17758 bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
17759 EVT VT = N->getValueType(0);
17760 SDLoc DL(N);
17761 const TargetOptions &Options = DAG.getTarget().Options;
17762 SDNodeFlags Flags = N->getFlags();
17763 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17764
17765 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17766 return R;
17767
17768 // fold (fadd c1, c2) -> c1 + c2
17769 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
17770 return C;
17771
17772 // canonicalize constant to RHS
17773 if (N0CFP && !N1CFP)
17774 return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
17775
17776 // fold vector ops
17777 if (VT.isVector())
17778 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17779 return FoldedVOp;
17780
17781 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
17782 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
17783 if (N1C && N1C->isZero())
17784 if (N1C->isNegative() || Flags.hasNoSignedZeros())
17785 return N0;
17786
17787 if (SDValue NewSel = foldBinOpIntoSelect(N))
17788 return NewSel;
17789
17790 // fold (fadd A, (fneg B)) -> (fsub A, B)
17791 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
17792 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
17793 N1, DAG, LegalOperations, ForCodeSize))
17794 return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
17795
17796 // fold (fadd (fneg A), B) -> (fsub B, A)
17797 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
17798 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
17799 N0, DAG, LegalOperations, ForCodeSize))
17800 return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
17801
17802 auto isFMulNegTwo = [](SDValue FMul) {
17803 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
17804 return false;
17805 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
17806 return C && C->isExactlyValue(-2.0);
17807 };
17808
17809 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
17810 if (isFMulNegTwo(N0)) {
17811 SDValue B = N0.getOperand(0);
17812 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
17813 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
17814 }
17815 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
17816 if (isFMulNegTwo(N1)) {
17817 SDValue B = N1.getOperand(0);
17818 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
17819 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
17820 }
17821
17822 // No FP constant should be created after legalization as Instruction
17823 // Selection pass has a hard time dealing with FP constants.
17824 bool AllowNewConst = (Level < AfterLegalizeDAG);
17825
17826 // If nnan is enabled, fold lots of things.
17827 if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
17828 // If allowed, fold (fadd (fneg x), x) -> 0.0
17829 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
17830 return DAG.getConstantFP(0.0, DL, VT);
17831
17832 // If allowed, fold (fadd x, (fneg x)) -> 0.0
17833 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
17834 return DAG.getConstantFP(0.0, DL, VT);
17835 }
17836
17837 // If reassoc and nsz, fold lots of things.
17838 // TODO: break out portions of the transformations below for which Unsafe is
17839 // considered and which do not require both nsz and reassoc
17840 if (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros() &&
17841 AllowNewConst) {
17842 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
17843 if (N1CFP && N0.getOpcode() == ISD::FADD &&
17845 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
17846 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
17847 }
17848
17849 // We can fold chains of FADD's of the same value into multiplications.
17850 // This transform is not safe in general because we are reducing the number
17851 // of rounding steps.
17852 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
17853 if (N0.getOpcode() == ISD::FMUL) {
17854 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
17855 bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
17856
17857 // (fadd (fmul x, c), x) -> (fmul x, c+1)
17858 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
17859 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
17860 DAG.getConstantFP(1.0, DL, VT));
17861 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
17862 }
17863
17864 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
17865 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
17866 N1.getOperand(0) == N1.getOperand(1) &&
17867 N0.getOperand(0) == N1.getOperand(0)) {
17868 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
17869 DAG.getConstantFP(2.0, DL, VT));
17870 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
17871 }
17872 }
17873
17874 if (N1.getOpcode() == ISD::FMUL) {
17875 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
17876 bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
17877
17878 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
17879 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
17880 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
17881 DAG.getConstantFP(1.0, DL, VT));
17882 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
17883 }
17884
17885 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
17886 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
17887 N0.getOperand(0) == N0.getOperand(1) &&
17888 N1.getOperand(0) == N0.getOperand(0)) {
17889 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
17890 DAG.getConstantFP(2.0, DL, VT));
17891 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
17892 }
17893 }
17894
17895 if (N0.getOpcode() == ISD::FADD) {
17896 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
17897 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
17898 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
17899 (N0.getOperand(0) == N1)) {
17900 return DAG.getNode(ISD::FMUL, DL, VT, N1,
17901 DAG.getConstantFP(3.0, DL, VT));
17902 }
17903 }
17904
17905 if (N1.getOpcode() == ISD::FADD) {
17906 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
17907 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
17908 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
17909 N1.getOperand(0) == N0) {
17910 return DAG.getNode(ISD::FMUL, DL, VT, N0,
17911 DAG.getConstantFP(3.0, DL, VT));
17912 }
17913 }
17914
17915 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
17916 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
17917 N0.getOperand(0) == N0.getOperand(1) &&
17918 N1.getOperand(0) == N1.getOperand(1) &&
17919 N0.getOperand(0) == N1.getOperand(0)) {
17920 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
17921 DAG.getConstantFP(4.0, DL, VT));
17922 }
17923 }
17924 } // reassoc && nsz && AllowNewConst
17925
17926 if (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()) {
17927 // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
17928 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
17929 VT, N0, N1, Flags))
17930 return SD;
17931 }
17932
17933 // FADD -> FMA combines:
17934 if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {
17935 if (Fused.getOpcode() != ISD::DELETED_NODE)
17936 AddToWorklist(Fused.getNode());
17937 return Fused;
17938 }
17939 return SDValue();
17940}
17941
17942SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
17943 SDValue Chain = N->getOperand(0);
17944 SDValue N0 = N->getOperand(1);
17945 SDValue N1 = N->getOperand(2);
17946 EVT VT = N->getValueType(0);
17947 EVT ChainVT = N->getValueType(1);
17948 SDLoc DL(N);
17949 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17950
17951 // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
17952 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
17953 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
17954 N1, DAG, LegalOperations, ForCodeSize)) {
17955 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
17956 {Chain, N0, NegN1});
17957 }
17958
17959 // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
17960 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
17961 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
17962 N0, DAG, LegalOperations, ForCodeSize)) {
17963 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
17964 {Chain, N1, NegN0});
17965 }
17966 return SDValue();
17967}
17968
17969SDValue DAGCombiner::visitFSUB(SDNode *N) {
17970 SDValue N0 = N->getOperand(0);
17971 SDValue N1 = N->getOperand(1);
17972 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
17973 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
17974 EVT VT = N->getValueType(0);
17975 SDLoc DL(N);
17976 const TargetOptions &Options = DAG.getTarget().Options;
17977 const SDNodeFlags Flags = N->getFlags();
17978 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17979
17980 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17981 return R;
17982
17983 // fold (fsub c1, c2) -> c1-c2
17984 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
17985 return C;
17986
17987 // fold vector ops
17988 if (VT.isVector())
17989 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17990 return FoldedVOp;
17991
17992 if (SDValue NewSel = foldBinOpIntoSelect(N))
17993 return NewSel;
17994
17995 // (fsub A, 0) -> A
17996 if (N1CFP && N1CFP->isZero()) {
17997 if (!N1CFP->isNegative() || Flags.hasNoSignedZeros()) {
17998 return N0;
17999 }
18000 }
18001
18002 if (N0 == N1) {
18003 // (fsub x, x) -> 0.0
18004 if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
18005 return DAG.getConstantFP(0.0f, DL, VT);
18006 }
18007
18008 // (fsub -0.0, N1) -> -N1
18009 if (N0CFP && N0CFP->isZero()) {
18010 if (N0CFP->isNegative() || Flags.hasNoSignedZeros()) {
18011 // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
18012 // flushed to zero, unless all users treat denorms as zero (DAZ).
18013 // FIXME: This transform will change the sign of a NaN and the behavior
18014 // of a signaling NaN. It is only valid when a NoNaN flag is present.
18015 DenormalMode DenormMode = DAG.getDenormalMode(VT);
18016 if (DenormMode == DenormalMode::getIEEE()) {
18017 if (SDValue NegN1 =
18018 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
18019 return NegN1;
18020 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
18021 return DAG.getNode(ISD::FNEG, DL, VT, N1);
18022 }
18023 }
18024 }
18025
18026 if (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros() &&
18027 N1.getOpcode() == ISD::FADD) {
18028 // X - (X + Y) -> -Y
18029 if (N0 == N1->getOperand(0))
18030 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
18031 // X - (Y + X) -> -Y
18032 if (N0 == N1->getOperand(1))
18033 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
18034 }
18035
18036 // fold (fsub A, (fneg B)) -> (fadd A, B)
18037 if (SDValue NegN1 =
18038 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
18039 return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
18040
18041 // FSUB -> FMA combines:
18042 if (SDValue Fused = visitFSUBForFMACombine<EmptyMatchContext>(N)) {
18043 AddToWorklist(Fused.getNode());
18044 return Fused;
18045 }
18046
18047 return SDValue();
18048}
18049
18050// Transform IEEE Floats:
18051// (fmul C, (uitofp Pow2))
18052// -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
18053// (fdiv C, (uitofp Pow2))
18054// -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
18055//
18056// The rationale is fmul/fdiv by a power of 2 is just change the exponent, so
18057// there is no need for more than an add/sub.
18058//
18059// This is valid under the following circumstances:
18060// 1) We are dealing with IEEE floats
18061// 2) C is normal
18062// 3) The fmul/fdiv add/sub will not go outside of min/max exponent bounds.
18063// TODO: Much of this could also be used for generating `ldexp` on targets the
18064// prefer it.
18065SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) {
18066 EVT VT = N->getValueType(0);
18068 return SDValue();
18069
18070 SDValue ConstOp, Pow2Op;
18071
18072 std::optional<int> Mantissa;
18073 auto GetConstAndPow2Ops = [&](unsigned ConstOpIdx) {
18074 if (ConstOpIdx == 1 && N->getOpcode() == ISD::FDIV)
18075 return false;
18076
18077 ConstOp = peekThroughBitcasts(N->getOperand(ConstOpIdx));
18078 Pow2Op = N->getOperand(1 - ConstOpIdx);
18079 if (Pow2Op.getOpcode() != ISD::UINT_TO_FP &&
18080 (Pow2Op.getOpcode() != ISD::SINT_TO_FP ||
18081 !DAG.computeKnownBits(Pow2Op).isNonNegative()))
18082 return false;
18083
18084 Pow2Op = Pow2Op.getOperand(0);
18085
18086 // `Log2(Pow2Op) < Pow2Op.getScalarSizeInBits()`.
18087 // TODO: We could use knownbits to make this bound more precise.
18088 int MaxExpChange = Pow2Op.getValueType().getScalarSizeInBits();
18089
18090 auto IsFPConstValid = [N, MaxExpChange, &Mantissa](ConstantFPSDNode *CFP) {
18091 if (CFP == nullptr)
18092 return false;
18093
18094 const APFloat &APF = CFP->getValueAPF();
18095
18096 // Make sure we have normal constant.
18097 if (!APF.isNormal())
18098 return false;
18099
18100 // Make sure the floats exponent is within the bounds that this transform
18101 // produces bitwise equals value.
18102 int CurExp = ilogb(APF);
18103 // FMul by pow2 will only increase exponent.
18104 int MinExp =
18105 N->getOpcode() == ISD::FMUL ? CurExp : (CurExp - MaxExpChange);
18106 // FDiv by pow2 will only decrease exponent.
18107 int MaxExp =
18108 N->getOpcode() == ISD::FDIV ? CurExp : (CurExp + MaxExpChange);
18109 if (MinExp <= APFloat::semanticsMinExponent(APF.getSemantics()) ||
18111 return false;
18112
18113 // Finally make sure we actually know the mantissa for the float type.
18114 int ThisMantissa = APFloat::semanticsPrecision(APF.getSemantics()) - 1;
18115 if (!Mantissa)
18116 Mantissa = ThisMantissa;
18117
18118 return *Mantissa == ThisMantissa && ThisMantissa > 0;
18119 };
18120
18121 // TODO: We may be able to include undefs.
18122 return ISD::matchUnaryFpPredicate(ConstOp, IsFPConstValid);
18123 };
18124
18125 if (!GetConstAndPow2Ops(0) && !GetConstAndPow2Ops(1))
18126 return SDValue();
18127
18128 if (!TLI.optimizeFMulOrFDivAsShiftAddBitcast(N, ConstOp, Pow2Op))
18129 return SDValue();
18130
18131 // Get log2 after all other checks have taken place. This is because
18132 // BuildLogBase2 may create a new node.
18133 SDLoc DL(N);
18134 // Get Log2 type with same bitwidth as the float type (VT).
18135 EVT NewIntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits());
18136 if (VT.isVector())
18137 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewIntVT,
18139
18140 SDValue Log2 = BuildLogBase2(Pow2Op, DL, DAG.isKnownNeverZero(Pow2Op),
18141 /*InexpensiveOnly*/ true, NewIntVT);
18142 if (!Log2)
18143 return SDValue();
18144
18145 // Perform actual transform.
18146 SDValue MantissaShiftCnt =
18147 DAG.getShiftAmountConstant(*Mantissa, NewIntVT, DL);
18148 // TODO: Sometimes Log2 is of form `(X + C)`. `(X + C) << C1` should fold to
18149 // `(X << C1) + (C << C1)`, but that isn't always the case because of the
18150 // cast. We could implement that by handle here to handle the casts.
18151 SDValue Shift = DAG.getNode(ISD::SHL, DL, NewIntVT, Log2, MantissaShiftCnt);
18152 SDValue ResAsInt =
18153 DAG.getNode(N->getOpcode() == ISD::FMUL ? ISD::ADD : ISD::SUB, DL,
18154 NewIntVT, DAG.getBitcast(NewIntVT, ConstOp), Shift);
18155 SDValue ResAsFP = DAG.getBitcast(VT, ResAsInt);
18156 return ResAsFP;
18157}
18158
18159SDValue DAGCombiner::visitFMUL(SDNode *N) {
18160 SDValue N0 = N->getOperand(0);
18161 SDValue N1 = N->getOperand(1);
18162 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
18163 EVT VT = N->getValueType(0);
18164 SDLoc DL(N);
18165 const SDNodeFlags Flags = N->getFlags();
18166 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18167
18168 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
18169 return R;
18170
18171 // fold (fmul c1, c2) -> c1*c2
18172 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
18173 return C;
18174
18175 // canonicalize constant to RHS
18178 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
18179
18180 // fold vector ops
18181 if (VT.isVector())
18182 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
18183 return FoldedVOp;
18184
18185 if (SDValue NewSel = foldBinOpIntoSelect(N))
18186 return NewSel;
18187
18188 if (Flags.hasAllowReassociation()) {
18189 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
18191 N0.getOpcode() == ISD::FMUL) {
18192 SDValue N00 = N0.getOperand(0);
18193 SDValue N01 = N0.getOperand(1);
18194 // Avoid an infinite loop by making sure that N00 is not a constant
18195 // (the inner multiply has not been constant folded yet).
18198 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
18199 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
18200 }
18201 }
18202
18203 // Match a special-case: we convert X * 2.0 into fadd.
18204 // fmul (fadd X, X), C -> fmul X, 2.0 * C
18205 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
18206 N0.getOperand(0) == N0.getOperand(1)) {
18207 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
18208 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
18209 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
18210 }
18211
18212 // Fold fmul(vecreduce(x), vecreduce(y)) -> vecreduce(fmul(x, y))
18213 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FMUL, ISD::FMUL, DL,
18214 VT, N0, N1, Flags))
18215 return SD;
18216 }
18217
18218 // fold (fmul X, 2.0) -> (fadd X, X)
18219 if (N1CFP && N1CFP->isExactlyValue(+2.0))
18220 return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
18221
18222 // fold (fmul X, -1.0) -> (fsub -0.0, X)
18223 if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
18224 if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
18225 return DAG.getNode(ISD::FSUB, DL, VT,
18226 DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
18227 }
18228 }
18229
18230 // -N0 * -N1 --> N0 * N1
18235 SDValue NegN0 =
18236 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
18237 if (NegN0) {
18238 HandleSDNode NegN0Handle(NegN0);
18239 SDValue NegN1 =
18240 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
18241 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
18243 return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
18244 }
18245
18246 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
18247 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
18248 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
18249 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
18250 TLI.isOperationLegal(ISD::FABS, VT)) {
18251 SDValue Select = N0, X = N1;
18252 if (Select.getOpcode() != ISD::SELECT)
18253 std::swap(Select, X);
18254
18255 SDValue Cond = Select.getOperand(0);
18256 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
18257 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
18258
18259 if (TrueOpnd && FalseOpnd &&
18260 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
18261 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
18262 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
18263 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
18264 switch (CC) {
18265 default: break;
18266 case ISD::SETOLT:
18267 case ISD::SETULT:
18268 case ISD::SETOLE:
18269 case ISD::SETULE:
18270 case ISD::SETLT:
18271 case ISD::SETLE:
18272 std::swap(TrueOpnd, FalseOpnd);
18273 [[fallthrough]];
18274 case ISD::SETOGT:
18275 case ISD::SETUGT:
18276 case ISD::SETOGE:
18277 case ISD::SETUGE:
18278 case ISD::SETGT:
18279 case ISD::SETGE:
18280 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
18281 TLI.isOperationLegal(ISD::FNEG, VT))
18282 return DAG.getNode(ISD::FNEG, DL, VT,
18283 DAG.getNode(ISD::FABS, DL, VT, X));
18284 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
18285 return DAG.getNode(ISD::FABS, DL, VT, X);
18286
18287 break;
18288 }
18289 }
18290 }
18291
18292 // FMUL -> FMA combines:
18293 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
18294 AddToWorklist(Fused.getNode());
18295 return Fused;
18296 }
18297
18298 // Don't do `combineFMulOrFDivWithIntPow2` until after FMUL -> FMA has been
18299 // able to run.
18300 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
18301 return R;
18302
18303 return SDValue();
18304}
18305
18306template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
18307 SDValue N0 = N->getOperand(0);
18308 SDValue N1 = N->getOperand(1);
18309 SDValue N2 = N->getOperand(2);
18310 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
18311 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
18312 ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
18313 EVT VT = N->getValueType(0);
18314 SDLoc DL(N);
18315 const TargetOptions &Options = DAG.getTarget().Options;
18316 // FMA nodes have flags that propagate to the created nodes.
18317 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18318 MatchContextClass matcher(DAG, TLI, N);
18319
18320 // Constant fold FMA.
18321 if (SDValue C =
18322 DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1, N2}))
18323 return C;
18324
18325 // (-N0 * -N1) + N2 --> (N0 * N1) + N2
18330 SDValue NegN0 =
18331 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
18332 if (NegN0) {
18333 HandleSDNode NegN0Handle(NegN0);
18334 SDValue NegN1 =
18335 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
18336 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
18338 return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
18339 }
18340
18341 if ((Options.NoNaNsFPMath && N->getFlags().hasNoInfs()) ||
18342 (N->getFlags().hasNoNaNs() && N->getFlags().hasNoInfs())) {
18343 if (N->getFlags().hasNoSignedZeros() ||
18344 (N2CFP && !N2CFP->isExactlyValue(-0.0))) {
18345 if (N0CFP && N0CFP->isZero())
18346 return N2;
18347 if (N1CFP && N1CFP->isZero())
18348 return N2;
18349 }
18350 }
18351
18352 // FIXME: Support splat of constant.
18353 if (N0CFP && N0CFP->isExactlyValue(1.0))
18354 return matcher.getNode(ISD::FADD, DL, VT, N1, N2);
18355 if (N1CFP && N1CFP->isExactlyValue(1.0))
18356 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
18357
18358 // Canonicalize (fma c, x, y) -> (fma x, c, y)
18361 return matcher.getNode(ISD::FMA, DL, VT, N1, N0, N2);
18362
18363 bool CanReassociate = N->getFlags().hasAllowReassociation();
18364 if (CanReassociate) {
18365 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
18366 if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) &&
18369 return matcher.getNode(
18370 ISD::FMUL, DL, VT, N0,
18371 matcher.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
18372 }
18373
18374 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
18375 if (matcher.match(N0, ISD::FMUL) &&
18378 return matcher.getNode(
18379 ISD::FMA, DL, VT, N0.getOperand(0),
18380 matcher.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)), N2);
18381 }
18382 }
18383
18384 // (fma x, -1, y) -> (fadd (fneg x), y)
18385 // FIXME: Support splat of constant.
18386 if (N1CFP) {
18387 if (N1CFP->isExactlyValue(1.0))
18388 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
18389
18390 if (N1CFP->isExactlyValue(-1.0) &&
18391 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
18392 SDValue RHSNeg = matcher.getNode(ISD::FNEG, DL, VT, N0);
18393 AddToWorklist(RHSNeg.getNode());
18394 return matcher.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
18395 }
18396
18397 // fma (fneg x), K, y -> fma x -K, y
18398 if (matcher.match(N0, ISD::FNEG) &&
18400 (N1.hasOneUse() &&
18401 !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) {
18402 return matcher.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
18403 matcher.getNode(ISD::FNEG, DL, VT, N1), N2);
18404 }
18405 }
18406
18407 // FIXME: Support splat of constant.
18408 if (CanReassociate) {
18409 // (fma x, c, x) -> (fmul x, (c+1))
18410 if (N1CFP && N0 == N2) {
18411 return matcher.getNode(ISD::FMUL, DL, VT, N0,
18412 matcher.getNode(ISD::FADD, DL, VT, N1,
18413 DAG.getConstantFP(1.0, DL, VT)));
18414 }
18415
18416 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
18417 if (N1CFP && matcher.match(N2, ISD::FNEG) && N2.getOperand(0) == N0) {
18418 return matcher.getNode(ISD::FMUL, DL, VT, N0,
18419 matcher.getNode(ISD::FADD, DL, VT, N1,
18420 DAG.getConstantFP(-1.0, DL, VT)));
18421 }
18422 }
18423
18424 // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
18425 // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
18426 if (!TLI.isFNegFree(VT))
18428 SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
18429 return matcher.getNode(ISD::FNEG, DL, VT, Neg);
18430 return SDValue();
18431}
18432
18433SDValue DAGCombiner::visitFMAD(SDNode *N) {
18434 SDValue N0 = N->getOperand(0);
18435 SDValue N1 = N->getOperand(1);
18436 SDValue N2 = N->getOperand(2);
18437 EVT VT = N->getValueType(0);
18438 SDLoc DL(N);
18439
18440 // Constant fold FMAD.
18441 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMAD, DL, VT, {N0, N1, N2}))
18442 return C;
18443
18444 return SDValue();
18445}
18446
18447// Combine multiple FDIVs with the same divisor into multiple FMULs by the
18448// reciprocal.
18449// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
18450// Notice that this is not always beneficial. One reason is different targets
18451// may have different costs for FDIV and FMUL, so sometimes the cost of two
18452// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
18453// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
18454SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
18455 // TODO: Limit this transform based on optsize/minsize - it always creates at
18456 // least 1 extra instruction. But the perf win may be substantial enough
18457 // that only minsize should restrict this.
18458 const SDNodeFlags Flags = N->getFlags();
18459 if (LegalDAG || !Flags.hasAllowReciprocal())
18460 return SDValue();
18461
18462 // Skip if current node is a reciprocal/fneg-reciprocal.
18463 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
18464 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
18465 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
18466 return SDValue();
18467
18468 // Exit early if the target does not want this transform or if there can't
18469 // possibly be enough uses of the divisor to make the transform worthwhile.
18470 unsigned MinUses = TLI.combineRepeatedFPDivisors();
18471
18472 // For splat vectors, scale the number of uses by the splat factor. If we can
18473 // convert the division into a scalar op, that will likely be much faster.
18474 unsigned NumElts = 1;
18475 EVT VT = N->getValueType(0);
18476 if (VT.isVector() && DAG.isSplatValue(N1))
18477 NumElts = VT.getVectorMinNumElements();
18478
18479 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
18480 return SDValue();
18481
18482 // Find all FDIV users of the same divisor.
18483 // Use a set because duplicates may be present in the user list.
18484 SetVector<SDNode *> Users;
18485 for (auto *U : N1->users()) {
18486 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
18487 // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
18488 if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
18489 U->getOperand(0) == U->getOperand(1).getOperand(0) &&
18490 U->getFlags().hasAllowReassociation() &&
18491 U->getFlags().hasNoSignedZeros())
18492 continue;
18493
18494 // This division is eligible for optimization only if global unsafe math
18495 // is enabled or if this division allows reciprocal formation.
18496 if (U->getFlags().hasAllowReciprocal())
18497 Users.insert(U);
18498 }
18499 }
18500
18501 // Now that we have the actual number of divisor uses, make sure it meets
18502 // the minimum threshold specified by the target.
18503 if ((Users.size() * NumElts) < MinUses)
18504 return SDValue();
18505
18506 SDLoc DL(N);
18507 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
18508 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
18509
18510 // Dividend / Divisor -> Dividend * Reciprocal
18511 for (auto *U : Users) {
18512 SDValue Dividend = U->getOperand(0);
18513 if (Dividend != FPOne) {
18514 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
18515 Reciprocal, Flags);
18516 CombineTo(U, NewNode);
18517 } else if (U != Reciprocal.getNode()) {
18518 // In the absence of fast-math-flags, this user node is always the
18519 // same node as Reciprocal, but with FMF they may be different nodes.
18520 CombineTo(U, Reciprocal);
18521 }
18522 }
18523 return SDValue(N, 0); // N was replaced.
18524}
18525
18526SDValue DAGCombiner::visitFDIV(SDNode *N) {
18527 SDValue N0 = N->getOperand(0);
18528 SDValue N1 = N->getOperand(1);
18529 EVT VT = N->getValueType(0);
18530 SDLoc DL(N);
18531 SDNodeFlags Flags = N->getFlags();
18532 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18533
18534 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
18535 return R;
18536
18537 // fold (fdiv c1, c2) -> c1/c2
18538 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
18539 return C;
18540
18541 // fold vector ops
18542 if (VT.isVector())
18543 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
18544 return FoldedVOp;
18545
18546 if (SDValue NewSel = foldBinOpIntoSelect(N))
18547 return NewSel;
18548
18550 return V;
18551
18552 // fold (fdiv X, c2) -> (fmul X, 1/c2) if there is no loss in precision, or
18553 // the loss is acceptable with AllowReciprocal.
18554 if (auto *N1CFP = isConstOrConstSplatFP(N1, true)) {
18555 // Compute the reciprocal 1.0 / c2.
18556 const APFloat &N1APF = N1CFP->getValueAPF();
18557 APFloat Recip = APFloat::getOne(N1APF.getSemantics());
18559 // Only do the transform if the reciprocal is a legal fp immediate that
18560 // isn't too nasty (eg NaN, denormal, ...).
18561 if (((st == APFloat::opOK && !Recip.isDenormal()) ||
18562 (st == APFloat::opInexact && Flags.hasAllowReciprocal())) &&
18563 (!LegalOperations ||
18564 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
18565 // backend)... we should handle this gracefully after Legalize.
18566 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
18568 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
18569 return DAG.getNode(ISD::FMUL, DL, VT, N0,
18570 DAG.getConstantFP(Recip, DL, VT));
18571 }
18572
18573 if (Flags.hasAllowReciprocal()) {
18574 // If this FDIV is part of a reciprocal square root, it may be folded
18575 // into a target-specific square root estimate instruction.
18576 if (N1.getOpcode() == ISD::FSQRT) {
18577 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
18578 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
18579 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
18580 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
18581 if (SDValue RV =
18582 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
18583 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
18584 AddToWorklist(RV.getNode());
18585 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
18586 }
18587 } else if (N1.getOpcode() == ISD::FP_ROUND &&
18588 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
18589 if (SDValue RV =
18590 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
18591 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
18592 AddToWorklist(RV.getNode());
18593 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
18594 }
18595 } else if (N1.getOpcode() == ISD::FMUL) {
18596 // Look through an FMUL. Even though this won't remove the FDIV directly,
18597 // it's still worthwhile to get rid of the FSQRT if possible.
18598 SDValue Sqrt, Y;
18599 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
18600 Sqrt = N1.getOperand(0);
18601 Y = N1.getOperand(1);
18602 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
18603 Sqrt = N1.getOperand(1);
18604 Y = N1.getOperand(0);
18605 }
18606 if (Sqrt.getNode()) {
18607 // If the other multiply operand is known positive, pull it into the
18608 // sqrt. That will eliminate the division if we convert to an estimate.
18609 if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
18610 N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
18611 SDValue A;
18612 if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
18613 A = Y.getOperand(0);
18614 else if (Y == Sqrt.getOperand(0))
18615 A = Y;
18616 if (A) {
18617 // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
18618 // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
18619 SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
18620 SDValue AAZ =
18621 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
18622 if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
18623 return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
18624
18625 // Estimate creation failed. Clean up speculatively created nodes.
18626 recursivelyDeleteUnusedNodes(AAZ.getNode());
18627 }
18628 }
18629
18630 // We found a FSQRT, so try to make this fold:
18631 // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
18632 if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
18633 SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
18634 AddToWorklist(Div.getNode());
18635 return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
18636 }
18637 }
18638 }
18639
18640 // Fold into a reciprocal estimate and multiply instead of a real divide.
18641 if (Flags.hasNoInfs())
18642 if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
18643 return RV;
18644 }
18645
18646 // Fold X/Sqrt(X) -> Sqrt(X)
18647 if (Flags.hasNoSignedZeros() && Flags.hasAllowReassociation())
18648 if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
18649 return N1;
18650
18651 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
18656 SDValue NegN0 =
18657 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
18658 if (NegN0) {
18659 HandleSDNode NegN0Handle(NegN0);
18660 SDValue NegN1 =
18661 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
18662 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
18664 return DAG.getNode(ISD::FDIV, DL, VT, NegN0, NegN1);
18665 }
18666
18667 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
18668 return R;
18669
18670 return SDValue();
18671}
18672
18673SDValue DAGCombiner::visitFREM(SDNode *N) {
18674 SDValue N0 = N->getOperand(0);
18675 SDValue N1 = N->getOperand(1);
18676 EVT VT = N->getValueType(0);
18677 SDNodeFlags Flags = N->getFlags();
18678 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18679 SDLoc DL(N);
18680
18681 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
18682 return R;
18683
18684 // fold (frem c1, c2) -> fmod(c1,c2)
18685 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, DL, VT, {N0, N1}))
18686 return C;
18687
18688 if (SDValue NewSel = foldBinOpIntoSelect(N))
18689 return NewSel;
18690
18691 // Lower frem N0, N1 => x - trunc(N0 / N1) * N1, providing N1 is an integer
18692 // power of 2.
18693 if (!TLI.isOperationLegal(ISD::FREM, VT) &&
18696 TLI.isOperationLegalOrCustom(ISD::FTRUNC, VT) &&
18697 DAG.isKnownToBeAPowerOfTwoFP(N1)) {
18698 bool NeedsCopySign =
18699 !Flags.hasNoSignedZeros() && !DAG.cannotBeOrderedNegativeFP(N0);
18700 SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
18701 SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
18702 SDValue MLA;
18704 MLA = DAG.getNode(ISD::FMA, DL, VT, DAG.getNode(ISD::FNEG, DL, VT, Rnd),
18705 N1, N0);
18706 } else {
18707 SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
18708 MLA = DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
18709 }
18710 return NeedsCopySign ? DAG.getNode(ISD::FCOPYSIGN, DL, VT, MLA, N0) : MLA;
18711 }
18712
18713 return SDValue();
18714}
18715
18716SDValue DAGCombiner::visitFSQRT(SDNode *N) {
18717 SDNodeFlags Flags = N->getFlags();
18718
18719 // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
18720 // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
18721 if (!Flags.hasApproximateFuncs() || !Flags.hasNoInfs())
18722 return SDValue();
18723
18724 SDValue N0 = N->getOperand(0);
18725 if (TLI.isFsqrtCheap(N0, DAG))
18726 return SDValue();
18727
18728 // FSQRT nodes have flags that propagate to the created nodes.
18729 // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
18730 // transform the fdiv, we may produce a sub-optimal estimate sequence
18731 // because the reciprocal calculation may not have to filter out a
18732 // 0.0 input.
18733 return buildSqrtEstimate(N0, Flags);
18734}
18735
18736/// copysign(x, fp_extend(y)) -> copysign(x, y)
18737/// copysign(x, fp_round(y)) -> copysign(x, y)
18738/// Operands to the functions are the type of X and Y respectively.
18739static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) {
18740 // Always fold no-op FP casts.
18741 if (XTy == YTy)
18742 return true;
18743
18744 // Do not optimize out type conversion of f128 type yet.
18745 // For some targets like x86_64, configuration is changed to keep one f128
18746 // value in one SSE register, but instruction selection cannot handle
18747 // FCOPYSIGN on SSE registers yet.
18748 if (YTy == MVT::f128)
18749 return false;
18750
18751 // Avoid mismatched vector operand types, for better instruction selection.
18752 return !YTy.isVector();
18753}
18754
18756 SDValue N1 = N->getOperand(1);
18757 if (N1.getOpcode() != ISD::FP_EXTEND &&
18758 N1.getOpcode() != ISD::FP_ROUND)
18759 return false;
18760 EVT N1VT = N1->getValueType(0);
18761 EVT N1Op0VT = N1->getOperand(0).getValueType();
18762 return CanCombineFCOPYSIGN_EXTEND_ROUND(N1VT, N1Op0VT);
18763}
18764
18765SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
18766 SDValue N0 = N->getOperand(0);
18767 SDValue N1 = N->getOperand(1);
18768 EVT VT = N->getValueType(0);
18769 SDLoc DL(N);
18770
18771 // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
18772 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, DL, VT, {N0, N1}))
18773 return C;
18774
18775 // copysign(x, fp_extend(y)) -> copysign(x, y)
18776 // copysign(x, fp_round(y)) -> copysign(x, y)
18778 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N0, N1.getOperand(0));
18779
18781 return SDValue(N, 0);
18782
18783 return SDValue();
18784}
18785
18786SDValue DAGCombiner::visitFPOW(SDNode *N) {
18787 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
18788 if (!ExponentC)
18789 return SDValue();
18790 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18791
18792 // Try to convert x ** (1/3) into cube root.
18793 // TODO: Handle the various flavors of long double.
18794 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
18795 // Some range near 1/3 should be fine.
18796 EVT VT = N->getValueType(0);
18797 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
18798 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
18799 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
18800 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
18801 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
18802 // For regular numbers, rounding may cause the results to differ.
18803 // Therefore, we require { nsz ninf nnan afn } for this transform.
18804 // TODO: We could select out the special cases if we don't have nsz/ninf.
18805 SDNodeFlags Flags = N->getFlags();
18806 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
18807 !Flags.hasApproximateFuncs())
18808 return SDValue();
18809
18810 // Do not create a cbrt() libcall if the target does not have it, and do not
18811 // turn a pow that has lowering support into a cbrt() libcall.
18812 if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
18813 (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
18814 DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
18815 return SDValue();
18816
18817 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
18818 }
18819
18820 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
18821 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
18822 // TODO: This could be extended (using a target hook) to handle smaller
18823 // power-of-2 fractional exponents.
18824 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
18825 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
18826 if (ExponentIs025 || ExponentIs075) {
18827 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
18828 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
18829 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
18830 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
18831 // For regular numbers, rounding may cause the results to differ.
18832 // Therefore, we require { nsz ninf afn } for this transform.
18833 // TODO: We could select out the special cases if we don't have nsz/ninf.
18834 SDNodeFlags Flags = N->getFlags();
18835
18836 // We only need no signed zeros for the 0.25 case.
18837 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
18838 !Flags.hasApproximateFuncs())
18839 return SDValue();
18840
18841 // Don't double the number of libcalls. We are trying to inline fast code.
18842 if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
18843 return SDValue();
18844
18845 // Assume that libcalls are the smallest code.
18846 // TODO: This restriction should probably be lifted for vectors.
18847 if (ForCodeSize)
18848 return SDValue();
18849
18850 // pow(X, 0.25) --> sqrt(sqrt(X))
18851 SDLoc DL(N);
18852 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
18853 SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
18854 if (ExponentIs025)
18855 return SqrtSqrt;
18856 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
18857 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
18858 }
18859
18860 return SDValue();
18861}
18862
18864 const TargetLowering &TLI) {
18865 // We can fold the fpto[us]i -> [us]itofp pattern into a single ftrunc.
18866 // If NoSignedZerosFPMath is enabled, this is a direct replacement.
18867 // Otherwise, for strict math, we must handle edge cases:
18868 // 1. For unsigned conversions, use FABS to handle negative cases. Take -0.0
18869 // as example, it first becomes integer 0, and is converted back to +0.0.
18870 // FTRUNC on its own could produce -0.0.
18871
18872 // FIXME: We should be able to use node-level FMF here.
18873 EVT VT = N->getValueType(0);
18874 if (!TLI.isOperationLegal(ISD::FTRUNC, VT))
18875 return SDValue();
18876
18877 // fptosi/fptoui round towards zero, so converting from FP to integer and
18878 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
18879 SDValue N0 = N->getOperand(0);
18880 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
18881 N0.getOperand(0).getValueType() == VT) {
18883 return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0));
18884 }
18885
18886 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
18887 N0.getOperand(0).getValueType() == VT) {
18889 return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0));
18890
18891 // Strict math: use FABS to handle negative inputs correctly.
18892 if (TLI.isFAbsFree(VT)) {
18893 SDValue Abs = DAG.getNode(ISD::FABS, DL, VT, N0.getOperand(0));
18894 return DAG.getNode(ISD::FTRUNC, DL, VT, Abs);
18895 }
18896 }
18897
18898 return SDValue();
18899}
18900
18901SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
18902 SDValue N0 = N->getOperand(0);
18903 EVT VT = N->getValueType(0);
18904 EVT OpVT = N0.getValueType();
18905 SDLoc DL(N);
18906
18907 // [us]itofp(undef) = 0, because the result value is bounded.
18908 if (N0.isUndef())
18909 return DAG.getConstantFP(0.0, DL, VT);
18910
18911 // fold (sint_to_fp c1) -> c1fp
18912 // ...but only if the target supports immediate floating-point values
18913 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18914 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SINT_TO_FP, DL, VT, {N0}))
18915 return C;
18916
18917 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
18918 // but UINT_TO_FP is legal on this target, try to convert.
18919 if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
18920 hasOperation(ISD::UINT_TO_FP, OpVT)) {
18921 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
18922 if (DAG.SignBitIsZero(N0))
18923 return DAG.getNode(ISD::UINT_TO_FP, DL, VT, N0);
18924 }
18925
18926 // The next optimizations are desirable only if SELECT_CC can be lowered.
18927 // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
18928 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
18929 !VT.isVector() &&
18930 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18931 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
18932 DAG.getConstantFP(0.0, DL, VT));
18933
18934 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
18935 // (select (setcc x, y, cc), 1.0, 0.0)
18936 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
18937 N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
18938 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18939 return DAG.getSelect(DL, VT, N0.getOperand(0),
18940 DAG.getConstantFP(1.0, DL, VT),
18941 DAG.getConstantFP(0.0, DL, VT));
18942
18943 if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
18944 return FTrunc;
18945
18946 // fold (sint_to_fp (trunc nsw x)) -> (sint_to_fp x)
18947 if (N0.getOpcode() == ISD::TRUNCATE && N0->getFlags().hasNoSignedWrap() &&
18949 N0.getOperand(0).getValueType()))
18950 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, N0.getOperand(0));
18951
18952 return SDValue();
18953}
18954
18955SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
18956 SDValue N0 = N->getOperand(0);
18957 EVT VT = N->getValueType(0);
18958 EVT OpVT = N0.getValueType();
18959 SDLoc DL(N);
18960
18961 // [us]itofp(undef) = 0, because the result value is bounded.
18962 if (N0.isUndef())
18963 return DAG.getConstantFP(0.0, DL, VT);
18964
18965 // fold (uint_to_fp c1) -> c1fp
18966 // ...but only if the target supports immediate floating-point values
18967 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18968 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UINT_TO_FP, DL, VT, {N0}))
18969 return C;
18970
18971 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
18972 // but SINT_TO_FP is legal on this target, try to convert.
18973 if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
18974 hasOperation(ISD::SINT_TO_FP, OpVT)) {
18975 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
18976 if (DAG.SignBitIsZero(N0))
18977 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, N0);
18978 }
18979
18980 // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
18981 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
18982 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18983 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
18984 DAG.getConstantFP(0.0, DL, VT));
18985
18986 if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
18987 return FTrunc;
18988
18989 // fold (uint_to_fp (trunc nuw x)) -> (uint_to_fp x)
18990 if (N0.getOpcode() == ISD::TRUNCATE && N0->getFlags().hasNoUnsignedWrap() &&
18992 N0.getOperand(0).getValueType()))
18993 return DAG.getNode(ISD::UINT_TO_FP, DL, VT, N0.getOperand(0));
18994
18995 return SDValue();
18996}
18997
18998// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
19000 SDValue N0 = N->getOperand(0);
19001 EVT VT = N->getValueType(0);
19002
19003 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
19004 return SDValue();
19005
19006 SDValue Src = N0.getOperand(0);
19007 EVT SrcVT = Src.getValueType();
19008 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
19009 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
19010
19011 // We can safely assume the conversion won't overflow the output range,
19012 // because (for example) (uint8_t)18293.f is undefined behavior.
19013
19014 // Since we can assume the conversion won't overflow, our decision as to
19015 // whether the input will fit in the float should depend on the minimum
19016 // of the input range and output range.
19017
19018 // This means this is also safe for a signed input and unsigned output, since
19019 // a negative input would lead to undefined behavior.
19020 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
19021 unsigned OutputSize = (int)VT.getScalarSizeInBits();
19022 unsigned ActualSize = std::min(InputSize, OutputSize);
19023 const fltSemantics &Sem = N0.getValueType().getFltSemantics();
19024
19025 // We can only fold away the float conversion if the input range can be
19026 // represented exactly in the float range.
19027 if (APFloat::semanticsPrecision(Sem) >= ActualSize) {
19028 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
19029 unsigned ExtOp =
19030 IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
19031 return DAG.getNode(ExtOp, DL, VT, Src);
19032 }
19033 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
19034 return DAG.getNode(ISD::TRUNCATE, DL, VT, Src);
19035 return DAG.getBitcast(VT, Src);
19036 }
19037 return SDValue();
19038}
19039
19040SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
19041 SDValue N0 = N->getOperand(0);
19042 EVT VT = N->getValueType(0);
19043 SDLoc DL(N);
19044
19045 // fold (fp_to_sint undef) -> undef
19046 if (N0.isUndef())
19047 return DAG.getUNDEF(VT);
19048
19049 // fold (fp_to_sint c1fp) -> c1
19050 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_TO_SINT, DL, VT, {N0}))
19051 return C;
19052
19053 return FoldIntToFPToInt(N, DL, DAG);
19054}
19055
19056SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
19057 SDValue N0 = N->getOperand(0);
19058 EVT VT = N->getValueType(0);
19059 SDLoc DL(N);
19060
19061 // fold (fp_to_uint undef) -> undef
19062 if (N0.isUndef())
19063 return DAG.getUNDEF(VT);
19064
19065 // fold (fp_to_uint c1fp) -> c1
19066 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_TO_UINT, DL, VT, {N0}))
19067 return C;
19068
19069 return FoldIntToFPToInt(N, DL, DAG);
19070}
19071
19072SDValue DAGCombiner::visitXROUND(SDNode *N) {
19073 SDValue N0 = N->getOperand(0);
19074 EVT VT = N->getValueType(0);
19075
19076 // fold (lrint|llrint undef) -> undef
19077 // fold (lround|llround undef) -> undef
19078 if (N0.isUndef())
19079 return DAG.getUNDEF(VT);
19080
19081 // fold (lrint|llrint c1fp) -> c1
19082 // fold (lround|llround c1fp) -> c1
19083 if (SDValue C =
19084 DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0}))
19085 return C;
19086
19087 return SDValue();
19088}
19089
19090SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
19091 SDValue N0 = N->getOperand(0);
19092 SDValue N1 = N->getOperand(1);
19093 EVT VT = N->getValueType(0);
19094 SDLoc DL(N);
19095
19096 // fold (fp_round c1fp) -> c1fp
19097 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_ROUND, DL, VT, {N0, N1}))
19098 return C;
19099
19100 // fold (fp_round (fp_extend x)) -> x
19101 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
19102 return N0.getOperand(0);
19103
19104 // fold (fp_round (fp_round x)) -> (fp_round x)
19105 if (N0.getOpcode() == ISD::FP_ROUND) {
19106 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
19107 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
19108
19109 // Avoid folding legal fp_rounds into non-legal ones.
19110 if (!hasOperation(ISD::FP_ROUND, VT))
19111 return SDValue();
19112
19113 // Skip this folding if it results in an fp_round from f80 to f16.
19114 //
19115 // f80 to f16 always generates an expensive (and as yet, unimplemented)
19116 // libcall to __truncxfhf2 instead of selecting native f16 conversion
19117 // instructions from f32 or f64. Moreover, the first (value-preserving)
19118 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
19119 // x86.
19120 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
19121 return SDValue();
19122
19123 // If the first fp_round isn't a value preserving truncation, it might
19124 // introduce a tie in the second fp_round, that wouldn't occur in the
19125 // single-step fp_round we want to fold to.
19126 // In other words, double rounding isn't the same as rounding.
19127 // Also, this is a value preserving truncation iff both fp_round's are.
19128 if ((N->getFlags().hasAllowContract() &&
19129 N0->getFlags().hasAllowContract()) ||
19130 N0IsTrunc)
19131 return DAG.getNode(
19132 ISD::FP_ROUND, DL, VT, N0.getOperand(0),
19133 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
19134 }
19135
19136 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
19137 // Note: From a legality perspective, this is a two step transform. First,
19138 // we duplicate the fp_round to the arguments of the copysign, then we
19139 // eliminate the fp_round on Y. The second step requires an additional
19140 // predicate to match the implementation above.
19141 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
19143 N0.getValueType())) {
19144 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
19145 N0.getOperand(0), N1);
19146 AddToWorklist(Tmp.getNode());
19147 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, Tmp, N0.getOperand(1));
19148 }
19149
19150 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
19151 return NewVSel;
19152
19153 return SDValue();
19154}
19155
19156// Eliminate a floating-point widening of a narrowed value if the fast math
19157// flags allow it.
19159 SDValue N0 = N->getOperand(0);
19160 EVT VT = N->getValueType(0);
19161
19162 unsigned NarrowingOp;
19163 switch (N->getOpcode()) {
19164 case ISD::FP16_TO_FP:
19165 NarrowingOp = ISD::FP_TO_FP16;
19166 break;
19167 case ISD::BF16_TO_FP:
19168 NarrowingOp = ISD::FP_TO_BF16;
19169 break;
19170 case ISD::FP_EXTEND:
19171 NarrowingOp = ISD::FP_ROUND;
19172 break;
19173 default:
19174 llvm_unreachable("Expected widening FP cast");
19175 }
19176
19177 if (N0.getOpcode() == NarrowingOp && N0.getOperand(0).getValueType() == VT) {
19178 const SDNodeFlags NarrowFlags = N0->getFlags();
19179 const SDNodeFlags WidenFlags = N->getFlags();
19180 // Narrowing can introduce inf and change the encoding of a nan, so the
19181 // widen must have the nnan and ninf flags to indicate that we don't need to
19182 // care about that. We are also removing a rounding step, and that requires
19183 // both the narrow and widen to allow contraction.
19184 if (WidenFlags.hasNoNaNs() && WidenFlags.hasNoInfs() &&
19185 NarrowFlags.hasAllowContract() && WidenFlags.hasAllowContract()) {
19186 return N0.getOperand(0);
19187 }
19188 }
19189
19190 return SDValue();
19191}
19192
19193SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
19194 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19195 SDValue N0 = N->getOperand(0);
19196 EVT VT = N->getValueType(0);
19197 SDLoc DL(N);
19198
19199 if (VT.isVector())
19200 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
19201 return FoldedVOp;
19202
19203 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
19204 if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::FP_ROUND)
19205 return SDValue();
19206
19207 // fold (fp_extend c1fp) -> c1fp
19208 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_EXTEND, DL, VT, {N0}))
19209 return C;
19210
19211 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
19212 if (N0.getOpcode() == ISD::FP16_TO_FP &&
19213 TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
19214 return DAG.getNode(ISD::FP16_TO_FP, DL, VT, N0.getOperand(0));
19215
19216 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
19217 // value of X.
19218 if (N0.getOpcode() == ISD::FP_ROUND && N0.getConstantOperandVal(1) == 1) {
19219 SDValue In = N0.getOperand(0);
19220 if (In.getValueType() == VT) return In;
19221 if (VT.bitsLT(In.getValueType()))
19222 return DAG.getNode(ISD::FP_ROUND, DL, VT, In, N0.getOperand(1));
19223 return DAG.getNode(ISD::FP_EXTEND, DL, VT, In);
19224 }
19225
19226 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
19227 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
19229 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
19230 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT,
19231 LN0->getChain(),
19232 LN0->getBasePtr(), N0.getValueType(),
19233 LN0->getMemOperand());
19234 CombineTo(N, ExtLoad);
19235 CombineTo(
19236 N0.getNode(),
19237 DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad,
19238 DAG.getIntPtrConstant(1, SDLoc(N0), /*isTarget=*/true)),
19239 ExtLoad.getValue(1));
19240 return SDValue(N, 0); // Return N so it doesn't get rechecked!
19241 }
19242
19243 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
19244 return NewVSel;
19245
19246 if (SDValue CastEliminated = eliminateFPCastPair(N))
19247 return CastEliminated;
19248
19249 return SDValue();
19250}
19251
19252SDValue DAGCombiner::visitFCEIL(SDNode *N) {
19253 SDValue N0 = N->getOperand(0);
19254 EVT VT = N->getValueType(0);
19255
19256 // fold (fceil c1) -> fceil(c1)
19257 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FCEIL, SDLoc(N), VT, {N0}))
19258 return C;
19259
19260 return SDValue();
19261}
19262
19263SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
19264 SDValue N0 = N->getOperand(0);
19265 EVT VT = N->getValueType(0);
19266
19267 // fold (ftrunc c1) -> ftrunc(c1)
19268 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FTRUNC, SDLoc(N), VT, {N0}))
19269 return C;
19270
19271 // fold ftrunc (known rounded int x) -> x
19272 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
19273 // likely to be generated to extract integer from a rounded floating value.
19274 switch (N0.getOpcode()) {
19275 default: break;
19276 case ISD::FRINT:
19277 case ISD::FTRUNC:
19278 case ISD::FNEARBYINT:
19279 case ISD::FROUNDEVEN:
19280 case ISD::FFLOOR:
19281 case ISD::FCEIL:
19282 return N0;
19283 }
19284
19285 return SDValue();
19286}
19287
19288SDValue DAGCombiner::visitFFREXP(SDNode *N) {
19289 SDValue N0 = N->getOperand(0);
19290
19291 // fold (ffrexp c1) -> ffrexp(c1)
19293 return DAG.getNode(ISD::FFREXP, SDLoc(N), N->getVTList(), N0);
19294 return SDValue();
19295}
19296
19297SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
19298 SDValue N0 = N->getOperand(0);
19299 EVT VT = N->getValueType(0);
19300
19301 // fold (ffloor c1) -> ffloor(c1)
19302 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FFLOOR, SDLoc(N), VT, {N0}))
19303 return C;
19304
19305 return SDValue();
19306}
19307
19308SDValue DAGCombiner::visitFNEG(SDNode *N) {
19309 SDValue N0 = N->getOperand(0);
19310 EVT VT = N->getValueType(0);
19311 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19312
19313 // Constant fold FNEG.
19314 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FNEG, SDLoc(N), VT, {N0}))
19315 return C;
19316
19317 if (SDValue NegN0 =
19318 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
19319 return NegN0;
19320
19321 // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
19322 // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
19323 // know it was called from a context with a nsz flag if the input fsub does
19324 // not.
19325 if (N0.getOpcode() == ISD::FSUB && N->getFlags().hasNoSignedZeros() &&
19326 N0.hasOneUse()) {
19327 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
19328 N0.getOperand(0));
19329 }
19330
19332 return SDValue(N, 0);
19333
19334 if (SDValue Cast = foldSignChangeInBitcast(N))
19335 return Cast;
19336
19337 return SDValue();
19338}
19339
19340SDValue DAGCombiner::visitFMinMax(SDNode *N) {
19341 SDValue N0 = N->getOperand(0);
19342 SDValue N1 = N->getOperand(1);
19343 EVT VT = N->getValueType(0);
19344 const SDNodeFlags Flags = N->getFlags();
19345 unsigned Opc = N->getOpcode();
19346 bool PropAllNaNsToQNaNs = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
19347 bool PropOnlySNaNsToQNaNs = Opc == ISD::FMINNUM || Opc == ISD::FMAXNUM;
19348 bool IsMin =
19349 Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM || Opc == ISD::FMINIMUMNUM;
19350 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19351
19352 // Constant fold.
19353 if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
19354 return C;
19355
19356 // Canonicalize to constant on RHS.
19359 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
19360
19361 if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
19362 const APFloat &AF = N1CFP->getValueAPF();
19363
19364 // minnum(X, qnan) -> X
19365 // maxnum(X, qnan) -> X
19366 // minnum(X, snan) -> qnan
19367 // maxnum(X, snan) -> qnan
19368 // minimum(X, nan) -> qnan
19369 // maximum(X, nan) -> qnan
19370 // minimumnum(X, nan) -> X
19371 // maximumnum(X, nan) -> X
19372 if (AF.isNaN()) {
19373 if (PropAllNaNsToQNaNs || (AF.isSignaling() && PropOnlySNaNsToQNaNs)) {
19374 if (AF.isSignaling())
19375 return DAG.getConstantFP(AF.makeQuiet(), SDLoc(N), VT);
19376 return N->getOperand(1);
19377 }
19378 return N->getOperand(0);
19379 }
19380
19381 // In the following folds, inf can be replaced with the largest finite
19382 // float, if the ninf flag is set.
19383 if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
19384 // minnum(X, -inf) -> -inf (ignoring sNaN -> qNaN propagation)
19385 // maxnum(X, +inf) -> +inf (ignoring sNaN -> qNaN propagation)
19386 // minimum(X, -inf) -> -inf if nnan
19387 // maximum(X, +inf) -> +inf if nnan
19388 // minimumnum(X, -inf) -> -inf
19389 // maximumnum(X, +inf) -> +inf
19390 if (IsMin == AF.isNegative() &&
19391 (!PropAllNaNsToQNaNs || Flags.hasNoNaNs()))
19392 return N->getOperand(1);
19393
19394 // minnum(X, +inf) -> X if nnan
19395 // maxnum(X, -inf) -> X if nnan
19396 // minimum(X, +inf) -> X (ignoring quieting of sNaNs)
19397 // maximum(X, -inf) -> X (ignoring quieting of sNaNs)
19398 // minimumnum(X, +inf) -> X if nnan
19399 // maximumnum(X, -inf) -> X if nnan
19400 if (IsMin != AF.isNegative() && (PropAllNaNsToQNaNs || Flags.hasNoNaNs()))
19401 return N->getOperand(0);
19402 }
19403 }
19404
19405 // There are no VECREDUCE variants of FMINIMUMNUM or FMAXIMUMNUM
19406 if (Opc == ISD::FMINIMUMNUM || Opc == ISD::FMAXIMUMNUM)
19407 return SDValue();
19408
19409 if (SDValue SD = reassociateReduction(
19410 PropAllNaNsToQNaNs
19411 ? (IsMin ? ISD::VECREDUCE_FMINIMUM : ISD::VECREDUCE_FMAXIMUM)
19412 : (IsMin ? ISD::VECREDUCE_FMIN : ISD::VECREDUCE_FMAX),
19413 Opc, SDLoc(N), VT, N0, N1, Flags))
19414 return SD;
19415
19416 return SDValue();
19417}
19418
19419SDValue DAGCombiner::visitFABS(SDNode *N) {
19420 SDValue N0 = N->getOperand(0);
19421 EVT VT = N->getValueType(0);
19422 SDLoc DL(N);
19423
19424 // fold (fabs c1) -> fabs(c1)
19425 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FABS, DL, VT, {N0}))
19426 return C;
19427
19429 return SDValue(N, 0);
19430
19431 if (SDValue Cast = foldSignChangeInBitcast(N))
19432 return Cast;
19433
19434 return SDValue();
19435}
19436
19437SDValue DAGCombiner::visitBRCOND(SDNode *N) {
19438 SDValue Chain = N->getOperand(0);
19439 SDValue N1 = N->getOperand(1);
19440 SDValue N2 = N->getOperand(2);
19441
19442 // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
19443 // nondeterministic jumps).
19444 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
19445 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
19446 N1->getOperand(0), N2, N->getFlags());
19447 }
19448
19449 // Variant of the previous fold where there is a SETCC in between:
19450 // BRCOND(SETCC(FREEZE(X), CONST, Cond))
19451 // =>
19452 // BRCOND(FREEZE(SETCC(X, CONST, Cond)))
19453 // =>
19454 // BRCOND(SETCC(X, CONST, Cond))
19455 // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
19456 // isn't equivalent to true or false.
19457 // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
19458 // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
19459 if (N1->getOpcode() == ISD::SETCC && N1.hasOneUse()) {
19460 SDValue S0 = N1->getOperand(0), S1 = N1->getOperand(1);
19462 ConstantSDNode *S0C = dyn_cast<ConstantSDNode>(S0);
19463 ConstantSDNode *S1C = dyn_cast<ConstantSDNode>(S1);
19464 bool Updated = false;
19465
19466 // Is 'X Cond C' always true or false?
19467 auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
19468 bool False = (Cond == ISD::SETULT && C->isZero()) ||
19469 (Cond == ISD::SETLT && C->isMinSignedValue()) ||
19470 (Cond == ISD::SETUGT && C->isAllOnes()) ||
19471 (Cond == ISD::SETGT && C->isMaxSignedValue());
19472 bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
19473 (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
19474 (Cond == ISD::SETUGE && C->isZero()) ||
19475 (Cond == ISD::SETGE && C->isMinSignedValue());
19476 return True || False;
19477 };
19478
19479 if (S0->getOpcode() == ISD::FREEZE && S0.hasOneUse() && S1C) {
19480 if (!IsAlwaysTrueOrFalse(Cond, S1C)) {
19481 S0 = S0->getOperand(0);
19482 Updated = true;
19483 }
19484 }
19485 if (S1->getOpcode() == ISD::FREEZE && S1.hasOneUse() && S0C) {
19486 if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond), S0C)) {
19487 S1 = S1->getOperand(0);
19488 Updated = true;
19489 }
19490 }
19491
19492 if (Updated)
19493 return DAG.getNode(
19494 ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
19495 DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2,
19496 N->getFlags());
19497 }
19498
19499 // If N is a constant we could fold this into a fallthrough or unconditional
19500 // branch. However that doesn't happen very often in normal code, because
19501 // Instcombine/SimplifyCFG should have handled the available opportunities.
19502 // If we did this folding here, it would be necessary to update the
19503 // MachineBasicBlock CFG, which is awkward.
19504
19505 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
19506 // on the target, also copy fast math flags.
19507 if (N1.getOpcode() == ISD::SETCC &&
19508 TLI.isOperationLegalOrCustom(ISD::BR_CC,
19509 N1.getOperand(0).getValueType())) {
19510 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, Chain,
19511 N1.getOperand(2), N1.getOperand(0), N1.getOperand(1), N2,
19512 N1->getFlags());
19513 }
19514
19515 if (N1.hasOneUse()) {
19516 // rebuildSetCC calls visitXor which may change the Chain when there is a
19517 // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
19518 HandleSDNode ChainHandle(Chain);
19519 if (SDValue NewN1 = rebuildSetCC(N1))
19520 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
19521 ChainHandle.getValue(), NewN1, N2, N->getFlags());
19522 }
19523
19524 return SDValue();
19525}
19526
19527SDValue DAGCombiner::rebuildSetCC(SDValue N) {
19528 if (N.getOpcode() == ISD::SRL ||
19529 (N.getOpcode() == ISD::TRUNCATE &&
19530 (N.getOperand(0).hasOneUse() &&
19531 N.getOperand(0).getOpcode() == ISD::SRL))) {
19532 // Look pass the truncate.
19533 if (N.getOpcode() == ISD::TRUNCATE)
19534 N = N.getOperand(0);
19535
19536 // Match this pattern so that we can generate simpler code:
19537 //
19538 // %a = ...
19539 // %b = and i32 %a, 2
19540 // %c = srl i32 %b, 1
19541 // brcond i32 %c ...
19542 //
19543 // into
19544 //
19545 // %a = ...
19546 // %b = and i32 %a, 2
19547 // %c = setcc eq %b, 0
19548 // brcond %c ...
19549 //
19550 // This applies only when the AND constant value has one bit set and the
19551 // SRL constant is equal to the log2 of the AND constant. The back-end is
19552 // smart enough to convert the result into a TEST/JMP sequence.
19553 SDValue Op0 = N.getOperand(0);
19554 SDValue Op1 = N.getOperand(1);
19555
19556 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
19557 SDValue AndOp1 = Op0.getOperand(1);
19558
19559 if (AndOp1.getOpcode() == ISD::Constant) {
19560 const APInt &AndConst = AndOp1->getAsAPIntVal();
19561
19562 if (AndConst.isPowerOf2() &&
19563 Op1->getAsAPIntVal() == AndConst.logBase2()) {
19564 SDLoc DL(N);
19565 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
19566 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
19567 ISD::SETNE);
19568 }
19569 }
19570 }
19571 }
19572
19573 // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
19574 // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
19575 if (N.getOpcode() == ISD::XOR) {
19576 // Because we may call this on a speculatively constructed
19577 // SimplifiedSetCC Node, we need to simplify this node first.
19578 // Ideally this should be folded into SimplifySetCC and not
19579 // here. For now, grab a handle to N so we don't lose it from
19580 // replacements interal to the visit.
19581 HandleSDNode XORHandle(N);
19582 while (N.getOpcode() == ISD::XOR) {
19583 SDValue Tmp = visitXOR(N.getNode());
19584 // No simplification done.
19585 if (!Tmp.getNode())
19586 break;
19587 // Returning N is form in-visit replacement that may invalidated
19588 // N. Grab value from Handle.
19589 if (Tmp.getNode() == N.getNode())
19590 N = XORHandle.getValue();
19591 else // Node simplified. Try simplifying again.
19592 N = Tmp;
19593 }
19594
19595 if (N.getOpcode() != ISD::XOR)
19596 return N;
19597
19598 SDValue Op0 = N->getOperand(0);
19599 SDValue Op1 = N->getOperand(1);
19600
19601 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
19602 bool Equal = false;
19603 // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
19604 if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
19605 Op0.getValueType() == MVT::i1) {
19606 N = Op0;
19607 Op0 = N->getOperand(0);
19608 Op1 = N->getOperand(1);
19609 Equal = true;
19610 }
19611
19612 EVT SetCCVT = N.getValueType();
19613 if (LegalTypes)
19614 SetCCVT = getSetCCResultType(SetCCVT);
19615 // Replace the uses of XOR with SETCC. Note, avoid this transformation if
19616 // it would introduce illegal operations post-legalization as this can
19617 // result in infinite looping between converting xor->setcc here, and
19618 // expanding setcc->xor in LegalizeSetCCCondCode if requested.
19620 if (!LegalOperations || TLI.isCondCodeLegal(CC, Op0.getSimpleValueType()))
19621 return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1, CC);
19622 }
19623 }
19624
19625 return SDValue();
19626}
19627
19628// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
19629//
19630SDValue DAGCombiner::visitBR_CC(SDNode *N) {
19631 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
19632 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
19633
19634 // If N is a constant we could fold this into a fallthrough or unconditional
19635 // branch. However that doesn't happen very often in normal code, because
19636 // Instcombine/SimplifyCFG should have handled the available opportunities.
19637 // If we did this folding here, it would be necessary to update the
19638 // MachineBasicBlock CFG, which is awkward.
19639
19640 // Use SimplifySetCC to simplify SETCC's.
19642 CondLHS, CondRHS, CC->get(), SDLoc(N),
19643 false);
19644 if (Simp.getNode()) AddToWorklist(Simp.getNode());
19645
19646 // fold to a simpler setcc
19647 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
19648 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
19649 N->getOperand(0), Simp.getOperand(2),
19650 Simp.getOperand(0), Simp.getOperand(1),
19651 N->getOperand(4));
19652
19653 return SDValue();
19654}
19655
19656static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
19657 bool &IsLoad, bool &IsMasked, SDValue &Ptr,
19658 const TargetLowering &TLI) {
19659 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19660 if (LD->isIndexed())
19661 return false;
19662 EVT VT = LD->getMemoryVT();
19663 if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
19664 return false;
19665 Ptr = LD->getBasePtr();
19666 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19667 if (ST->isIndexed())
19668 return false;
19669 EVT VT = ST->getMemoryVT();
19670 if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
19671 return false;
19672 Ptr = ST->getBasePtr();
19673 IsLoad = false;
19674 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
19675 if (LD->isIndexed())
19676 return false;
19677 EVT VT = LD->getMemoryVT();
19678 if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
19679 !TLI.isIndexedMaskedLoadLegal(Dec, VT))
19680 return false;
19681 Ptr = LD->getBasePtr();
19682 IsMasked = true;
19684 if (ST->isIndexed())
19685 return false;
19686 EVT VT = ST->getMemoryVT();
19687 if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
19688 !TLI.isIndexedMaskedStoreLegal(Dec, VT))
19689 return false;
19690 Ptr = ST->getBasePtr();
19691 IsLoad = false;
19692 IsMasked = true;
19693 } else {
19694 return false;
19695 }
19696 return true;
19697}
19698
19699/// Try turning a load/store into a pre-indexed load/store when the base
19700/// pointer is an add or subtract and it has other uses besides the load/store.
19701/// After the transformation, the new indexed load/store has effectively folded
19702/// the add/subtract in and all of its other uses are redirected to the
19703/// new load/store.
19704bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
19705 if (Level < AfterLegalizeDAG)
19706 return false;
19707
19708 bool IsLoad = true;
19709 bool IsMasked = false;
19710 SDValue Ptr;
19711 if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
19712 Ptr, TLI))
19713 return false;
19714
19715 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
19716 // out. There is no reason to make this a preinc/predec.
19717 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
19718 Ptr->hasOneUse())
19719 return false;
19720
19721 // Ask the target to do addressing mode selection.
19725 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
19726 return false;
19727
19728 // Backends without true r+i pre-indexed forms may need to pass a
19729 // constant base with a variable offset so that constant coercion
19730 // will work with the patterns in canonical form.
19731 bool Swapped = false;
19732 if (isa<ConstantSDNode>(BasePtr)) {
19733 std::swap(BasePtr, Offset);
19734 Swapped = true;
19735 }
19736
19737 // Don't create a indexed load / store with zero offset.
19739 return false;
19740
19741 // Try turning it into a pre-indexed load / store except when:
19742 // 1) The new base ptr is a frame index.
19743 // 2) If N is a store and the new base ptr is either the same as or is a
19744 // predecessor of the value being stored.
19745 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
19746 // that would create a cycle.
19747 // 4) All uses are load / store ops that use it as old base ptr.
19748
19749 // Check #1. Preinc'ing a frame index would require copying the stack pointer
19750 // (plus the implicit offset) to a register to preinc anyway.
19751 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
19752 return false;
19753
19754 // Check #2.
19755 if (!IsLoad) {
19756 SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
19757 : cast<StoreSDNode>(N)->getValue();
19758
19759 // Would require a copy.
19760 if (Val == BasePtr)
19761 return false;
19762
19763 // Would create a cycle.
19764 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
19765 return false;
19766 }
19767
19768 // Caches for hasPredecessorHelper.
19769 SmallPtrSet<const SDNode *, 32> Visited;
19771 Worklist.push_back(N);
19772
19773 // If the offset is a constant, there may be other adds of constants that
19774 // can be folded with this one. We should do this to avoid having to keep
19775 // a copy of the original base pointer.
19776 SmallVector<SDNode *, 16> OtherUses;
19779 for (SDUse &Use : BasePtr->uses()) {
19780 // Skip the use that is Ptr and uses of other results from BasePtr's
19781 // node (important for nodes that return multiple results).
19782 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
19783 continue;
19784
19785 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist,
19786 MaxSteps))
19787 continue;
19788
19789 if (Use.getUser()->getOpcode() != ISD::ADD &&
19790 Use.getUser()->getOpcode() != ISD::SUB) {
19791 OtherUses.clear();
19792 break;
19793 }
19794
19795 SDValue Op1 = Use.getUser()->getOperand((Use.getOperandNo() + 1) & 1);
19796 if (!isa<ConstantSDNode>(Op1)) {
19797 OtherUses.clear();
19798 break;
19799 }
19800
19801 // FIXME: In some cases, we can be smarter about this.
19802 if (Op1.getValueType() != Offset.getValueType()) {
19803 OtherUses.clear();
19804 break;
19805 }
19806
19807 OtherUses.push_back(Use.getUser());
19808 }
19809
19810 if (Swapped)
19811 std::swap(BasePtr, Offset);
19812
19813 // Now check for #3 and #4.
19814 bool RealUse = false;
19815
19816 for (SDNode *User : Ptr->users()) {
19817 if (User == N)
19818 continue;
19819 if (SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))
19820 return false;
19821
19822 // If Ptr may be folded in addressing mode of other use, then it's
19823 // not profitable to do this transformation.
19824 if (!canFoldInAddressingMode(Ptr.getNode(), User, DAG, TLI))
19825 RealUse = true;
19826 }
19827
19828 if (!RealUse)
19829 return false;
19830
19832 if (!IsMasked) {
19833 if (IsLoad)
19834 Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
19835 else
19836 Result =
19837 DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
19838 } else {
19839 if (IsLoad)
19840 Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
19841 Offset, AM);
19842 else
19843 Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
19844 Offset, AM);
19845 }
19846 ++PreIndexedNodes;
19847 ++NodesCombined;
19848 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
19849 Result.dump(&DAG); dbgs() << '\n');
19850 WorklistRemover DeadNodes(*this);
19851 if (IsLoad) {
19852 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
19853 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
19854 } else {
19855 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
19856 }
19857
19858 // Finally, since the node is now dead, remove it from the graph.
19859 deleteAndRecombine(N);
19860
19861 if (Swapped)
19862 std::swap(BasePtr, Offset);
19863
19864 // Replace other uses of BasePtr that can be updated to use Ptr
19865 for (SDNode *OtherUse : OtherUses) {
19866 unsigned OffsetIdx = 1;
19867 if (OtherUse->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
19868 OffsetIdx = 0;
19869 assert(OtherUse->getOperand(!OffsetIdx).getNode() == BasePtr.getNode() &&
19870 "Expected BasePtr operand");
19871
19872 // We need to replace ptr0 in the following expression:
19873 // x0 * offset0 + y0 * ptr0 = t0
19874 // knowing that
19875 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
19876 //
19877 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
19878 // indexed load/store and the expression that needs to be re-written.
19879 //
19880 // Therefore, we have:
19881 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
19882
19883 auto *CN = cast<ConstantSDNode>(OtherUse->getOperand(OffsetIdx));
19884 const APInt &Offset0 = CN->getAPIntValue();
19885 const APInt &Offset1 = Offset->getAsAPIntVal();
19886 int X0 = (OtherUse->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
19887 int Y0 = (OtherUse->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
19888 int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
19889 int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
19890
19891 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
19892
19893 APInt CNV = Offset0;
19894 if (X0 < 0) CNV = -CNV;
19895 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
19896 else CNV = CNV - Offset1;
19897
19898 SDLoc DL(OtherUse);
19899
19900 // We can now generate the new expression.
19901 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
19902 SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
19903
19904 SDValue NewUse =
19905 DAG.getNode(Opcode, DL, OtherUse->getValueType(0), NewOp1, NewOp2);
19906 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUse, 0), NewUse);
19907 deleteAndRecombine(OtherUse);
19908 }
19909
19910 // Replace the uses of Ptr with uses of the updated base value.
19911 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
19912 deleteAndRecombine(Ptr.getNode());
19913 AddToWorklist(Result.getNode());
19914
19915 return true;
19916}
19917
19919 SDValue &BasePtr, SDValue &Offset,
19921 SelectionDAG &DAG,
19922 const TargetLowering &TLI) {
19923 if (PtrUse == N ||
19924 (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
19925 return false;
19926
19927 if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
19928 return false;
19929
19930 // Don't create a indexed load / store with zero offset.
19932 return false;
19933
19934 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
19935 return false;
19936
19939 for (SDNode *User : BasePtr->users()) {
19940 if (User == Ptr.getNode())
19941 continue;
19942
19943 // No if there's a later user which could perform the index instead.
19944 if (isa<MemSDNode>(User)) {
19945 bool IsLoad = true;
19946 bool IsMasked = false;
19947 SDValue OtherPtr;
19949 IsMasked, OtherPtr, TLI)) {
19951 Worklist.push_back(User);
19952 if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps))
19953 return false;
19954 }
19955 }
19956
19957 // If all the uses are load / store addresses, then don't do the
19958 // transformation.
19959 if (User->getOpcode() == ISD::ADD || User->getOpcode() == ISD::SUB) {
19960 for (SDNode *UserUser : User->users())
19961 if (canFoldInAddressingMode(User, UserUser, DAG, TLI))
19962 return false;
19963 }
19964 }
19965 return true;
19966}
19967
19969 bool &IsMasked, SDValue &Ptr,
19970 SDValue &BasePtr, SDValue &Offset,
19972 SelectionDAG &DAG,
19973 const TargetLowering &TLI) {
19975 IsMasked, Ptr, TLI) ||
19976 Ptr->hasOneUse())
19977 return nullptr;
19978
19979 // Try turning it into a post-indexed load / store except when
19980 // 1) All uses are load / store ops that use it as base ptr (and
19981 // it may be folded as addressing mmode).
19982 // 2) Op must be independent of N, i.e. Op is neither a predecessor
19983 // nor a successor of N. Otherwise, if Op is folded that would
19984 // create a cycle.
19986 for (SDNode *Op : Ptr->users()) {
19987 // Check for #1.
19988 if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
19989 continue;
19990
19991 // Check for #2.
19994 // Ptr is predecessor to both N and Op.
19995 Visited.insert(Ptr.getNode());
19996 Worklist.push_back(N);
19997 Worklist.push_back(Op);
19998 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) &&
19999 !SDNode::hasPredecessorHelper(Op, Visited, Worklist, MaxSteps))
20000 return Op;
20001 }
20002 return nullptr;
20003}
20004
20005/// Try to combine a load/store with a add/sub of the base pointer node into a
20006/// post-indexed load/store. The transformation folded the add/subtract into the
20007/// new indexed load/store effectively and all of its uses are redirected to the
20008/// new load/store.
20009bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
20010 if (Level < AfterLegalizeDAG)
20011 return false;
20012
20013 bool IsLoad = true;
20014 bool IsMasked = false;
20015 SDValue Ptr;
20019 SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
20020 Offset, AM, DAG, TLI);
20021 if (!Op)
20022 return false;
20023
20025 if (!IsMasked)
20026 Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
20027 Offset, AM)
20028 : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
20029 BasePtr, Offset, AM);
20030 else
20031 Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
20032 BasePtr, Offset, AM)
20033 : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
20034 BasePtr, Offset, AM);
20035 ++PostIndexedNodes;
20036 ++NodesCombined;
20037 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";
20038 Result.dump(&DAG); dbgs() << '\n');
20039 WorklistRemover DeadNodes(*this);
20040 if (IsLoad) {
20041 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
20042 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
20043 } else {
20044 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
20045 }
20046
20047 // Finally, since the node is now dead, remove it from the graph.
20048 deleteAndRecombine(N);
20049
20050 // Replace the uses of Use with uses of the updated base value.
20052 Result.getValue(IsLoad ? 1 : 0));
20053 deleteAndRecombine(Op);
20054 return true;
20055}
20056
20057/// Return the base-pointer arithmetic from an indexed \p LD.
20058SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
20059 ISD::MemIndexedMode AM = LD->getAddressingMode();
20060 assert(AM != ISD::UNINDEXED);
20061 SDValue BP = LD->getOperand(1);
20062 SDValue Inc = LD->getOperand(2);
20063
20064 // Some backends use TargetConstants for load offsets, but don't expect
20065 // TargetConstants in general ADD nodes. We can convert these constants into
20066 // regular Constants (if the constant is not opaque).
20068 !cast<ConstantSDNode>(Inc)->isOpaque()) &&
20069 "Cannot split out indexing using opaque target constants");
20070 if (Inc.getOpcode() == ISD::TargetConstant) {
20071 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
20072 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
20073 ConstInc->getValueType(0));
20074 }
20075
20076 unsigned Opc =
20077 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
20078 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
20079}
20080
20082 return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
20083}
20084
20085bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
20086 EVT STType = Val.getValueType();
20087 EVT STMemType = ST->getMemoryVT();
20088 if (STType == STMemType)
20089 return true;
20090 if (isTypeLegal(STMemType))
20091 return false; // fail.
20092 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
20093 TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
20094 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
20095 return true;
20096 }
20097 if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
20098 STType.isInteger() && STMemType.isInteger()) {
20099 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
20100 return true;
20101 }
20102 if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
20103 Val = DAG.getBitcast(STMemType, Val);
20104 return true;
20105 }
20106 return false; // fail.
20107}
20108
20109bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
20110 EVT LDMemType = LD->getMemoryVT();
20111 EVT LDType = LD->getValueType(0);
20112 assert(Val.getValueType() == LDMemType &&
20113 "Attempting to extend value of non-matching type");
20114 if (LDType == LDMemType)
20115 return true;
20116 if (LDMemType.isInteger() && LDType.isInteger()) {
20117 switch (LD->getExtensionType()) {
20118 case ISD::NON_EXTLOAD:
20119 Val = DAG.getBitcast(LDType, Val);
20120 return true;
20121 case ISD::EXTLOAD:
20122 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
20123 return true;
20124 case ISD::SEXTLOAD:
20125 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
20126 return true;
20127 case ISD::ZEXTLOAD:
20128 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
20129 return true;
20130 }
20131 }
20132 return false;
20133}
20134
20135StoreSDNode *DAGCombiner::getUniqueStoreFeeding(LoadSDNode *LD,
20136 int64_t &Offset) {
20137 SDValue Chain = LD->getOperand(0);
20138
20139 // Look through CALLSEQ_START.
20140 if (Chain.getOpcode() == ISD::CALLSEQ_START)
20141 Chain = Chain->getOperand(0);
20142
20143 StoreSDNode *ST = nullptr;
20145 if (Chain.getOpcode() == ISD::TokenFactor) {
20146 // Look for unique store within the TokenFactor.
20147 for (SDValue Op : Chain->ops()) {
20148 StoreSDNode *Store = dyn_cast<StoreSDNode>(Op.getNode());
20149 if (!Store)
20150 continue;
20151 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
20152 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
20153 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
20154 continue;
20155 // Make sure the store is not aliased with any nodes in TokenFactor.
20156 GatherAllAliases(Store, Chain, Aliases);
20157 if (Aliases.empty() ||
20158 (Aliases.size() == 1 && Aliases.front().getNode() == Store))
20159 ST = Store;
20160 break;
20161 }
20162 } else {
20163 StoreSDNode *Store = dyn_cast<StoreSDNode>(Chain.getNode());
20164 if (Store) {
20165 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
20166 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
20167 if (BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
20168 ST = Store;
20169 }
20170 }
20171
20172 return ST;
20173}
20174
20175SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
20176 if (OptLevel == CodeGenOptLevel::None || !LD->isSimple())
20177 return SDValue();
20178 SDValue Chain = LD->getOperand(0);
20179 int64_t Offset;
20180
20181 StoreSDNode *ST = getUniqueStoreFeeding(LD, Offset);
20182 // TODO: Relax this restriction for unordered atomics (see D66309)
20183 if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace())
20184 return SDValue();
20185
20186 EVT LDType = LD->getValueType(0);
20187 EVT LDMemType = LD->getMemoryVT();
20188 EVT STMemType = ST->getMemoryVT();
20189 EVT STType = ST->getValue().getValueType();
20190
20191 // There are two cases to consider here:
20192 // 1. The store is fixed width and the load is scalable. In this case we
20193 // don't know at compile time if the store completely envelops the load
20194 // so we abandon the optimisation.
20195 // 2. The store is scalable and the load is fixed width. We could
20196 // potentially support a limited number of cases here, but there has been
20197 // no cost-benefit analysis to prove it's worth it.
20198 bool LdStScalable = LDMemType.isScalableVT();
20199 if (LdStScalable != STMemType.isScalableVT())
20200 return SDValue();
20201
20202 // If we are dealing with scalable vectors on a big endian platform the
20203 // calculation of offsets below becomes trickier, since we do not know at
20204 // compile time the absolute size of the vector. Until we've done more
20205 // analysis on big-endian platforms it seems better to bail out for now.
20206 if (LdStScalable && DAG.getDataLayout().isBigEndian())
20207 return SDValue();
20208
20209 // Normalize for Endianness. After this Offset=0 will denote that the least
20210 // significant bit in the loaded value maps to the least significant bit in
20211 // the stored value). With Offset=n (for n > 0) the loaded value starts at the
20212 // n:th least significant byte of the stored value.
20213 int64_t OrigOffset = Offset;
20214 if (DAG.getDataLayout().isBigEndian())
20215 Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedValue() -
20216 (int64_t)LDMemType.getStoreSizeInBits().getFixedValue()) /
20217 8 -
20218 Offset;
20219
20220 // Check that the stored value cover all bits that are loaded.
20221 bool STCoversLD;
20222
20223 TypeSize LdMemSize = LDMemType.getSizeInBits();
20224 TypeSize StMemSize = STMemType.getSizeInBits();
20225 if (LdStScalable)
20226 STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
20227 else
20228 STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedValue() <=
20229 StMemSize.getFixedValue());
20230
20231 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
20232 if (LD->isIndexed()) {
20233 // Cannot handle opaque target constants and we must respect the user's
20234 // request not to split indexes from loads.
20235 if (!canSplitIdx(LD))
20236 return SDValue();
20237 SDValue Idx = SplitIndexingFromLoad(LD);
20238 SDValue Ops[] = {Val, Idx, Chain};
20239 return CombineTo(LD, Ops, 3);
20240 }
20241 return CombineTo(LD, Val, Chain);
20242 };
20243
20244 if (!STCoversLD)
20245 return SDValue();
20246
20247 // Memory as copy space (potentially masked).
20248 if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
20249 // Simple case: Direct non-truncating forwarding
20250 if (LDType.getSizeInBits() == LdMemSize)
20251 return ReplaceLd(LD, ST->getValue(), Chain);
20252 // Can we model the truncate and extension with an and mask?
20253 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
20254 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
20255 // Mask to size of LDMemType
20256 auto Mask =
20258 StMemSize.getFixedValue()),
20259 SDLoc(ST), STType);
20260 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
20261 return ReplaceLd(LD, Val, Chain);
20262 }
20263 }
20264
20265 // Handle some cases for big-endian that would be Offset 0 and handled for
20266 // little-endian.
20267 SDValue Val = ST->getValue();
20268 if (DAG.getDataLayout().isBigEndian() && Offset > 0 && OrigOffset == 0) {
20269 if (STType.isInteger() && !STType.isVector() && LDType.isInteger() &&
20270 !LDType.isVector() && isTypeLegal(STType) &&
20271 TLI.isOperationLegal(ISD::SRL, STType)) {
20272 Val = DAG.getNode(ISD::SRL, SDLoc(LD), STType, Val,
20273 DAG.getConstant(Offset * 8, SDLoc(LD), STType));
20274 Offset = 0;
20275 }
20276 }
20277
20278 // TODO: Deal with nonzero offset.
20279 if (LD->getBasePtr().isUndef() || Offset != 0)
20280 return SDValue();
20281 // Model necessary truncations / extenstions.
20282 // Truncate Value To Stored Memory Size.
20283 do {
20284 if (!getTruncatedStoreValue(ST, Val))
20285 break;
20286 if (!isTypeLegal(LDMemType))
20287 break;
20288 if (STMemType != LDMemType) {
20289 // TODO: Support vectors? This requires extract_subvector/bitcast.
20290 if (!STMemType.isVector() && !LDMemType.isVector() &&
20291 STMemType.isInteger() && LDMemType.isInteger())
20292 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
20293 else
20294 break;
20295 }
20296 if (!extendLoadedValueToExtension(LD, Val))
20297 break;
20298 return ReplaceLd(LD, Val, Chain);
20299 } while (false);
20300
20301 // On failure, cleanup dead nodes we may have created.
20302 if (Val->use_empty())
20303 deleteAndRecombine(Val.getNode());
20304 return SDValue();
20305}
20306
20307SDValue DAGCombiner::visitLOAD(SDNode *N) {
20308 LoadSDNode *LD = cast<LoadSDNode>(N);
20309 SDValue Chain = LD->getChain();
20310 SDValue Ptr = LD->getBasePtr();
20311
20312 // If load is not volatile and there are no uses of the loaded value (and
20313 // the updated indexed value in case of indexed loads), change uses of the
20314 // chain value into uses of the chain input (i.e. delete the dead load).
20315 // TODO: Allow this for unordered atomics (see D66309)
20316 if (LD->isSimple()) {
20317 if (N->getValueType(1) == MVT::Other) {
20318 // Unindexed loads.
20319 if (!N->hasAnyUseOfValue(0)) {
20320 // It's not safe to use the two value CombineTo variant here. e.g.
20321 // v1, chain2 = load chain1, loc
20322 // v2, chain3 = load chain2, loc
20323 // v3 = add v2, c
20324 // Now we replace use of chain2 with chain1. This makes the second load
20325 // isomorphic to the one we are deleting, and thus makes this load live.
20326 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
20327 dbgs() << "\nWith chain: "; Chain.dump(&DAG);
20328 dbgs() << "\n");
20329 WorklistRemover DeadNodes(*this);
20330 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
20331 AddUsersToWorklist(Chain.getNode());
20332 if (N->use_empty())
20333 deleteAndRecombine(N);
20334
20335 return SDValue(N, 0); // Return N so it doesn't get rechecked!
20336 }
20337 } else {
20338 // Indexed loads.
20339 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
20340
20341 // If this load has an opaque TargetConstant offset, then we cannot split
20342 // the indexing into an add/sub directly (that TargetConstant may not be
20343 // valid for a different type of node, and we cannot convert an opaque
20344 // target constant into a regular constant).
20345 bool CanSplitIdx = canSplitIdx(LD);
20346
20347 if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
20348 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
20349 SDValue Index;
20350 if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
20351 Index = SplitIndexingFromLoad(LD);
20352 // Try to fold the base pointer arithmetic into subsequent loads and
20353 // stores.
20354 AddUsersToWorklist(N);
20355 } else
20356 Index = DAG.getUNDEF(N->getValueType(1));
20357 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
20358 dbgs() << "\nWith: "; Undef.dump(&DAG);
20359 dbgs() << " and 2 other values\n");
20360 WorklistRemover DeadNodes(*this);
20361 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
20362 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
20363 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
20364 deleteAndRecombine(N);
20365 return SDValue(N, 0); // Return N so it doesn't get rechecked!
20366 }
20367 }
20368 }
20369
20370 // If this load is directly stored, replace the load value with the stored
20371 // value.
20372 if (auto V = ForwardStoreValueToDirectLoad(LD))
20373 return V;
20374
20375 // Try to infer better alignment information than the load already has.
20376 if (OptLevel != CodeGenOptLevel::None && LD->isUnindexed() &&
20377 !LD->isAtomic()) {
20378 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
20379 if (*Alignment > LD->getAlign() &&
20380 isAligned(*Alignment, LD->getSrcValueOffset())) {
20381 SDValue NewLoad = DAG.getExtLoad(
20382 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
20383 LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
20384 LD->getMemOperand()->getFlags(), LD->getAAInfo());
20385 // NewLoad will always be N as we are only refining the alignment
20386 assert(NewLoad.getNode() == N);
20387 (void)NewLoad;
20388 }
20389 }
20390 }
20391
20392 if (LD->isUnindexed()) {
20393 // Walk up chain skipping non-aliasing memory nodes.
20394 SDValue BetterChain = FindBetterChain(LD, Chain);
20395
20396 // If there is a better chain.
20397 if (Chain != BetterChain) {
20398 SDValue ReplLoad;
20399
20400 // Replace the chain to void dependency.
20401 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
20402 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
20403 BetterChain, Ptr, LD->getMemOperand());
20404 } else {
20405 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
20406 LD->getValueType(0),
20407 BetterChain, Ptr, LD->getMemoryVT(),
20408 LD->getMemOperand());
20409 }
20410
20411 // Create token factor to keep old chain connected.
20412 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
20413 MVT::Other, Chain, ReplLoad.getValue(1));
20414
20415 // Replace uses with load result and token factor
20416 return CombineTo(N, ReplLoad.getValue(0), Token);
20417 }
20418 }
20419
20420 // Try transforming N to an indexed load.
20421 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
20422 return SDValue(N, 0);
20423
20424 // Try to slice up N to more direct loads if the slices are mapped to
20425 // different register banks or pairing can take place.
20426 if (SliceUpLoad(N))
20427 return SDValue(N, 0);
20428
20429 return SDValue();
20430}
20431
20432namespace {
20433
20434/// Helper structure used to slice a load in smaller loads.
20435/// Basically a slice is obtained from the following sequence:
20436/// Origin = load Ty1, Base
20437/// Shift = srl Ty1 Origin, CstTy Amount
20438/// Inst = trunc Shift to Ty2
20439///
20440/// Then, it will be rewritten into:
20441/// Slice = load SliceTy, Base + SliceOffset
20442/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
20443///
20444/// SliceTy is deduced from the number of bits that are actually used to
20445/// build Inst.
20446struct LoadedSlice {
20447 /// Helper structure used to compute the cost of a slice.
20448 struct Cost {
20449 /// Are we optimizing for code size.
20450 bool ForCodeSize = false;
20451
20452 /// Various cost.
20453 unsigned Loads = 0;
20454 unsigned Truncates = 0;
20455 unsigned CrossRegisterBanksCopies = 0;
20456 unsigned ZExts = 0;
20457 unsigned Shift = 0;
20458
20459 explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
20460
20461 /// Get the cost of one isolated slice.
20462 Cost(const LoadedSlice &LS, bool ForCodeSize)
20463 : ForCodeSize(ForCodeSize), Loads(1) {
20464 EVT TruncType = LS.Inst->getValueType(0);
20465 EVT LoadedType = LS.getLoadedType();
20466 if (TruncType != LoadedType &&
20467 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
20468 ZExts = 1;
20469 }
20470
20471 /// Account for slicing gain in the current cost.
20472 /// Slicing provide a few gains like removing a shift or a
20473 /// truncate. This method allows to grow the cost of the original
20474 /// load with the gain from this slice.
20475 void addSliceGain(const LoadedSlice &LS) {
20476 // Each slice saves a truncate.
20477 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
20478 if (!TLI.isTruncateFree(LS.Inst->getOperand(0), LS.Inst->getValueType(0)))
20479 ++Truncates;
20480 // If there is a shift amount, this slice gets rid of it.
20481 if (LS.Shift)
20482 ++Shift;
20483 // If this slice can merge a cross register bank copy, account for it.
20484 if (LS.canMergeExpensiveCrossRegisterBankCopy())
20485 ++CrossRegisterBanksCopies;
20486 }
20487
20488 Cost &operator+=(const Cost &RHS) {
20489 Loads += RHS.Loads;
20490 Truncates += RHS.Truncates;
20491 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
20492 ZExts += RHS.ZExts;
20493 Shift += RHS.Shift;
20494 return *this;
20495 }
20496
20497 bool operator==(const Cost &RHS) const {
20498 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
20499 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
20500 ZExts == RHS.ZExts && Shift == RHS.Shift;
20501 }
20502
20503 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
20504
20505 bool operator<(const Cost &RHS) const {
20506 // Assume cross register banks copies are as expensive as loads.
20507 // FIXME: Do we want some more target hooks?
20508 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
20509 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
20510 // Unless we are optimizing for code size, consider the
20511 // expensive operation first.
20512 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
20513 return ExpensiveOpsLHS < ExpensiveOpsRHS;
20514 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
20515 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
20516 }
20517
20518 bool operator>(const Cost &RHS) const { return RHS < *this; }
20519
20520 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
20521
20522 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
20523 };
20524
20525 // The last instruction that represent the slice. This should be a
20526 // truncate instruction.
20527 SDNode *Inst;
20528
20529 // The original load instruction.
20530 LoadSDNode *Origin;
20531
20532 // The right shift amount in bits from the original load.
20533 unsigned Shift;
20534
20535 // The DAG from which Origin came from.
20536 // This is used to get some contextual information about legal types, etc.
20537 SelectionDAG *DAG;
20538
20539 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
20540 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
20541 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
20542
20543 /// Get the bits used in a chunk of bits \p BitWidth large.
20544 /// \return Result is \p BitWidth and has used bits set to 1 and
20545 /// not used bits set to 0.
20546 APInt getUsedBits() const {
20547 // Reproduce the trunc(lshr) sequence:
20548 // - Start from the truncated value.
20549 // - Zero extend to the desired bit width.
20550 // - Shift left.
20551 assert(Origin && "No original load to compare against.");
20552 unsigned BitWidth = Origin->getValueSizeInBits(0);
20553 assert(Inst && "This slice is not bound to an instruction");
20554 assert(Inst->getValueSizeInBits(0) <= BitWidth &&
20555 "Extracted slice is bigger than the whole type!");
20556 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
20557 UsedBits.setAllBits();
20558 UsedBits = UsedBits.zext(BitWidth);
20559 UsedBits <<= Shift;
20560 return UsedBits;
20561 }
20562
20563 /// Get the size of the slice to be loaded in bytes.
20564 unsigned getLoadedSize() const {
20565 unsigned SliceSize = getUsedBits().popcount();
20566 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
20567 return SliceSize / 8;
20568 }
20569
20570 /// Get the type that will be loaded for this slice.
20571 /// Note: This may not be the final type for the slice.
20572 EVT getLoadedType() const {
20573 assert(DAG && "Missing context");
20574 LLVMContext &Ctxt = *DAG->getContext();
20575 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
20576 }
20577
20578 /// Get the alignment of the load used for this slice.
20579 Align getAlign() const {
20580 Align Alignment = Origin->getAlign();
20581 uint64_t Offset = getOffsetFromBase();
20582 if (Offset != 0)
20583 Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
20584 return Alignment;
20585 }
20586
20587 /// Check if this slice can be rewritten with legal operations.
20588 bool isLegal() const {
20589 // An invalid slice is not legal.
20590 if (!Origin || !Inst || !DAG)
20591 return false;
20592
20593 // Offsets are for indexed load only, we do not handle that.
20594 if (!Origin->getOffset().isUndef())
20595 return false;
20596
20597 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
20598
20599 // Check that the type is legal.
20600 EVT SliceType = getLoadedType();
20601 if (!TLI.isTypeLegal(SliceType))
20602 return false;
20603
20604 // Check that the load is legal for this type.
20605 if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
20606 return false;
20607
20608 // Check that the offset can be computed.
20609 // 1. Check its type.
20610 EVT PtrType = Origin->getBasePtr().getValueType();
20611 if (PtrType == MVT::Untyped || PtrType.isExtended())
20612 return false;
20613
20614 // 2. Check that it fits in the immediate.
20615 if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
20616 return false;
20617
20618 // 3. Check that the computation is legal.
20619 if (!TLI.isOperationLegal(ISD::ADD, PtrType))
20620 return false;
20621
20622 // Check that the zext is legal if it needs one.
20623 EVT TruncateType = Inst->getValueType(0);
20624 if (TruncateType != SliceType &&
20625 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
20626 return false;
20627
20628 return true;
20629 }
20630
20631 /// Get the offset in bytes of this slice in the original chunk of
20632 /// bits.
20633 /// \pre DAG != nullptr.
20634 uint64_t getOffsetFromBase() const {
20635 assert(DAG && "Missing context.");
20636 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
20637 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
20638 uint64_t Offset = Shift / 8;
20639 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
20640 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
20641 "The size of the original loaded type is not a multiple of a"
20642 " byte.");
20643 // If Offset is bigger than TySizeInBytes, it means we are loading all
20644 // zeros. This should have been optimized before in the process.
20645 assert(TySizeInBytes > Offset &&
20646 "Invalid shift amount for given loaded size");
20647 if (IsBigEndian)
20648 Offset = TySizeInBytes - Offset - getLoadedSize();
20649 return Offset;
20650 }
20651
20652 /// Generate the sequence of instructions to load the slice
20653 /// represented by this object and redirect the uses of this slice to
20654 /// this new sequence of instructions.
20655 /// \pre this->Inst && this->Origin are valid Instructions and this
20656 /// object passed the legal check: LoadedSlice::isLegal returned true.
20657 /// \return The last instruction of the sequence used to load the slice.
20658 SDValue loadSlice() const {
20659 assert(Inst && Origin && "Unable to replace a non-existing slice.");
20660 const SDValue &OldBaseAddr = Origin->getBasePtr();
20661 SDValue BaseAddr = OldBaseAddr;
20662 // Get the offset in that chunk of bytes w.r.t. the endianness.
20663 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
20664 assert(Offset >= 0 && "Offset too big to fit in int64_t!");
20665 if (Offset) {
20666 // BaseAddr = BaseAddr + Offset.
20667 EVT ArithType = BaseAddr.getValueType();
20668 SDLoc DL(Origin);
20669 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
20670 DAG->getConstant(Offset, DL, ArithType));
20671 }
20672
20673 // Create the type of the loaded slice according to its size.
20674 EVT SliceType = getLoadedType();
20675
20676 // Create the load for the slice.
20677 SDValue LastInst =
20678 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
20680 Origin->getMemOperand()->getFlags());
20681 // If the final type is not the same as the loaded type, this means that
20682 // we have to pad with zero. Create a zero extend for that.
20683 EVT FinalType = Inst->getValueType(0);
20684 if (SliceType != FinalType)
20685 LastInst =
20686 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
20687 return LastInst;
20688 }
20689
20690 /// Check if this slice can be merged with an expensive cross register
20691 /// bank copy. E.g.,
20692 /// i = load i32
20693 /// f = bitcast i32 i to float
20694 bool canMergeExpensiveCrossRegisterBankCopy() const {
20695 if (!Inst || !Inst->hasOneUse())
20696 return false;
20697 SDNode *User = *Inst->user_begin();
20698 if (User->getOpcode() != ISD::BITCAST)
20699 return false;
20700 assert(DAG && "Missing context");
20701 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
20702 EVT ResVT = User->getValueType(0);
20703 const TargetRegisterClass *ResRC =
20704 TLI.getRegClassFor(ResVT.getSimpleVT(), User->isDivergent());
20705 const TargetRegisterClass *ArgRC =
20706 TLI.getRegClassFor(User->getOperand(0).getValueType().getSimpleVT(),
20707 User->getOperand(0)->isDivergent());
20708 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
20709 return false;
20710
20711 // At this point, we know that we perform a cross-register-bank copy.
20712 // Check if it is expensive.
20713 const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
20714 // Assume bitcasts are cheap, unless both register classes do not
20715 // explicitly share a common sub class.
20716 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
20717 return false;
20718
20719 // Check if it will be merged with the load.
20720 // 1. Check the alignment / fast memory access constraint.
20721 unsigned IsFast = 0;
20722 if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
20723 Origin->getAddressSpace(), getAlign(),
20724 Origin->getMemOperand()->getFlags(), &IsFast) ||
20725 !IsFast)
20726 return false;
20727
20728 // 2. Check that the load is a legal operation for that type.
20729 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
20730 return false;
20731
20732 // 3. Check that we do not have a zext in the way.
20733 if (Inst->getValueType(0) != getLoadedType())
20734 return false;
20735
20736 return true;
20737 }
20738};
20739
20740} // end anonymous namespace
20741
20742/// Check that all bits set in \p UsedBits form a dense region, i.e.,
20743/// \p UsedBits looks like 0..0 1..1 0..0.
20744static bool areUsedBitsDense(const APInt &UsedBits) {
20745 // If all the bits are one, this is dense!
20746 if (UsedBits.isAllOnes())
20747 return true;
20748
20749 // Get rid of the unused bits on the right.
20750 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countr_zero());
20751 // Get rid of the unused bits on the left.
20752 if (NarrowedUsedBits.countl_zero())
20753 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
20754 // Check that the chunk of bits is completely used.
20755 return NarrowedUsedBits.isAllOnes();
20756}
20757
20758/// Check whether or not \p First and \p Second are next to each other
20759/// in memory. This means that there is no hole between the bits loaded
20760/// by \p First and the bits loaded by \p Second.
20761static bool areSlicesNextToEachOther(const LoadedSlice &First,
20762 const LoadedSlice &Second) {
20763 assert(First.Origin == Second.Origin && First.Origin &&
20764 "Unable to match different memory origins.");
20765 APInt UsedBits = First.getUsedBits();
20766 assert((UsedBits & Second.getUsedBits()) == 0 &&
20767 "Slices are not supposed to overlap.");
20768 UsedBits |= Second.getUsedBits();
20769 return areUsedBitsDense(UsedBits);
20770}
20771
20772/// Adjust the \p GlobalLSCost according to the target
20773/// paring capabilities and the layout of the slices.
20774/// \pre \p GlobalLSCost should account for at least as many loads as
20775/// there is in the slices in \p LoadedSlices.
20777 LoadedSlice::Cost &GlobalLSCost) {
20778 unsigned NumberOfSlices = LoadedSlices.size();
20779 // If there is less than 2 elements, no pairing is possible.
20780 if (NumberOfSlices < 2)
20781 return;
20782
20783 // Sort the slices so that elements that are likely to be next to each
20784 // other in memory are next to each other in the list.
20785 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
20786 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
20787 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
20788 });
20789 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
20790 // First (resp. Second) is the first (resp. Second) potentially candidate
20791 // to be placed in a paired load.
20792 const LoadedSlice *First = nullptr;
20793 const LoadedSlice *Second = nullptr;
20794 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
20795 // Set the beginning of the pair.
20796 First = Second) {
20797 Second = &LoadedSlices[CurrSlice];
20798
20799 // If First is NULL, it means we start a new pair.
20800 // Get to the next slice.
20801 if (!First)
20802 continue;
20803
20804 EVT LoadedType = First->getLoadedType();
20805
20806 // If the types of the slices are different, we cannot pair them.
20807 if (LoadedType != Second->getLoadedType())
20808 continue;
20809
20810 // Check if the target supplies paired loads for this type.
20811 Align RequiredAlignment;
20812 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
20813 // move to the next pair, this type is hopeless.
20814 Second = nullptr;
20815 continue;
20816 }
20817 // Check if we meet the alignment requirement.
20818 if (First->getAlign() < RequiredAlignment)
20819 continue;
20820
20821 // Check that both loads are next to each other in memory.
20822 if (!areSlicesNextToEachOther(*First, *Second))
20823 continue;
20824
20825 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
20826 --GlobalLSCost.Loads;
20827 // Move to the next pair.
20828 Second = nullptr;
20829 }
20830}
20831
20832/// Check the profitability of all involved LoadedSlice.
20833/// Currently, it is considered profitable if there is exactly two
20834/// involved slices (1) which are (2) next to each other in memory, and
20835/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
20836///
20837/// Note: The order of the elements in \p LoadedSlices may be modified, but not
20838/// the elements themselves.
20839///
20840/// FIXME: When the cost model will be mature enough, we can relax
20841/// constraints (1) and (2).
20843 const APInt &UsedBits, bool ForCodeSize) {
20844 unsigned NumberOfSlices = LoadedSlices.size();
20846 return NumberOfSlices > 1;
20847
20848 // Check (1).
20849 if (NumberOfSlices != 2)
20850 return false;
20851
20852 // Check (2).
20853 if (!areUsedBitsDense(UsedBits))
20854 return false;
20855
20856 // Check (3).
20857 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
20858 // The original code has one big load.
20859 OrigCost.Loads = 1;
20860 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
20861 const LoadedSlice &LS = LoadedSlices[CurrSlice];
20862 // Accumulate the cost of all the slices.
20863 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
20864 GlobalSlicingCost += SliceCost;
20865
20866 // Account as cost in the original configuration the gain obtained
20867 // with the current slices.
20868 OrigCost.addSliceGain(LS);
20869 }
20870
20871 // If the target supports paired load, adjust the cost accordingly.
20872 adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
20873 return OrigCost > GlobalSlicingCost;
20874}
20875
20876/// If the given load, \p LI, is used only by trunc or trunc(lshr)
20877/// operations, split it in the various pieces being extracted.
20878///
20879/// This sort of thing is introduced by SROA.
20880/// This slicing takes care not to insert overlapping loads.
20881/// \pre LI is a simple load (i.e., not an atomic or volatile load).
20882bool DAGCombiner::SliceUpLoad(SDNode *N) {
20883 if (Level < AfterLegalizeDAG)
20884 return false;
20885
20886 LoadSDNode *LD = cast<LoadSDNode>(N);
20887 if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
20888 !LD->getValueType(0).isInteger())
20889 return false;
20890
20891 // The algorithm to split up a load of a scalable vector into individual
20892 // elements currently requires knowing the length of the loaded type,
20893 // so will need adjusting to work on scalable vectors.
20894 if (LD->getValueType(0).isScalableVector())
20895 return false;
20896
20897 // Keep track of already used bits to detect overlapping values.
20898 // In that case, we will just abort the transformation.
20899 APInt UsedBits(LD->getValueSizeInBits(0), 0);
20900
20901 SmallVector<LoadedSlice, 4> LoadedSlices;
20902
20903 // Check if this load is used as several smaller chunks of bits.
20904 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
20905 // of computation for each trunc.
20906 for (SDUse &U : LD->uses()) {
20907 // Skip the uses of the chain.
20908 if (U.getResNo() != 0)
20909 continue;
20910
20911 SDNode *User = U.getUser();
20912 unsigned Shift = 0;
20913
20914 // Check if this is a trunc(lshr).
20915 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
20916 isa<ConstantSDNode>(User->getOperand(1))) {
20917 Shift = User->getConstantOperandVal(1);
20918 User = *User->user_begin();
20919 }
20920
20921 // At this point, User is a Truncate, iff we encountered, trunc or
20922 // trunc(lshr).
20923 if (User->getOpcode() != ISD::TRUNCATE)
20924 return false;
20925
20926 // The width of the type must be a power of 2 and greater than 8-bits.
20927 // Otherwise the load cannot be represented in LLVM IR.
20928 // Moreover, if we shifted with a non-8-bits multiple, the slice
20929 // will be across several bytes. We do not support that.
20930 unsigned Width = User->getValueSizeInBits(0);
20931 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
20932 return false;
20933
20934 // Build the slice for this chain of computations.
20935 LoadedSlice LS(User, LD, Shift, &DAG);
20936 APInt CurrentUsedBits = LS.getUsedBits();
20937
20938 // Check if this slice overlaps with another.
20939 if ((CurrentUsedBits & UsedBits) != 0)
20940 return false;
20941 // Update the bits used globally.
20942 UsedBits |= CurrentUsedBits;
20943
20944 // Check if the new slice would be legal.
20945 if (!LS.isLegal())
20946 return false;
20947
20948 // Record the slice.
20949 LoadedSlices.push_back(LS);
20950 }
20951
20952 // Abort slicing if it does not seem to be profitable.
20953 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
20954 return false;
20955
20956 ++SlicedLoads;
20957
20958 // Rewrite each chain to use an independent load.
20959 // By construction, each chain can be represented by a unique load.
20960
20961 // Prepare the argument for the new token factor for all the slices.
20962 SmallVector<SDValue, 8> ArgChains;
20963 for (const LoadedSlice &LS : LoadedSlices) {
20964 SDValue SliceInst = LS.loadSlice();
20965 CombineTo(LS.Inst, SliceInst, true);
20966 if (SliceInst.getOpcode() != ISD::LOAD)
20967 SliceInst = SliceInst.getOperand(0);
20968 assert(SliceInst->getOpcode() == ISD::LOAD &&
20969 "It takes more than a zext to get to the loaded slice!!");
20970 ArgChains.push_back(SliceInst.getValue(1));
20971 }
20972
20973 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
20974 ArgChains);
20975 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
20976 AddToWorklist(Chain.getNode());
20977 return true;
20978}
20979
20980/// Check to see if V is (and load (ptr), imm), where the load is having
20981/// specific bytes cleared out. If so, return the byte size being masked out
20982/// and the shift amount.
20983static std::pair<unsigned, unsigned>
20985 std::pair<unsigned, unsigned> Result(0, 0);
20986
20987 // Check for the structure we're looking for.
20988 if (V->getOpcode() != ISD::AND ||
20989 !isa<ConstantSDNode>(V->getOperand(1)) ||
20990 !ISD::isNormalLoad(V->getOperand(0).getNode()))
20991 return Result;
20992
20993 // Check the chain and pointer.
20994 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
20995 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
20996
20997 // This only handles simple types.
20998 if (V.getValueType() != MVT::i16 &&
20999 V.getValueType() != MVT::i32 &&
21000 V.getValueType() != MVT::i64)
21001 return Result;
21002
21003 // Check the constant mask. Invert it so that the bits being masked out are
21004 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
21005 // follow the sign bit for uniformity.
21006 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
21007 unsigned NotMaskLZ = llvm::countl_zero(NotMask);
21008 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
21009 unsigned NotMaskTZ = llvm::countr_zero(NotMask);
21010 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
21011 if (NotMaskLZ == 64) return Result; // All zero mask.
21012
21013 // See if we have a continuous run of bits. If so, we have 0*1+0*
21014 if (llvm::countr_one(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
21015 return Result;
21016
21017 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
21018 if (V.getValueType() != MVT::i64 && NotMaskLZ)
21019 NotMaskLZ -= 64-V.getValueSizeInBits();
21020
21021 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
21022 switch (MaskedBytes) {
21023 case 1:
21024 case 2:
21025 case 4: break;
21026 default: return Result; // All one mask, or 5-byte mask.
21027 }
21028
21029 // Verify that the first bit starts at a multiple of mask so that the access
21030 // is aligned the same as the access width.
21031 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
21032
21033 // For narrowing to be valid, it must be the case that the load the
21034 // immediately preceding memory operation before the store.
21035 if (LD == Chain.getNode())
21036 ; // ok.
21037 else if (Chain->getOpcode() == ISD::TokenFactor &&
21038 SDValue(LD, 1).hasOneUse()) {
21039 // LD has only 1 chain use so they are no indirect dependencies.
21040 if (!LD->isOperandOf(Chain.getNode()))
21041 return Result;
21042 } else
21043 return Result; // Fail.
21044
21045 Result.first = MaskedBytes;
21046 Result.second = NotMaskTZ/8;
21047 return Result;
21048}
21049
21050/// Check to see if IVal is something that provides a value as specified by
21051/// MaskInfo. If so, replace the specified store with a narrower store of
21052/// truncated IVal.
21053static SDValue
21054ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
21055 SDValue IVal, StoreSDNode *St,
21056 DAGCombiner *DC) {
21057 unsigned NumBytes = MaskInfo.first;
21058 unsigned ByteShift = MaskInfo.second;
21059 SelectionDAG &DAG = DC->getDAG();
21060
21061 // Check to see if IVal is all zeros in the part being masked in by the 'or'
21062 // that uses this. If not, this is not a replacement.
21063 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
21064 ByteShift*8, (ByteShift+NumBytes)*8);
21065 if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
21066
21067 // Check that it is legal on the target to do this. It is legal if the new
21068 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
21069 // legalization. If the source type is legal, but the store type isn't, see
21070 // if we can use a truncating store.
21071 MVT VT = MVT::getIntegerVT(NumBytes * 8);
21072 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21073 bool UseTruncStore;
21074 if (DC->isTypeLegal(VT))
21075 UseTruncStore = false;
21076 else if (TLI.isTypeLegal(IVal.getValueType()) &&
21077 TLI.isTruncStoreLegal(IVal.getValueType(), VT))
21078 UseTruncStore = true;
21079 else
21080 return SDValue();
21081
21082 // Can't do this for indexed stores.
21083 if (St->isIndexed())
21084 return SDValue();
21085
21086 // Check that the target doesn't think this is a bad idea.
21087 if (St->getMemOperand() &&
21088 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
21089 *St->getMemOperand()))
21090 return SDValue();
21091
21092 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
21093 // shifted by ByteShift and truncated down to NumBytes.
21094 if (ByteShift) {
21095 SDLoc DL(IVal);
21096 IVal = DAG.getNode(
21097 ISD::SRL, DL, IVal.getValueType(), IVal,
21098 DAG.getShiftAmountConstant(ByteShift * 8, IVal.getValueType(), DL));
21099 }
21100
21101 // Figure out the offset for the store and the alignment of the access.
21102 unsigned StOffset;
21103 if (DAG.getDataLayout().isLittleEndian())
21104 StOffset = ByteShift;
21105 else
21106 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
21107
21108 SDValue Ptr = St->getBasePtr();
21109 if (StOffset) {
21110 SDLoc DL(IVal);
21112 }
21113
21114 ++OpsNarrowed;
21115 if (UseTruncStore)
21116 return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr,
21117 St->getPointerInfo().getWithOffset(StOffset), VT,
21118 St->getBaseAlign());
21119
21120 // Truncate down to the new size.
21121 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
21122
21123 return DAG.getStore(St->getChain(), SDLoc(St), IVal, Ptr,
21124 St->getPointerInfo().getWithOffset(StOffset),
21125 St->getBaseAlign());
21126}
21127
21128/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
21129/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
21130/// narrowing the load and store if it would end up being a win for performance
21131/// or code size.
21132SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
21133 StoreSDNode *ST = cast<StoreSDNode>(N);
21134 if (!ST->isSimple())
21135 return SDValue();
21136
21137 SDValue Chain = ST->getChain();
21138 SDValue Value = ST->getValue();
21139 SDValue Ptr = ST->getBasePtr();
21140 EVT VT = Value.getValueType();
21141
21142 if (ST->isTruncatingStore() || VT.isVector())
21143 return SDValue();
21144
21145 unsigned Opc = Value.getOpcode();
21146
21147 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
21148 !Value.hasOneUse())
21149 return SDValue();
21150
21151 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
21152 // is a byte mask indicating a consecutive number of bytes, check to see if
21153 // Y is known to provide just those bytes. If so, we try to replace the
21154 // load + replace + store sequence with a single (narrower) store, which makes
21155 // the load dead.
21157 std::pair<unsigned, unsigned> MaskedLoad;
21158 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
21159 if (MaskedLoad.first)
21160 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
21161 Value.getOperand(1), ST,this))
21162 return NewST;
21163
21164 // Or is commutative, so try swapping X and Y.
21165 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
21166 if (MaskedLoad.first)
21167 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
21168 Value.getOperand(0), ST,this))
21169 return NewST;
21170 }
21171
21173 return SDValue();
21174
21175 if (Value.getOperand(1).getOpcode() != ISD::Constant)
21176 return SDValue();
21177
21178 SDValue N0 = Value.getOperand(0);
21179 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
21180 Chain == SDValue(N0.getNode(), 1)) {
21181 LoadSDNode *LD = cast<LoadSDNode>(N0);
21182 if (LD->getBasePtr() != Ptr ||
21183 LD->getPointerInfo().getAddrSpace() !=
21184 ST->getPointerInfo().getAddrSpace())
21185 return SDValue();
21186
21187 // Find the type NewVT to narrow the load / op / store to.
21188 SDValue N1 = Value.getOperand(1);
21189 unsigned BitWidth = N1.getValueSizeInBits();
21190 APInt Imm = N1->getAsAPIntVal();
21191 if (Opc == ISD::AND)
21192 Imm.flipAllBits();
21193 if (Imm == 0 || Imm.isAllOnes())
21194 return SDValue();
21195 // Find least/most significant bit that need to be part of the narrowed
21196 // operation. We assume target will need to address/access full bytes, so
21197 // we make sure to align LSB and MSB at byte boundaries.
21198 unsigned BitsPerByteMask = 7u;
21199 unsigned LSB = Imm.countr_zero() & ~BitsPerByteMask;
21200 unsigned MSB = (Imm.getActiveBits() - 1) | BitsPerByteMask;
21201 unsigned NewBW = NextPowerOf2(MSB - LSB);
21202 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
21203 // The narrowing should be profitable, the load/store operation should be
21204 // legal (or custom) and the store size should be equal to the NewVT width.
21205 while (NewBW < BitWidth &&
21206 (NewVT.getStoreSizeInBits() != NewBW ||
21207 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
21209 !TLI.isNarrowingProfitable(N, VT, NewVT)))) {
21210 NewBW = NextPowerOf2(NewBW);
21211 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
21212 }
21213 if (NewBW >= BitWidth)
21214 return SDValue();
21215
21216 // If we come this far NewVT/NewBW reflect a power-of-2 sized type that is
21217 // large enough to cover all bits that should be modified. This type might
21218 // however be larger than really needed (such as i32 while we actually only
21219 // need to modify one byte). Now we need to find our how to align the memory
21220 // accesses to satisfy preferred alignments as well as avoiding to access
21221 // memory outside the store size of the orignal access.
21222
21223 unsigned VTStoreSize = VT.getStoreSizeInBits().getFixedValue();
21224
21225 // Let ShAmt denote amount of bits to skip, counted from the least
21226 // significant bits of Imm. And let PtrOff how much the pointer needs to be
21227 // offsetted (in bytes) for the new access.
21228 unsigned ShAmt = 0;
21229 uint64_t PtrOff = 0;
21230 for (; ShAmt + NewBW <= VTStoreSize; ShAmt += 8) {
21231 // Make sure the range [ShAmt, ShAmt+NewBW) cover both LSB and MSB.
21232 if (ShAmt > LSB)
21233 return SDValue();
21234 if (ShAmt + NewBW < MSB)
21235 continue;
21236
21237 // Calculate PtrOff.
21238 unsigned PtrAdjustmentInBits = DAG.getDataLayout().isBigEndian()
21239 ? VTStoreSize - NewBW - ShAmt
21240 : ShAmt;
21241 PtrOff = PtrAdjustmentInBits / 8;
21242
21243 // Now check if narrow access is allowed and fast, considering alignments.
21244 unsigned IsFast = 0;
21245 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
21246 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
21247 LD->getAddressSpace(), NewAlign,
21248 LD->getMemOperand()->getFlags(), &IsFast) &&
21249 IsFast)
21250 break;
21251 }
21252 // If loop above did not find any accepted ShAmt we need to exit here.
21253 if (ShAmt + NewBW > VTStoreSize)
21254 return SDValue();
21255
21256 APInt NewImm = Imm.lshr(ShAmt).trunc(NewBW);
21257 if (Opc == ISD::AND)
21258 NewImm.flipAllBits();
21259 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
21260 SDValue NewPtr =
21261 DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(PtrOff), SDLoc(LD));
21262 SDValue NewLD =
21263 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
21264 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
21265 LD->getMemOperand()->getFlags(), LD->getAAInfo());
21266 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
21267 DAG.getConstant(NewImm, SDLoc(Value), NewVT));
21268 SDValue NewST =
21269 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
21270 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
21271
21272 AddToWorklist(NewPtr.getNode());
21273 AddToWorklist(NewLD.getNode());
21274 AddToWorklist(NewVal.getNode());
21275 WorklistRemover DeadNodes(*this);
21276 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
21277 ++OpsNarrowed;
21278 return NewST;
21279 }
21280
21281 return SDValue();
21282}
21283
21284/// For a given floating point load / store pair, if the load value isn't used
21285/// by any other operations, then consider transforming the pair to integer
21286/// load / store operations if the target deems the transformation profitable.
21287SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
21288 StoreSDNode *ST = cast<StoreSDNode>(N);
21289 SDValue Value = ST->getValue();
21290 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
21291 Value.hasOneUse()) {
21292 LoadSDNode *LD = cast<LoadSDNode>(Value);
21293 EVT VT = LD->getMemoryVT();
21294 if (!VT.isSimple() || !VT.isFloatingPoint() || VT != ST->getMemoryVT() ||
21295 LD->isNonTemporal() || ST->isNonTemporal() ||
21296 LD->getPointerInfo().getAddrSpace() != 0 ||
21297 ST->getPointerInfo().getAddrSpace() != 0)
21298 return SDValue();
21299
21300 TypeSize VTSize = VT.getSizeInBits();
21301
21302 // We don't know the size of scalable types at compile time so we cannot
21303 // create an integer of the equivalent size.
21304 if (VTSize.isScalable())
21305 return SDValue();
21306
21307 unsigned FastLD = 0, FastST = 0;
21308 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedValue());
21309 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
21310 !TLI.isOperationLegal(ISD::STORE, IntVT) ||
21311 !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
21312 !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) ||
21313 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
21314 *LD->getMemOperand(), &FastLD) ||
21315 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
21316 *ST->getMemOperand(), &FastST) ||
21317 !FastLD || !FastST)
21318 return SDValue();
21319
21320 SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(),
21321 LD->getBasePtr(), LD->getMemOperand());
21322
21323 SDValue NewST = DAG.getStore(ST->getChain(), SDLoc(N), NewLD,
21324 ST->getBasePtr(), ST->getMemOperand());
21325
21326 AddToWorklist(NewLD.getNode());
21327 AddToWorklist(NewST.getNode());
21328 WorklistRemover DeadNodes(*this);
21329 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
21330 ++LdStFP2Int;
21331 return NewST;
21332 }
21333
21334 return SDValue();
21335}
21336
21337// This is a helper function for visitMUL to check the profitability
21338// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
21339// MulNode is the original multiply, AddNode is (add x, c1),
21340// and ConstNode is c2.
21341//
21342// If the (add x, c1) has multiple uses, we could increase
21343// the number of adds if we make this transformation.
21344// It would only be worth doing this if we can remove a
21345// multiply in the process. Check for that here.
21346// To illustrate:
21347// (A + c1) * c3
21348// (A + c2) * c3
21349// We're checking for cases where we have common "c3 * A" expressions.
21350bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
21351 SDValue ConstNode) {
21352 // If the add only has one use, and the target thinks the folding is
21353 // profitable or does not lead to worse code, this would be OK to do.
21354 if (AddNode->hasOneUse() &&
21355 TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
21356 return true;
21357
21358 // Walk all the users of the constant with which we're multiplying.
21359 for (SDNode *User : ConstNode->users()) {
21360 if (User == MulNode) // This use is the one we're on right now. Skip it.
21361 continue;
21362
21363 if (User->getOpcode() == ISD::MUL) { // We have another multiply use.
21364 SDNode *OtherOp;
21365 SDNode *MulVar = AddNode.getOperand(0).getNode();
21366
21367 // OtherOp is what we're multiplying against the constant.
21368 if (User->getOperand(0) == ConstNode)
21369 OtherOp = User->getOperand(1).getNode();
21370 else
21371 OtherOp = User->getOperand(0).getNode();
21372
21373 // Check to see if multiply is with the same operand of our "add".
21374 //
21375 // ConstNode = CONST
21376 // User = ConstNode * A <-- visiting User. OtherOp is A.
21377 // ...
21378 // AddNode = (A + c1) <-- MulVar is A.
21379 // = AddNode * ConstNode <-- current visiting instruction.
21380 //
21381 // If we make this transformation, we will have a common
21382 // multiply (ConstNode * A) that we can save.
21383 if (OtherOp == MulVar)
21384 return true;
21385
21386 // Now check to see if a future expansion will give us a common
21387 // multiply.
21388 //
21389 // ConstNode = CONST
21390 // AddNode = (A + c1)
21391 // ... = AddNode * ConstNode <-- current visiting instruction.
21392 // ...
21393 // OtherOp = (A + c2)
21394 // User = OtherOp * ConstNode <-- visiting User.
21395 //
21396 // If we make this transformation, we will have a common
21397 // multiply (CONST * A) after we also do the same transformation
21398 // to the "t2" instruction.
21399 if (OtherOp->getOpcode() == ISD::ADD &&
21401 OtherOp->getOperand(0).getNode() == MulVar)
21402 return true;
21403 }
21404 }
21405
21406 // Didn't find a case where this would be profitable.
21407 return false;
21408}
21409
21410SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
21411 unsigned NumStores) {
21413 SmallPtrSet<const SDNode *, 8> Visited;
21414 SDLoc StoreDL(StoreNodes[0].MemNode);
21415
21416 for (unsigned i = 0; i < NumStores; ++i) {
21417 Visited.insert(StoreNodes[i].MemNode);
21418 }
21419
21420 // don't include nodes that are children or repeated nodes.
21421 for (unsigned i = 0; i < NumStores; ++i) {
21422 if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
21423 Chains.push_back(StoreNodes[i].MemNode->getChain());
21424 }
21425
21426 assert(!Chains.empty() && "Chain should have generated a chain");
21427 return DAG.getTokenFactor(StoreDL, Chains);
21428}
21429
21430bool DAGCombiner::hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes) {
21431 const Value *UnderlyingObj = nullptr;
21432 for (const auto &MemOp : StoreNodes) {
21433 const MachineMemOperand *MMO = MemOp.MemNode->getMemOperand();
21434 // Pseudo value like stack frame has its own frame index and size, should
21435 // not use the first store's frame index for other frames.
21436 if (MMO->getPseudoValue())
21437 return false;
21438
21439 if (!MMO->getValue())
21440 return false;
21441
21442 const Value *Obj = getUnderlyingObject(MMO->getValue());
21443
21444 if (UnderlyingObj && UnderlyingObj != Obj)
21445 return false;
21446
21447 if (!UnderlyingObj)
21448 UnderlyingObj = Obj;
21449 }
21450
21451 return true;
21452}
21453
21454bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
21455 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
21456 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
21457 // Make sure we have something to merge.
21458 if (NumStores < 2)
21459 return false;
21460
21461 assert((!UseTrunc || !UseVector) &&
21462 "This optimization cannot emit a vector truncating store");
21463
21464 // The latest Node in the DAG.
21465 SDLoc DL(StoreNodes[0].MemNode);
21466
21467 TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
21468 unsigned SizeInBits = NumStores * ElementSizeBits;
21469 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
21470
21471 std::optional<MachineMemOperand::Flags> Flags;
21472 AAMDNodes AAInfo;
21473 for (unsigned I = 0; I != NumStores; ++I) {
21474 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
21475 if (!Flags) {
21476 Flags = St->getMemOperand()->getFlags();
21477 AAInfo = St->getAAInfo();
21478 continue;
21479 }
21480 // Skip merging if there's an inconsistent flag.
21481 if (Flags != St->getMemOperand()->getFlags())
21482 return false;
21483 // Concatenate AA metadata.
21484 AAInfo = AAInfo.concat(St->getAAInfo());
21485 }
21486
21487 EVT StoreTy;
21488 if (UseVector) {
21489 unsigned Elts = NumStores * NumMemElts;
21490 // Get the type for the merged vector store.
21491 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
21492 } else
21493 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
21494
21495 SDValue StoredVal;
21496 if (UseVector) {
21497 if (IsConstantSrc) {
21498 SmallVector<SDValue, 8> BuildVector;
21499 for (unsigned I = 0; I != NumStores; ++I) {
21500 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
21501 SDValue Val = St->getValue();
21502 // If constant is of the wrong type, convert it now. This comes up
21503 // when one of our stores was truncating.
21504 if (MemVT != Val.getValueType()) {
21505 Val = peekThroughBitcasts(Val);
21506 // Deal with constants of wrong size.
21507 if (ElementSizeBits != Val.getValueSizeInBits()) {
21508 auto *C = dyn_cast<ConstantSDNode>(Val);
21509 if (!C)
21510 // Not clear how to truncate FP values.
21511 // TODO: Handle truncation of build_vector constants
21512 return false;
21513
21514 EVT IntMemVT =
21516 Val = DAG.getConstant(C->getAPIntValue()
21517 .zextOrTrunc(Val.getValueSizeInBits())
21518 .zextOrTrunc(ElementSizeBits),
21519 SDLoc(C), IntMemVT);
21520 }
21521 // Make sure correctly size type is the correct type.
21522 Val = DAG.getBitcast(MemVT, Val);
21523 }
21524 BuildVector.push_back(Val);
21525 }
21526 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
21528 DL, StoreTy, BuildVector);
21529 } else {
21531 for (unsigned i = 0; i < NumStores; ++i) {
21532 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
21534 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
21535 // type MemVT. If the underlying value is not the correct
21536 // type, but it is an extraction of an appropriate vector we
21537 // can recast Val to be of the correct type. This may require
21538 // converting between EXTRACT_VECTOR_ELT and
21539 // EXTRACT_SUBVECTOR.
21540 if ((MemVT != Val.getValueType()) &&
21543 EVT MemVTScalarTy = MemVT.getScalarType();
21544 // We may need to add a bitcast here to get types to line up.
21545 if (MemVTScalarTy != Val.getValueType().getScalarType()) {
21546 Val = DAG.getBitcast(MemVT, Val);
21547 } else if (MemVT.isVector() &&
21549 Val = DAG.getNode(ISD::BUILD_VECTOR, DL, MemVT, Val);
21550 } else {
21551 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
21553 SDValue Vec = Val.getOperand(0);
21554 SDValue Idx = Val.getOperand(1);
21555 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
21556 }
21557 }
21558 Ops.push_back(Val);
21559 }
21560
21561 // Build the extracted vector elements back into a vector.
21562 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
21564 DL, StoreTy, Ops);
21565 }
21566 } else {
21567 // We should always use a vector store when merging extracted vector
21568 // elements, so this path implies a store of constants.
21569 assert(IsConstantSrc && "Merged vector elements should use vector store");
21570
21571 APInt StoreInt(SizeInBits, 0);
21572
21573 // Construct a single integer constant which is made of the smaller
21574 // constant inputs.
21575 bool IsLE = DAG.getDataLayout().isLittleEndian();
21576 for (unsigned i = 0; i < NumStores; ++i) {
21577 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
21578 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
21579
21580 SDValue Val = St->getValue();
21581 Val = peekThroughBitcasts(Val);
21582 StoreInt <<= ElementSizeBits;
21583 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
21584 StoreInt |= C->getAPIntValue()
21585 .zextOrTrunc(ElementSizeBits)
21586 .zextOrTrunc(SizeInBits);
21587 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
21588 StoreInt |= C->getValueAPF()
21589 .bitcastToAPInt()
21590 .zextOrTrunc(ElementSizeBits)
21591 .zextOrTrunc(SizeInBits);
21592 // If fp truncation is necessary give up for now.
21593 if (MemVT.getSizeInBits() != ElementSizeBits)
21594 return false;
21595 } else if (ISD::isBuildVectorOfConstantSDNodes(Val.getNode()) ||
21597 // Not yet handled
21598 return false;
21599 } else {
21600 llvm_unreachable("Invalid constant element type");
21601 }
21602 }
21603
21604 // Create the new Load and Store operations.
21605 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
21606 }
21607
21608 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
21609 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
21610 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
21611
21612 // make sure we use trunc store if it's necessary to be legal.
21613 // When generate the new widen store, if the first store's pointer info can
21614 // not be reused, discard the pointer info except the address space because
21615 // now the widen store can not be represented by the original pointer info
21616 // which is for the narrow memory object.
21617 SDValue NewStore;
21618 if (!UseTrunc) {
21619 NewStore = DAG.getStore(
21620 NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
21621 CanReusePtrInfo
21622 ? FirstInChain->getPointerInfo()
21623 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
21624 FirstInChain->getAlign(), *Flags, AAInfo);
21625 } else { // Must be realized as a trunc store
21626 EVT LegalizedStoredValTy =
21627 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
21628 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
21629 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
21630 SDValue ExtendedStoreVal =
21631 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
21632 LegalizedStoredValTy);
21633 NewStore = DAG.getTruncStore(
21634 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
21635 CanReusePtrInfo
21636 ? FirstInChain->getPointerInfo()
21637 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
21638 StoredVal.getValueType() /*TVT*/, FirstInChain->getAlign(), *Flags,
21639 AAInfo);
21640 }
21641
21642 // Replace all merged stores with the new store.
21643 for (unsigned i = 0; i < NumStores; ++i)
21644 CombineTo(StoreNodes[i].MemNode, NewStore);
21645
21646 AddToWorklist(NewChain.getNode());
21647 return true;
21648}
21649
21650SDNode *
21651DAGCombiner::getStoreMergeCandidates(StoreSDNode *St,
21652 SmallVectorImpl<MemOpLink> &StoreNodes) {
21653 // This holds the base pointer, index, and the offset in bytes from the base
21654 // pointer. We must have a base and an offset. Do not handle stores to undef
21655 // base pointers.
21656 BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
21657 if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
21658 return nullptr;
21659
21661 StoreSource StoreSrc = getStoreSource(Val);
21662 assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
21663
21664 // Match on loadbaseptr if relevant.
21665 EVT MemVT = St->getMemoryVT();
21666 BaseIndexOffset LBasePtr;
21667 EVT LoadVT;
21668 if (StoreSrc == StoreSource::Load) {
21669 auto *Ld = cast<LoadSDNode>(Val);
21670 LBasePtr = BaseIndexOffset::match(Ld, DAG);
21671 LoadVT = Ld->getMemoryVT();
21672 // Load and store should be the same type.
21673 if (MemVT != LoadVT)
21674 return nullptr;
21675 // Loads must only have one use.
21676 if (!Ld->hasNUsesOfValue(1, 0))
21677 return nullptr;
21678 // The memory operands must not be volatile/indexed/atomic.
21679 // TODO: May be able to relax for unordered atomics (see D66309)
21680 if (!Ld->isSimple() || Ld->isIndexed())
21681 return nullptr;
21682 }
21683 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
21684 int64_t &Offset) -> bool {
21685 // The memory operands must not be volatile/indexed/atomic.
21686 // TODO: May be able to relax for unordered atomics (see D66309)
21687 if (!Other->isSimple() || Other->isIndexed())
21688 return false;
21689 // Don't mix temporal stores with non-temporal stores.
21690 if (St->isNonTemporal() != Other->isNonTemporal())
21691 return false;
21693 return false;
21694 SDValue OtherBC = peekThroughBitcasts(Other->getValue());
21695 // Allow merging constants of different types as integers.
21696 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
21697 : Other->getMemoryVT() != MemVT;
21698 switch (StoreSrc) {
21699 case StoreSource::Load: {
21700 if (NoTypeMatch)
21701 return false;
21702 // The Load's Base Ptr must also match.
21703 auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
21704 if (!OtherLd)
21705 return false;
21706 BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
21707 if (LoadVT != OtherLd->getMemoryVT())
21708 return false;
21709 // Loads must only have one use.
21710 if (!OtherLd->hasNUsesOfValue(1, 0))
21711 return false;
21712 // The memory operands must not be volatile/indexed/atomic.
21713 // TODO: May be able to relax for unordered atomics (see D66309)
21714 if (!OtherLd->isSimple() || OtherLd->isIndexed())
21715 return false;
21716 // Don't mix temporal loads with non-temporal loads.
21717 if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
21718 return false;
21720 *OtherLd))
21721 return false;
21722 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
21723 return false;
21724 break;
21725 }
21726 case StoreSource::Constant:
21727 if (NoTypeMatch)
21728 return false;
21729 if (getStoreSource(OtherBC) != StoreSource::Constant)
21730 return false;
21731 break;
21732 case StoreSource::Extract:
21733 // Do not merge truncated stores here.
21734 if (Other->isTruncatingStore())
21735 return false;
21736 if (!MemVT.bitsEq(OtherBC.getValueType()))
21737 return false;
21738 if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
21739 OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
21740 return false;
21741 break;
21742 default:
21743 llvm_unreachable("Unhandled store source for merging");
21744 }
21746 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
21747 };
21748
21749 // We are looking for a root node which is an ancestor to all mergable
21750 // stores. We search up through a load, to our root and then down
21751 // through all children. For instance we will find Store{1,2,3} if
21752 // St is Store1, Store2. or Store3 where the root is not a load
21753 // which always true for nonvolatile ops. TODO: Expand
21754 // the search to find all valid candidates through multiple layers of loads.
21755 //
21756 // Root
21757 // |-------|-------|
21758 // Load Load Store3
21759 // | |
21760 // Store1 Store2
21761 //
21762 // FIXME: We should be able to climb and
21763 // descend TokenFactors to find candidates as well.
21764
21765 SDNode *RootNode = St->getChain().getNode();
21766 // Bail out if we already analyzed this root node and found nothing.
21767 if (ChainsWithoutMergeableStores.contains(RootNode))
21768 return nullptr;
21769
21770 // Check if the pair of StoreNode and the RootNode already bail out many
21771 // times which is over the limit in dependence check.
21772 auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
21773 SDNode *RootNode) -> bool {
21774 auto RootCount = StoreRootCountMap.find(StoreNode);
21775 return RootCount != StoreRootCountMap.end() &&
21776 RootCount->second.first == RootNode &&
21777 RootCount->second.second > StoreMergeDependenceLimit;
21778 };
21779
21780 auto TryToAddCandidate = [&](SDUse &Use) {
21781 // This must be a chain use.
21782 if (Use.getOperandNo() != 0)
21783 return;
21784 if (auto *OtherStore = dyn_cast<StoreSDNode>(Use.getUser())) {
21785 BaseIndexOffset Ptr;
21786 int64_t PtrDiff;
21787 if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
21788 !OverLimitInDependenceCheck(OtherStore, RootNode))
21789 StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
21790 }
21791 };
21792
21793 unsigned NumNodesExplored = 0;
21794 const unsigned MaxSearchNodes = 1024;
21795 if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
21796 RootNode = Ldn->getChain().getNode();
21797 // Bail out if we already analyzed this root node and found nothing.
21798 if (ChainsWithoutMergeableStores.contains(RootNode))
21799 return nullptr;
21800 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
21801 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
21802 SDNode *User = I->getUser();
21803 if (I->getOperandNo() == 0 && isa<LoadSDNode>(User)) { // walk down chain
21804 for (SDUse &U2 : User->uses())
21805 TryToAddCandidate(U2);
21806 }
21807 // Check stores that depend on the root (e.g. Store 3 in the chart above).
21808 if (I->getOperandNo() == 0 && isa<StoreSDNode>(User)) {
21809 TryToAddCandidate(*I);
21810 }
21811 }
21812 } else {
21813 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
21814 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
21815 TryToAddCandidate(*I);
21816 }
21817
21818 return RootNode;
21819}
21820
21821// We need to check that merging these stores does not cause a loop in the
21822// DAG. Any store candidate may depend on another candidate indirectly through
21823// its operands. Check in parallel by searching up from operands of candidates.
21824bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
21825 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
21826 SDNode *RootNode) {
21827 // FIXME: We should be able to truncate a full search of
21828 // predecessors by doing a BFS and keeping tabs the originating
21829 // stores from which worklist nodes come from in a similar way to
21830 // TokenFactor simplfication.
21831
21832 SmallPtrSet<const SDNode *, 32> Visited;
21834
21835 // RootNode is a predecessor to all candidates so we need not search
21836 // past it. Add RootNode (peeking through TokenFactors). Do not count
21837 // these towards size check.
21838
21839 Worklist.push_back(RootNode);
21840 while (!Worklist.empty()) {
21841 auto N = Worklist.pop_back_val();
21842 if (!Visited.insert(N).second)
21843 continue; // Already present in Visited.
21844 if (N->getOpcode() == ISD::TokenFactor) {
21845 for (SDValue Op : N->ops())
21846 Worklist.push_back(Op.getNode());
21847 }
21848 }
21849
21850 // Don't count pruning nodes towards max.
21851 unsigned int Max = 1024 + Visited.size();
21852 // Search Ops of store candidates.
21853 for (unsigned i = 0; i < NumStores; ++i) {
21854 SDNode *N = StoreNodes[i].MemNode;
21855 // Of the 4 Store Operands:
21856 // * Chain (Op 0) -> We have already considered these
21857 // in candidate selection, but only by following the
21858 // chain dependencies. We could still have a chain
21859 // dependency to a load, that has a non-chain dep to
21860 // another load, that depends on a store, etc. So it is
21861 // possible to have dependencies that consist of a mix
21862 // of chain and non-chain deps, and we need to include
21863 // chain operands in the analysis here..
21864 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
21865 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
21866 // but aren't necessarily fromt the same base node, so
21867 // cycles possible (e.g. via indexed store).
21868 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
21869 // non-indexed stores). Not constant on all targets (e.g. ARM)
21870 // and so can participate in a cycle.
21871 for (const SDValue &Op : N->op_values())
21872 Worklist.push_back(Op.getNode());
21873 }
21874 // Search through DAG. We can stop early if we find a store node.
21875 for (unsigned i = 0; i < NumStores; ++i)
21876 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
21877 Max)) {
21878 // If the searching bail out, record the StoreNode and RootNode in the
21879 // StoreRootCountMap. If we have seen the pair many times over a limit,
21880 // we won't add the StoreNode into StoreNodes set again.
21881 if (Visited.size() >= Max) {
21882 auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
21883 if (RootCount.first == RootNode)
21884 RootCount.second++;
21885 else
21886 RootCount = {RootNode, 1};
21887 }
21888 return false;
21889 }
21890 return true;
21891}
21892
21893bool DAGCombiner::hasCallInLdStChain(StoreSDNode *St, LoadSDNode *Ld) {
21894 SmallPtrSet<const SDNode *, 32> Visited;
21896 Worklist.emplace_back(St->getChain().getNode(), false);
21897
21898 while (!Worklist.empty()) {
21899 auto [Node, FoundCall] = Worklist.pop_back_val();
21900 if (!Visited.insert(Node).second || Node->getNumOperands() == 0)
21901 continue;
21902
21903 switch (Node->getOpcode()) {
21904 case ISD::CALLSEQ_END:
21905 Worklist.emplace_back(Node->getOperand(0).getNode(), true);
21906 break;
21907 case ISD::TokenFactor:
21908 for (SDValue Op : Node->ops())
21909 Worklist.emplace_back(Op.getNode(), FoundCall);
21910 break;
21911 case ISD::LOAD:
21912 if (Node == Ld)
21913 return FoundCall;
21914 [[fallthrough]];
21915 default:
21916 assert(Node->getOperand(0).getValueType() == MVT::Other &&
21917 "Invalid chain type");
21918 Worklist.emplace_back(Node->getOperand(0).getNode(), FoundCall);
21919 break;
21920 }
21921 }
21922 return false;
21923}
21924
21925unsigned
21926DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
21927 int64_t ElementSizeBytes) const {
21928 while (true) {
21929 // Find a store past the width of the first store.
21930 size_t StartIdx = 0;
21931 while ((StartIdx + 1 < StoreNodes.size()) &&
21932 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
21933 StoreNodes[StartIdx + 1].OffsetFromBase)
21934 ++StartIdx;
21935
21936 // Bail if we don't have enough candidates to merge.
21937 if (StartIdx + 1 >= StoreNodes.size())
21938 return 0;
21939
21940 // Trim stores that overlapped with the first store.
21941 if (StartIdx)
21942 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
21943
21944 // Scan the memory operations on the chain and find the first
21945 // non-consecutive store memory address.
21946 unsigned NumConsecutiveStores = 1;
21947 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
21948 // Check that the addresses are consecutive starting from the second
21949 // element in the list of stores.
21950 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
21951 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
21952 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
21953 break;
21954 NumConsecutiveStores = i + 1;
21955 }
21956 if (NumConsecutiveStores > 1)
21957 return NumConsecutiveStores;
21958
21959 // There are no consecutive stores at the start of the list.
21960 // Remove the first store and try again.
21961 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
21962 }
21963}
21964
21965bool DAGCombiner::tryStoreMergeOfConstants(
21966 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
21967 EVT MemVT, SDNode *RootNode, bool AllowVectors) {
21968 LLVMContext &Context = *DAG.getContext();
21969 const DataLayout &DL = DAG.getDataLayout();
21970 int64_t ElementSizeBytes = MemVT.getStoreSize();
21971 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
21972 bool MadeChange = false;
21973
21974 // Store the constants into memory as one consecutive store.
21975 while (NumConsecutiveStores >= 2) {
21976 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
21977 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
21978 Align FirstStoreAlign = FirstInChain->getAlign();
21979 unsigned LastLegalType = 1;
21980 unsigned LastLegalVectorType = 1;
21981 bool LastIntegerTrunc = false;
21982 bool NonZero = false;
21983 unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
21984 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
21985 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
21986 SDValue StoredVal = ST->getValue();
21987 bool IsElementZero = false;
21988 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
21989 IsElementZero = C->isZero();
21990 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
21991 IsElementZero = C->getConstantFPValue()->isNullValue();
21992 else if (ISD::isBuildVectorAllZeros(StoredVal.getNode()))
21993 IsElementZero = true;
21994 if (IsElementZero) {
21995 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
21996 FirstZeroAfterNonZero = i;
21997 }
21998 NonZero |= !IsElementZero;
21999
22000 // Find a legal type for the constant store.
22001 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
22002 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
22003 unsigned IsFast = 0;
22004
22005 // Break early when size is too large to be legal.
22006 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
22007 break;
22008
22009 if (TLI.isTypeLegal(StoreTy) &&
22010 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
22011 DAG.getMachineFunction()) &&
22012 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22013 *FirstInChain->getMemOperand(), &IsFast) &&
22014 IsFast) {
22015 LastIntegerTrunc = false;
22016 LastLegalType = i + 1;
22017 // Or check whether a truncstore is legal.
22018 } else if (TLI.getTypeAction(Context, StoreTy) ==
22020 EVT LegalizedStoredValTy =
22021 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
22022 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
22023 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
22024 DAG.getMachineFunction()) &&
22025 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22026 *FirstInChain->getMemOperand(), &IsFast) &&
22027 IsFast) {
22028 LastIntegerTrunc = true;
22029 LastLegalType = i + 1;
22030 }
22031 }
22032
22033 // We only use vectors if the target allows it and the function is not
22034 // marked with the noimplicitfloat attribute.
22035 if (TLI.storeOfVectorConstantIsCheap(!NonZero, MemVT, i + 1, FirstStoreAS) &&
22036 AllowVectors) {
22037 // Find a legal type for the vector store.
22038 unsigned Elts = (i + 1) * NumMemElts;
22039 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
22040 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
22041 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
22042 TLI.allowsMemoryAccess(Context, DL, Ty,
22043 *FirstInChain->getMemOperand(), &IsFast) &&
22044 IsFast)
22045 LastLegalVectorType = i + 1;
22046 }
22047 }
22048
22049 bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
22050 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
22051 bool UseTrunc = LastIntegerTrunc && !UseVector;
22052
22053 // Check if we found a legal integer type that creates a meaningful
22054 // merge.
22055 if (NumElem < 2) {
22056 // We know that candidate stores are in order and of correct
22057 // shape. While there is no mergeable sequence from the
22058 // beginning one may start later in the sequence. The only
22059 // reason a merge of size N could have failed where another of
22060 // the same size would not have, is if the alignment has
22061 // improved or we've dropped a non-zero value. Drop as many
22062 // candidates as we can here.
22063 unsigned NumSkip = 1;
22064 while ((NumSkip < NumConsecutiveStores) &&
22065 (NumSkip < FirstZeroAfterNonZero) &&
22066 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
22067 NumSkip++;
22068
22069 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
22070 NumConsecutiveStores -= NumSkip;
22071 continue;
22072 }
22073
22074 // Check that we can merge these candidates without causing a cycle.
22075 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
22076 RootNode)) {
22077 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
22078 NumConsecutiveStores -= NumElem;
22079 continue;
22080 }
22081
22082 MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
22083 /*IsConstantSrc*/ true,
22084 UseVector, UseTrunc);
22085
22086 // Remove merged stores for next iteration.
22087 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
22088 NumConsecutiveStores -= NumElem;
22089 }
22090 return MadeChange;
22091}
22092
22093bool DAGCombiner::tryStoreMergeOfExtracts(
22094 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
22095 EVT MemVT, SDNode *RootNode) {
22096 LLVMContext &Context = *DAG.getContext();
22097 const DataLayout &DL = DAG.getDataLayout();
22098 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
22099 bool MadeChange = false;
22100
22101 // Loop on Consecutive Stores on success.
22102 while (NumConsecutiveStores >= 2) {
22103 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
22104 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
22105 Align FirstStoreAlign = FirstInChain->getAlign();
22106 unsigned NumStoresToMerge = 1;
22107 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
22108 // Find a legal type for the vector store.
22109 unsigned Elts = (i + 1) * NumMemElts;
22110 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
22111 unsigned IsFast = 0;
22112
22113 // Break early when size is too large to be legal.
22114 if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
22115 break;
22116
22117 if (TLI.isTypeLegal(Ty) &&
22118 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
22119 TLI.allowsMemoryAccess(Context, DL, Ty,
22120 *FirstInChain->getMemOperand(), &IsFast) &&
22121 IsFast)
22122 NumStoresToMerge = i + 1;
22123 }
22124
22125 // Check if we found a legal integer type creating a meaningful
22126 // merge.
22127 if (NumStoresToMerge < 2) {
22128 // We know that candidate stores are in order and of correct
22129 // shape. While there is no mergeable sequence from the
22130 // beginning one may start later in the sequence. The only
22131 // reason a merge of size N could have failed where another of
22132 // the same size would not have, is if the alignment has
22133 // improved. Drop as many candidates as we can here.
22134 unsigned NumSkip = 1;
22135 while ((NumSkip < NumConsecutiveStores) &&
22136 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
22137 NumSkip++;
22138
22139 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
22140 NumConsecutiveStores -= NumSkip;
22141 continue;
22142 }
22143
22144 // Check that we can merge these candidates without causing a cycle.
22145 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
22146 RootNode)) {
22147 StoreNodes.erase(StoreNodes.begin(),
22148 StoreNodes.begin() + NumStoresToMerge);
22149 NumConsecutiveStores -= NumStoresToMerge;
22150 continue;
22151 }
22152
22153 MadeChange |= mergeStoresOfConstantsOrVecElts(
22154 StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
22155 /*UseVector*/ true, /*UseTrunc*/ false);
22156
22157 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
22158 NumConsecutiveStores -= NumStoresToMerge;
22159 }
22160 return MadeChange;
22161}
22162
22163bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
22164 unsigned NumConsecutiveStores, EVT MemVT,
22165 SDNode *RootNode, bool AllowVectors,
22166 bool IsNonTemporalStore,
22167 bool IsNonTemporalLoad) {
22168 LLVMContext &Context = *DAG.getContext();
22169 const DataLayout &DL = DAG.getDataLayout();
22170 int64_t ElementSizeBytes = MemVT.getStoreSize();
22171 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
22172 bool MadeChange = false;
22173
22174 // Look for load nodes which are used by the stored values.
22175 SmallVector<MemOpLink, 8> LoadNodes;
22176
22177 // Find acceptable loads. Loads need to have the same chain (token factor),
22178 // must not be zext, volatile, indexed, and they must be consecutive.
22179 BaseIndexOffset LdBasePtr;
22180
22181 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
22182 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
22184 LoadSDNode *Ld = cast<LoadSDNode>(Val);
22185
22186 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
22187 // If this is not the first ptr that we check.
22188 int64_t LdOffset = 0;
22189 if (LdBasePtr.getBase().getNode()) {
22190 // The base ptr must be the same.
22191 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
22192 break;
22193 } else {
22194 // Check that all other base pointers are the same as this one.
22195 LdBasePtr = LdPtr;
22196 }
22197
22198 // We found a potential memory operand to merge.
22199 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
22200 }
22201
22202 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
22203 Align RequiredAlignment;
22204 bool NeedRotate = false;
22205 if (LoadNodes.size() == 2) {
22206 // If we have load/store pair instructions and we only have two values,
22207 // don't bother merging.
22208 if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
22209 StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
22210 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
22211 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
22212 break;
22213 }
22214 // If the loads are reversed, see if we can rotate the halves into place.
22215 int64_t Offset0 = LoadNodes[0].OffsetFromBase;
22216 int64_t Offset1 = LoadNodes[1].OffsetFromBase;
22217 EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
22218 if (Offset0 - Offset1 == ElementSizeBytes &&
22219 (hasOperation(ISD::ROTL, PairVT) ||
22220 hasOperation(ISD::ROTR, PairVT))) {
22221 std::swap(LoadNodes[0], LoadNodes[1]);
22222 NeedRotate = true;
22223 }
22224 }
22225 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
22226 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
22227 Align FirstStoreAlign = FirstInChain->getAlign();
22228 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
22229
22230 // Scan the memory operations on the chain and find the first
22231 // non-consecutive load memory address. These variables hold the index in
22232 // the store node array.
22233
22234 unsigned LastConsecutiveLoad = 1;
22235
22236 // This variable refers to the size and not index in the array.
22237 unsigned LastLegalVectorType = 1;
22238 unsigned LastLegalIntegerType = 1;
22239 bool isDereferenceable = true;
22240 bool DoIntegerTruncate = false;
22241 int64_t StartAddress = LoadNodes[0].OffsetFromBase;
22242 SDValue LoadChain = FirstLoad->getChain();
22243 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
22244 // All loads must share the same chain.
22245 if (LoadNodes[i].MemNode->getChain() != LoadChain)
22246 break;
22247
22248 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
22249 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
22250 break;
22251 LastConsecutiveLoad = i;
22252
22253 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
22254 isDereferenceable = false;
22255
22256 // Find a legal type for the vector store.
22257 unsigned Elts = (i + 1) * NumMemElts;
22258 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
22259
22260 // Break early when size is too large to be legal.
22261 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
22262 break;
22263
22264 unsigned IsFastSt = 0;
22265 unsigned IsFastLd = 0;
22266 // Don't try vector types if we need a rotate. We may still fail the
22267 // legality checks for the integer type, but we can't handle the rotate
22268 // case with vectors.
22269 // FIXME: We could use a shuffle in place of the rotate.
22270 if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
22271 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
22272 DAG.getMachineFunction()) &&
22273 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22274 *FirstInChain->getMemOperand(), &IsFastSt) &&
22275 IsFastSt &&
22276 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22277 *FirstLoad->getMemOperand(), &IsFastLd) &&
22278 IsFastLd) {
22279 LastLegalVectorType = i + 1;
22280 }
22281
22282 // Find a legal type for the integer store.
22283 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
22284 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
22285 if (TLI.isTypeLegal(StoreTy) &&
22286 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
22287 DAG.getMachineFunction()) &&
22288 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22289 *FirstInChain->getMemOperand(), &IsFastSt) &&
22290 IsFastSt &&
22291 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22292 *FirstLoad->getMemOperand(), &IsFastLd) &&
22293 IsFastLd) {
22294 LastLegalIntegerType = i + 1;
22295 DoIntegerTruncate = false;
22296 // Or check whether a truncstore and extload is legal.
22297 } else if (TLI.getTypeAction(Context, StoreTy) ==
22299 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
22300 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
22301 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
22302 DAG.getMachineFunction()) &&
22303 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
22304 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
22305 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
22306 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22307 *FirstInChain->getMemOperand(), &IsFastSt) &&
22308 IsFastSt &&
22309 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22310 *FirstLoad->getMemOperand(), &IsFastLd) &&
22311 IsFastLd) {
22312 LastLegalIntegerType = i + 1;
22313 DoIntegerTruncate = true;
22314 }
22315 }
22316 }
22317
22318 // Only use vector types if the vector type is larger than the integer
22319 // type. If they are the same, use integers.
22320 bool UseVectorTy =
22321 LastLegalVectorType > LastLegalIntegerType && AllowVectors;
22322 unsigned LastLegalType =
22323 std::max(LastLegalVectorType, LastLegalIntegerType);
22324
22325 // We add +1 here because the LastXXX variables refer to location while
22326 // the NumElem refers to array/index size.
22327 unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
22328 NumElem = std::min(LastLegalType, NumElem);
22329 Align FirstLoadAlign = FirstLoad->getAlign();
22330
22331 if (NumElem < 2) {
22332 // We know that candidate stores are in order and of correct
22333 // shape. While there is no mergeable sequence from the
22334 // beginning one may start later in the sequence. The only
22335 // reason a merge of size N could have failed where another of
22336 // the same size would not have is if the alignment or either
22337 // the load or store has improved. Drop as many candidates as we
22338 // can here.
22339 unsigned NumSkip = 1;
22340 while ((NumSkip < LoadNodes.size()) &&
22341 (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
22342 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
22343 NumSkip++;
22344 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
22345 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
22346 NumConsecutiveStores -= NumSkip;
22347 continue;
22348 }
22349
22350 // Check that we can merge these candidates without causing a cycle.
22351 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
22352 RootNode)) {
22353 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
22354 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
22355 NumConsecutiveStores -= NumElem;
22356 continue;
22357 }
22358
22359 // Find if it is better to use vectors or integers to load and store
22360 // to memory.
22361 EVT JointMemOpVT;
22362 if (UseVectorTy) {
22363 // Find a legal type for the vector store.
22364 unsigned Elts = NumElem * NumMemElts;
22365 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
22366 } else {
22367 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
22368 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
22369 }
22370
22371 // Check if there is a call in the load/store chain.
22372 if (!TLI.shouldMergeStoreOfLoadsOverCall(MemVT, JointMemOpVT) &&
22373 hasCallInLdStChain(cast<StoreSDNode>(StoreNodes[0].MemNode),
22374 cast<LoadSDNode>(LoadNodes[0].MemNode))) {
22375 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
22376 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
22377 NumConsecutiveStores -= NumElem;
22378 continue;
22379 }
22380
22381 SDLoc LoadDL(LoadNodes[0].MemNode);
22382 SDLoc StoreDL(StoreNodes[0].MemNode);
22383
22384 // The merged loads are required to have the same incoming chain, so
22385 // using the first's chain is acceptable.
22386
22387 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
22388 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
22389 AddToWorklist(NewStoreChain.getNode());
22390
22391 MachineMemOperand::Flags LdMMOFlags =
22392 isDereferenceable ? MachineMemOperand::MODereferenceable
22394 if (IsNonTemporalLoad)
22396
22397 LdMMOFlags |= TLI.getTargetMMOFlags(*FirstLoad);
22398
22399 MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
22402
22403 StMMOFlags |= TLI.getTargetMMOFlags(*StoreNodes[0].MemNode);
22404
22405 SDValue NewLoad, NewStore;
22406 if (UseVectorTy || !DoIntegerTruncate) {
22407 NewLoad = DAG.getLoad(
22408 JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
22409 FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
22410 SDValue StoreOp = NewLoad;
22411 if (NeedRotate) {
22412 unsigned LoadWidth = ElementSizeBytes * 8 * 2;
22413 assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
22414 "Unexpected type for rotate-able load pair");
22415 SDValue RotAmt =
22416 DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
22417 // Target can convert to the identical ROTR if it does not have ROTL.
22418 StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
22419 }
22420 NewStore = DAG.getStore(
22421 NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
22422 CanReusePtrInfo ? FirstInChain->getPointerInfo()
22423 : MachinePointerInfo(FirstStoreAS),
22424 FirstStoreAlign, StMMOFlags);
22425 } else { // This must be the truncstore/extload case
22426 EVT ExtendedTy =
22427 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
22428 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
22429 FirstLoad->getChain(), FirstLoad->getBasePtr(),
22430 FirstLoad->getPointerInfo(), JointMemOpVT,
22431 FirstLoadAlign, LdMMOFlags);
22432 NewStore = DAG.getTruncStore(
22433 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
22434 CanReusePtrInfo ? FirstInChain->getPointerInfo()
22435 : MachinePointerInfo(FirstStoreAS),
22436 JointMemOpVT, FirstInChain->getAlign(),
22437 FirstInChain->getMemOperand()->getFlags());
22438 }
22439
22440 // Transfer chain users from old loads to the new load.
22441 for (unsigned i = 0; i < NumElem; ++i) {
22442 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
22444 SDValue(NewLoad.getNode(), 1));
22445 }
22446
22447 // Replace all stores with the new store. Recursively remove corresponding
22448 // values if they are no longer used.
22449 for (unsigned i = 0; i < NumElem; ++i) {
22450 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
22451 CombineTo(StoreNodes[i].MemNode, NewStore);
22452 if (Val->use_empty())
22453 recursivelyDeleteUnusedNodes(Val.getNode());
22454 }
22455
22456 MadeChange = true;
22457 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
22458 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
22459 NumConsecutiveStores -= NumElem;
22460 }
22461 return MadeChange;
22462}
22463
22464bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
22465 if (OptLevel == CodeGenOptLevel::None || !EnableStoreMerging)
22466 return false;
22467
22468 // TODO: Extend this function to merge stores of scalable vectors.
22469 // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
22470 // store since we know <vscale x 16 x i8> is exactly twice as large as
22471 // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
22472 EVT MemVT = St->getMemoryVT();
22473 if (MemVT.isScalableVT())
22474 return false;
22475 if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
22476 return false;
22477
22478 // This function cannot currently deal with non-byte-sized memory sizes.
22479 int64_t ElementSizeBytes = MemVT.getStoreSize();
22480 if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
22481 return false;
22482
22483 // Do not bother looking at stored values that are not constants, loads, or
22484 // extracted vector elements.
22485 SDValue StoredVal = peekThroughBitcasts(St->getValue());
22486 const StoreSource StoreSrc = getStoreSource(StoredVal);
22487 if (StoreSrc == StoreSource::Unknown)
22488 return false;
22489
22490 SmallVector<MemOpLink, 8> StoreNodes;
22491 // Find potential store merge candidates by searching through chain sub-DAG
22492 SDNode *RootNode = getStoreMergeCandidates(St, StoreNodes);
22493
22494 // Check if there is anything to merge.
22495 if (StoreNodes.size() < 2)
22496 return false;
22497
22498 // Sort the memory operands according to their distance from the
22499 // base pointer.
22500 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
22501 return LHS.OffsetFromBase < RHS.OffsetFromBase;
22502 });
22503
22504 bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
22505 Attribute::NoImplicitFloat);
22506 bool IsNonTemporalStore = St->isNonTemporal();
22507 bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
22508 cast<LoadSDNode>(StoredVal)->isNonTemporal();
22509
22510 // Store Merge attempts to merge the lowest stores. This generally
22511 // works out as if successful, as the remaining stores are checked
22512 // after the first collection of stores is merged. However, in the
22513 // case that a non-mergeable store is found first, e.g., {p[-2],
22514 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
22515 // mergeable cases. To prevent this, we prune such stores from the
22516 // front of StoreNodes here.
22517 bool MadeChange = false;
22518 while (StoreNodes.size() > 1) {
22519 unsigned NumConsecutiveStores =
22520 getConsecutiveStores(StoreNodes, ElementSizeBytes);
22521 // There are no more stores in the list to examine.
22522 if (NumConsecutiveStores == 0)
22523 return MadeChange;
22524
22525 // We have at least 2 consecutive stores. Try to merge them.
22526 assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
22527 switch (StoreSrc) {
22528 case StoreSource::Constant:
22529 MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
22530 MemVT, RootNode, AllowVectors);
22531 break;
22532
22533 case StoreSource::Extract:
22534 MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
22535 MemVT, RootNode);
22536 break;
22537
22538 case StoreSource::Load:
22539 MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
22540 MemVT, RootNode, AllowVectors,
22541 IsNonTemporalStore, IsNonTemporalLoad);
22542 break;
22543
22544 default:
22545 llvm_unreachable("Unhandled store source type");
22546 }
22547 }
22548
22549 // Remember if we failed to optimize, to save compile time.
22550 if (!MadeChange)
22551 ChainsWithoutMergeableStores.insert(RootNode);
22552
22553 return MadeChange;
22554}
22555
22556SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
22557 SDLoc SL(ST);
22558 SDValue ReplStore;
22559
22560 // Replace the chain to avoid dependency.
22561 if (ST->isTruncatingStore()) {
22562 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
22563 ST->getBasePtr(), ST->getMemoryVT(),
22564 ST->getMemOperand());
22565 } else {
22566 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
22567 ST->getMemOperand());
22568 }
22569
22570 // Create token to keep both nodes around.
22571 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
22572 MVT::Other, ST->getChain(), ReplStore);
22573
22574 // Make sure the new and old chains are cleaned up.
22575 AddToWorklist(Token.getNode());
22576
22577 // Don't add users to work list.
22578 return CombineTo(ST, Token, false);
22579}
22580
22581SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
22582 SDValue Value = ST->getValue();
22583 if (Value.getOpcode() == ISD::TargetConstantFP)
22584 return SDValue();
22585
22586 if (!ISD::isNormalStore(ST))
22587 return SDValue();
22588
22589 SDLoc DL(ST);
22590
22591 SDValue Chain = ST->getChain();
22592 SDValue Ptr = ST->getBasePtr();
22593
22594 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
22595
22596 // NOTE: If the original store is volatile, this transform must not increase
22597 // the number of stores. For example, on x86-32 an f64 can be stored in one
22598 // processor operation but an i64 (which is not legal) requires two. So the
22599 // transform should not be done in this case.
22600
22601 SDValue Tmp;
22602 switch (CFP->getSimpleValueType(0).SimpleTy) {
22603 default:
22604 llvm_unreachable("Unknown FP type");
22605 case MVT::f16: // We don't do this for these yet.
22606 case MVT::bf16:
22607 case MVT::f80:
22608 case MVT::f128:
22609 case MVT::ppcf128:
22610 return SDValue();
22611 case MVT::f32:
22612 if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
22613 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
22614 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
22615 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
22616 MVT::i32);
22617 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
22618 }
22619
22620 return SDValue();
22621 case MVT::f64:
22622 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
22623 ST->isSimple()) ||
22624 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
22625 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
22626 getZExtValue(), SDLoc(CFP), MVT::i64);
22627 return DAG.getStore(Chain, DL, Tmp,
22628 Ptr, ST->getMemOperand());
22629 }
22630
22631 if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32) &&
22632 !TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) {
22633 // Many FP stores are not made apparent until after legalize, e.g. for
22634 // argument passing. Since this is so common, custom legalize the
22635 // 64-bit integer store into two 32-bit stores.
22636 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
22637 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
22638 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
22639 if (DAG.getDataLayout().isBigEndian())
22640 std::swap(Lo, Hi);
22641
22642 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
22643 AAMDNodes AAInfo = ST->getAAInfo();
22644
22645 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
22646 ST->getBaseAlign(), MMOFlags, AAInfo);
22648 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
22649 ST->getPointerInfo().getWithOffset(4),
22650 ST->getBaseAlign(), MMOFlags, AAInfo);
22651 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
22652 St0, St1);
22653 }
22654
22655 return SDValue();
22656 }
22657}
22658
22659// (store (insert_vector_elt (load p), x, i), p) -> (store x, p+offset)
22660//
22661// If a store of a load with an element inserted into it has no other
22662// uses in between the chain, then we can consider the vector store
22663// dead and replace it with just the single scalar element store.
22664SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
22665 SDLoc DL(ST);
22666 SDValue Value = ST->getValue();
22667 SDValue Ptr = ST->getBasePtr();
22668 SDValue Chain = ST->getChain();
22669 if (Value.getOpcode() != ISD::INSERT_VECTOR_ELT || !Value.hasOneUse())
22670 return SDValue();
22671
22672 SDValue Elt = Value.getOperand(1);
22673 SDValue Idx = Value.getOperand(2);
22674
22675 // If the element isn't byte sized or is implicitly truncated then we can't
22676 // compute an offset.
22677 EVT EltVT = Elt.getValueType();
22678 if (!EltVT.isByteSized() ||
22679 EltVT != Value.getOperand(0).getValueType().getVectorElementType())
22680 return SDValue();
22681
22682 auto *Ld = dyn_cast<LoadSDNode>(Value.getOperand(0));
22683 if (!Ld || Ld->getBasePtr() != Ptr ||
22684 ST->getMemoryVT() != Ld->getMemoryVT() || !ST->isSimple() ||
22685 !ISD::isNormalStore(ST) ||
22686 Ld->getAddressSpace() != ST->getAddressSpace() ||
22688 return SDValue();
22689
22690 unsigned IsFast;
22691 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
22692 Elt.getValueType(), ST->getAddressSpace(),
22693 ST->getAlign(), ST->getMemOperand()->getFlags(),
22694 &IsFast) ||
22695 !IsFast)
22696 return SDValue();
22697
22698 MachinePointerInfo PointerInfo(ST->getAddressSpace());
22699
22700 // If the offset is a known constant then try to recover the pointer
22701 // info
22702 SDValue NewPtr;
22703 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
22704 unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8;
22705 NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(COffset), DL);
22706 PointerInfo = ST->getPointerInfo().getWithOffset(COffset);
22707 } else {
22708 NewPtr = TLI.getVectorElementPointer(DAG, Ptr, Value.getValueType(), Idx);
22709 }
22710
22711 return DAG.getStore(Chain, DL, Elt, NewPtr, PointerInfo, ST->getAlign(),
22712 ST->getMemOperand()->getFlags());
22713}
22714
22715SDValue DAGCombiner::visitATOMIC_STORE(SDNode *N) {
22716 AtomicSDNode *ST = cast<AtomicSDNode>(N);
22717 SDValue Val = ST->getVal();
22718 EVT VT = Val.getValueType();
22719 EVT MemVT = ST->getMemoryVT();
22720
22721 if (MemVT.bitsLT(VT)) { // Is truncating store
22722 APInt TruncDemandedBits = APInt::getLowBitsSet(VT.getScalarSizeInBits(),
22723 MemVT.getScalarSizeInBits());
22724 // See if we can simplify the operation with SimplifyDemandedBits, which
22725 // only works if the value has a single use.
22726 if (SimplifyDemandedBits(Val, TruncDemandedBits))
22727 return SDValue(N, 0);
22728 }
22729
22730 return SDValue();
22731}
22732
22734 const SDLoc &Dl) {
22735 if (!Store->isSimple() || !ISD::isNormalStore(Store))
22736 return SDValue();
22737
22738 SDValue StoredVal = Store->getValue();
22739 SDValue StorePtr = Store->getBasePtr();
22740 SDValue StoreOffset = Store->getOffset();
22741 EVT VT = Store->getMemoryVT();
22742
22743 // Skip this combine for non-vector types and for <1 x ty> vectors, as they
22744 // will be scalarized later.
22745 if (!VT.isVector() || VT.isScalableVector() || VT.getVectorNumElements() == 1)
22746 return SDValue();
22747
22748 unsigned AddrSpace = Store->getAddressSpace();
22749 Align Alignment = Store->getAlign();
22750 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22751
22752 if (!TLI.isOperationLegalOrCustom(ISD::MSTORE, VT) ||
22753 !TLI.allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment))
22754 return SDValue();
22755
22756 SDValue Mask, OtherVec, LoadCh;
22757 unsigned LoadPos;
22758 if (sd_match(StoredVal,
22759 m_VSelect(m_Value(Mask), m_Value(OtherVec),
22760 m_Load(m_Value(LoadCh), m_Specific(StorePtr),
22761 m_Specific(StoreOffset))))) {
22762 LoadPos = 2;
22763 } else if (sd_match(StoredVal,
22764 m_VSelect(m_Value(Mask),
22765 m_Load(m_Value(LoadCh), m_Specific(StorePtr),
22766 m_Specific(StoreOffset)),
22767 m_Value(OtherVec)))) {
22768 LoadPos = 1;
22769 } else {
22770 return SDValue();
22771 }
22772
22773 auto *Load = cast<LoadSDNode>(StoredVal.getOperand(LoadPos));
22774 if (!Load->isSimple() || !ISD::isNormalLoad(Load) ||
22775 Load->getAddressSpace() != AddrSpace)
22776 return SDValue();
22777
22778 if (!Store->getChain().reachesChainWithoutSideEffects(LoadCh))
22779 return SDValue();
22780
22781 if (LoadPos == 1)
22782 Mask = DAG.getNOT(Dl, Mask, Mask.getValueType());
22783
22784 return DAG.getMaskedStore(Store->getChain(), Dl, OtherVec, StorePtr,
22785 StoreOffset, Mask, VT, Store->getMemOperand(),
22786 Store->getAddressingMode());
22787}
22788
22789SDValue DAGCombiner::visitSTORE(SDNode *N) {
22790 StoreSDNode *ST = cast<StoreSDNode>(N);
22791 SDValue Chain = ST->getChain();
22792 SDValue Value = ST->getValue();
22793 SDValue Ptr = ST->getBasePtr();
22794
22795 // If this is a store of a bit convert, store the input value if the
22796 // resultant store does not need a higher alignment than the original.
22797 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
22798 ST->isUnindexed()) {
22799 EVT SVT = Value.getOperand(0).getValueType();
22800 // If the store is volatile, we only want to change the store type if the
22801 // resulting store is legal. Otherwise we might increase the number of
22802 // memory accesses. We don't care if the original type was legal or not
22803 // as we assume software couldn't rely on the number of accesses of an
22804 // illegal type.
22805 // TODO: May be able to relax for unordered atomics (see D66309)
22806 if (((!LegalOperations && ST->isSimple()) ||
22807 TLI.isOperationLegal(ISD::STORE, SVT)) &&
22808 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
22809 DAG, *ST->getMemOperand())) {
22810 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
22811 ST->getMemOperand());
22812 }
22813 }
22814
22815 // Turn 'store undef, Ptr' -> nothing.
22816 if (Value.isUndef() && ST->isUnindexed() && !ST->isVolatile())
22817 return Chain;
22818
22819 // Try to infer better alignment information than the store already has.
22820 if (OptLevel != CodeGenOptLevel::None && ST->isUnindexed() &&
22821 !ST->isAtomic()) {
22822 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
22823 if (*Alignment > ST->getAlign() &&
22824 isAligned(*Alignment, ST->getSrcValueOffset())) {
22825 SDValue NewStore =
22826 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
22827 ST->getMemoryVT(), *Alignment,
22828 ST->getMemOperand()->getFlags(), ST->getAAInfo());
22829 // NewStore will always be N as we are only refining the alignment
22830 assert(NewStore.getNode() == N);
22831 (void)NewStore;
22832 }
22833 }
22834 }
22835
22836 // Try transforming a pair floating point load / store ops to integer
22837 // load / store ops.
22838 if (SDValue NewST = TransformFPLoadStorePair(N))
22839 return NewST;
22840
22841 // Try transforming several stores into STORE (BSWAP).
22842 if (SDValue Store = mergeTruncStores(ST))
22843 return Store;
22844
22845 if (ST->isUnindexed()) {
22846 // Walk up chain skipping non-aliasing memory nodes, on this store and any
22847 // adjacent stores.
22848 if (findBetterNeighborChains(ST)) {
22849 // replaceStoreChain uses CombineTo, which handled all of the worklist
22850 // manipulation. Return the original node to not do anything else.
22851 return SDValue(ST, 0);
22852 }
22853 Chain = ST->getChain();
22854 }
22855
22856 // FIXME: is there such a thing as a truncating indexed store?
22857 if (ST->isTruncatingStore() && ST->isUnindexed() &&
22858 Value.getValueType().isInteger() &&
22860 !cast<ConstantSDNode>(Value)->isOpaque())) {
22861 // Convert a truncating store of a extension into a standard store.
22862 if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
22863 Value.getOpcode() == ISD::SIGN_EXTEND ||
22864 Value.getOpcode() == ISD::ANY_EXTEND) &&
22865 Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
22866 TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
22867 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
22868 ST->getMemOperand());
22869
22870 APInt TruncDemandedBits =
22871 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
22872 ST->getMemoryVT().getScalarSizeInBits());
22873
22874 // See if we can simplify the operation with SimplifyDemandedBits, which
22875 // only works if the value has a single use.
22876 AddToWorklist(Value.getNode());
22877 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
22878 // Re-visit the store if anything changed and the store hasn't been merged
22879 // with another node (N is deleted) SimplifyDemandedBits will add Value's
22880 // node back to the worklist if necessary, but we also need to re-visit
22881 // the Store node itself.
22882 if (N->getOpcode() != ISD::DELETED_NODE)
22883 AddToWorklist(N);
22884 return SDValue(N, 0);
22885 }
22886
22887 // Otherwise, see if we can simplify the input to this truncstore with
22888 // knowledge that only the low bits are being used. For example:
22889 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
22890 if (SDValue Shorter =
22891 TLI.SimplifyMultipleUseDemandedBits(Value, TruncDemandedBits, DAG))
22892 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
22893 ST->getMemOperand());
22894
22895 // If we're storing a truncated constant, see if we can simplify it.
22896 // TODO: Move this to targetShrinkDemandedConstant?
22897 if (auto *Cst = dyn_cast<ConstantSDNode>(Value))
22898 if (!Cst->isOpaque()) {
22899 const APInt &CValue = Cst->getAPIntValue();
22900 APInt NewVal = CValue & TruncDemandedBits;
22901 if (NewVal != CValue) {
22902 SDValue Shorter =
22903 DAG.getConstant(NewVal, SDLoc(N), Value.getValueType());
22904 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr,
22905 ST->getMemoryVT(), ST->getMemOperand());
22906 }
22907 }
22908 }
22909
22910 // If this is a load followed by a store to the same location, then the store
22911 // is dead/noop. Peek through any truncates if canCombineTruncStore failed.
22912 // TODO: Add big-endian truncate support with test coverage.
22913 // TODO: Can relax for unordered atomics (see D66309)
22914 SDValue TruncVal = DAG.getDataLayout().isLittleEndian()
22916 : Value;
22917 if (auto *Ld = dyn_cast<LoadSDNode>(TruncVal)) {
22918 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
22919 ST->isUnindexed() && ST->isSimple() &&
22920 Ld->getAddressSpace() == ST->getAddressSpace() &&
22921 // There can't be any side effects between the load and store, such as
22922 // a call or store.
22924 // The store is dead, remove it.
22925 return Chain;
22926 }
22927 }
22928
22929 // Try scalarizing vector stores of loads where we only change one element
22930 if (SDValue NewST = replaceStoreOfInsertLoad(ST))
22931 return NewST;
22932
22933 // TODO: Can relax for unordered atomics (see D66309)
22934 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
22935 if (ST->isUnindexed() && ST->isSimple() &&
22936 ST1->isUnindexed() && ST1->isSimple()) {
22937 if (OptLevel != CodeGenOptLevel::None && ST1->getBasePtr() == Ptr &&
22938 ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
22939 ST->getAddressSpace() == ST1->getAddressSpace()) {
22940 // If this is a store followed by a store with the same value to the
22941 // same location, then the store is dead/noop.
22942 return Chain;
22943 }
22944
22945 if (OptLevel != CodeGenOptLevel::None && ST1->hasOneUse() &&
22946 !ST1->getBasePtr().isUndef() &&
22947 ST->getAddressSpace() == ST1->getAddressSpace()) {
22948 // If we consider two stores and one smaller in size is a scalable
22949 // vector type and another one a bigger size store with a fixed type,
22950 // then we could not allow the scalable store removal because we don't
22951 // know its final size in the end.
22952 if (ST->getMemoryVT().isScalableVector() ||
22953 ST1->getMemoryVT().isScalableVector()) {
22954 if (ST1->getBasePtr() == Ptr &&
22955 TypeSize::isKnownLE(ST1->getMemoryVT().getStoreSize(),
22956 ST->getMemoryVT().getStoreSize())) {
22957 CombineTo(ST1, ST1->getChain());
22958 return SDValue(N, 0);
22959 }
22960 } else {
22961 const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
22962 const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
22963 // If this is a store who's preceding store to a subset of the current
22964 // location and no one other node is chained to that store we can
22965 // effectively drop the store. Do not remove stores to undef as they
22966 // may be used as data sinks.
22967 if (STBase.contains(DAG, ST->getMemoryVT().getFixedSizeInBits(),
22968 ChainBase,
22969 ST1->getMemoryVT().getFixedSizeInBits())) {
22970 CombineTo(ST1, ST1->getChain());
22971 return SDValue(N, 0);
22972 }
22973 }
22974 }
22975 }
22976 }
22977
22978 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
22979 // truncating store. We can do this even if this is already a truncstore.
22980 if ((Value.getOpcode() == ISD::FP_ROUND ||
22981 Value.getOpcode() == ISD::TRUNCATE) &&
22982 Value->hasOneUse() && ST->isUnindexed() &&
22983 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
22984 ST->getMemoryVT(), LegalOperations)) {
22985 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
22986 Ptr, ST->getMemoryVT(), ST->getMemOperand());
22987 }
22988
22989 // Always perform this optimization before types are legal. If the target
22990 // prefers, also try this after legalization to catch stores that were created
22991 // by intrinsics or other nodes.
22992 if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
22993 while (true) {
22994 // There can be multiple store sequences on the same chain.
22995 // Keep trying to merge store sequences until we are unable to do so
22996 // or until we merge the last store on the chain.
22997 bool Changed = mergeConsecutiveStores(ST);
22998 if (!Changed) break;
22999 // Return N as merge only uses CombineTo and no worklist clean
23000 // up is necessary.
23001 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
23002 return SDValue(N, 0);
23003 }
23004 }
23005
23006 // Try transforming N to an indexed store.
23007 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
23008 return SDValue(N, 0);
23009
23010 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
23011 //
23012 // Make sure to do this only after attempting to merge stores in order to
23013 // avoid changing the types of some subset of stores due to visit order,
23014 // preventing their merging.
23015 if (isa<ConstantFPSDNode>(ST->getValue())) {
23016 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
23017 return NewSt;
23018 }
23019
23020 if (SDValue NewSt = splitMergedValStore(ST))
23021 return NewSt;
23022
23023 if (SDValue MaskedStore = foldToMaskedStore(ST, DAG, SDLoc(N)))
23024 return MaskedStore;
23025
23026 return ReduceLoadOpStoreWidth(N);
23027}
23028
23029SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
23030 const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
23031 const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(), 0, false);
23032
23033 // We walk up the chains to find stores.
23034 SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
23035 while (!Chains.empty()) {
23036 SDValue Chain = Chains.pop_back_val();
23037 if (!Chain.hasOneUse())
23038 continue;
23039 switch (Chain.getOpcode()) {
23040 case ISD::TokenFactor:
23041 for (unsigned Nops = Chain.getNumOperands(); Nops;)
23042 Chains.push_back(Chain.getOperand(--Nops));
23043 break;
23044 case ISD::LIFETIME_START:
23045 case ISD::LIFETIME_END:
23046 // We can forward past any lifetime start/end that can be proven not to
23047 // alias the node.
23048 if (!mayAlias(Chain.getNode(), N))
23049 Chains.push_back(Chain.getOperand(0));
23050 break;
23051 case ISD::STORE: {
23052 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
23053 // TODO: Can relax for unordered atomics (see D66309)
23054 if (!ST->isSimple() || ST->isIndexed())
23055 continue;
23056 const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
23057 // The bounds of a scalable store are not known until runtime, so this
23058 // store cannot be elided.
23059 if (StoreSize.isScalable())
23060 continue;
23061 const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
23062 // If we store purely within object bounds just before its lifetime ends,
23063 // we can remove the store.
23064 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
23065 if (LifetimeEndBase.contains(
23066 DAG, MFI.getObjectSize(LifetimeEnd->getFrameIndex()) * 8,
23067 StoreBase, StoreSize.getFixedValue() * 8)) {
23068 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
23069 dbgs() << "\nwithin LIFETIME_END of : ";
23070 LifetimeEndBase.dump(); dbgs() << "\n");
23071 CombineTo(ST, ST->getChain());
23072 return SDValue(N, 0);
23073 }
23074 }
23075 }
23076 }
23077 return SDValue();
23078}
23079
23080/// For the instruction sequence of store below, F and I values
23081/// are bundled together as an i64 value before being stored into memory.
23082/// Sometimes it is more efficent to generate separate stores for F and I,
23083/// which can remove the bitwise instructions or sink them to colder places.
23084///
23085/// (store (or (zext (bitcast F to i32) to i64),
23086/// (shl (zext I to i64), 32)), addr) -->
23087/// (store F, addr) and (store I, addr+4)
23088///
23089/// Similarly, splitting for other merged store can also be beneficial, like:
23090/// For pair of {i32, i32}, i64 store --> two i32 stores.
23091/// For pair of {i32, i16}, i64 store --> two i32 stores.
23092/// For pair of {i16, i16}, i32 store --> two i16 stores.
23093/// For pair of {i16, i8}, i32 store --> two i16 stores.
23094/// For pair of {i8, i8}, i16 store --> two i8 stores.
23095///
23096/// We allow each target to determine specifically which kind of splitting is
23097/// supported.
23098///
23099/// The store patterns are commonly seen from the simple code snippet below
23100/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
23101/// void goo(const std::pair<int, float> &);
23102/// hoo() {
23103/// ...
23104/// goo(std::make_pair(tmp, ftmp));
23105/// ...
23106/// }
23107///
23108SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
23109 if (OptLevel == CodeGenOptLevel::None)
23110 return SDValue();
23111
23112 // Can't change the number of memory accesses for a volatile store or break
23113 // atomicity for an atomic one.
23114 if (!ST->isSimple())
23115 return SDValue();
23116
23117 SDValue Val = ST->getValue();
23118 SDLoc DL(ST);
23119
23120 // Match OR operand.
23121 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
23122 return SDValue();
23123
23124 // Match SHL operand and get Lower and Higher parts of Val.
23125 SDValue Op1 = Val.getOperand(0);
23126 SDValue Op2 = Val.getOperand(1);
23127 SDValue Lo, Hi;
23128 if (Op1.getOpcode() != ISD::SHL) {
23129 std::swap(Op1, Op2);
23130 if (Op1.getOpcode() != ISD::SHL)
23131 return SDValue();
23132 }
23133 Lo = Op2;
23134 Hi = Op1.getOperand(0);
23135 if (!Op1.hasOneUse())
23136 return SDValue();
23137
23138 // Match shift amount to HalfValBitSize.
23139 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
23140 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
23141 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
23142 return SDValue();
23143
23144 // Lo and Hi are zero-extended from int with size less equal than 32
23145 // to i64.
23146 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
23147 !Lo.getOperand(0).getValueType().isScalarInteger() ||
23148 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
23149 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
23150 !Hi.getOperand(0).getValueType().isScalarInteger() ||
23151 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
23152 return SDValue();
23153
23154 // Use the EVT of low and high parts before bitcast as the input
23155 // of target query.
23156 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
23157 ? Lo.getOperand(0).getValueType()
23158 : Lo.getValueType();
23159 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
23160 ? Hi.getOperand(0).getValueType()
23161 : Hi.getValueType();
23162 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
23163 return SDValue();
23164
23165 // Start to split store.
23166 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
23167 AAMDNodes AAInfo = ST->getAAInfo();
23168
23169 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
23170 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
23171 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
23172 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
23173
23174 SDValue Chain = ST->getChain();
23175 SDValue Ptr = ST->getBasePtr();
23176 // Lower value store.
23177 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
23178 ST->getBaseAlign(), MMOFlags, AAInfo);
23179 Ptr =
23180 DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(HalfValBitSize / 8), DL);
23181 // Higher value store.
23182 SDValue St1 = DAG.getStore(
23183 St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
23184 ST->getBaseAlign(), MMOFlags, AAInfo);
23185 return St1;
23186}
23187
23188// Merge an insertion into an existing shuffle:
23189// (insert_vector_elt (vector_shuffle X, Y, Mask),
23190// .(extract_vector_elt X, N), InsIndex)
23191// --> (vector_shuffle X, Y, NewMask)
23192// and variations where shuffle operands may be CONCAT_VECTORS.
23194 SmallVectorImpl<int> &NewMask, SDValue Elt,
23195 unsigned InsIndex) {
23196 if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
23198 return false;
23199
23200 // Vec's operand 0 is using indices from 0 to N-1 and
23201 // operand 1 from N to 2N - 1, where N is the number of
23202 // elements in the vectors.
23203 SDValue InsertVal0 = Elt.getOperand(0);
23204 int ElementOffset = -1;
23205
23206 // We explore the inputs of the shuffle in order to see if we find the
23207 // source of the extract_vector_elt. If so, we can use it to modify the
23208 // shuffle rather than perform an insert_vector_elt.
23210 ArgWorkList.emplace_back(Mask.size(), Y);
23211 ArgWorkList.emplace_back(0, X);
23212
23213 while (!ArgWorkList.empty()) {
23214 int ArgOffset;
23215 SDValue ArgVal;
23216 std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
23217
23218 if (ArgVal == InsertVal0) {
23219 ElementOffset = ArgOffset;
23220 break;
23221 }
23222
23223 // Peek through concat_vector.
23224 if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
23225 int CurrentArgOffset =
23226 ArgOffset + ArgVal.getValueType().getVectorNumElements();
23227 int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
23228 for (SDValue Op : reverse(ArgVal->ops())) {
23229 CurrentArgOffset -= Step;
23230 ArgWorkList.emplace_back(CurrentArgOffset, Op);
23231 }
23232
23233 // Make sure we went through all the elements and did not screw up index
23234 // computation.
23235 assert(CurrentArgOffset == ArgOffset);
23236 }
23237 }
23238
23239 // If we failed to find a match, see if we can replace an UNDEF shuffle
23240 // operand.
23241 if (ElementOffset == -1) {
23242 if (!Y.isUndef() || InsertVal0.getValueType() != Y.getValueType())
23243 return false;
23244 ElementOffset = Mask.size();
23245 Y = InsertVal0;
23246 }
23247
23248 NewMask.assign(Mask.begin(), Mask.end());
23249 NewMask[InsIndex] = ElementOffset + Elt.getConstantOperandVal(1);
23250 assert(NewMask[InsIndex] < (int)(2 * Mask.size()) && NewMask[InsIndex] >= 0 &&
23251 "NewMask[InsIndex] is out of bound");
23252 return true;
23253}
23254
23255// Merge an insertion into an existing shuffle:
23256// (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
23257// InsIndex)
23258// --> (vector_shuffle X, Y) and variations where shuffle operands may be
23259// CONCAT_VECTORS.
23260SDValue DAGCombiner::mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex) {
23261 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
23262 "Expected extract_vector_elt");
23263 SDValue InsertVal = N->getOperand(1);
23264 SDValue Vec = N->getOperand(0);
23265
23266 auto *SVN = dyn_cast<ShuffleVectorSDNode>(Vec);
23267 if (!SVN || !Vec.hasOneUse())
23268 return SDValue();
23269
23270 ArrayRef<int> Mask = SVN->getMask();
23271 SDValue X = Vec.getOperand(0);
23272 SDValue Y = Vec.getOperand(1);
23273
23274 SmallVector<int, 16> NewMask(Mask);
23275 if (mergeEltWithShuffle(X, Y, Mask, NewMask, InsertVal, InsIndex)) {
23276 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
23277 Vec.getValueType(), SDLoc(N), X, Y, NewMask, DAG);
23278 if (LegalShuffle)
23279 return LegalShuffle;
23280 }
23281
23282 return SDValue();
23283}
23284
23285// Convert a disguised subvector insertion into a shuffle:
23286// insert_vector_elt V, (bitcast X from vector type), IdxC -->
23287// bitcast(shuffle (bitcast V), (extended X), Mask)
23288// Note: We do not use an insert_subvector node because that requires a
23289// legal subvector type.
23290SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
23291 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
23292 "Expected extract_vector_elt");
23293 SDValue InsertVal = N->getOperand(1);
23294
23295 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
23296 !InsertVal.getOperand(0).getValueType().isVector())
23297 return SDValue();
23298
23299 SDValue SubVec = InsertVal.getOperand(0);
23300 SDValue DestVec = N->getOperand(0);
23301 EVT SubVecVT = SubVec.getValueType();
23302 EVT VT = DestVec.getValueType();
23303 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
23304 // If the source only has a single vector element, the cost of creating adding
23305 // it to a vector is likely to exceed the cost of a insert_vector_elt.
23306 if (NumSrcElts == 1)
23307 return SDValue();
23308 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
23309 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
23310
23311 // Step 1: Create a shuffle mask that implements this insert operation. The
23312 // vector that we are inserting into will be operand 0 of the shuffle, so
23313 // those elements are just 'i'. The inserted subvector is in the first
23314 // positions of operand 1 of the shuffle. Example:
23315 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
23316 SmallVector<int, 16> Mask(NumMaskVals);
23317 for (unsigned i = 0; i != NumMaskVals; ++i) {
23318 if (i / NumSrcElts == InsIndex)
23319 Mask[i] = (i % NumSrcElts) + NumMaskVals;
23320 else
23321 Mask[i] = i;
23322 }
23323
23324 // Bail out if the target can not handle the shuffle we want to create.
23325 EVT SubVecEltVT = SubVecVT.getVectorElementType();
23326 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
23327 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
23328 return SDValue();
23329
23330 // Step 2: Create a wide vector from the inserted source vector by appending
23331 // undefined elements. This is the same size as our destination vector.
23332 SDLoc DL(N);
23333 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
23334 ConcatOps[0] = SubVec;
23335 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
23336
23337 // Step 3: Shuffle in the padded subvector.
23338 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
23339 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
23340 AddToWorklist(PaddedSubV.getNode());
23341 AddToWorklist(DestVecBC.getNode());
23342 AddToWorklist(Shuf.getNode());
23343 return DAG.getBitcast(VT, Shuf);
23344}
23345
23346// Combine insert(shuffle(load, <u,0,1,2>), load, 0) into a single load if
23347// possible and the new load will be quick. We use more loads but less shuffles
23348// and inserts.
23349SDValue DAGCombiner::combineInsertEltToLoad(SDNode *N, unsigned InsIndex) {
23350 EVT VT = N->getValueType(0);
23351
23352 // InsIndex is expected to be the first of last lane.
23353 if (!VT.isFixedLengthVector() ||
23354 (InsIndex != 0 && InsIndex != VT.getVectorNumElements() - 1))
23355 return SDValue();
23356
23357 // Look for a shuffle with the mask u,0,1,2,3,4,5,6 or 1,2,3,4,5,6,7,u
23358 // depending on the InsIndex.
23359 auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
23360 SDValue Scalar = N->getOperand(1);
23361 if (!Shuffle || !all_of(enumerate(Shuffle->getMask()), [&](auto P) {
23362 return InsIndex == P.index() || P.value() < 0 ||
23363 (InsIndex == 0 && P.value() == (int)P.index() - 1) ||
23364 (InsIndex == VT.getVectorNumElements() - 1 &&
23365 P.value() == (int)P.index() + 1);
23366 }))
23367 return SDValue();
23368
23369 // We optionally skip over an extend so long as both loads are extended in the
23370 // same way from the same type.
23371 unsigned Extend = 0;
23372 if (Scalar.getOpcode() == ISD::ZERO_EXTEND ||
23373 Scalar.getOpcode() == ISD::SIGN_EXTEND ||
23374 Scalar.getOpcode() == ISD::ANY_EXTEND) {
23375 Extend = Scalar.getOpcode();
23376 Scalar = Scalar.getOperand(0);
23377 }
23378
23379 auto *ScalarLoad = dyn_cast<LoadSDNode>(Scalar);
23380 if (!ScalarLoad)
23381 return SDValue();
23382
23383 SDValue Vec = Shuffle->getOperand(0);
23384 if (Extend) {
23385 if (Vec.getOpcode() != Extend)
23386 return SDValue();
23387 Vec = Vec.getOperand(0);
23388 }
23389 auto *VecLoad = dyn_cast<LoadSDNode>(Vec);
23390 if (!VecLoad || Vec.getValueType().getScalarType() != Scalar.getValueType())
23391 return SDValue();
23392
23393 int EltSize = ScalarLoad->getValueType(0).getScalarSizeInBits();
23394 if (EltSize == 0 || EltSize % 8 != 0 || !ScalarLoad->isSimple() ||
23395 !VecLoad->isSimple() || VecLoad->getExtensionType() != ISD::NON_EXTLOAD ||
23396 ScalarLoad->getExtensionType() != ISD::NON_EXTLOAD ||
23397 ScalarLoad->getAddressSpace() != VecLoad->getAddressSpace())
23398 return SDValue();
23399
23400 // Check that the offset between the pointers to produce a single continuous
23401 // load.
23402 if (InsIndex == 0) {
23403 if (!DAG.areNonVolatileConsecutiveLoads(ScalarLoad, VecLoad, EltSize / 8,
23404 -1))
23405 return SDValue();
23406 } else {
23408 VecLoad, ScalarLoad, VT.getVectorNumElements() * EltSize / 8, -1))
23409 return SDValue();
23410 }
23411
23412 // And that the new unaligned load will be fast.
23413 unsigned IsFast = 0;
23414 Align NewAlign = commonAlignment(VecLoad->getAlign(), EltSize / 8);
23415 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
23416 Vec.getValueType(), VecLoad->getAddressSpace(),
23417 NewAlign, VecLoad->getMemOperand()->getFlags(),
23418 &IsFast) ||
23419 !IsFast)
23420 return SDValue();
23421
23422 // Calculate the new Ptr and create the new load.
23423 SDLoc DL(N);
23424 SDValue Ptr = ScalarLoad->getBasePtr();
23425 if (InsIndex != 0)
23426 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), VecLoad->getBasePtr(),
23427 DAG.getConstant(EltSize / 8, DL, Ptr.getValueType()));
23428 MachinePointerInfo PtrInfo =
23429 InsIndex == 0 ? ScalarLoad->getPointerInfo()
23430 : VecLoad->getPointerInfo().getWithOffset(EltSize / 8);
23431
23432 SDValue Load = DAG.getLoad(VecLoad->getValueType(0), DL,
23433 ScalarLoad->getChain(), Ptr, PtrInfo, NewAlign);
23434 DAG.makeEquivalentMemoryOrdering(ScalarLoad, Load.getValue(1));
23435 DAG.makeEquivalentMemoryOrdering(VecLoad, Load.getValue(1));
23436 return Extend ? DAG.getNode(Extend, DL, VT, Load) : Load;
23437}
23438
23439SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
23440 SDValue InVec = N->getOperand(0);
23441 SDValue InVal = N->getOperand(1);
23442 SDValue EltNo = N->getOperand(2);
23443 SDLoc DL(N);
23444
23445 EVT VT = InVec.getValueType();
23446 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
23447
23448 // Insert into out-of-bounds element is undefined.
23449 if (IndexC && VT.isFixedLengthVector() &&
23450 IndexC->getZExtValue() >= VT.getVectorNumElements())
23451 return DAG.getUNDEF(VT);
23452
23453 // Remove redundant insertions:
23454 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
23455 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23456 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
23457 return InVec;
23458
23459 // Remove insert of UNDEF/POISON elements.
23460 if (InVal.isUndef()) {
23461 if (InVal.getOpcode() == ISD::POISON || InVec.getOpcode() == ISD::UNDEF)
23462 return InVec;
23463 return DAG.getFreeze(InVec);
23464 }
23465
23466 if (!IndexC) {
23467 // If this is variable insert to undef vector, it might be better to splat:
23468 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
23469 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
23470 return DAG.getSplat(VT, DL, InVal);
23471 return SDValue();
23472 }
23473
23474 if (VT.isScalableVector())
23475 return SDValue();
23476
23477 unsigned NumElts = VT.getVectorNumElements();
23478
23479 // We must know which element is being inserted for folds below here.
23480 unsigned Elt = IndexC->getZExtValue();
23481
23482 // Handle <1 x ???> vector insertion special cases.
23483 if (NumElts == 1) {
23484 // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
23485 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23486 InVal.getOperand(0).getValueType() == VT &&
23487 isNullConstant(InVal.getOperand(1)))
23488 return InVal.getOperand(0);
23489 }
23490
23491 // Canonicalize insert_vector_elt dag nodes.
23492 // Example:
23493 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
23494 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
23495 //
23496 // Do this only if the child insert_vector node has one use; also
23497 // do this only if indices are both constants and Idx1 < Idx0.
23498 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
23499 && isa<ConstantSDNode>(InVec.getOperand(2))) {
23500 unsigned OtherElt = InVec.getConstantOperandVal(2);
23501 if (Elt < OtherElt) {
23502 // Swap nodes.
23503 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
23504 InVec.getOperand(0), InVal, EltNo);
23505 AddToWorklist(NewOp.getNode());
23506 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
23507 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
23508 }
23509 }
23510
23511 if (SDValue Shuf = mergeInsertEltWithShuffle(N, Elt))
23512 return Shuf;
23513
23514 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
23515 return Shuf;
23516
23517 if (SDValue Shuf = combineInsertEltToLoad(N, Elt))
23518 return Shuf;
23519
23520 // Attempt to convert an insert_vector_elt chain into a legal build_vector.
23521 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
23522 // vXi1 vector - we don't need to recurse.
23523 if (NumElts == 1)
23524 return DAG.getBuildVector(VT, DL, {InVal});
23525
23526 // If we haven't already collected the element, insert into the op list.
23527 EVT MaxEltVT = InVal.getValueType();
23528 auto AddBuildVectorOp = [&](SmallVectorImpl<SDValue> &Ops, SDValue Elt,
23529 unsigned Idx) {
23530 if (!Ops[Idx]) {
23531 Ops[Idx] = Elt;
23532 if (VT.isInteger()) {
23533 EVT EltVT = Elt.getValueType();
23534 MaxEltVT = MaxEltVT.bitsGE(EltVT) ? MaxEltVT : EltVT;
23535 }
23536 }
23537 };
23538
23539 // Ensure all the operands are the same value type, fill any missing
23540 // operands with UNDEF and create the BUILD_VECTOR.
23541 auto CanonicalizeBuildVector = [&](SmallVectorImpl<SDValue> &Ops,
23542 bool FreezeUndef = false) {
23543 assert(Ops.size() == NumElts && "Unexpected vector size");
23544 SDValue UndefOp = FreezeUndef ? DAG.getFreeze(DAG.getUNDEF(MaxEltVT))
23545 : DAG.getUNDEF(MaxEltVT);
23546 for (SDValue &Op : Ops) {
23547 if (Op)
23548 Op = VT.isInteger() ? DAG.getAnyExtOrTrunc(Op, DL, MaxEltVT) : Op;
23549 else
23550 Op = UndefOp;
23551 }
23552 return DAG.getBuildVector(VT, DL, Ops);
23553 };
23554
23556 Ops[Elt] = InVal;
23557
23558 // Recurse up a INSERT_VECTOR_ELT chain to build a BUILD_VECTOR.
23559 for (SDValue CurVec = InVec; CurVec;) {
23560 // UNDEF - build new BUILD_VECTOR from already inserted operands.
23561 if (CurVec.isUndef())
23562 return CanonicalizeBuildVector(Ops);
23563
23564 // FREEZE(UNDEF) - build new BUILD_VECTOR from already inserted operands.
23565 if (ISD::isFreezeUndef(CurVec.getNode()) && CurVec.hasOneUse())
23566 return CanonicalizeBuildVector(Ops, /*FreezeUndef=*/true);
23567
23568 // BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR.
23569 if (CurVec.getOpcode() == ISD::BUILD_VECTOR && CurVec.hasOneUse()) {
23570 for (unsigned I = 0; I != NumElts; ++I)
23571 AddBuildVectorOp(Ops, CurVec.getOperand(I), I);
23572 return CanonicalizeBuildVector(Ops);
23573 }
23574
23575 // SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR.
23576 if (CurVec.getOpcode() == ISD::SCALAR_TO_VECTOR && CurVec.hasOneUse()) {
23577 AddBuildVectorOp(Ops, CurVec.getOperand(0), 0);
23578 return CanonicalizeBuildVector(Ops);
23579 }
23580
23581 // INSERT_VECTOR_ELT - insert operand and continue up the chain.
23582 if (CurVec.getOpcode() == ISD::INSERT_VECTOR_ELT && CurVec.hasOneUse())
23583 if (auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2)))
23584 if (CurIdx->getAPIntValue().ult(NumElts)) {
23585 unsigned Idx = CurIdx->getZExtValue();
23586 AddBuildVectorOp(Ops, CurVec.getOperand(1), Idx);
23587
23588 // Found entire BUILD_VECTOR.
23589 if (all_of(Ops, [](SDValue Op) { return !!Op; }))
23590 return CanonicalizeBuildVector(Ops);
23591
23592 CurVec = CurVec->getOperand(0);
23593 continue;
23594 }
23595
23596 // VECTOR_SHUFFLE - if all the operands match the shuffle's sources,
23597 // update the shuffle mask (and second operand if we started with unary
23598 // shuffle) and create a new legal shuffle.
23599 if (CurVec.getOpcode() == ISD::VECTOR_SHUFFLE && CurVec.hasOneUse()) {
23600 auto *SVN = cast<ShuffleVectorSDNode>(CurVec);
23601 SDValue LHS = SVN->getOperand(0);
23602 SDValue RHS = SVN->getOperand(1);
23603 SmallVector<int, 16> Mask(SVN->getMask());
23604 bool Merged = true;
23605 for (auto I : enumerate(Ops)) {
23606 SDValue &Op = I.value();
23607 if (Op) {
23608 SmallVector<int, 16> NewMask;
23609 if (!mergeEltWithShuffle(LHS, RHS, Mask, NewMask, Op, I.index())) {
23610 Merged = false;
23611 break;
23612 }
23613 Mask = std::move(NewMask);
23614 }
23615 }
23616 if (Merged)
23617 if (SDValue NewShuffle =
23618 TLI.buildLegalVectorShuffle(VT, DL, LHS, RHS, Mask, DAG))
23619 return NewShuffle;
23620 }
23621
23622 if (!LegalOperations) {
23623 bool IsNull = llvm::isNullConstant(InVal);
23624 // We can convert to AND/OR mask if all insertions are zero or -1
23625 // respectively.
23626 if ((IsNull || llvm::isAllOnesConstant(InVal)) &&
23627 all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
23628 count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
23629 SDValue Zero = DAG.getConstant(0, DL, MaxEltVT);
23630 SDValue AllOnes = DAG.getAllOnesConstant(DL, MaxEltVT);
23632
23633 // Build the mask and return the corresponding DAG node.
23634 auto BuildMaskAndNode = [&](SDValue TrueVal, SDValue FalseVal,
23635 unsigned MaskOpcode) {
23636 for (unsigned I = 0; I != NumElts; ++I)
23637 Mask[I] = Ops[I] ? TrueVal : FalseVal;
23638 return DAG.getNode(MaskOpcode, DL, VT, CurVec,
23639 DAG.getBuildVector(VT, DL, Mask));
23640 };
23641
23642 // If all elements are zero, we can use AND with all ones.
23643 if (IsNull)
23644 return BuildMaskAndNode(Zero, AllOnes, ISD::AND);
23645
23646 // If all elements are -1, we can use OR with zero.
23647 return BuildMaskAndNode(AllOnes, Zero, ISD::OR);
23648 }
23649 }
23650
23651 // Failed to find a match in the chain - bail.
23652 break;
23653 }
23654
23655 // See if we can fill in the missing constant elements as zeros.
23656 // TODO: Should we do this for any constant?
23657 APInt DemandedZeroElts = APInt::getZero(NumElts);
23658 for (unsigned I = 0; I != NumElts; ++I)
23659 if (!Ops[I])
23660 DemandedZeroElts.setBit(I);
23661
23662 if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) {
23663 SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT)
23664 : DAG.getConstantFP(0, DL, MaxEltVT);
23665 for (unsigned I = 0; I != NumElts; ++I)
23666 if (!Ops[I])
23667 Ops[I] = Zero;
23668
23669 return CanonicalizeBuildVector(Ops);
23670 }
23671 }
23672
23673 return SDValue();
23674}
23675
23676/// Transform a vector binary operation into a scalar binary operation by moving
23677/// the math/logic after an extract element of a vector.
23679 const SDLoc &DL, bool LegalTypes) {
23680 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23681 SDValue Vec = ExtElt->getOperand(0);
23682 SDValue Index = ExtElt->getOperand(1);
23683 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
23684 unsigned Opc = Vec.getOpcode();
23685 if (!IndexC || !Vec.hasOneUse() || (!TLI.isBinOp(Opc) && Opc != ISD::SETCC) ||
23686 Vec->getNumValues() != 1)
23687 return SDValue();
23688
23689 // Targets may want to avoid this to prevent an expensive register transfer.
23690 if (!TLI.shouldScalarizeBinop(Vec))
23691 return SDValue();
23692
23693 EVT ResVT = ExtElt->getValueType(0);
23694 if (Opc == ISD::SETCC &&
23695 (ResVT != Vec.getValueType().getVectorElementType() || LegalTypes))
23696 return SDValue();
23697
23698 // Extracting an element of a vector constant is constant-folded, so this
23699 // transform is just replacing a vector op with a scalar op while moving the
23700 // extract.
23701 SDValue Op0 = Vec.getOperand(0);
23702 SDValue Op1 = Vec.getOperand(1);
23703 APInt SplatVal;
23704 if (!isAnyConstantBuildVector(Op0, true) &&
23705 !ISD::isConstantSplatVector(Op0.getNode(), SplatVal) &&
23706 !isAnyConstantBuildVector(Op1, true) &&
23707 !ISD::isConstantSplatVector(Op1.getNode(), SplatVal))
23708 return SDValue();
23709
23710 // extractelt (op X, C), IndexC --> op (extractelt X, IndexC), C'
23711 // extractelt (op C, X), IndexC --> op C', (extractelt X, IndexC)
23712 if (Opc == ISD::SETCC) {
23713 EVT OpVT = Op0.getValueType().getVectorElementType();
23714 Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op0, Index);
23715 Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op1, Index);
23716 SDValue NewVal = DAG.getSetCC(
23717 DL, ResVT, Op0, Op1, cast<CondCodeSDNode>(Vec->getOperand(2))->get());
23718 // We may need to sign- or zero-extend the result to match the same
23719 // behaviour as the vector version of SETCC.
23720 unsigned VecBoolContents = TLI.getBooleanContents(Vec.getValueType());
23721 if (ResVT != MVT::i1 &&
23722 VecBoolContents != TargetLowering::UndefinedBooleanContent &&
23723 VecBoolContents != TLI.getBooleanContents(ResVT)) {
23725 NewVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ResVT, NewVal,
23726 DAG.getValueType(MVT::i1));
23727 else
23728 NewVal = DAG.getZeroExtendInReg(NewVal, DL, MVT::i1);
23729 }
23730 return NewVal;
23731 }
23732 Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op0, Index);
23733 Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op1, Index);
23734 return DAG.getNode(Opc, DL, ResVT, Op0, Op1);
23735}
23736
23737// Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract,
23738// recursively analyse all of it's users. and try to model themselves as
23739// bit sequence extractions. If all of them agree on the new, narrower element
23740// type, and all of them can be modelled as ISD::EXTRACT_VECTOR_ELT's of that
23741// new element type, do so now.
23742// This is mainly useful to recover from legalization that scalarized
23743// the vector as wide elements, but tries to rebuild it with narrower elements.
23744//
23745// Some more nodes could be modelled if that helps cover interesting patterns.
23746bool DAGCombiner::refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(
23747 SDNode *N) {
23748 // We perform this optimization post type-legalization because
23749 // the type-legalizer often scalarizes integer-promoted vectors.
23750 // Performing this optimization before may cause legalizaton cycles.
23751 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
23752 return false;
23753
23754 // TODO: Add support for big-endian.
23755 if (DAG.getDataLayout().isBigEndian())
23756 return false;
23757
23758 SDValue VecOp = N->getOperand(0);
23759 EVT VecVT = VecOp.getValueType();
23760 assert(!VecVT.isScalableVector() && "Only for fixed vectors.");
23761
23762 // We must start with a constant extraction index.
23763 auto *IndexC = dyn_cast<ConstantSDNode>(N->getOperand(1));
23764 if (!IndexC)
23765 return false;
23766
23767 assert(IndexC->getZExtValue() < VecVT.getVectorNumElements() &&
23768 "Original ISD::EXTRACT_VECTOR_ELT is undefinend?");
23769
23770 // TODO: deal with the case of implicit anyext of the extraction.
23771 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
23772 EVT ScalarVT = N->getValueType(0);
23773 if (VecVT.getScalarType() != ScalarVT)
23774 return false;
23775
23776 // TODO: deal with the cases other than everything being integer-typed.
23777 if (!ScalarVT.isScalarInteger())
23778 return false;
23779
23780 struct Entry {
23781 SDNode *Producer;
23782
23783 // Which bits of VecOp does it contain?
23784 unsigned BitPos;
23785 int NumBits;
23786 // NOTE: the actual width of \p Producer may be wider than NumBits!
23787
23788 Entry(Entry &&) = default;
23789 Entry(SDNode *Producer_, unsigned BitPos_, int NumBits_)
23790 : Producer(Producer_), BitPos(BitPos_), NumBits(NumBits_) {}
23791
23792 Entry() = delete;
23793 Entry(const Entry &) = delete;
23794 Entry &operator=(const Entry &) = delete;
23795 Entry &operator=(Entry &&) = delete;
23796 };
23797 SmallVector<Entry, 32> Worklist;
23799
23800 // We start at the "root" ISD::EXTRACT_VECTOR_ELT.
23801 Worklist.emplace_back(N, /*BitPos=*/VecEltBitWidth * IndexC->getZExtValue(),
23802 /*NumBits=*/VecEltBitWidth);
23803
23804 while (!Worklist.empty()) {
23805 Entry E = Worklist.pop_back_val();
23806 // Does the node not even use any of the VecOp bits?
23807 if (!(E.NumBits > 0 && E.BitPos < VecVT.getSizeInBits() &&
23808 E.BitPos + E.NumBits <= VecVT.getSizeInBits()))
23809 return false; // Let's allow the other combines clean this up first.
23810 // Did we fail to model any of the users of the Producer?
23811 bool ProducerIsLeaf = false;
23812 // Look at each user of this Producer.
23813 for (SDNode *User : E.Producer->users()) {
23814 switch (User->getOpcode()) {
23815 // TODO: support ISD::BITCAST
23816 // TODO: support ISD::ANY_EXTEND
23817 // TODO: support ISD::ZERO_EXTEND
23818 // TODO: support ISD::SIGN_EXTEND
23819 case ISD::TRUNCATE:
23820 // Truncation simply means we keep position, but extract less bits.
23821 Worklist.emplace_back(User, E.BitPos,
23822 /*NumBits=*/User->getValueSizeInBits(0));
23823 break;
23824 // TODO: support ISD::SRA
23825 // TODO: support ISD::SHL
23826 case ISD::SRL:
23827 // We should be shifting the Producer by a constant amount.
23828 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(User->getOperand(1));
23829 User->getOperand(0).getNode() == E.Producer && ShAmtC) {
23830 // Logical right-shift means that we start extraction later,
23831 // but stop it at the same position we did previously.
23832 unsigned ShAmt = ShAmtC->getZExtValue();
23833 Worklist.emplace_back(User, E.BitPos + ShAmt, E.NumBits - ShAmt);
23834 break;
23835 }
23836 [[fallthrough]];
23837 default:
23838 // We can not model this user of the Producer.
23839 // Which means the current Producer will be a ISD::EXTRACT_VECTOR_ELT.
23840 ProducerIsLeaf = true;
23841 // Profitability check: all users that we can not model
23842 // must be ISD::BUILD_VECTOR's.
23843 if (User->getOpcode() != ISD::BUILD_VECTOR)
23844 return false;
23845 break;
23846 }
23847 }
23848 if (ProducerIsLeaf)
23849 Leafs.emplace_back(std::move(E));
23850 }
23851
23852 unsigned NewVecEltBitWidth = Leafs.front().NumBits;
23853
23854 // If we are still at the same element granularity, give up,
23855 if (NewVecEltBitWidth == VecEltBitWidth)
23856 return false;
23857
23858 // The vector width must be a multiple of the new element width.
23859 if (VecVT.getSizeInBits() % NewVecEltBitWidth != 0)
23860 return false;
23861
23862 // All leafs must agree on the new element width.
23863 // All leafs must not expect any "padding" bits ontop of that width.
23864 // All leafs must start extraction from multiple of that width.
23865 if (!all_of(Leafs, [NewVecEltBitWidth](const Entry &E) {
23866 return (unsigned)E.NumBits == NewVecEltBitWidth &&
23867 E.Producer->getValueSizeInBits(0) == NewVecEltBitWidth &&
23868 E.BitPos % NewVecEltBitWidth == 0;
23869 }))
23870 return false;
23871
23872 EVT NewScalarVT = EVT::getIntegerVT(*DAG.getContext(), NewVecEltBitWidth);
23873 EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewScalarVT,
23874 VecVT.getSizeInBits() / NewVecEltBitWidth);
23875
23876 if (LegalTypes &&
23877 !(TLI.isTypeLegal(NewScalarVT) && TLI.isTypeLegal(NewVecVT)))
23878 return false;
23879
23880 if (LegalOperations &&
23881 !(TLI.isOperationLegalOrCustom(ISD::BITCAST, NewVecVT) &&
23883 return false;
23884
23885 SDValue NewVecOp = DAG.getBitcast(NewVecVT, VecOp);
23886 for (const Entry &E : Leafs) {
23887 SDLoc DL(E.Producer);
23888 unsigned NewIndex = E.BitPos / NewVecEltBitWidth;
23889 assert(NewIndex < NewVecVT.getVectorNumElements() &&
23890 "Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?");
23891 SDValue V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, NewScalarVT, NewVecOp,
23892 DAG.getVectorIdxConstant(NewIndex, DL));
23893 CombineTo(E.Producer, V);
23894 }
23895
23896 return true;
23897}
23898
23899SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
23900 SDValue VecOp = N->getOperand(0);
23901 SDValue Index = N->getOperand(1);
23902 EVT ScalarVT = N->getValueType(0);
23903 EVT VecVT = VecOp.getValueType();
23904 if (VecOp.isUndef())
23905 return DAG.getUNDEF(ScalarVT);
23906
23907 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
23908 //
23909 // This only really matters if the index is non-constant since other combines
23910 // on the constant elements already work.
23911 SDLoc DL(N);
23912 if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
23913 Index == VecOp.getOperand(2)) {
23914 SDValue Elt = VecOp.getOperand(1);
23915 AddUsersToWorklist(VecOp.getNode());
23916 return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
23917 }
23918
23919 // (vextract (scalar_to_vector val, 0) -> val
23920 if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
23921 // Only 0'th element of SCALAR_TO_VECTOR is defined.
23922 if (DAG.isKnownNeverZero(Index))
23923 return DAG.getUNDEF(ScalarVT);
23924
23925 // Check if the result type doesn't match the inserted element type.
23926 // The inserted element and extracted element may have mismatched bitwidth.
23927 // As a result, EXTRACT_VECTOR_ELT may extend or truncate the extracted vector.
23928 SDValue InOp = VecOp.getOperand(0);
23929 if (InOp.getValueType() != ScalarVT) {
23930 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
23931 if (InOp.getValueType().bitsGT(ScalarVT))
23932 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
23933 return DAG.getNode(ISD::ANY_EXTEND, DL, ScalarVT, InOp);
23934 }
23935 return InOp;
23936 }
23937
23938 // extract_vector_elt of out-of-bounds element -> UNDEF
23939 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
23940 if (IndexC && VecVT.isFixedLengthVector() &&
23941 IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
23942 return DAG.getUNDEF(ScalarVT);
23943
23944 // extract_vector_elt (build_vector x, y), 1 -> y
23945 if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
23946 VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
23947 TLI.isTypeLegal(VecVT)) {
23948 assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
23949 VecVT.isFixedLengthVector()) &&
23950 "BUILD_VECTOR used for scalable vectors");
23951 unsigned IndexVal =
23952 VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
23953 SDValue Elt = VecOp.getOperand(IndexVal);
23954 EVT InEltVT = Elt.getValueType();
23955
23956 if (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT) ||
23957 isNullConstant(Elt)) {
23958 // Sometimes build_vector's scalar input types do not match result type.
23959 if (ScalarVT == InEltVT)
23960 return Elt;
23961
23962 // TODO: It may be useful to truncate if free if the build_vector
23963 // implicitly converts.
23964 }
23965 }
23966
23967 if (SDValue BO = scalarizeExtractedBinOp(N, DAG, DL, LegalTypes))
23968 return BO;
23969
23970 if (VecVT.isScalableVector())
23971 return SDValue();
23972
23973 // All the code from this point onwards assumes fixed width vectors, but it's
23974 // possible that some of the combinations could be made to work for scalable
23975 // vectors too.
23976 unsigned NumElts = VecVT.getVectorNumElements();
23977 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
23978
23979 // See if the extracted element is constant, in which case fold it if its
23980 // a legal fp immediate.
23981 if (IndexC && ScalarVT.isFloatingPoint()) {
23982 APInt EltMask = APInt::getOneBitSet(NumElts, IndexC->getZExtValue());
23983 KnownBits KnownElt = DAG.computeKnownBits(VecOp, EltMask);
23984 if (KnownElt.isConstant()) {
23985 APFloat CstFP =
23986 APFloat(ScalarVT.getFltSemantics(), KnownElt.getConstant());
23987 if (TLI.isFPImmLegal(CstFP, ScalarVT))
23988 return DAG.getConstantFP(CstFP, DL, ScalarVT);
23989 }
23990 }
23991
23992 // TODO: These transforms should not require the 'hasOneUse' restriction, but
23993 // there are regressions on multiple targets without it. We can end up with a
23994 // mess of scalar and vector code if we reduce only part of the DAG to scalar.
23995 if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
23996 VecOp.hasOneUse()) {
23997 // The vector index of the LSBs of the source depend on the endian-ness.
23998 bool IsLE = DAG.getDataLayout().isLittleEndian();
23999 unsigned ExtractIndex = IndexC->getZExtValue();
24000 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
24001 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
24002 SDValue BCSrc = VecOp.getOperand(0);
24003 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
24004 return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT);
24005
24006 // TODO: Add support for SCALAR_TO_VECTOR implicit truncation.
24007 if (LegalTypes && BCSrc.getValueType().isInteger() &&
24008 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR &&
24009 BCSrc.getScalarValueSizeInBits() ==
24011 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
24012 // trunc i64 X to i32
24013 SDValue X = BCSrc.getOperand(0);
24014 EVT XVT = X.getValueType();
24015 assert(XVT.isScalarInteger() && ScalarVT.isScalarInteger() &&
24016 "Extract element and scalar to vector can't change element type "
24017 "from FP to integer.");
24018 unsigned XBitWidth = X.getValueSizeInBits();
24019 unsigned Scale = XBitWidth / VecEltBitWidth;
24020 BCTruncElt = IsLE ? 0 : Scale - 1;
24021
24022 // An extract element return value type can be wider than its vector
24023 // operand element type. In that case, the high bits are undefined, so
24024 // it's possible that we may need to extend rather than truncate.
24025 if (ExtractIndex < Scale && XBitWidth > VecEltBitWidth) {
24026 assert(XBitWidth % VecEltBitWidth == 0 &&
24027 "Scalar bitwidth must be a multiple of vector element bitwidth");
24028
24029 if (ExtractIndex != BCTruncElt) {
24030 unsigned ShiftIndex =
24031 IsLE ? ExtractIndex : (Scale - 1) - ExtractIndex;
24032 X = DAG.getNode(
24033 ISD::SRL, DL, XVT, X,
24034 DAG.getShiftAmountConstant(ShiftIndex * VecEltBitWidth, XVT, DL));
24035 }
24036
24037 return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
24038 }
24039 }
24040 }
24041
24042 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
24043 // We only perform this optimization before the op legalization phase because
24044 // we may introduce new vector instructions which are not backed by TD
24045 // patterns. For example on AVX, extracting elements from a wide vector
24046 // without using extract_subvector. However, if we can find an underlying
24047 // scalar value, then we can always use that.
24048 if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
24049 auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
24050 // Find the new index to extract from.
24051 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
24052
24053 // Extracting an undef index is undef.
24054 if (OrigElt == -1)
24055 return DAG.getUNDEF(ScalarVT);
24056
24057 // Select the right vector half to extract from.
24058 SDValue SVInVec;
24059 if (OrigElt < (int)NumElts) {
24060 SVInVec = VecOp.getOperand(0);
24061 } else {
24062 SVInVec = VecOp.getOperand(1);
24063 OrigElt -= NumElts;
24064 }
24065
24066 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
24067 // TODO: Check if shuffle mask is legal?
24068 if (LegalOperations && TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VecVT) &&
24069 !VecOp.hasOneUse())
24070 return SDValue();
24071
24072 SDValue InOp = SVInVec.getOperand(OrigElt);
24073 if (InOp.getValueType() != ScalarVT) {
24074 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
24075 InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
24076 }
24077
24078 return InOp;
24079 }
24080
24081 // FIXME: We should handle recursing on other vector shuffles and
24082 // scalar_to_vector here as well.
24083
24084 if (!LegalOperations ||
24085 // FIXME: Should really be just isOperationLegalOrCustom.
24088 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
24089 DAG.getVectorIdxConstant(OrigElt, DL));
24090 }
24091 }
24092
24093 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
24094 // simplify it based on the (valid) extraction indices.
24095 if (llvm::all_of(VecOp->users(), [&](SDNode *Use) {
24096 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
24097 Use->getOperand(0) == VecOp &&
24098 isa<ConstantSDNode>(Use->getOperand(1));
24099 })) {
24100 APInt DemandedElts = APInt::getZero(NumElts);
24101 for (SDNode *User : VecOp->users()) {
24102 auto *CstElt = cast<ConstantSDNode>(User->getOperand(1));
24103 if (CstElt->getAPIntValue().ult(NumElts))
24104 DemandedElts.setBit(CstElt->getZExtValue());
24105 }
24106 if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
24107 // We simplified the vector operand of this extract element. If this
24108 // extract is not dead, visit it again so it is folded properly.
24109 if (N->getOpcode() != ISD::DELETED_NODE)
24110 AddToWorklist(N);
24111 return SDValue(N, 0);
24112 }
24113 APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
24114 if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
24115 // We simplified the vector operand of this extract element. If this
24116 // extract is not dead, visit it again so it is folded properly.
24117 if (N->getOpcode() != ISD::DELETED_NODE)
24118 AddToWorklist(N);
24119 return SDValue(N, 0);
24120 }
24121 }
24122
24123 if (refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(N))
24124 return SDValue(N, 0);
24125
24126 // Everything under here is trying to match an extract of a loaded value.
24127 // If the result of load has to be truncated, then it's not necessarily
24128 // profitable.
24129 bool BCNumEltsChanged = false;
24130 EVT ExtVT = VecVT.getVectorElementType();
24131 EVT LVT = ExtVT;
24132 if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
24133 return SDValue();
24134
24135 if (VecOp.getOpcode() == ISD::BITCAST) {
24136 // Don't duplicate a load with other uses.
24137 if (!VecOp.hasOneUse())
24138 return SDValue();
24139
24140 EVT BCVT = VecOp.getOperand(0).getValueType();
24141 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
24142 return SDValue();
24143 if (NumElts != BCVT.getVectorNumElements())
24144 BCNumEltsChanged = true;
24145 VecOp = VecOp.getOperand(0);
24146 ExtVT = BCVT.getVectorElementType();
24147 }
24148
24149 // extract (vector load $addr), i --> load $addr + i * size
24150 if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
24151 ISD::isNormalLoad(VecOp.getNode()) &&
24152 !Index->hasPredecessor(VecOp.getNode())) {
24153 auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
24154 if (VecLoad && VecLoad->isSimple()) {
24155 if (SDValue Scalarized = TLI.scalarizeExtractedVectorLoad(
24156 ScalarVT, SDLoc(N), VecVT, Index, VecLoad, DAG)) {
24157 ++OpsNarrowed;
24158 return Scalarized;
24159 }
24160 }
24161 }
24162
24163 // Perform only after legalization to ensure build_vector / vector_shuffle
24164 // optimizations have already been done.
24165 if (!LegalOperations || !IndexC)
24166 return SDValue();
24167
24168 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
24169 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
24170 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
24171 int Elt = IndexC->getZExtValue();
24172 LoadSDNode *LN0 = nullptr;
24173 if (ISD::isNormalLoad(VecOp.getNode())) {
24174 LN0 = cast<LoadSDNode>(VecOp);
24175 } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
24176 VecOp.getOperand(0).getValueType() == ExtVT &&
24177 ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
24178 // Don't duplicate a load with other uses.
24179 if (!VecOp.hasOneUse())
24180 return SDValue();
24181
24182 LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
24183 }
24184 if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
24185 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
24186 // =>
24187 // (load $addr+1*size)
24188
24189 // Don't duplicate a load with other uses.
24190 if (!VecOp.hasOneUse())
24191 return SDValue();
24192
24193 // If the bit convert changed the number of elements, it is unsafe
24194 // to examine the mask.
24195 if (BCNumEltsChanged)
24196 return SDValue();
24197
24198 // Select the input vector, guarding against out of range extract vector.
24199 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
24200 VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
24201
24202 if (VecOp.getOpcode() == ISD::BITCAST) {
24203 // Don't duplicate a load with other uses.
24204 if (!VecOp.hasOneUse())
24205 return SDValue();
24206
24207 VecOp = VecOp.getOperand(0);
24208 }
24209 if (ISD::isNormalLoad(VecOp.getNode())) {
24210 LN0 = cast<LoadSDNode>(VecOp);
24211 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
24212 Index = DAG.getConstant(Elt, DL, Index.getValueType());
24213 }
24214 } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
24215 VecVT.getVectorElementType() == ScalarVT &&
24216 (!LegalTypes ||
24217 TLI.isTypeLegal(
24219 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
24220 // -> extract_vector_elt a, 0
24221 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
24222 // -> extract_vector_elt a, 1
24223 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
24224 // -> extract_vector_elt b, 0
24225 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
24226 // -> extract_vector_elt b, 1
24227 EVT ConcatVT = VecOp.getOperand(0).getValueType();
24228 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
24229 SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, DL,
24230 Index.getValueType());
24231
24232 SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
24234 ConcatVT.getVectorElementType(),
24235 ConcatOp, NewIdx);
24236 return DAG.getNode(ISD::BITCAST, DL, ScalarVT, Elt);
24237 }
24238
24239 // Make sure we found a non-volatile load and the extractelement is
24240 // the only use.
24241 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
24242 return SDValue();
24243
24244 // If Idx was -1 above, Elt is going to be -1, so just return undef.
24245 if (Elt == -1)
24246 return DAG.getUNDEF(LVT);
24247
24248 if (SDValue Scalarized =
24249 TLI.scalarizeExtractedVectorLoad(LVT, DL, VecVT, Index, LN0, DAG)) {
24250 ++OpsNarrowed;
24251 return Scalarized;
24252 }
24253
24254 return SDValue();
24255}
24256
24257// Simplify (build_vec (ext )) to (bitcast (build_vec ))
24258SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
24259 // We perform this optimization post type-legalization because
24260 // the type-legalizer often scalarizes integer-promoted vectors.
24261 // Performing this optimization before may create bit-casts which
24262 // will be type-legalized to complex code sequences.
24263 // We perform this optimization only before the operation legalizer because we
24264 // may introduce illegal operations.
24265 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
24266 return SDValue();
24267
24268 unsigned NumInScalars = N->getNumOperands();
24269 SDLoc DL(N);
24270 EVT VT = N->getValueType(0);
24271
24272 // Check to see if this is a BUILD_VECTOR of a bunch of values
24273 // which come from any_extend or zero_extend nodes. If so, we can create
24274 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
24275 // optimizations. We do not handle sign-extend because we can't fill the sign
24276 // using shuffles.
24277 EVT SourceType = MVT::Other;
24278 bool AllAnyExt = true;
24279
24280 for (unsigned i = 0; i != NumInScalars; ++i) {
24281 SDValue In = N->getOperand(i);
24282 // Ignore undef inputs.
24283 if (In.isUndef()) continue;
24284
24285 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
24286 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
24287
24288 // Abort if the element is not an extension.
24289 if (!ZeroExt && !AnyExt) {
24290 SourceType = MVT::Other;
24291 break;
24292 }
24293
24294 // The input is a ZeroExt or AnyExt. Check the original type.
24295 EVT InTy = In.getOperand(0).getValueType();
24296
24297 // Check that all of the widened source types are the same.
24298 if (SourceType == MVT::Other)
24299 // First time.
24300 SourceType = InTy;
24301 else if (InTy != SourceType) {
24302 // Multiple income types. Abort.
24303 SourceType = MVT::Other;
24304 break;
24305 }
24306
24307 // Check if all of the extends are ANY_EXTENDs.
24308 AllAnyExt &= AnyExt;
24309 }
24310
24311 // In order to have valid types, all of the inputs must be extended from the
24312 // same source type and all of the inputs must be any or zero extend.
24313 // Scalar sizes must be a power of two.
24314 EVT OutScalarTy = VT.getScalarType();
24315 bool ValidTypes =
24316 SourceType != MVT::Other &&
24319
24320 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
24321 // turn into a single shuffle instruction.
24322 if (!ValidTypes)
24323 return SDValue();
24324
24325 // If we already have a splat buildvector, then don't fold it if it means
24326 // introducing zeros.
24327 if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
24328 return SDValue();
24329
24330 bool isLE = DAG.getDataLayout().isLittleEndian();
24331 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
24332 assert(ElemRatio > 1 && "Invalid element size ratio");
24333 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
24334 DAG.getConstant(0, DL, SourceType);
24335
24336 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
24337 SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
24338
24339 // Populate the new build_vector
24340 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
24341 SDValue Cast = N->getOperand(i);
24342 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
24343 Cast.getOpcode() == ISD::ZERO_EXTEND ||
24344 Cast.isUndef()) && "Invalid cast opcode");
24345 SDValue In;
24346 if (Cast.isUndef())
24347 In = DAG.getUNDEF(SourceType);
24348 else
24349 In = Cast->getOperand(0);
24350 unsigned Index = isLE ? (i * ElemRatio) :
24351 (i * ElemRatio + (ElemRatio - 1));
24352
24353 assert(Index < Ops.size() && "Invalid index");
24354 Ops[Index] = In;
24355 }
24356
24357 // The type of the new BUILD_VECTOR node.
24358 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
24359 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
24360 "Invalid vector size");
24361 // Check if the new vector type is legal.
24362 if (!isTypeLegal(VecVT) ||
24363 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
24365 return SDValue();
24366
24367 // Make the new BUILD_VECTOR.
24368 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
24369
24370 // The new BUILD_VECTOR node has the potential to be further optimized.
24371 AddToWorklist(BV.getNode());
24372 // Bitcast to the desired type.
24373 return DAG.getBitcast(VT, BV);
24374}
24375
24376// Simplify (build_vec (trunc $1)
24377// (trunc (srl $1 half-width))
24378// (trunc (srl $1 (2 * half-width))))
24379// to (bitcast $1)
24380SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
24381 assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
24382
24383 EVT VT = N->getValueType(0);
24384
24385 // Don't run this before LegalizeTypes if VT is legal.
24386 // Targets may have other preferences.
24387 if (Level < AfterLegalizeTypes && TLI.isTypeLegal(VT))
24388 return SDValue();
24389
24390 // Only for little endian
24391 if (!DAG.getDataLayout().isLittleEndian())
24392 return SDValue();
24393
24394 EVT OutScalarTy = VT.getScalarType();
24395 uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
24396
24397 // Only for power of two types to be sure that bitcast works well
24398 if (!isPowerOf2_64(ScalarTypeBitsize))
24399 return SDValue();
24400
24401 unsigned NumInScalars = N->getNumOperands();
24402
24403 // Look through bitcasts
24404 auto PeekThroughBitcast = [](SDValue Op) {
24405 if (Op.getOpcode() == ISD::BITCAST)
24406 return Op.getOperand(0);
24407 return Op;
24408 };
24409
24410 // The source value where all the parts are extracted.
24411 SDValue Src;
24412 for (unsigned i = 0; i != NumInScalars; ++i) {
24413 SDValue In = PeekThroughBitcast(N->getOperand(i));
24414 // Ignore undef inputs.
24415 if (In.isUndef()) continue;
24416
24417 if (In.getOpcode() != ISD::TRUNCATE)
24418 return SDValue();
24419
24420 In = PeekThroughBitcast(In.getOperand(0));
24421
24422 if (In.getOpcode() != ISD::SRL) {
24423 // For now only build_vec without shuffling, handle shifts here in the
24424 // future.
24425 if (i != 0)
24426 return SDValue();
24427
24428 Src = In;
24429 } else {
24430 // In is SRL
24431 SDValue part = PeekThroughBitcast(In.getOperand(0));
24432
24433 if (!Src) {
24434 Src = part;
24435 } else if (Src != part) {
24436 // Vector parts do not stem from the same variable
24437 return SDValue();
24438 }
24439
24440 SDValue ShiftAmtVal = In.getOperand(1);
24441 if (!isa<ConstantSDNode>(ShiftAmtVal))
24442 return SDValue();
24443
24444 uint64_t ShiftAmt = In.getConstantOperandVal(1);
24445
24446 // The extracted value is not extracted at the right position
24447 if (ShiftAmt != i * ScalarTypeBitsize)
24448 return SDValue();
24449 }
24450 }
24451
24452 // Only cast if the size is the same
24453 if (!Src || Src.getValueType().getSizeInBits() != VT.getSizeInBits())
24454 return SDValue();
24455
24456 return DAG.getBitcast(VT, Src);
24457}
24458
24459SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
24460 ArrayRef<int> VectorMask,
24461 SDValue VecIn1, SDValue VecIn2,
24462 unsigned LeftIdx, bool DidSplitVec) {
24463 EVT VT = N->getValueType(0);
24464 EVT InVT1 = VecIn1.getValueType();
24465 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
24466
24467 unsigned NumElems = VT.getVectorNumElements();
24468 unsigned ShuffleNumElems = NumElems;
24469
24470 // If we artificially split a vector in two already, then the offsets in the
24471 // operands will all be based off of VecIn1, even those in VecIn2.
24472 unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
24473
24474 uint64_t VTSize = VT.getFixedSizeInBits();
24475 uint64_t InVT1Size = InVT1.getFixedSizeInBits();
24476 uint64_t InVT2Size = InVT2.getFixedSizeInBits();
24477
24478 assert(InVT2Size <= InVT1Size &&
24479 "Inputs must be sorted to be in non-increasing vector size order.");
24480
24481 // We can't generate a shuffle node with mismatched input and output types.
24482 // Try to make the types match the type of the output.
24483 if (InVT1 != VT || InVT2 != VT) {
24484 if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
24485 // If the output vector length is a multiple of both input lengths,
24486 // we can concatenate them and pad the rest with undefs.
24487 unsigned NumConcats = VTSize / InVT1Size;
24488 assert(NumConcats >= 2 && "Concat needs at least two inputs!");
24489 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
24490 ConcatOps[0] = VecIn1;
24491 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
24492 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
24493 VecIn2 = SDValue();
24494 } else if (InVT1Size == VTSize * 2) {
24495 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
24496 return SDValue();
24497
24498 if (!VecIn2.getNode()) {
24499 // If we only have one input vector, and it's twice the size of the
24500 // output, split it in two.
24501 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
24502 DAG.getVectorIdxConstant(NumElems, DL));
24503 VecIn1 = DAG.getExtractSubvector(DL, VT, VecIn1, 0);
24504 // Since we now have shorter input vectors, adjust the offset of the
24505 // second vector's start.
24506 Vec2Offset = NumElems;
24507 } else {
24508 assert(InVT2Size <= InVT1Size &&
24509 "Second input is not going to be larger than the first one.");
24510
24511 // VecIn1 is wider than the output, and we have another, possibly
24512 // smaller input. Pad the smaller input with undefs, shuffle at the
24513 // input vector width, and extract the output.
24514 // The shuffle type is different than VT, so check legality again.
24515 if (LegalOperations &&
24517 return SDValue();
24518
24519 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
24520 // lower it back into a BUILD_VECTOR. So if the inserted type is
24521 // illegal, don't even try.
24522 if (InVT1 != InVT2) {
24523 if (!TLI.isTypeLegal(InVT2))
24524 return SDValue();
24525 VecIn2 = DAG.getInsertSubvector(DL, DAG.getUNDEF(InVT1), VecIn2, 0);
24526 }
24527 ShuffleNumElems = NumElems * 2;
24528 }
24529 } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
24530 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
24531 ConcatOps[0] = VecIn2;
24532 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
24533 } else if (InVT1Size / VTSize > 1 && InVT1Size % VTSize == 0) {
24534 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems) ||
24535 !TLI.isTypeLegal(InVT1) || !TLI.isTypeLegal(InVT2))
24536 return SDValue();
24537 // If dest vector has less than two elements, then use shuffle and extract
24538 // from larger regs will cost even more.
24539 if (VT.getVectorNumElements() <= 2 || !VecIn2.getNode())
24540 return SDValue();
24541 assert(InVT2Size <= InVT1Size &&
24542 "Second input is not going to be larger than the first one.");
24543
24544 // VecIn1 is wider than the output, and we have another, possibly
24545 // smaller input. Pad the smaller input with undefs, shuffle at the
24546 // input vector width, and extract the output.
24547 // The shuffle type is different than VT, so check legality again.
24548 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
24549 return SDValue();
24550
24551 if (InVT1 != InVT2) {
24552 VecIn2 = DAG.getInsertSubvector(DL, DAG.getUNDEF(InVT1), VecIn2, 0);
24553 }
24554 ShuffleNumElems = InVT1Size / VTSize * NumElems;
24555 } else {
24556 // TODO: Support cases where the length mismatch isn't exactly by a
24557 // factor of 2.
24558 // TODO: Move this check upwards, so that if we have bad type
24559 // mismatches, we don't create any DAG nodes.
24560 return SDValue();
24561 }
24562 }
24563
24564 // Initialize mask to undef.
24565 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
24566
24567 // Only need to run up to the number of elements actually used, not the
24568 // total number of elements in the shuffle - if we are shuffling a wider
24569 // vector, the high lanes should be set to undef.
24570 for (unsigned i = 0; i != NumElems; ++i) {
24571 if (VectorMask[i] <= 0)
24572 continue;
24573
24574 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
24575 if (VectorMask[i] == (int)LeftIdx) {
24576 Mask[i] = ExtIndex;
24577 } else if (VectorMask[i] == (int)LeftIdx + 1) {
24578 Mask[i] = Vec2Offset + ExtIndex;
24579 }
24580 }
24581
24582 // The type the input vectors may have changed above.
24583 InVT1 = VecIn1.getValueType();
24584
24585 // If we already have a VecIn2, it should have the same type as VecIn1.
24586 // If we don't, get an undef/zero vector of the appropriate type.
24587 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
24588 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
24589
24590 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
24591 if (ShuffleNumElems > NumElems)
24592 Shuffle = DAG.getExtractSubvector(DL, VT, Shuffle, 0);
24593
24594 return Shuffle;
24595}
24596
24598 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
24599
24600 // First, determine where the build vector is not undef.
24601 // TODO: We could extend this to handle zero elements as well as undefs.
24602 int NumBVOps = BV->getNumOperands();
24603 int ZextElt = -1;
24604 for (int i = 0; i != NumBVOps; ++i) {
24605 SDValue Op = BV->getOperand(i);
24606 if (Op.isUndef())
24607 continue;
24608 if (ZextElt == -1)
24609 ZextElt = i;
24610 else
24611 return SDValue();
24612 }
24613 // Bail out if there's no non-undef element.
24614 if (ZextElt == -1)
24615 return SDValue();
24616
24617 // The build vector contains some number of undef elements and exactly
24618 // one other element. That other element must be a zero-extended scalar
24619 // extracted from a vector at a constant index to turn this into a shuffle.
24620 // Also, require that the build vector does not implicitly truncate/extend
24621 // its elements.
24622 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
24623 EVT VT = BV->getValueType(0);
24624 SDValue Zext = BV->getOperand(ZextElt);
24625 if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
24629 return SDValue();
24630
24631 // The zero-extend must be a multiple of the source size, and we must be
24632 // building a vector of the same size as the source of the extract element.
24633 SDValue Extract = Zext.getOperand(0);
24634 unsigned DestSize = Zext.getValueSizeInBits();
24635 unsigned SrcSize = Extract.getValueSizeInBits();
24636 if (DestSize % SrcSize != 0 ||
24637 Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
24638 return SDValue();
24639
24640 // Create a shuffle mask that will combine the extracted element with zeros
24641 // and undefs.
24642 int ZextRatio = DestSize / SrcSize;
24643 int NumMaskElts = NumBVOps * ZextRatio;
24644 SmallVector<int, 32> ShufMask(NumMaskElts, -1);
24645 for (int i = 0; i != NumMaskElts; ++i) {
24646 if (i / ZextRatio == ZextElt) {
24647 // The low bits of the (potentially translated) extracted element map to
24648 // the source vector. The high bits map to zero. We will use a zero vector
24649 // as the 2nd source operand of the shuffle, so use the 1st element of
24650 // that vector (mask value is number-of-elements) for the high bits.
24651 int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - 1) : 0;
24652 ShufMask[i] = (i % ZextRatio == Low) ? Extract.getConstantOperandVal(1)
24653 : NumMaskElts;
24654 }
24655
24656 // Undef elements of the build vector remain undef because we initialize
24657 // the shuffle mask with -1.
24658 }
24659
24660 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
24661 // bitcast (shuffle V, ZeroVec, VectorMask)
24662 SDLoc DL(BV);
24663 EVT VecVT = Extract.getOperand(0).getValueType();
24664 SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
24665 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24666 SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
24667 ZeroVec, ShufMask, DAG);
24668 if (!Shuf)
24669 return SDValue();
24670 return DAG.getBitcast(VT, Shuf);
24671}
24672
24673// FIXME: promote to STLExtras.
24674template <typename R, typename T>
24675static auto getFirstIndexOf(R &&Range, const T &Val) {
24676 auto I = find(Range, Val);
24677 if (I == Range.end())
24678 return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
24679 return std::distance(Range.begin(), I);
24680}
24681
24682// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
24683// operations. If the types of the vectors we're extracting from allow it,
24684// turn this into a vector_shuffle node.
24685SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
24686 SDLoc DL(N);
24687 EVT VT = N->getValueType(0);
24688
24689 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
24690 if (!isTypeLegal(VT))
24691 return SDValue();
24692
24694 return V;
24695
24696 // May only combine to shuffle after legalize if shuffle is legal.
24697 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
24698 return SDValue();
24699
24700 bool UsesZeroVector = false;
24701 unsigned NumElems = N->getNumOperands();
24702
24703 // Record, for each element of the newly built vector, which input vector
24704 // that element comes from. -1 stands for undef, 0 for the zero vector,
24705 // and positive values for the input vectors.
24706 // VectorMask maps each element to its vector number, and VecIn maps vector
24707 // numbers to their initial SDValues.
24708
24709 SmallVector<int, 8> VectorMask(NumElems, -1);
24711 VecIn.push_back(SDValue());
24712
24713 // If we have a single extract_element with a constant index, track the index
24714 // value.
24715 unsigned OneConstExtractIndex = ~0u;
24716
24717 // Count the number of extract_vector_elt sources (i.e. non-constant or undef)
24718 unsigned NumExtracts = 0;
24719
24720 for (unsigned i = 0; i != NumElems; ++i) {
24721 SDValue Op = N->getOperand(i);
24722
24723 if (Op.isUndef())
24724 continue;
24725
24726 // See if we can use a blend with a zero vector.
24727 // TODO: Should we generalize this to a blend with an arbitrary constant
24728 // vector?
24730 UsesZeroVector = true;
24731 VectorMask[i] = 0;
24732 continue;
24733 }
24734
24735 // Not an undef or zero. If the input is something other than an
24736 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
24737 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
24738 return SDValue();
24739
24740 SDValue ExtractedFromVec = Op.getOperand(0);
24741 if (ExtractedFromVec.getValueType().isScalableVector())
24742 return SDValue();
24743 auto *ExtractIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
24744 if (!ExtractIdx)
24745 return SDValue();
24746
24747 if (ExtractIdx->getAsAPIntVal().uge(
24748 ExtractedFromVec.getValueType().getVectorNumElements()))
24749 return SDValue();
24750
24751 // All inputs must have the same element type as the output.
24752 if (VT.getVectorElementType() !=
24753 ExtractedFromVec.getValueType().getVectorElementType())
24754 return SDValue();
24755
24756 OneConstExtractIndex = ExtractIdx->getZExtValue();
24757 ++NumExtracts;
24758
24759 // Have we seen this input vector before?
24760 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
24761 // a map back from SDValues to numbers isn't worth it.
24762 int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
24763 if (Idx == -1) { // A new source vector?
24764 Idx = VecIn.size();
24765 VecIn.push_back(ExtractedFromVec);
24766 }
24767
24768 VectorMask[i] = Idx;
24769 }
24770
24771 // If we didn't find at least one input vector, bail out.
24772 if (VecIn.size() < 2)
24773 return SDValue();
24774
24775 // If all the Operands of BUILD_VECTOR extract from same
24776 // vector, then split the vector efficiently based on the maximum
24777 // vector access index and adjust the VectorMask and
24778 // VecIn accordingly.
24779 bool DidSplitVec = false;
24780 if (VecIn.size() == 2) {
24781 // If we only found a single constant indexed extract_vector_elt feeding the
24782 // build_vector, do not produce a more complicated shuffle if the extract is
24783 // cheap with other constant/undef elements. Skip broadcast patterns with
24784 // multiple uses in the build_vector.
24785
24786 // TODO: This should be more aggressive about skipping the shuffle
24787 // formation, particularly if VecIn[1].hasOneUse(), and regardless of the
24788 // index.
24789 if (NumExtracts == 1 &&
24792 TLI.isExtractVecEltCheap(VT, OneConstExtractIndex))
24793 return SDValue();
24794
24795 unsigned MaxIndex = 0;
24796 unsigned NearestPow2 = 0;
24797 SDValue Vec = VecIn.back();
24798 EVT InVT = Vec.getValueType();
24799 SmallVector<unsigned, 8> IndexVec(NumElems, 0);
24800
24801 for (unsigned i = 0; i < NumElems; i++) {
24802 if (VectorMask[i] <= 0)
24803 continue;
24804 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
24805 IndexVec[i] = Index;
24806 MaxIndex = std::max(MaxIndex, Index);
24807 }
24808
24809 NearestPow2 = PowerOf2Ceil(MaxIndex);
24810 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
24811 NumElems * 2 < NearestPow2) {
24812 unsigned SplitSize = NearestPow2 / 2;
24813 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
24814 InVT.getVectorElementType(), SplitSize);
24815 if (TLI.isTypeLegal(SplitVT) &&
24816 SplitSize + SplitVT.getVectorNumElements() <=
24817 InVT.getVectorNumElements()) {
24818 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
24819 DAG.getVectorIdxConstant(SplitSize, DL));
24820 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
24821 DAG.getVectorIdxConstant(0, DL));
24822 VecIn.pop_back();
24823 VecIn.push_back(VecIn1);
24824 VecIn.push_back(VecIn2);
24825 DidSplitVec = true;
24826
24827 for (unsigned i = 0; i < NumElems; i++) {
24828 if (VectorMask[i] <= 0)
24829 continue;
24830 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
24831 }
24832 }
24833 }
24834 }
24835
24836 // Sort input vectors by decreasing vector element count,
24837 // while preserving the relative order of equally-sized vectors.
24838 // Note that we keep the first "implicit zero vector as-is.
24839 SmallVector<SDValue, 8> SortedVecIn(VecIn);
24840 llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
24841 [](const SDValue &a, const SDValue &b) {
24842 return a.getValueType().getVectorNumElements() >
24843 b.getValueType().getVectorNumElements();
24844 });
24845
24846 // We now also need to rebuild the VectorMask, because it referenced element
24847 // order in VecIn, and we just sorted them.
24848 for (int &SourceVectorIndex : VectorMask) {
24849 if (SourceVectorIndex <= 0)
24850 continue;
24851 unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
24852 assert(Idx > 0 && Idx < SortedVecIn.size() &&
24853 VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
24854 SourceVectorIndex = Idx;
24855 }
24856
24857 VecIn = std::move(SortedVecIn);
24858
24859 // TODO: Should this fire if some of the input vectors has illegal type (like
24860 // it does now), or should we let legalization run its course first?
24861
24862 // Shuffle phase:
24863 // Take pairs of vectors, and shuffle them so that the result has elements
24864 // from these vectors in the correct places.
24865 // For example, given:
24866 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
24867 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
24868 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
24869 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
24870 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
24871 // We will generate:
24872 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
24873 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
24874 SmallVector<SDValue, 4> Shuffles;
24875 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
24876 unsigned LeftIdx = 2 * In + 1;
24877 SDValue VecLeft = VecIn[LeftIdx];
24878 SDValue VecRight =
24879 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
24880
24881 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
24882 VecRight, LeftIdx, DidSplitVec))
24883 Shuffles.push_back(Shuffle);
24884 else
24885 return SDValue();
24886 }
24887
24888 // If we need the zero vector as an "ingredient" in the blend tree, add it
24889 // to the list of shuffles.
24890 if (UsesZeroVector)
24891 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
24892 : DAG.getConstantFP(0.0, DL, VT));
24893
24894 // If we only have one shuffle, we're done.
24895 if (Shuffles.size() == 1)
24896 return Shuffles[0];
24897
24898 // Update the vector mask to point to the post-shuffle vectors.
24899 for (int &Vec : VectorMask)
24900 if (Vec == 0)
24901 Vec = Shuffles.size() - 1;
24902 else
24903 Vec = (Vec - 1) / 2;
24904
24905 // More than one shuffle. Generate a binary tree of blends, e.g. if from
24906 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
24907 // generate:
24908 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
24909 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
24910 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
24911 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
24912 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
24913 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
24914 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
24915
24916 // Make sure the initial size of the shuffle list is even.
24917 if (Shuffles.size() % 2)
24918 Shuffles.push_back(DAG.getUNDEF(VT));
24919
24920 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
24921 if (CurSize % 2) {
24922 Shuffles[CurSize] = DAG.getUNDEF(VT);
24923 CurSize++;
24924 }
24925 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
24926 int Left = 2 * In;
24927 int Right = 2 * In + 1;
24928 SmallVector<int, 8> Mask(NumElems, -1);
24929 SDValue L = Shuffles[Left];
24930 ArrayRef<int> LMask;
24931 bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE &&
24932 L.use_empty() && L.getOperand(1).isUndef() &&
24933 L.getOperand(0).getValueType() == L.getValueType();
24934 if (IsLeftShuffle) {
24935 LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask();
24936 L = L.getOperand(0);
24937 }
24938 SDValue R = Shuffles[Right];
24939 ArrayRef<int> RMask;
24940 bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE &&
24941 R.use_empty() && R.getOperand(1).isUndef() &&
24942 R.getOperand(0).getValueType() == R.getValueType();
24943 if (IsRightShuffle) {
24944 RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask();
24945 R = R.getOperand(0);
24946 }
24947 for (unsigned I = 0; I != NumElems; ++I) {
24948 if (VectorMask[I] == Left) {
24949 Mask[I] = I;
24950 if (IsLeftShuffle)
24951 Mask[I] = LMask[I];
24952 VectorMask[I] = In;
24953 } else if (VectorMask[I] == Right) {
24954 Mask[I] = I + NumElems;
24955 if (IsRightShuffle)
24956 Mask[I] = RMask[I] + NumElems;
24957 VectorMask[I] = In;
24958 }
24959 }
24960
24961 Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask);
24962 }
24963 }
24964 return Shuffles[0];
24965}
24966
24967// Try to turn a build vector of zero extends of extract vector elts into a
24968// a vector zero extend and possibly an extract subvector.
24969// TODO: Support sign extend?
24970// TODO: Allow undef elements?
24971SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
24972 if (LegalOperations)
24973 return SDValue();
24974
24975 EVT VT = N->getValueType(0);
24976
24977 bool FoundZeroExtend = false;
24978 SDValue Op0 = N->getOperand(0);
24979 auto checkElem = [&](SDValue Op) -> int64_t {
24980 unsigned Opc = Op.getOpcode();
24981 FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
24982 if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
24983 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
24984 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
24985 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
24986 return C->getZExtValue();
24987 return -1;
24988 };
24989
24990 // Make sure the first element matches
24991 // (zext (extract_vector_elt X, C))
24992 // Offset must be a constant multiple of the
24993 // known-minimum vector length of the result type.
24994 int64_t Offset = checkElem(Op0);
24995 if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
24996 return SDValue();
24997
24998 unsigned NumElems = N->getNumOperands();
24999 SDValue In = Op0.getOperand(0).getOperand(0);
25000 EVT InSVT = In.getValueType().getScalarType();
25001 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
25002
25003 // Don't create an illegal input type after type legalization.
25004 if (LegalTypes && !TLI.isTypeLegal(InVT))
25005 return SDValue();
25006
25007 // Ensure all the elements come from the same vector and are adjacent.
25008 for (unsigned i = 1; i != NumElems; ++i) {
25009 if ((Offset + i) != checkElem(N->getOperand(i)))
25010 return SDValue();
25011 }
25012
25013 SDLoc DL(N);
25014 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
25015 Op0.getOperand(0).getOperand(1));
25016 return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
25017 VT, In);
25018}
25019
25020// If this is a very simple BUILD_VECTOR with first element being a ZERO_EXTEND,
25021// and all other elements being constant zero's, granularize the BUILD_VECTOR's
25022// element width, absorbing the ZERO_EXTEND, turning it into a constant zero op.
25023// This patten can appear during legalization.
25024//
25025// NOTE: This can be generalized to allow more than a single
25026// non-constant-zero op, UNDEF's, and to be KnownBits-based,
25027SDValue DAGCombiner::convertBuildVecZextToBuildVecWithZeros(SDNode *N) {
25028 // Don't run this after legalization. Targets may have other preferences.
25029 if (Level >= AfterLegalizeDAG)
25030 return SDValue();
25031
25032 // FIXME: support big-endian.
25033 if (DAG.getDataLayout().isBigEndian())
25034 return SDValue();
25035
25036 EVT VT = N->getValueType(0);
25037 EVT OpVT = N->getOperand(0).getValueType();
25038 assert(!VT.isScalableVector() && "Encountered scalable BUILD_VECTOR?");
25039
25040 EVT OpIntVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
25041
25042 if (!TLI.isTypeLegal(OpIntVT) ||
25043 (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::BITCAST, OpIntVT)))
25044 return SDValue();
25045
25046 unsigned EltBitwidth = VT.getScalarSizeInBits();
25047 // NOTE: the actual width of operands may be wider than that!
25048
25049 // Analyze all operands of this BUILD_VECTOR. What is the largest number of
25050 // active bits they all have? We'll want to truncate them all to that width.
25051 unsigned ActiveBits = 0;
25052 APInt KnownZeroOps(VT.getVectorNumElements(), 0);
25053 for (auto I : enumerate(N->ops())) {
25054 SDValue Op = I.value();
25055 // FIXME: support UNDEF elements?
25056 if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
25057 unsigned OpActiveBits =
25058 Cst->getAPIntValue().trunc(EltBitwidth).getActiveBits();
25059 if (OpActiveBits == 0) {
25060 KnownZeroOps.setBit(I.index());
25061 continue;
25062 }
25063 // Profitability check: don't allow non-zero constant operands.
25064 return SDValue();
25065 }
25066 // Profitability check: there must only be a single non-zero operand,
25067 // and it must be the first operand of the BUILD_VECTOR.
25068 if (I.index() != 0)
25069 return SDValue();
25070 // The operand must be a zero-extension itself.
25071 // FIXME: this could be generalized to known leading zeros check.
25072 if (Op.getOpcode() != ISD::ZERO_EXTEND)
25073 return SDValue();
25074 unsigned CurrActiveBits =
25075 Op.getOperand(0).getValueSizeInBits().getFixedValue();
25076 assert(!ActiveBits && "Already encountered non-constant-zero operand?");
25077 ActiveBits = CurrActiveBits;
25078 // We want to at least halve the element size.
25079 if (2 * ActiveBits > EltBitwidth)
25080 return SDValue();
25081 }
25082
25083 // This BUILD_VECTOR must have at least one non-constant-zero operand.
25084 if (ActiveBits == 0)
25085 return SDValue();
25086
25087 // We have EltBitwidth bits, the *minimal* chunk size is ActiveBits,
25088 // into how many chunks can we split our element width?
25089 EVT NewScalarIntVT, NewIntVT;
25090 std::optional<unsigned> Factor;
25091 // We can split the element into at least two chunks, but not into more
25092 // than |_ EltBitwidth / ActiveBits _| chunks. Find a largest split factor
25093 // for which the element width is a multiple of it,
25094 // and the resulting types/operations on that chunk width are legal.
25095 assert(2 * ActiveBits <= EltBitwidth &&
25096 "We know that half or less bits of the element are active.");
25097 for (unsigned Scale = EltBitwidth / ActiveBits; Scale >= 2; --Scale) {
25098 if (EltBitwidth % Scale != 0)
25099 continue;
25100 unsigned ChunkBitwidth = EltBitwidth / Scale;
25101 assert(ChunkBitwidth >= ActiveBits && "As per starting point.");
25102 NewScalarIntVT = EVT::getIntegerVT(*DAG.getContext(), ChunkBitwidth);
25103 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewScalarIntVT,
25104 Scale * N->getNumOperands());
25105 if (!TLI.isTypeLegal(NewScalarIntVT) || !TLI.isTypeLegal(NewIntVT) ||
25106 (LegalOperations &&
25107 !(TLI.isOperationLegalOrCustom(ISD::TRUNCATE, NewScalarIntVT) &&
25109 continue;
25110 Factor = Scale;
25111 break;
25112 }
25113 if (!Factor)
25114 return SDValue();
25115
25116 SDLoc DL(N);
25117 SDValue ZeroOp = DAG.getConstant(0, DL, NewScalarIntVT);
25118
25119 // Recreate the BUILD_VECTOR, with elements now being Factor times smaller.
25121 NewOps.reserve(NewIntVT.getVectorNumElements());
25122 for (auto I : enumerate(N->ops())) {
25123 SDValue Op = I.value();
25124 assert(!Op.isUndef() && "FIXME: after allowing UNDEF's, handle them here.");
25125 unsigned SrcOpIdx = I.index();
25126 if (KnownZeroOps[SrcOpIdx]) {
25127 NewOps.append(*Factor, ZeroOp);
25128 continue;
25129 }
25130 Op = DAG.getBitcast(OpIntVT, Op);
25131 Op = DAG.getNode(ISD::TRUNCATE, DL, NewScalarIntVT, Op);
25132 NewOps.emplace_back(Op);
25133 NewOps.append(*Factor - 1, ZeroOp);
25134 }
25135 assert(NewOps.size() == NewIntVT.getVectorNumElements());
25136 SDValue NewBV = DAG.getBuildVector(NewIntVT, DL, NewOps);
25137 NewBV = DAG.getBitcast(VT, NewBV);
25138 return NewBV;
25139}
25140
25141SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
25142 EVT VT = N->getValueType(0);
25143
25144 // A vector built entirely of undefs is undef.
25146 return DAG.getUNDEF(VT);
25147
25148 // If this is a splat of a bitcast from another vector, change to a
25149 // concat_vector.
25150 // For example:
25151 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
25152 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
25153 //
25154 // If X is a build_vector itself, the concat can become a larger build_vector.
25155 // TODO: Maybe this is useful for non-splat too?
25156 if (!LegalOperations) {
25157 SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue();
25158 // Only change build_vector to a concat_vector if the splat value type is
25159 // same as the vector element type.
25160 if (Splat && Splat.getValueType() == VT.getVectorElementType()) {
25162 EVT SrcVT = Splat.getValueType();
25163 if (SrcVT.isVector()) {
25164 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
25165 EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
25166 SrcVT.getVectorElementType(), NumElts);
25167 if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
25168 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
25169 SDValue Concat =
25170 DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), NewVT, Ops);
25171 return DAG.getBitcast(VT, Concat);
25172 }
25173 }
25174 }
25175 }
25176
25177 // Check if we can express BUILD VECTOR via subvector extract.
25178 if (!LegalTypes && (N->getNumOperands() > 1)) {
25179 SDValue Op0 = N->getOperand(0);
25180 auto checkElem = [&](SDValue Op) -> uint64_t {
25181 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
25182 (Op0.getOperand(0) == Op.getOperand(0)))
25183 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
25184 return CNode->getZExtValue();
25185 return -1;
25186 };
25187
25188 int Offset = checkElem(Op0);
25189 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
25190 if (Offset + i != checkElem(N->getOperand(i))) {
25191 Offset = -1;
25192 break;
25193 }
25194 }
25195
25196 if ((Offset == 0) &&
25197 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
25198 return Op0.getOperand(0);
25199 if ((Offset != -1) &&
25200 ((Offset % N->getValueType(0).getVectorNumElements()) ==
25201 0)) // IDX must be multiple of output size.
25202 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
25203 Op0.getOperand(0), Op0.getOperand(1));
25204 }
25205
25206 if (SDValue V = convertBuildVecZextToZext(N))
25207 return V;
25208
25209 if (SDValue V = convertBuildVecZextToBuildVecWithZeros(N))
25210 return V;
25211
25212 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
25213 return V;
25214
25215 if (SDValue V = reduceBuildVecTruncToBitCast(N))
25216 return V;
25217
25218 if (SDValue V = reduceBuildVecToShuffle(N))
25219 return V;
25220
25221 // A splat of a single element is a SPLAT_VECTOR if supported on the target.
25222 // Do this late as some of the above may replace the splat.
25225 assert(!V.isUndef() && "Splat of undef should have been handled earlier");
25226 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
25227 }
25228
25229 return SDValue();
25230}
25231
25233 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25234 EVT OpVT = N->getOperand(0).getValueType();
25235
25236 // If the operands are legal vectors, leave them alone.
25237 if (TLI.isTypeLegal(OpVT) || OpVT.isScalableVector())
25238 return SDValue();
25239
25240 SDLoc DL(N);
25241 EVT VT = N->getValueType(0);
25243 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
25244
25245 // Keep track of what we encounter.
25246 EVT AnyFPVT;
25247
25248 for (const SDValue &Op : N->ops()) {
25249 if (ISD::BITCAST == Op.getOpcode() &&
25250 !Op.getOperand(0).getValueType().isVector())
25251 Ops.push_back(Op.getOperand(0));
25252 else if (Op.isUndef())
25253 Ops.push_back(DAG.getNode(ISD::UNDEF, DL, SVT));
25254 else
25255 return SDValue();
25256
25257 // Note whether we encounter an integer or floating point scalar.
25258 // If it's neither, bail out, it could be something weird like x86mmx.
25259 EVT LastOpVT = Ops.back().getValueType();
25260 if (LastOpVT.isFloatingPoint())
25261 AnyFPVT = LastOpVT;
25262 else if (!LastOpVT.isInteger())
25263 return SDValue();
25264 }
25265
25266 // If any of the operands is a floating point scalar bitcast to a vector,
25267 // use floating point types throughout, and bitcast everything.
25268 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
25269 if (AnyFPVT != EVT()) {
25270 SVT = AnyFPVT;
25271 for (SDValue &Op : Ops) {
25272 if (Op.getValueType() == SVT)
25273 continue;
25274 if (Op.isUndef())
25275 Op = DAG.getNode(ISD::UNDEF, DL, SVT);
25276 else
25277 Op = DAG.getBitcast(SVT, Op);
25278 }
25279 }
25280
25281 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
25282 VT.getSizeInBits() / SVT.getSizeInBits());
25283 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
25284}
25285
25286// Attempt to merge nested concat_vectors/undefs.
25287// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
25288// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
25290 SelectionDAG &DAG) {
25291 EVT VT = N->getValueType(0);
25292
25293 // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
25294 EVT SubVT;
25295 SDValue FirstConcat;
25296 for (const SDValue &Op : N->ops()) {
25297 if (Op.isUndef())
25298 continue;
25299 if (Op.getOpcode() != ISD::CONCAT_VECTORS)
25300 return SDValue();
25301 if (!FirstConcat) {
25302 SubVT = Op.getOperand(0).getValueType();
25303 if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
25304 return SDValue();
25305 FirstConcat = Op;
25306 continue;
25307 }
25308 if (SubVT != Op.getOperand(0).getValueType())
25309 return SDValue();
25310 }
25311 assert(FirstConcat && "Concat of all-undefs found");
25312
25313 SmallVector<SDValue> ConcatOps;
25314 for (const SDValue &Op : N->ops()) {
25315 if (Op.isUndef()) {
25316 ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
25317 continue;
25318 }
25319 ConcatOps.append(Op->op_begin(), Op->op_end());
25320 }
25321 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
25322}
25323
25324// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
25325// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
25326// most two distinct vectors the same size as the result, attempt to turn this
25327// into a legal shuffle.
25329 EVT VT = N->getValueType(0);
25330 EVT OpVT = N->getOperand(0).getValueType();
25331
25332 // We currently can't generate an appropriate shuffle for a scalable vector.
25333 if (VT.isScalableVector())
25334 return SDValue();
25335
25336 int NumElts = VT.getVectorNumElements();
25337 int NumOpElts = OpVT.getVectorNumElements();
25338
25339 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
25341
25342 for (SDValue Op : N->ops()) {
25344
25345 // UNDEF nodes convert to UNDEF shuffle mask values.
25346 if (Op.isUndef()) {
25347 Mask.append((unsigned)NumOpElts, -1);
25348 continue;
25349 }
25350
25351 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
25352 return SDValue();
25353
25354 // What vector are we extracting the subvector from and at what index?
25355 SDValue ExtVec = Op.getOperand(0);
25356 int ExtIdx = Op.getConstantOperandVal(1);
25357
25358 // We want the EVT of the original extraction to correctly scale the
25359 // extraction index.
25360 EVT ExtVT = ExtVec.getValueType();
25361 ExtVec = peekThroughBitcasts(ExtVec);
25362
25363 // UNDEF nodes convert to UNDEF shuffle mask values.
25364 if (ExtVec.isUndef()) {
25365 Mask.append((unsigned)NumOpElts, -1);
25366 continue;
25367 }
25368
25369 // Ensure that we are extracting a subvector from a vector the same
25370 // size as the result.
25371 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
25372 return SDValue();
25373
25374 // Scale the subvector index to account for any bitcast.
25375 int NumExtElts = ExtVT.getVectorNumElements();
25376 if (0 == (NumExtElts % NumElts))
25377 ExtIdx /= (NumExtElts / NumElts);
25378 else if (0 == (NumElts % NumExtElts))
25379 ExtIdx *= (NumElts / NumExtElts);
25380 else
25381 return SDValue();
25382
25383 // At most we can reference 2 inputs in the final shuffle.
25384 if (SV0.isUndef() || SV0 == ExtVec) {
25385 SV0 = ExtVec;
25386 for (int i = 0; i != NumOpElts; ++i)
25387 Mask.push_back(i + ExtIdx);
25388 } else if (SV1.isUndef() || SV1 == ExtVec) {
25389 SV1 = ExtVec;
25390 for (int i = 0; i != NumOpElts; ++i)
25391 Mask.push_back(i + ExtIdx + NumElts);
25392 } else {
25393 return SDValue();
25394 }
25395 }
25396
25397 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25398 return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
25399 DAG.getBitcast(VT, SV1), Mask, DAG);
25400}
25401
25403 unsigned CastOpcode = N->getOperand(0).getOpcode();
25404 switch (CastOpcode) {
25405 case ISD::SINT_TO_FP:
25406 case ISD::UINT_TO_FP:
25407 case ISD::FP_TO_SINT:
25408 case ISD::FP_TO_UINT:
25409 // TODO: Allow more opcodes?
25410 // case ISD::BITCAST:
25411 // case ISD::TRUNCATE:
25412 // case ISD::ZERO_EXTEND:
25413 // case ISD::SIGN_EXTEND:
25414 // case ISD::FP_EXTEND:
25415 break;
25416 default:
25417 return SDValue();
25418 }
25419
25420 EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
25421 if (!SrcVT.isVector())
25422 return SDValue();
25423
25424 // All operands of the concat must be the same kind of cast from the same
25425 // source type.
25427 for (SDValue Op : N->ops()) {
25428 if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
25429 Op.getOperand(0).getValueType() != SrcVT)
25430 return SDValue();
25431 SrcOps.push_back(Op.getOperand(0));
25432 }
25433
25434 // The wider cast must be supported by the target. This is unusual because
25435 // the operation support type parameter depends on the opcode. In addition,
25436 // check the other type in the cast to make sure this is really legal.
25437 EVT VT = N->getValueType(0);
25438 EVT SrcEltVT = SrcVT.getVectorElementType();
25439 ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
25440 EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
25441 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25442 switch (CastOpcode) {
25443 case ISD::SINT_TO_FP:
25444 case ISD::UINT_TO_FP:
25445 if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
25446 !TLI.isTypeLegal(VT))
25447 return SDValue();
25448 break;
25449 case ISD::FP_TO_SINT:
25450 case ISD::FP_TO_UINT:
25451 if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
25452 !TLI.isTypeLegal(ConcatSrcVT))
25453 return SDValue();
25454 break;
25455 default:
25456 llvm_unreachable("Unexpected cast opcode");
25457 }
25458
25459 // concat (cast X), (cast Y)... -> cast (concat X, Y...)
25460 SDLoc DL(N);
25461 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
25462 return DAG.getNode(CastOpcode, DL, VT, NewConcat);
25463}
25464
25465// See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of
25466// the operands is a SHUFFLE_VECTOR, and all other operands are also operands
25467// to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR.
25469 SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
25470 bool LegalOperations) {
25471 EVT VT = N->getValueType(0);
25472 EVT OpVT = N->getOperand(0).getValueType();
25473 if (VT.isScalableVector())
25474 return SDValue();
25475
25476 // For now, only allow simple 2-operand concatenations.
25477 if (N->getNumOperands() != 2)
25478 return SDValue();
25479
25480 // Don't create illegal types/shuffles when not allowed to.
25481 if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
25482 (LegalOperations &&
25484 return SDValue();
25485
25486 // Analyze all of the operands of the CONCAT_VECTORS. Out of all of them,
25487 // we want to find one that is: (1) a SHUFFLE_VECTOR (2) only used by us,
25488 // and (3) all operands of CONCAT_VECTORS must be either that SHUFFLE_VECTOR,
25489 // or one of the operands of that SHUFFLE_VECTOR (but not UNDEF!).
25490 // (4) and for now, the SHUFFLE_VECTOR must be unary.
25491 ShuffleVectorSDNode *SVN = nullptr;
25492 for (SDValue Op : N->ops()) {
25493 if (auto *CurSVN = dyn_cast<ShuffleVectorSDNode>(Op);
25494 CurSVN && CurSVN->getOperand(1).isUndef() && N->isOnlyUserOf(CurSVN) &&
25495 all_of(N->ops(), [CurSVN](SDValue Op) {
25496 // FIXME: can we allow UNDEF operands?
25497 return !Op.isUndef() &&
25498 (Op.getNode() == CurSVN || is_contained(CurSVN->ops(), Op));
25499 })) {
25500 SVN = CurSVN;
25501 break;
25502 }
25503 }
25504 if (!SVN)
25505 return SDValue();
25506
25507 // We are going to pad the shuffle operands, so any indice, that was picking
25508 // from the second operand, must be adjusted.
25509 SmallVector<int, 16> AdjustedMask(SVN->getMask());
25510 assert(SVN->getOperand(1).isUndef() && "Expected unary shuffle!");
25511
25512 // Identity masks for the operands of the (padded) shuffle.
25513 SmallVector<int, 32> IdentityMask(2 * OpVT.getVectorNumElements());
25514 MutableArrayRef<int> FirstShufOpIdentityMask =
25515 MutableArrayRef<int>(IdentityMask)
25517 MutableArrayRef<int> SecondShufOpIdentityMask =
25519 std::iota(FirstShufOpIdentityMask.begin(), FirstShufOpIdentityMask.end(), 0);
25520 std::iota(SecondShufOpIdentityMask.begin(), SecondShufOpIdentityMask.end(),
25522
25523 // New combined shuffle mask.
25525 Mask.reserve(VT.getVectorNumElements());
25526 for (SDValue Op : N->ops()) {
25527 assert(!Op.isUndef() && "Not expecting to concatenate UNDEF.");
25528 if (Op.getNode() == SVN) {
25529 append_range(Mask, AdjustedMask);
25530 continue;
25531 }
25532 if (Op == SVN->getOperand(0)) {
25533 append_range(Mask, FirstShufOpIdentityMask);
25534 continue;
25535 }
25536 if (Op == SVN->getOperand(1)) {
25537 append_range(Mask, SecondShufOpIdentityMask);
25538 continue;
25539 }
25540 llvm_unreachable("Unexpected operand!");
25541 }
25542
25543 // Don't create illegal shuffle masks.
25544 if (!TLI.isShuffleMaskLegal(Mask, VT))
25545 return SDValue();
25546
25547 // Pad the shuffle operands with UNDEF.
25548 SDLoc dl(N);
25549 std::array<SDValue, 2> ShufOps;
25550 for (auto I : zip(SVN->ops(), ShufOps)) {
25551 SDValue ShufOp = std::get<0>(I);
25552 SDValue &NewShufOp = std::get<1>(I);
25553 if (ShufOp.isUndef())
25554 NewShufOp = DAG.getUNDEF(VT);
25555 else {
25556 SmallVector<SDValue, 2> ShufOpParts(N->getNumOperands(),
25557 DAG.getUNDEF(OpVT));
25558 ShufOpParts[0] = ShufOp;
25559 NewShufOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, ShufOpParts);
25560 }
25561 }
25562 // Finally, create the new wide shuffle.
25563 return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask);
25564}
25565
25567 const TargetLowering &TLI,
25568 bool LegalTypes,
25569 bool LegalOperations) {
25570 EVT VT = N->getValueType(0);
25571
25572 // Post-legalization we can only create wider SPLAT_VECTOR operations if both
25573 // the type and operation is legal. The Hexagon target has custom
25574 // legalization for SPLAT_VECTOR that splits the operation into two parts and
25575 // concatenates them. Therefore, custom lowering must also be rejected in
25576 // order to avoid an infinite loop.
25577 if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
25578 (LegalOperations && !TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT)))
25579 return SDValue();
25580
25581 SDValue Op0 = N->getOperand(0);
25582 if (!llvm::all_equal(N->op_values()) || Op0.getOpcode() != ISD::SPLAT_VECTOR)
25583 return SDValue();
25584
25585 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, Op0.getOperand(0));
25586}
25587
25588SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
25589 // If we only have one input vector, we don't need to do any concatenation.
25590 if (N->getNumOperands() == 1)
25591 return N->getOperand(0);
25592
25593 // Check if all of the operands are undefs.
25594 EVT VT = N->getValueType(0);
25596 return DAG.getUNDEF(VT);
25597
25598 // Optimize concat_vectors where all but the first of the vectors are undef.
25599 if (all_of(drop_begin(N->ops()),
25600 [](const SDValue &Op) { return Op.isUndef(); })) {
25601 SDValue In = N->getOperand(0);
25602 assert(In.getValueType().isVector() && "Must concat vectors");
25603
25604 // If the input is a concat_vectors, just make a larger concat by padding
25605 // with smaller undefs.
25606 //
25607 // Legalizing in AArch64TargetLowering::LowerCONCAT_VECTORS() and combining
25608 // here could cause an infinite loop. That legalizing happens when LegalDAG
25609 // is true and input of AArch64TargetLowering::LowerCONCAT_VECTORS() is
25610 // scalable.
25611 if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse() &&
25612 !(LegalDAG && In.getValueType().isScalableVector())) {
25613 unsigned NumOps = N->getNumOperands() * In.getNumOperands();
25615 Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
25616 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
25617 }
25618
25620
25621 // concat_vectors(scalar_to_vector(scalar), undef) ->
25622 // scalar_to_vector(scalar)
25623 if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
25624 Scalar.hasOneUse()) {
25625 EVT SVT = Scalar.getValueType().getVectorElementType();
25626 if (SVT == Scalar.getOperand(0).getValueType())
25627 Scalar = Scalar.getOperand(0);
25628 }
25629
25630 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
25631 if (!Scalar.getValueType().isVector() && In.hasOneUse()) {
25632 // If the bitcast type isn't legal, it might be a trunc of a legal type;
25633 // look through the trunc so we can still do the transform:
25634 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
25635 if (Scalar->getOpcode() == ISD::TRUNCATE &&
25636 !TLI.isTypeLegal(Scalar.getValueType()) &&
25637 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
25638 Scalar = Scalar->getOperand(0);
25639
25640 EVT SclTy = Scalar.getValueType();
25641
25642 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
25643 return SDValue();
25644
25645 // Bail out if the vector size is not a multiple of the scalar size.
25646 if (VT.getSizeInBits() % SclTy.getSizeInBits())
25647 return SDValue();
25648
25649 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
25650 if (VNTNumElms < 2)
25651 return SDValue();
25652
25653 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
25654 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
25655 return SDValue();
25656
25657 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
25658 return DAG.getBitcast(VT, Res);
25659 }
25660 }
25661
25662 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
25663 // We have already tested above for an UNDEF only concatenation.
25664 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
25665 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
25666 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
25667 return Op.isUndef() || ISD::BUILD_VECTOR == Op.getOpcode();
25668 };
25669 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
25671 EVT SVT = VT.getScalarType();
25672
25673 EVT MinVT = SVT;
25674 if (!SVT.isFloatingPoint()) {
25675 // If BUILD_VECTOR are from built from integer, they may have different
25676 // operand types. Get the smallest type and truncate all operands to it.
25677 bool FoundMinVT = false;
25678 for (const SDValue &Op : N->ops())
25679 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
25680 EVT OpSVT = Op.getOperand(0).getValueType();
25681 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
25682 FoundMinVT = true;
25683 }
25684 assert(FoundMinVT && "Concat vector type mismatch");
25685 }
25686
25687 for (const SDValue &Op : N->ops()) {
25688 EVT OpVT = Op.getValueType();
25689 unsigned NumElts = OpVT.getVectorNumElements();
25690
25691 if (Op.isUndef())
25692 Opnds.append(NumElts, DAG.getUNDEF(MinVT));
25693
25694 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
25695 if (SVT.isFloatingPoint()) {
25696 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
25697 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
25698 } else {
25699 for (unsigned i = 0; i != NumElts; ++i)
25700 Opnds.push_back(
25701 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
25702 }
25703 }
25704 }
25705
25706 assert(VT.getVectorNumElements() == Opnds.size() &&
25707 "Concat vector type mismatch");
25708 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
25709 }
25710
25711 if (SDValue V =
25712 combineConcatVectorOfSplats(N, DAG, TLI, LegalTypes, LegalOperations))
25713 return V;
25714
25715 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
25716 // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
25718 return V;
25719
25720 if (Level <= AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
25721 // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
25723 return V;
25724
25725 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
25727 return V;
25728 }
25729
25730 if (SDValue V = combineConcatVectorOfCasts(N, DAG))
25731 return V;
25732
25734 N, DAG, TLI, LegalTypes, LegalOperations))
25735 return V;
25736
25737 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
25738 // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
25739 // operands and look for a CONCAT operations that place the incoming vectors
25740 // at the exact same location.
25741 //
25742 // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
25743 SDValue SingleSource = SDValue();
25744 unsigned PartNumElem =
25745 N->getOperand(0).getValueType().getVectorMinNumElements();
25746
25747 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
25748 SDValue Op = N->getOperand(i);
25749
25750 if (Op.isUndef())
25751 continue;
25752
25753 // Check if this is the identity extract:
25754 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
25755 return SDValue();
25756
25757 // Find the single incoming vector for the extract_subvector.
25758 if (SingleSource.getNode()) {
25759 if (Op.getOperand(0) != SingleSource)
25760 return SDValue();
25761 } else {
25762 SingleSource = Op.getOperand(0);
25763
25764 // Check the source type is the same as the type of the result.
25765 // If not, this concat may extend the vector, so we can not
25766 // optimize it away.
25767 if (SingleSource.getValueType() != N->getValueType(0))
25768 return SDValue();
25769 }
25770
25771 // Check that we are reading from the identity index.
25772 unsigned IdentityIndex = i * PartNumElem;
25773 if (Op.getConstantOperandAPInt(1) != IdentityIndex)
25774 return SDValue();
25775 }
25776
25777 if (SingleSource.getNode())
25778 return SingleSource;
25779
25780 return SDValue();
25781}
25782
25783SDValue DAGCombiner::visitVECTOR_INTERLEAVE(SDNode *N) {
25784 // Check to see if all operands are identical.
25785 if (!llvm::all_equal(N->op_values()))
25786 return SDValue();
25787
25788 // Check to see if the identical operand is a splat.
25789 if (!DAG.isSplatValue(N->getOperand(0)))
25790 return SDValue();
25791
25792 // interleave splat(X), splat(X).... --> splat(X), splat(X)....
25794 Ops.append(N->op_values().begin(), N->op_values().end());
25795 return CombineTo(N, &Ops);
25796}
25797
25798// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
25799// if the subvector can be sourced for free.
25800static SDValue getSubVectorSrc(SDValue V, unsigned Index, EVT SubVT) {
25801 if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
25802 V.getOperand(1).getValueType() == SubVT &&
25803 V.getConstantOperandAPInt(2) == Index) {
25804 return V.getOperand(1);
25805 }
25806 if (V.getOpcode() == ISD::CONCAT_VECTORS &&
25807 V.getOperand(0).getValueType() == SubVT &&
25808 (Index % SubVT.getVectorMinNumElements()) == 0) {
25809 uint64_t SubIdx = Index / SubVT.getVectorMinNumElements();
25810 return V.getOperand(SubIdx);
25811 }
25812 return SDValue();
25813}
25814
25816 unsigned Index, const SDLoc &DL,
25817 SelectionDAG &DAG,
25818 bool LegalOperations) {
25819 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25820 unsigned BinOpcode = BinOp.getOpcode();
25821 if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
25822 return SDValue();
25823
25824 EVT VecVT = BinOp.getValueType();
25825 SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
25826 if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
25827 return SDValue();
25828 if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
25829 return SDValue();
25830
25831 SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
25832 SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
25833
25834 // TODO: We could handle the case where only 1 operand is being inserted by
25835 // creating an extract of the other operand, but that requires checking
25836 // number of uses and/or costs.
25837 if (!Sub0 || !Sub1)
25838 return SDValue();
25839
25840 // We are inserting both operands of the wide binop only to extract back
25841 // to the narrow vector size. Eliminate all of the insert/extract:
25842 // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
25843 return DAG.getNode(BinOpcode, DL, SubVT, Sub0, Sub1, BinOp->getFlags());
25844}
25845
25846/// If we are extracting a subvector produced by a wide binary operator try
25847/// to use a narrow binary operator and/or avoid concatenation and extraction.
25848static SDValue narrowExtractedVectorBinOp(EVT VT, SDValue Src, unsigned Index,
25849 const SDLoc &DL, SelectionDAG &DAG,
25850 bool LegalOperations) {
25851 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
25852 // some of these bailouts with other transforms.
25853
25854 if (SDValue V = narrowInsertExtractVectorBinOp(VT, Src, Index, DL, DAG,
25855 LegalOperations))
25856 return V;
25857
25858 // We are looking for an optionally bitcasted wide vector binary operator
25859 // feeding an extract subvector.
25860 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25861 SDValue BinOp = peekThroughBitcasts(Src);
25862 unsigned BOpcode = BinOp.getOpcode();
25863 if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
25864 return SDValue();
25865
25866 // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
25867 // reduced to the unary fneg when it is visited, and we probably want to deal
25868 // with fneg in a target-specific way.
25869 if (BOpcode == ISD::FSUB) {
25870 auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
25871 if (C && C->getValueAPF().isNegZero())
25872 return SDValue();
25873 }
25874
25875 // The binop must be a vector type, so we can extract some fraction of it.
25876 EVT WideBVT = BinOp.getValueType();
25877 // The optimisations below currently assume we are dealing with fixed length
25878 // vectors. It is possible to add support for scalable vectors, but at the
25879 // moment we've done no analysis to prove whether they are profitable or not.
25880 if (!WideBVT.isFixedLengthVector())
25881 return SDValue();
25882
25883 assert((Index % VT.getVectorNumElements()) == 0 &&
25884 "Extract index is not a multiple of the vector length.");
25885
25886 // Bail out if this is not a proper multiple width extraction.
25887 unsigned WideWidth = WideBVT.getSizeInBits();
25888 unsigned NarrowWidth = VT.getSizeInBits();
25889 if (WideWidth % NarrowWidth != 0)
25890 return SDValue();
25891
25892 // Bail out if we are extracting a fraction of a single operation. This can
25893 // occur because we potentially looked through a bitcast of the binop.
25894 unsigned NarrowingRatio = WideWidth / NarrowWidth;
25895 unsigned WideNumElts = WideBVT.getVectorNumElements();
25896 if (WideNumElts % NarrowingRatio != 0)
25897 return SDValue();
25898
25899 // Bail out if the target does not support a narrower version of the binop.
25900 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
25901 WideNumElts / NarrowingRatio);
25902 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT,
25903 LegalOperations))
25904 return SDValue();
25905
25906 // If extraction is cheap, we don't need to look at the binop operands
25907 // for concat ops. The narrow binop alone makes this transform profitable.
25908 // We can't just reuse the original extract index operand because we may have
25909 // bitcasted.
25910 unsigned ConcatOpNum = Index / VT.getVectorNumElements();
25911 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
25912 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
25913 BinOp.hasOneUse() && Src->hasOneUse()) {
25914 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
25915 SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
25916 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
25917 BinOp.getOperand(0), NewExtIndex);
25918 SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
25919 BinOp.getOperand(1), NewExtIndex);
25920 SDValue NarrowBinOp =
25921 DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
25922 return DAG.getBitcast(VT, NarrowBinOp);
25923 }
25924
25925 // Only handle the case where we are doubling and then halving. A larger ratio
25926 // may require more than two narrow binops to replace the wide binop.
25927 if (NarrowingRatio != 2)
25928 return SDValue();
25929
25930 // TODO: The motivating case for this transform is an x86 AVX1 target. That
25931 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
25932 // flavors, but no other 256-bit integer support. This could be extended to
25933 // handle any binop, but that may require fixing/adding other folds to avoid
25934 // codegen regressions.
25935 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
25936 return SDValue();
25937
25938 // We need at least one concatenation operation of a binop operand to make
25939 // this transform worthwhile. The concat must double the input vector sizes.
25940 auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
25941 if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
25942 return V.getOperand(ConcatOpNum);
25943 return SDValue();
25944 };
25945 SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
25946 SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
25947
25948 if (SubVecL || SubVecR) {
25949 // If a binop operand was not the result of a concat, we must extract a
25950 // half-sized operand for our new narrow binop:
25951 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
25952 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
25953 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
25954 SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
25955 SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
25956 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
25957 BinOp.getOperand(0), IndexC);
25958
25959 SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
25960 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
25961 BinOp.getOperand(1), IndexC);
25962
25963 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
25964 return DAG.getBitcast(VT, NarrowBinOp);
25965 }
25966
25967 return SDValue();
25968}
25969
25970/// If we are extracting a subvector from a wide vector load, convert to a
25971/// narrow load to eliminate the extraction:
25972/// (extract_subvector (load wide vector)) --> (load narrow vector)
25973static SDValue narrowExtractedVectorLoad(EVT VT, SDValue Src, unsigned Index,
25974 const SDLoc &DL, SelectionDAG &DAG) {
25975 // TODO: Add support for big-endian. The offset calculation must be adjusted.
25976 if (DAG.getDataLayout().isBigEndian())
25977 return SDValue();
25978
25979 auto *Ld = dyn_cast<LoadSDNode>(Src);
25980 if (!Ld || !ISD::isNormalLoad(Ld) || !Ld->isSimple())
25981 return SDValue();
25982
25983 // We can only create byte sized loads.
25984 if (!VT.isByteSized())
25985 return SDValue();
25986
25987 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25988 if (!TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, VT))
25989 return SDValue();
25990
25991 unsigned NumElts = VT.getVectorMinNumElements();
25992 // A fixed length vector being extracted from a scalable vector
25993 // may not be any *smaller* than the scalable one.
25994 if (Index == 0 && NumElts >= Ld->getValueType(0).getVectorMinNumElements())
25995 return SDValue();
25996
25997 // The definition of EXTRACT_SUBVECTOR states that the index must be a
25998 // multiple of the minimum number of elements in the result type.
25999 assert(Index % NumElts == 0 && "The extract subvector index is not a "
26000 "multiple of the result's element count");
26001
26002 // It's fine to use TypeSize here as we know the offset will not be negative.
26003 TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
26004 std::optional<unsigned> ByteOffset;
26005 if (Offset.isFixed())
26006 ByteOffset = Offset.getFixedValue();
26007
26008 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT, ByteOffset))
26009 return SDValue();
26010
26011 // The narrow load will be offset from the base address of the old load if
26012 // we are extracting from something besides index 0 (little-endian).
26013 // TODO: Use "BaseIndexOffset" to make this more effective.
26014 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
26015
26017 MachineMemOperand *MMO;
26018 if (Offset.isScalable()) {
26019 MachinePointerInfo MPI =
26021 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, VT.getStoreSize());
26022 } else
26023 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedValue(),
26024 VT.getStoreSize());
26025
26026 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
26027 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
26028 return NewLd;
26029}
26030
26031/// Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
26032/// try to produce VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
26033/// EXTRACT_SUBVECTOR(Op?, ?),
26034/// Mask'))
26035/// iff it is legal and profitable to do so. Notably, the trimmed mask
26036/// (containing only the elements that are extracted)
26037/// must reference at most two subvectors.
26039 unsigned Index,
26040 const SDLoc &DL,
26041 SelectionDAG &DAG,
26042 bool LegalOperations) {
26043 // Only deal with non-scalable vectors.
26044 EVT WideVT = Src.getValueType();
26045 if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
26046 return SDValue();
26047
26048 // The operand must be a shufflevector.
26049 auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(Src);
26050 if (!WideShuffleVector)
26051 return SDValue();
26052
26053 // The old shuffleneeds to go away.
26054 if (!WideShuffleVector->hasOneUse())
26055 return SDValue();
26056
26057 // And the narrow shufflevector that we'll form must be legal.
26058 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26059 if (LegalOperations &&
26061 return SDValue();
26062
26063 int NumEltsExtracted = NarrowVT.getVectorNumElements();
26064 assert((Index % NumEltsExtracted) == 0 &&
26065 "Extract index is not a multiple of the output vector length.");
26066
26067 int WideNumElts = WideVT.getVectorNumElements();
26068
26069 SmallVector<int, 16> NewMask;
26070 NewMask.reserve(NumEltsExtracted);
26071 SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
26072 DemandedSubvectors;
26073
26074 // Try to decode the wide mask into narrow mask from at most two subvectors.
26075 for (int M : WideShuffleVector->getMask().slice(Index, NumEltsExtracted)) {
26076 assert((M >= -1) && (M < (2 * WideNumElts)) &&
26077 "Out-of-bounds shuffle mask?");
26078
26079 if (M < 0) {
26080 // Does not depend on operands, does not require adjustment.
26081 NewMask.emplace_back(M);
26082 continue;
26083 }
26084
26085 // From which operand of the shuffle does this shuffle mask element pick?
26086 int WideShufOpIdx = M / WideNumElts;
26087 // Which element of that operand is picked?
26088 int OpEltIdx = M % WideNumElts;
26089
26090 assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
26091 "Shuffle mask vector decomposition failure.");
26092
26093 // And which NumEltsExtracted-sized subvector of that operand is that?
26094 int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
26095 // And which element within that subvector of that operand is that?
26096 int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
26097
26098 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
26099 "Shuffle mask subvector decomposition failure.");
26100
26101 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
26102 WideShufOpIdx * WideNumElts) == M &&
26103 "Shuffle mask full decomposition failure.");
26104
26105 SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
26106
26107 if (Op.isUndef()) {
26108 // Picking from an undef operand. Let's adjust mask instead.
26109 NewMask.emplace_back(-1);
26110 continue;
26111 }
26112
26113 const std::pair<SDValue, int> DemandedSubvector =
26114 std::make_pair(Op, OpSubvecIdx);
26115
26116 if (DemandedSubvectors.insert(DemandedSubvector)) {
26117 if (DemandedSubvectors.size() > 2)
26118 return SDValue(); // We can't handle more than two subvectors.
26119 // How many elements into the WideVT does this subvector start?
26120 int Index = NumEltsExtracted * OpSubvecIdx;
26121 // Bail out if the extraction isn't going to be cheap.
26122 if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
26123 return SDValue();
26124 }
26125
26126 // Ok, but from which operand of the new shuffle will this element pick?
26127 int NewOpIdx =
26128 getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
26129 assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
26130
26131 int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
26132 NewMask.emplace_back(AdjM);
26133 }
26134 assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
26135 assert(DemandedSubvectors.size() <= 2 &&
26136 "Should have ended up demanding at most two subvectors.");
26137
26138 // Did we discover that the shuffle does not actually depend on operands?
26139 if (DemandedSubvectors.empty())
26140 return DAG.getUNDEF(NarrowVT);
26141
26142 // Profitability check: only deal with extractions from the first subvector
26143 // unless the mask becomes an identity mask.
26144 if (!ShuffleVectorInst::isIdentityMask(NewMask, NewMask.size()) ||
26145 any_of(NewMask, [](int M) { return M < 0; }))
26146 for (auto &DemandedSubvector : DemandedSubvectors)
26147 if (DemandedSubvector.second != 0)
26148 return SDValue();
26149
26150 // We still perform the exact same EXTRACT_SUBVECTOR, just on different
26151 // operand[s]/index[es], so there is no point in checking for it's legality.
26152
26153 // Do not turn a legal shuffle into an illegal one.
26154 if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
26155 !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
26156 return SDValue();
26157
26159 for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
26160 &DemandedSubvector : DemandedSubvectors) {
26161 // How many elements into the WideVT does this subvector start?
26162 int Index = NumEltsExtracted * DemandedSubvector.second;
26163 SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
26164 NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
26165 DemandedSubvector.first, IndexC));
26166 }
26167 assert((NewOps.size() == 1 || NewOps.size() == 2) &&
26168 "Should end up with either one or two ops");
26169
26170 // If we ended up with only one operand, pad with an undef.
26171 if (NewOps.size() == 1)
26172 NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
26173
26174 return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
26175}
26176
26177SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
26178 EVT NVT = N->getValueType(0);
26179 SDValue V = N->getOperand(0);
26180 uint64_t ExtIdx = N->getConstantOperandVal(1);
26181 SDLoc DL(N);
26182
26183 // Extract from UNDEF is UNDEF.
26184 if (V.isUndef())
26185 return DAG.getUNDEF(NVT);
26186
26187 if (SDValue NarrowLoad = narrowExtractedVectorLoad(NVT, V, ExtIdx, DL, DAG))
26188 return NarrowLoad;
26189
26190 // Combine an extract of an extract into a single extract_subvector.
26191 // ext (ext X, C), 0 --> ext X, C
26192 if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
26193 // The index has to be a multiple of the new result type's known minimum
26194 // vector length.
26195 if (V.getConstantOperandVal(1) % NVT.getVectorMinNumElements() == 0 &&
26196 TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
26197 V.getConstantOperandVal(1)) &&
26199 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, V.getOperand(0),
26200 V.getOperand(1));
26201 }
26202 }
26203
26204 // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
26205 if (V.getOpcode() == ISD::SPLAT_VECTOR)
26206 if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
26207 if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
26208 return DAG.getSplatVector(NVT, DL, V.getOperand(0));
26209
26210 // extract_subvector(insert_subvector(x,y,c1),c2)
26211 // --> extract_subvector(y,c2-c1)
26212 // iff we're just extracting from the inserted subvector.
26213 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
26214 SDValue InsSub = V.getOperand(1);
26215 EVT InsSubVT = InsSub.getValueType();
26216 unsigned NumInsElts = InsSubVT.getVectorMinNumElements();
26217 unsigned InsIdx = V.getConstantOperandVal(2);
26218 unsigned NumSubElts = NVT.getVectorMinNumElements();
26219 if (InsIdx <= ExtIdx && (ExtIdx + NumSubElts) <= (InsIdx + NumInsElts) &&
26220 TLI.isExtractSubvectorCheap(NVT, InsSubVT, ExtIdx - InsIdx) &&
26221 InsSubVT.isFixedLengthVector() && NVT.isFixedLengthVector() &&
26222 V.getValueType().isFixedLengthVector())
26223 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, InsSub,
26224 DAG.getVectorIdxConstant(ExtIdx - InsIdx, DL));
26225 }
26226
26227 // Try to move vector bitcast after extract_subv by scaling extraction index:
26228 // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
26229 if (V.getOpcode() == ISD::BITCAST &&
26230 V.getOperand(0).getValueType().isVector() &&
26231 (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
26232 SDValue SrcOp = V.getOperand(0);
26233 EVT SrcVT = SrcOp.getValueType();
26234 unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
26235 unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
26236 if ((SrcNumElts % DestNumElts) == 0) {
26237 unsigned SrcDestRatio = SrcNumElts / DestNumElts;
26238 ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
26239 EVT NewExtVT =
26240 EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), NewExtEC);
26242 SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
26243 SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
26244 V.getOperand(0), NewIndex);
26245 return DAG.getBitcast(NVT, NewExtract);
26246 }
26247 }
26248 if ((DestNumElts % SrcNumElts) == 0) {
26249 unsigned DestSrcRatio = DestNumElts / SrcNumElts;
26250 if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
26251 ElementCount NewExtEC =
26252 NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
26253 EVT ScalarVT = SrcVT.getScalarType();
26254 if ((ExtIdx % DestSrcRatio) == 0) {
26255 unsigned IndexValScaled = ExtIdx / DestSrcRatio;
26256 EVT NewExtVT =
26257 EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
26259 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
26260 SDValue NewExtract =
26261 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
26262 V.getOperand(0), NewIndex);
26263 return DAG.getBitcast(NVT, NewExtract);
26264 }
26265 if (NewExtEC.isScalar() &&
26267 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
26268 SDValue NewExtract =
26269 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
26270 V.getOperand(0), NewIndex);
26271 return DAG.getBitcast(NVT, NewExtract);
26272 }
26273 }
26274 }
26275 }
26276 }
26277
26278 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
26279 unsigned ExtNumElts = NVT.getVectorMinNumElements();
26280 EVT ConcatSrcVT = V.getOperand(0).getValueType();
26281 assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
26282 "Concat and extract subvector do not change element type");
26283
26284 unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
26285 unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
26286
26287 // If the concatenated source types match this extract, it's a direct
26288 // simplification:
26289 // extract_subvec (concat V1, V2, ...), i --> Vi
26290 if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
26291 return V.getOperand(ConcatOpIdx);
26292
26293 // If the concatenated source vectors are a multiple length of this extract,
26294 // then extract a fraction of one of those source vectors directly from a
26295 // concat operand. Example:
26296 // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
26297 // v2i8 extract_subvec v8i8 Y, 6
26298 if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
26299 ConcatSrcNumElts % ExtNumElts == 0) {
26300 unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
26301 assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
26302 "Trying to extract from >1 concat operand?");
26303 assert(NewExtIdx % ExtNumElts == 0 &&
26304 "Extract index is not a multiple of the input vector length.");
26305 SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
26306 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
26307 V.getOperand(ConcatOpIdx), NewIndexC);
26308 }
26309 }
26310
26312 NVT, V, ExtIdx, DL, DAG, LegalOperations))
26313 return Shuffle;
26314
26315 if (SDValue NarrowBOp =
26316 narrowExtractedVectorBinOp(NVT, V, ExtIdx, DL, DAG, LegalOperations))
26317 return NarrowBOp;
26318
26320
26321 // If the input is a build vector. Try to make a smaller build vector.
26322 if (V.getOpcode() == ISD::BUILD_VECTOR) {
26323 EVT InVT = V.getValueType();
26324 unsigned ExtractSize = NVT.getSizeInBits();
26325 unsigned EltSize = InVT.getScalarSizeInBits();
26326 // Only do this if we won't split any elements.
26327 if (ExtractSize % EltSize == 0) {
26328 unsigned NumElems = ExtractSize / EltSize;
26329 EVT EltVT = InVT.getVectorElementType();
26330 EVT ExtractVT =
26331 NumElems == 1 ? EltVT
26332 : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
26333 if ((Level < AfterLegalizeDAG ||
26334 (NumElems == 1 ||
26335 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
26336 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
26337 unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
26338
26339 if (NumElems == 1) {
26340 SDValue Src = V->getOperand(IdxVal);
26341 if (EltVT != Src.getValueType())
26342 Src = DAG.getNode(ISD::TRUNCATE, DL, EltVT, Src);
26343 return DAG.getBitcast(NVT, Src);
26344 }
26345
26346 // Extract the pieces from the original build_vector.
26347 SDValue BuildVec =
26348 DAG.getBuildVector(ExtractVT, DL, V->ops().slice(IdxVal, NumElems));
26349 return DAG.getBitcast(NVT, BuildVec);
26350 }
26351 }
26352 }
26353
26354 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
26355 // Handle only simple case where vector being inserted and vector
26356 // being extracted are of same size.
26357 EVT SmallVT = V.getOperand(1).getValueType();
26358 if (NVT.bitsEq(SmallVT)) {
26359 // Combine:
26360 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
26361 // Into:
26362 // indices are equal or bit offsets are equal => V1
26363 // otherwise => (extract_subvec V1, ExtIdx)
26364 uint64_t InsIdx = V.getConstantOperandVal(2);
26365 if (InsIdx * SmallVT.getScalarSizeInBits() ==
26366 ExtIdx * NVT.getScalarSizeInBits()) {
26367 if (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))
26368 return DAG.getBitcast(NVT, V.getOperand(1));
26369 } else {
26370 return DAG.getNode(
26372 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
26373 N->getOperand(1));
26374 }
26375 }
26376 }
26377
26378 // If only EXTRACT_SUBVECTOR nodes use the source vector we can
26379 // simplify it based on the (valid) extractions.
26380 if (!V.getValueType().isScalableVector() &&
26381 llvm::all_of(V->users(), [&](SDNode *Use) {
26382 return Use->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
26383 Use->getOperand(0) == V;
26384 })) {
26385 unsigned NumElts = V.getValueType().getVectorNumElements();
26386 APInt DemandedElts = APInt::getZero(NumElts);
26387 for (SDNode *User : V->users()) {
26388 unsigned ExtIdx = User->getConstantOperandVal(1);
26389 unsigned NumSubElts = User->getValueType(0).getVectorNumElements();
26390 DemandedElts.setBits(ExtIdx, ExtIdx + NumSubElts);
26391 }
26392 if (SimplifyDemandedVectorElts(V, DemandedElts, /*AssumeSingleUse=*/true)) {
26393 // We simplified the vector operand of this extract subvector. If this
26394 // extract is not dead, visit it again so it is folded properly.
26395 if (N->getOpcode() != ISD::DELETED_NODE)
26396 AddToWorklist(N);
26397 return SDValue(N, 0);
26398 }
26399 } else {
26401 return SDValue(N, 0);
26402 }
26403
26404 return SDValue();
26405}
26406
26407/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
26408/// followed by concatenation. Narrow vector ops may have better performance
26409/// than wide ops, and this can unlock further narrowing of other vector ops.
26410/// Targets can invert this transform later if it is not profitable.
26412 SelectionDAG &DAG) {
26413 SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
26414 if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
26415 N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
26416 !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
26417 return SDValue();
26418
26419 // Split the wide shuffle mask into halves. Any mask element that is accessing
26420 // operand 1 is offset down to account for narrowing of the vectors.
26421 ArrayRef<int> Mask = Shuf->getMask();
26422 EVT VT = Shuf->getValueType(0);
26423 unsigned NumElts = VT.getVectorNumElements();
26424 unsigned HalfNumElts = NumElts / 2;
26425 SmallVector<int, 16> Mask0(HalfNumElts, -1);
26426 SmallVector<int, 16> Mask1(HalfNumElts, -1);
26427 for (unsigned i = 0; i != NumElts; ++i) {
26428 if (Mask[i] == -1)
26429 continue;
26430 // If we reference the upper (undef) subvector then the element is undef.
26431 if ((Mask[i] % NumElts) >= HalfNumElts)
26432 continue;
26433 int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
26434 if (i < HalfNumElts)
26435 Mask0[i] = M;
26436 else
26437 Mask1[i - HalfNumElts] = M;
26438 }
26439
26440 // Ask the target if this is a valid transform.
26441 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26442 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
26443 HalfNumElts);
26444 if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
26445 !TLI.isShuffleMaskLegal(Mask1, HalfVT))
26446 return SDValue();
26447
26448 // shuffle (concat X, undef), (concat Y, undef), Mask -->
26449 // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
26450 SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
26451 SDLoc DL(Shuf);
26452 SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
26453 SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
26454 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
26455}
26456
26457// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
26458// or turn a shuffle of a single concat into simpler shuffle then concat.
26460 EVT VT = N->getValueType(0);
26461 unsigned NumElts = VT.getVectorNumElements();
26462
26463 SDValue N0 = N->getOperand(0);
26464 SDValue N1 = N->getOperand(1);
26466 ArrayRef<int> Mask = SVN->getMask();
26467
26469 EVT ConcatVT = N0.getOperand(0).getValueType();
26470 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
26471 unsigned NumConcats = NumElts / NumElemsPerConcat;
26472
26473 auto IsUndefMaskElt = [](int i) { return i == -1; };
26474
26475 // Special case: shuffle(concat(A,B)) can be more efficiently represented
26476 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
26477 // half vector elements.
26478 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
26479 llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
26480 IsUndefMaskElt)) {
26481 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
26482 N0.getOperand(1),
26483 Mask.slice(0, NumElemsPerConcat));
26484 N1 = DAG.getUNDEF(ConcatVT);
26485 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
26486 }
26487
26488 // Look at every vector that's inserted. We're looking for exact
26489 // subvector-sized copies from a concatenated vector
26490 for (unsigned I = 0; I != NumConcats; ++I) {
26491 unsigned Begin = I * NumElemsPerConcat;
26492 ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
26493
26494 // Make sure we're dealing with a copy.
26495 if (llvm::all_of(SubMask, IsUndefMaskElt)) {
26496 Ops.push_back(DAG.getUNDEF(ConcatVT));
26497 continue;
26498 }
26499
26500 int OpIdx = -1;
26501 for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
26502 if (IsUndefMaskElt(SubMask[i]))
26503 continue;
26504 if ((SubMask[i] % (int)NumElemsPerConcat) != i)
26505 return SDValue();
26506 int EltOpIdx = SubMask[i] / NumElemsPerConcat;
26507 if (0 <= OpIdx && EltOpIdx != OpIdx)
26508 return SDValue();
26509 OpIdx = EltOpIdx;
26510 }
26511 assert(0 <= OpIdx && "Unknown concat_vectors op");
26512
26513 if (OpIdx < (int)N0.getNumOperands())
26514 Ops.push_back(N0.getOperand(OpIdx));
26515 else
26516 Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
26517 }
26518
26519 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
26520}
26521
26522// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
26523// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
26524//
26525// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
26526// a simplification in some sense, but it isn't appropriate in general: some
26527// BUILD_VECTORs are substantially cheaper than others. The general case
26528// of a BUILD_VECTOR requires inserting each element individually (or
26529// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
26530// all constants is a single constant pool load. A BUILD_VECTOR where each
26531// element is identical is a splat. A BUILD_VECTOR where most of the operands
26532// are undef lowers to a small number of element insertions.
26533//
26534// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
26535// We don't fold shuffles where one side is a non-zero constant, and we don't
26536// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
26537// non-constant operands. This seems to work out reasonably well in practice.
26539 SelectionDAG &DAG,
26540 const TargetLowering &TLI) {
26541 EVT VT = SVN->getValueType(0);
26542 unsigned NumElts = VT.getVectorNumElements();
26543 SDValue N0 = SVN->getOperand(0);
26544 SDValue N1 = SVN->getOperand(1);
26545
26546 if (!N0->hasOneUse())
26547 return SDValue();
26548
26549 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
26550 // discussed above.
26551 if (!N1.isUndef()) {
26552 if (!N1->hasOneUse())
26553 return SDValue();
26554
26555 bool N0AnyConst = isAnyConstantBuildVector(N0);
26556 bool N1AnyConst = isAnyConstantBuildVector(N1);
26557 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
26558 return SDValue();
26559 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
26560 return SDValue();
26561 }
26562
26563 // If both inputs are splats of the same value then we can safely merge this
26564 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
26565 bool IsSplat = false;
26566 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
26567 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
26568 if (BV0 && BV1)
26569 if (SDValue Splat0 = BV0->getSplatValue())
26570 IsSplat = (Splat0 == BV1->getSplatValue());
26571
26573 SmallSet<SDValue, 16> DuplicateOps;
26574 for (int M : SVN->getMask()) {
26575 SDValue Op = DAG.getUNDEF(VT.getScalarType());
26576 if (M >= 0) {
26577 int Idx = M < (int)NumElts ? M : M - NumElts;
26578 SDValue &S = (M < (int)NumElts ? N0 : N1);
26579 if (S.getOpcode() == ISD::BUILD_VECTOR) {
26580 Op = S.getOperand(Idx);
26581 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
26582 SDValue Op0 = S.getOperand(0);
26583 Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
26584 } else {
26585 // Operand can't be combined - bail out.
26586 return SDValue();
26587 }
26588 }
26589
26590 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
26591 // generating a splat; semantically, this is fine, but it's likely to
26592 // generate low-quality code if the target can't reconstruct an appropriate
26593 // shuffle.
26594 if (!Op.isUndef() && !isIntOrFPConstant(Op))
26595 if (!IsSplat && !DuplicateOps.insert(Op).second)
26596 return SDValue();
26597
26598 Ops.push_back(Op);
26599 }
26600
26601 // BUILD_VECTOR requires all inputs to be of the same type, find the
26602 // maximum type and extend them all.
26603 EVT SVT = VT.getScalarType();
26604 if (SVT.isInteger())
26605 for (SDValue &Op : Ops)
26606 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
26607 if (SVT != VT.getScalarType())
26608 for (SDValue &Op : Ops)
26609 Op = Op.isUndef() ? DAG.getUNDEF(SVT)
26610 : (TLI.isZExtFree(Op.getValueType(), SVT)
26611 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
26612 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT));
26613 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
26614}
26615
26616// Match shuffles that can be converted to *_vector_extend_in_reg.
26617// This is often generated during legalization.
26618// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)),
26619// and returns the EVT to which the extension should be performed.
26620// NOTE: this assumes that the src is the first operand of the shuffle.
26622 unsigned Opcode, EVT VT, std::function<bool(unsigned)> Match,
26623 SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
26624 bool LegalOperations) {
26625 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
26626
26627 // TODO Add support for big-endian when we have a test case.
26628 if (!VT.isInteger() || IsBigEndian)
26629 return std::nullopt;
26630
26631 unsigned NumElts = VT.getVectorNumElements();
26632 unsigned EltSizeInBits = VT.getScalarSizeInBits();
26633
26634 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
26635 // power-of-2 extensions as they are the most likely.
26636 // FIXME: should try Scale == NumElts case too,
26637 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
26638 // The vector width must be a multiple of Scale.
26639 if (NumElts % Scale != 0)
26640 continue;
26641
26642 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
26643 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
26644
26645 if ((LegalTypes && !TLI.isTypeLegal(OutVT)) ||
26646 (LegalOperations && !TLI.isOperationLegalOrCustom(Opcode, OutVT)))
26647 continue;
26648
26649 if (Match(Scale))
26650 return OutVT;
26651 }
26652
26653 return std::nullopt;
26654}
26655
26656// Match shuffles that can be converted to any_vector_extend_in_reg.
26657// This is often generated during legalization.
26658// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
26660 SelectionDAG &DAG,
26661 const TargetLowering &TLI,
26662 bool LegalOperations) {
26663 EVT VT = SVN->getValueType(0);
26664 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
26665
26666 // TODO Add support for big-endian when we have a test case.
26667 if (!VT.isInteger() || IsBigEndian)
26668 return SDValue();
26669
26670 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
26671 auto isAnyExtend = [NumElts = VT.getVectorNumElements(),
26672 Mask = SVN->getMask()](unsigned Scale) {
26673 for (unsigned i = 0; i != NumElts; ++i) {
26674 if (Mask[i] < 0)
26675 continue;
26676 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
26677 continue;
26678 return false;
26679 }
26680 return true;
26681 };
26682
26683 unsigned Opcode = ISD::ANY_EXTEND_VECTOR_INREG;
26684 SDValue N0 = SVN->getOperand(0);
26685 // Never create an illegal type. Only create unsupported operations if we
26686 // are pre-legalization.
26687 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
26688 Opcode, VT, isAnyExtend, DAG, TLI, /*LegalTypes=*/true, LegalOperations);
26689 if (!OutVT)
26690 return SDValue();
26691 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT, N0));
26692}
26693
26694// Match shuffles that can be converted to zero_extend_vector_inreg.
26695// This is often generated during legalization.
26696// e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src))
26698 SelectionDAG &DAG,
26699 const TargetLowering &TLI,
26700 bool LegalOperations) {
26701 bool LegalTypes = true;
26702 EVT VT = SVN->getValueType(0);
26703 assert(!VT.isScalableVector() && "Encountered scalable shuffle?");
26704 unsigned NumElts = VT.getVectorNumElements();
26705 unsigned EltSizeInBits = VT.getScalarSizeInBits();
26706
26707 // TODO: add support for big-endian when we have a test case.
26708 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
26709 if (!VT.isInteger() || IsBigEndian)
26710 return SDValue();
26711
26712 SmallVector<int, 16> Mask(SVN->getMask());
26713 auto ForEachDecomposedIndice = [NumElts, &Mask](auto Fn) {
26714 for (int &Indice : Mask) {
26715 if (Indice < 0)
26716 continue;
26717 int OpIdx = (unsigned)Indice < NumElts ? 0 : 1;
26718 int OpEltIdx = (unsigned)Indice < NumElts ? Indice : Indice - NumElts;
26719 Fn(Indice, OpIdx, OpEltIdx);
26720 }
26721 };
26722
26723 // Which elements of which operand does this shuffle demand?
26724 std::array<APInt, 2> OpsDemandedElts;
26725 for (APInt &OpDemandedElts : OpsDemandedElts)
26726 OpDemandedElts = APInt::getZero(NumElts);
26727 ForEachDecomposedIndice(
26728 [&OpsDemandedElts](int &Indice, int OpIdx, int OpEltIdx) {
26729 OpsDemandedElts[OpIdx].setBit(OpEltIdx);
26730 });
26731
26732 // Element-wise(!), which of these demanded elements are know to be zero?
26733 std::array<APInt, 2> OpsKnownZeroElts;
26734 for (auto I : zip(SVN->ops(), OpsDemandedElts, OpsKnownZeroElts))
26735 std::get<2>(I) =
26736 DAG.computeVectorKnownZeroElements(std::get<0>(I), std::get<1>(I));
26737
26738 // Manifest zeroable element knowledge in the shuffle mask.
26739 // NOTE: we don't have 'zeroable' sentinel value in generic DAG,
26740 // this is a local invention, but it won't leak into DAG.
26741 // FIXME: should we not manifest them, but just check when matching?
26742 bool HadZeroableElts = false;
26743 ForEachDecomposedIndice([&OpsKnownZeroElts, &HadZeroableElts](
26744 int &Indice, int OpIdx, int OpEltIdx) {
26745 if (OpsKnownZeroElts[OpIdx][OpEltIdx]) {
26746 Indice = -2; // Zeroable element.
26747 HadZeroableElts = true;
26748 }
26749 });
26750
26751 // Don't proceed unless we've refined at least one zeroable mask indice.
26752 // If we didn't, then we are still trying to match the same shuffle mask
26753 // we previously tried to match as ISD::ANY_EXTEND_VECTOR_INREG,
26754 // and evidently failed. Proceeding will lead to endless combine loops.
26755 if (!HadZeroableElts)
26756 return SDValue();
26757
26758 // The shuffle may be more fine-grained than we want. Widen elements first.
26759 // FIXME: should we do this before manifesting zeroable shuffle mask indices?
26760 SmallVector<int, 16> ScaledMask;
26761 getShuffleMaskWithWidestElts(Mask, ScaledMask);
26762 assert(Mask.size() >= ScaledMask.size() &&
26763 Mask.size() % ScaledMask.size() == 0 && "Unexpected mask widening.");
26764 int Prescale = Mask.size() / ScaledMask.size();
26765
26766 NumElts = ScaledMask.size();
26767 EltSizeInBits *= Prescale;
26768
26769 EVT PrescaledVT = EVT::getVectorVT(
26770 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits),
26771 NumElts);
26772
26773 if (LegalTypes && !TLI.isTypeLegal(PrescaledVT) && TLI.isTypeLegal(VT))
26774 return SDValue();
26775
26776 // For example,
26777 // shuffle<0,z,1,-1> == (v2i64 zero_extend_vector_inreg(v4i32))
26778 // But not shuffle<z,z,1,-1> and not shuffle<0,z,z,-1> ! (for same types)
26779 auto isZeroExtend = [NumElts, &ScaledMask](unsigned Scale) {
26780 assert(Scale >= 2 && Scale <= NumElts && NumElts % Scale == 0 &&
26781 "Unexpected mask scaling factor.");
26782 ArrayRef<int> Mask = ScaledMask;
26783 for (unsigned SrcElt = 0, NumSrcElts = NumElts / Scale;
26784 SrcElt != NumSrcElts; ++SrcElt) {
26785 // Analyze the shuffle mask in Scale-sized chunks.
26786 ArrayRef<int> MaskChunk = Mask.take_front(Scale);
26787 assert(MaskChunk.size() == Scale && "Unexpected mask size.");
26788 Mask = Mask.drop_front(MaskChunk.size());
26789 // The first indice in this chunk must be SrcElt, but not zero!
26790 // FIXME: undef should be fine, but that results in more-defined result.
26791 if (int FirstIndice = MaskChunk[0]; (unsigned)FirstIndice != SrcElt)
26792 return false;
26793 // The rest of the indices in this chunk must be zeros.
26794 // FIXME: undef should be fine, but that results in more-defined result.
26795 if (!all_of(MaskChunk.drop_front(1),
26796 [](int Indice) { return Indice == -2; }))
26797 return false;
26798 }
26799 assert(Mask.empty() && "Did not process the whole mask?");
26800 return true;
26801 };
26802
26803 unsigned Opcode = ISD::ZERO_EXTEND_VECTOR_INREG;
26804 for (bool Commuted : {false, true}) {
26805 SDValue Op = SVN->getOperand(!Commuted ? 0 : 1);
26806 if (Commuted)
26808 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
26809 Opcode, PrescaledVT, isZeroExtend, DAG, TLI, LegalTypes,
26810 LegalOperations);
26811 if (OutVT)
26812 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT,
26813 DAG.getBitcast(PrescaledVT, Op)));
26814 }
26815 return SDValue();
26816}
26817
26818// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
26819// each source element of a large type into the lowest elements of a smaller
26820// destination type. This is often generated during legalization.
26821// If the source node itself was a '*_extend_vector_inreg' node then we should
26822// then be able to remove it.
26824 SelectionDAG &DAG) {
26825 EVT VT = SVN->getValueType(0);
26826 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
26827
26828 // TODO Add support for big-endian when we have a test case.
26829 if (!VT.isInteger() || IsBigEndian)
26830 return SDValue();
26831
26833
26834 unsigned Opcode = N0.getOpcode();
26835 if (!ISD::isExtVecInRegOpcode(Opcode))
26836 return SDValue();
26837
26838 SDValue N00 = N0.getOperand(0);
26839 ArrayRef<int> Mask = SVN->getMask();
26840 unsigned NumElts = VT.getVectorNumElements();
26841 unsigned EltSizeInBits = VT.getScalarSizeInBits();
26842 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
26843 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
26844
26845 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
26846 return SDValue();
26847 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
26848
26849 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
26850 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
26851 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
26852 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
26853 for (unsigned i = 0; i != NumElts; ++i) {
26854 if (Mask[i] < 0)
26855 continue;
26856 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
26857 continue;
26858 return false;
26859 }
26860 return true;
26861 };
26862
26863 // At the moment we just handle the case where we've truncated back to the
26864 // same size as before the extension.
26865 // TODO: handle more extension/truncation cases as cases arise.
26866 if (EltSizeInBits != ExtSrcSizeInBits)
26867 return SDValue();
26868
26869 // We can remove *extend_vector_inreg only if the truncation happens at
26870 // the same scale as the extension.
26871 if (isTruncate(ExtScale))
26872 return DAG.getBitcast(VT, N00);
26873
26874 return SDValue();
26875}
26876
26877// Combine shuffles of splat-shuffles of the form:
26878// shuffle (shuffle V, undef, splat-mask), undef, M
26879// If splat-mask contains undef elements, we need to be careful about
26880// introducing undef's in the folded mask which are not the result of composing
26881// the masks of the shuffles.
26883 SelectionDAG &DAG) {
26884 EVT VT = Shuf->getValueType(0);
26885 unsigned NumElts = VT.getVectorNumElements();
26886
26887 if (!Shuf->getOperand(1).isUndef())
26888 return SDValue();
26889
26890 // See if this unary non-splat shuffle actually *is* a splat shuffle,
26891 // in disguise, with all demanded elements being identical.
26892 // FIXME: this can be done per-operand.
26893 if (!Shuf->isSplat()) {
26894 APInt DemandedElts(NumElts, 0);
26895 for (int Idx : Shuf->getMask()) {
26896 if (Idx < 0)
26897 continue; // Ignore sentinel indices.
26898 assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?");
26899 DemandedElts.setBit(Idx);
26900 }
26901 assert(DemandedElts.popcount() > 1 && "Is a splat shuffle already?");
26902 APInt UndefElts;
26903 if (DAG.isSplatValue(Shuf->getOperand(0), DemandedElts, UndefElts)) {
26904 // Even if all demanded elements are splat, some of them could be undef.
26905 // Which lowest demanded element is *not* known-undef?
26906 std::optional<unsigned> MinNonUndefIdx;
26907 for (int Idx : Shuf->getMask()) {
26908 if (Idx < 0 || UndefElts[Idx])
26909 continue; // Ignore sentinel indices, and undef elements.
26910 MinNonUndefIdx = std::min<unsigned>(Idx, MinNonUndefIdx.value_or(~0U));
26911 }
26912 if (!MinNonUndefIdx)
26913 return DAG.getUNDEF(VT); // All undef - result is undef.
26914 assert(*MinNonUndefIdx < NumElts && "Expected valid element index.");
26915 SmallVector<int, 8> SplatMask(Shuf->getMask());
26916 for (int &Idx : SplatMask) {
26917 if (Idx < 0)
26918 continue; // Passthrough sentinel indices.
26919 // Otherwise, just pick the lowest demanded non-undef element.
26920 // Or sentinel undef, if we know we'd pick a known-undef element.
26921 Idx = UndefElts[Idx] ? -1 : *MinNonUndefIdx;
26922 }
26923 assert(SplatMask != Shuf->getMask() && "Expected mask to change!");
26924 return DAG.getVectorShuffle(VT, SDLoc(Shuf), Shuf->getOperand(0),
26925 Shuf->getOperand(1), SplatMask);
26926 }
26927 }
26928
26929 // If the inner operand is a known splat with no undefs, just return that directly.
26930 // TODO: Create DemandedElts mask from Shuf's mask.
26931 // TODO: Allow undef elements and merge with the shuffle code below.
26932 if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false))
26933 return Shuf->getOperand(0);
26934
26936 if (!Splat || !Splat->isSplat())
26937 return SDValue();
26938
26939 ArrayRef<int> ShufMask = Shuf->getMask();
26940 ArrayRef<int> SplatMask = Splat->getMask();
26941 assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
26942
26943 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
26944 // every undef mask element in the splat-shuffle has a corresponding undef
26945 // element in the user-shuffle's mask or if the composition of mask elements
26946 // would result in undef.
26947 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
26948 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
26949 // In this case it is not legal to simplify to the splat-shuffle because we
26950 // may be exposing the users of the shuffle an undef element at index 1
26951 // which was not there before the combine.
26952 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
26953 // In this case the composition of masks yields SplatMask, so it's ok to
26954 // simplify to the splat-shuffle.
26955 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
26956 // In this case the composed mask includes all undef elements of SplatMask
26957 // and in addition sets element zero to undef. It is safe to simplify to
26958 // the splat-shuffle.
26959 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
26960 ArrayRef<int> SplatMask) {
26961 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
26962 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
26963 SplatMask[UserMask[i]] != -1)
26964 return false;
26965 return true;
26966 };
26967 if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
26968 return Shuf->getOperand(0);
26969
26970 // Create a new shuffle with a mask that is composed of the two shuffles'
26971 // masks.
26972 SmallVector<int, 32> NewMask;
26973 for (int Idx : ShufMask)
26974 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
26975
26976 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
26977 Splat->getOperand(0), Splat->getOperand(1),
26978 NewMask);
26979}
26980
26981// Combine shuffles of bitcasts into a shuffle of the bitcast type, providing
26982// the mask can be treated as a larger type.
26984 SelectionDAG &DAG,
26985 const TargetLowering &TLI,
26986 bool LegalOperations) {
26987 SDValue Op0 = SVN->getOperand(0);
26988 SDValue Op1 = SVN->getOperand(1);
26989 EVT VT = SVN->getValueType(0);
26990 if (Op0.getOpcode() != ISD::BITCAST)
26991 return SDValue();
26992 EVT InVT = Op0.getOperand(0).getValueType();
26993 if (!InVT.isVector() ||
26994 (!Op1.isUndef() && (Op1.getOpcode() != ISD::BITCAST ||
26995 Op1.getOperand(0).getValueType() != InVT)))
26996 return SDValue();
26998 (Op1.isUndef() || isAnyConstantBuildVector(Op1.getOperand(0))))
26999 return SDValue();
27000
27001 int VTLanes = VT.getVectorNumElements();
27002 int InLanes = InVT.getVectorNumElements();
27003 if (VTLanes <= InLanes || VTLanes % InLanes != 0 ||
27004 (LegalOperations &&
27006 return SDValue();
27007 int Factor = VTLanes / InLanes;
27008
27009 // Check that each group of lanes in the mask are either undef or make a valid
27010 // mask for the wider lane type.
27011 ArrayRef<int> Mask = SVN->getMask();
27012 SmallVector<int> NewMask;
27013 if (!widenShuffleMaskElts(Factor, Mask, NewMask))
27014 return SDValue();
27015
27016 if (!TLI.isShuffleMaskLegal(NewMask, InVT))
27017 return SDValue();
27018
27019 // Create the new shuffle with the new mask and bitcast it back to the
27020 // original type.
27021 SDLoc DL(SVN);
27022 Op0 = Op0.getOperand(0);
27023 Op1 = Op1.isUndef() ? DAG.getUNDEF(InVT) : Op1.getOperand(0);
27024 SDValue NewShuf = DAG.getVectorShuffle(InVT, DL, Op0, Op1, NewMask);
27025 return DAG.getBitcast(VT, NewShuf);
27026}
27027
27028/// Combine shuffle of shuffle of the form:
27029/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
27031 SelectionDAG &DAG) {
27032 if (!OuterShuf->getOperand(1).isUndef())
27033 return SDValue();
27034 auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
27035 if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
27036 return SDValue();
27037
27038 ArrayRef<int> OuterMask = OuterShuf->getMask();
27039 ArrayRef<int> InnerMask = InnerShuf->getMask();
27040 unsigned NumElts = OuterMask.size();
27041 assert(NumElts == InnerMask.size() && "Mask length mismatch");
27042 SmallVector<int, 32> CombinedMask(NumElts, -1);
27043 int SplatIndex = -1;
27044 for (unsigned i = 0; i != NumElts; ++i) {
27045 // Undef lanes remain undef.
27046 int OuterMaskElt = OuterMask[i];
27047 if (OuterMaskElt == -1)
27048 continue;
27049
27050 // Peek through the shuffle masks to get the underlying source element.
27051 int InnerMaskElt = InnerMask[OuterMaskElt];
27052 if (InnerMaskElt == -1)
27053 continue;
27054
27055 // Initialize the splatted element.
27056 if (SplatIndex == -1)
27057 SplatIndex = InnerMaskElt;
27058
27059 // Non-matching index - this is not a splat.
27060 if (SplatIndex != InnerMaskElt)
27061 return SDValue();
27062
27063 CombinedMask[i] = InnerMaskElt;
27064 }
27065 assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
27066 getSplatIndex(CombinedMask) != -1) &&
27067 "Expected a splat mask");
27068
27069 // TODO: The transform may be a win even if the mask is not legal.
27070 EVT VT = OuterShuf->getValueType(0);
27071 assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
27072 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
27073 return SDValue();
27074
27075 return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
27076 InnerShuf->getOperand(1), CombinedMask);
27077}
27078
27079/// If the shuffle mask is taking exactly one element from the first vector
27080/// operand and passing through all other elements from the second vector
27081/// operand, return the index of the mask element that is choosing an element
27082/// from the first operand. Otherwise, return -1.
27084 int MaskSize = Mask.size();
27085 int EltFromOp0 = -1;
27086 // TODO: This does not match if there are undef elements in the shuffle mask.
27087 // Should we ignore undefs in the shuffle mask instead? The trade-off is
27088 // removing an instruction (a shuffle), but losing the knowledge that some
27089 // vector lanes are not needed.
27090 for (int i = 0; i != MaskSize; ++i) {
27091 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
27092 // We're looking for a shuffle of exactly one element from operand 0.
27093 if (EltFromOp0 != -1)
27094 return -1;
27095 EltFromOp0 = i;
27096 } else if (Mask[i] != i + MaskSize) {
27097 // Nothing from operand 1 can change lanes.
27098 return -1;
27099 }
27100 }
27101 return EltFromOp0;
27102}
27103
27104/// If a shuffle inserts exactly one element from a source vector operand into
27105/// another vector operand and we can access the specified element as a scalar,
27106/// then we can eliminate the shuffle.
27107SDValue DAGCombiner::replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf) {
27108 // First, check if we are taking one element of a vector and shuffling that
27109 // element into another vector.
27110 ArrayRef<int> Mask = Shuf->getMask();
27111 SmallVector<int, 16> CommutedMask(Mask);
27112 SDValue Op0 = Shuf->getOperand(0);
27113 SDValue Op1 = Shuf->getOperand(1);
27114 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
27115 if (ShufOp0Index == -1) {
27116 // Commute mask and check again.
27118 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
27119 if (ShufOp0Index == -1)
27120 return SDValue();
27121 // Commute operands to match the commuted shuffle mask.
27122 std::swap(Op0, Op1);
27123 Mask = CommutedMask;
27124 }
27125
27126 // The shuffle inserts exactly one element from operand 0 into operand 1.
27127 // Now see if we can access that element as a scalar via a real insert element
27128 // instruction.
27129 // TODO: We can try harder to locate the element as a scalar. Examples: it
27130 // could be an operand of BUILD_VECTOR, or a constant.
27131 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
27132 "Shuffle mask value must be from operand 0");
27133
27134 SDValue Elt;
27135 if (sd_match(Op0, m_InsertElt(m_Value(), m_Value(Elt),
27136 m_SpecificInt(Mask[ShufOp0Index])))) {
27137 // There's an existing insertelement with constant insertion index, so we
27138 // don't need to check the legality/profitability of a replacement operation
27139 // that differs at most in the constant value. The target should be able to
27140 // lower any of those in a similar way. If not, legalization will expand
27141 // this to a scalar-to-vector plus shuffle.
27142 //
27143 // Note that the shuffle may move the scalar from the position that the
27144 // insert element used. Therefore, our new insert element occurs at the
27145 // shuffle's mask index value, not the insert's index value.
27146 //
27147 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
27148 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
27149 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
27150 Op1, Elt, NewInsIndex);
27151 }
27152
27153 if (!hasOperation(ISD::INSERT_VECTOR_ELT, Op0.getValueType()))
27154 return SDValue();
27155
27157 Mask[ShufOp0Index] == 0) {
27158 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
27159 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
27160 Op1, Elt, NewInsIndex);
27161 }
27162
27163 return SDValue();
27164}
27165
27166/// If we have a unary shuffle of a shuffle, see if it can be folded away
27167/// completely. This has the potential to lose undef knowledge because the first
27168/// shuffle may not have an undef mask element where the second one does. So
27169/// only call this after doing simplifications based on demanded elements.
27171 // shuf (shuf0 X, Y, Mask0), undef, Mask
27172 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
27173 if (!Shuf0 || !Shuf->getOperand(1).isUndef())
27174 return SDValue();
27175
27176 ArrayRef<int> Mask = Shuf->getMask();
27177 ArrayRef<int> Mask0 = Shuf0->getMask();
27178 for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
27179 // Ignore undef elements.
27180 if (Mask[i] == -1)
27181 continue;
27182 assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
27183
27184 // Is the element of the shuffle operand chosen by this shuffle the same as
27185 // the element chosen by the shuffle operand itself?
27186 if (Mask0[Mask[i]] != Mask0[i])
27187 return SDValue();
27188 }
27189 // Every element of this shuffle is identical to the result of the previous
27190 // shuffle, so we can replace this value.
27191 return Shuf->getOperand(0);
27192}
27193
27194SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
27195 EVT VT = N->getValueType(0);
27196 unsigned NumElts = VT.getVectorNumElements();
27197
27198 SDValue N0 = N->getOperand(0);
27199 SDValue N1 = N->getOperand(1);
27200
27201 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
27202
27203 // Canonicalize shuffle undef, undef -> undef
27204 if (N0.isUndef() && N1.isUndef())
27205 return DAG.getUNDEF(VT);
27206
27207 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
27208
27209 // Canonicalize shuffle v, v -> v, undef
27210 if (N0 == N1)
27211 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
27212 createUnaryMask(SVN->getMask(), NumElts));
27213
27214 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
27215 if (N0.isUndef())
27216 return DAG.getCommutedVectorShuffle(*SVN);
27217
27218 // Remove references to rhs if it is undef
27219 if (N1.isUndef()) {
27220 bool Changed = false;
27221 SmallVector<int, 8> NewMask;
27222 for (unsigned i = 0; i != NumElts; ++i) {
27223 int Idx = SVN->getMaskElt(i);
27224 if (Idx >= (int)NumElts) {
27225 Idx = -1;
27226 Changed = true;
27227 }
27228 NewMask.push_back(Idx);
27229 }
27230 if (Changed)
27231 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
27232 }
27233
27234 if (SDValue InsElt = replaceShuffleOfInsert(SVN))
27235 return InsElt;
27236
27237 // A shuffle of a single vector that is a splatted value can always be folded.
27238 if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
27239 return V;
27240
27241 if (SDValue V = formSplatFromShuffles(SVN, DAG))
27242 return V;
27243
27244 // If it is a splat, check if the argument vector is another splat or a
27245 // build_vector.
27246 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
27247 int SplatIndex = SVN->getSplatIndex();
27248 if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
27249 TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) {
27250 // splat (vector_bo L, R), Index -->
27251 // splat (scalar_bo (extelt L, Index), (extelt R, Index))
27252 SDValue L = N0.getOperand(0), R = N0.getOperand(1);
27253 SDLoc DL(N);
27254 EVT EltVT = VT.getScalarType();
27255 SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
27256 SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
27257 SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
27258 SDValue NewBO =
27259 DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags());
27260 SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
27261 SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
27262 return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
27263 }
27264
27265 // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x)
27266 // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x)
27267 if ((!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) &&
27268 N0.hasOneUse()) {
27269 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == 0)
27270 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(0));
27271
27273 if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
27274 if (Idx->getAPIntValue() == SplatIndex)
27275 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
27276
27277 // Look through a bitcast if LE and splatting lane 0, through to a
27278 // scalar_to_vector or a build_vector.
27279 if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(0).hasOneUse() &&
27280 SplatIndex == 0 && DAG.getDataLayout().isLittleEndian() &&
27283 EVT N00VT = N0.getOperand(0).getValueType();
27284 if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() &&
27285 VT.isInteger() && N00VT.isInteger()) {
27286 EVT InVT =
27289 SDLoc(N), InVT);
27290 return DAG.getSplatBuildVector(VT, SDLoc(N), Op);
27291 }
27292 }
27293 }
27294
27295 // If this is a bit convert that changes the element type of the vector but
27296 // not the number of vector elements, look through it. Be careful not to
27297 // look though conversions that change things like v4f32 to v2f64.
27298 SDNode *V = N0.getNode();
27299 if (V->getOpcode() == ISD::BITCAST) {
27300 SDValue ConvInput = V->getOperand(0);
27301 if (ConvInput.getValueType().isVector() &&
27302 ConvInput.getValueType().getVectorNumElements() == NumElts)
27303 V = ConvInput.getNode();
27304 }
27305
27306 if (V->getOpcode() == ISD::BUILD_VECTOR) {
27307 assert(V->getNumOperands() == NumElts &&
27308 "BUILD_VECTOR has wrong number of operands");
27309 SDValue Base;
27310 bool AllSame = true;
27311 for (unsigned i = 0; i != NumElts; ++i) {
27312 if (!V->getOperand(i).isUndef()) {
27313 Base = V->getOperand(i);
27314 break;
27315 }
27316 }
27317 // Splat of <u, u, u, u>, return <u, u, u, u>
27318 if (!Base.getNode())
27319 return N0;
27320 for (unsigned i = 0; i != NumElts; ++i) {
27321 if (V->getOperand(i) != Base) {
27322 AllSame = false;
27323 break;
27324 }
27325 }
27326 // Splat of <x, x, x, x>, return <x, x, x, x>
27327 if (AllSame)
27328 return N0;
27329
27330 // Canonicalize any other splat as a build_vector, but avoid defining any
27331 // undefined elements in the mask.
27332 SDValue Splatted = V->getOperand(SplatIndex);
27333 SmallVector<SDValue, 8> Ops(NumElts, Splatted);
27334 EVT EltVT = Splatted.getValueType();
27335
27336 for (unsigned i = 0; i != NumElts; ++i) {
27337 if (SVN->getMaskElt(i) < 0)
27338 Ops[i] = DAG.getUNDEF(EltVT);
27339 }
27340
27341 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
27342
27343 // We may have jumped through bitcasts, so the type of the
27344 // BUILD_VECTOR may not match the type of the shuffle.
27345 if (V->getValueType(0) != VT)
27346 NewBV = DAG.getBitcast(VT, NewBV);
27347 return NewBV;
27348 }
27349 }
27350
27351 // Simplify source operands based on shuffle mask.
27353 return SDValue(N, 0);
27354
27355 // This is intentionally placed after demanded elements simplification because
27356 // it could eliminate knowledge of undef elements created by this shuffle.
27357 if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
27358 return ShufOp;
27359
27360 // Match shuffles that can be converted to any_vector_extend_in_reg.
27361 if (SDValue V =
27362 combineShuffleToAnyExtendVectorInreg(SVN, DAG, TLI, LegalOperations))
27363 return V;
27364
27365 // Combine "truncate_vector_in_reg" style shuffles.
27366 if (SDValue V = combineTruncationShuffle(SVN, DAG))
27367 return V;
27368
27369 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
27370 Level < AfterLegalizeVectorOps &&
27371 (N1.isUndef() ||
27372 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
27373 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
27374 if (SDValue V = partitionShuffleOfConcats(N, DAG))
27375 return V;
27376 }
27377
27378 // A shuffle of a concat of the same narrow vector can be reduced to use
27379 // only low-half elements of a concat with undef:
27380 // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
27381 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
27382 N0.getNumOperands() == 2 &&
27383 N0.getOperand(0) == N0.getOperand(1)) {
27384 int HalfNumElts = (int)NumElts / 2;
27385 SmallVector<int, 8> NewMask;
27386 for (unsigned i = 0; i != NumElts; ++i) {
27387 int Idx = SVN->getMaskElt(i);
27388 if (Idx >= HalfNumElts) {
27389 assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
27390 Idx -= HalfNumElts;
27391 }
27392 NewMask.push_back(Idx);
27393 }
27394 if (TLI.isShuffleMaskLegal(NewMask, VT)) {
27395 SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
27396 SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
27397 N0.getOperand(0), UndefVec);
27398 return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
27399 }
27400 }
27401
27402 // See if we can replace a shuffle with an insert_subvector.
27403 // e.g. v2i32 into v8i32:
27404 // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
27405 // --> insert_subvector(lhs,rhs1,4).
27406 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
27408 auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
27409 // Ensure RHS subvectors are legal.
27410 assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
27411 EVT SubVT = RHS.getOperand(0).getValueType();
27412 int NumSubVecs = RHS.getNumOperands();
27413 int NumSubElts = SubVT.getVectorNumElements();
27414 assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
27415 if (!TLI.isTypeLegal(SubVT))
27416 return SDValue();
27417
27418 // Don't bother if we have an unary shuffle (matches undef + LHS elts).
27419 if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
27420 return SDValue();
27421
27422 // Search [NumSubElts] spans for RHS sequence.
27423 // TODO: Can we avoid nested loops to increase performance?
27424 SmallVector<int> InsertionMask(NumElts);
27425 for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
27426 for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
27427 // Reset mask to identity.
27428 std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
27429
27430 // Add subvector insertion.
27431 std::iota(InsertionMask.begin() + SubIdx,
27432 InsertionMask.begin() + SubIdx + NumSubElts,
27433 NumElts + (SubVec * NumSubElts));
27434
27435 // See if the shuffle mask matches the reference insertion mask.
27436 bool MatchingShuffle = true;
27437 for (int i = 0; i != (int)NumElts; ++i) {
27438 int ExpectIdx = InsertionMask[i];
27439 int ActualIdx = Mask[i];
27440 if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
27441 MatchingShuffle = false;
27442 break;
27443 }
27444 }
27445
27446 if (MatchingShuffle)
27447 return DAG.getInsertSubvector(SDLoc(N), LHS, RHS.getOperand(SubVec),
27448 SubIdx);
27449 }
27450 }
27451 return SDValue();
27452 };
27453 ArrayRef<int> Mask = SVN->getMask();
27454 if (N1.getOpcode() == ISD::CONCAT_VECTORS)
27455 if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
27456 return InsertN1;
27457 if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
27458 SmallVector<int> CommuteMask(Mask);
27460 if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
27461 return InsertN0;
27462 }
27463 }
27464
27465 // If we're not performing a select/blend shuffle, see if we can convert the
27466 // shuffle into a AND node, with all the out-of-lane elements are known zero.
27467 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
27468 bool IsInLaneMask = true;
27469 ArrayRef<int> Mask = SVN->getMask();
27470 SmallVector<int, 16> ClearMask(NumElts, -1);
27471 APInt DemandedLHS = APInt::getZero(NumElts);
27472 APInt DemandedRHS = APInt::getZero(NumElts);
27473 for (int I = 0; I != (int)NumElts; ++I) {
27474 int M = Mask[I];
27475 if (M < 0)
27476 continue;
27477 ClearMask[I] = M == I ? I : (I + NumElts);
27478 IsInLaneMask &= (M == I) || (M == (int)(I + NumElts));
27479 if (M != I) {
27480 APInt &Demanded = M < (int)NumElts ? DemandedLHS : DemandedRHS;
27481 Demanded.setBit(M % NumElts);
27482 }
27483 }
27484 // TODO: Should we try to mask with N1 as well?
27485 if (!IsInLaneMask && (!DemandedLHS.isZero() || !DemandedRHS.isZero()) &&
27486 (DemandedLHS.isZero() || DAG.MaskedVectorIsZero(N0, DemandedLHS)) &&
27487 (DemandedRHS.isZero() || DAG.MaskedVectorIsZero(N1, DemandedRHS))) {
27488 SDLoc DL(N);
27489 EVT IntVT = VT.changeVectorElementTypeToInteger();
27490 EVT IntSVT = VT.getVectorElementType().changeTypeToInteger();
27491 // Transform the type to a legal type so that the buildvector constant
27492 // elements are not illegal. Make sure that the result is larger than the
27493 // original type, incase the value is split into two (eg i64->i32).
27494 if (!TLI.isTypeLegal(IntSVT) && LegalTypes)
27495 IntSVT = TLI.getTypeToTransformTo(*DAG.getContext(), IntSVT);
27496 if (IntSVT.getSizeInBits() >= IntVT.getScalarSizeInBits()) {
27497 SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT);
27498 SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT);
27499 SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT));
27500 for (int I = 0; I != (int)NumElts; ++I)
27501 if (0 <= Mask[I])
27502 AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt;
27503
27504 // See if a clear mask is legal instead of going via
27505 // XformToShuffleWithZero which loses UNDEF mask elements.
27506 if (TLI.isVectorClearMaskLegal(ClearMask, IntVT))
27507 return DAG.getBitcast(
27508 VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0),
27509 DAG.getConstant(0, DL, IntVT), ClearMask));
27510
27511 if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
27512 return DAG.getBitcast(
27513 VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0),
27514 DAG.getBuildVector(IntVT, DL, AndMask)));
27515 }
27516 }
27517 }
27518
27519 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
27520 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
27521 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
27522 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
27523 return Res;
27524
27525 // If this shuffle only has a single input that is a bitcasted shuffle,
27526 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
27527 // back to their original types.
27528 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
27529 N1.isUndef() && Level < AfterLegalizeVectorOps &&
27530 TLI.isTypeLegal(VT)) {
27531
27533 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
27534 EVT SVT = VT.getScalarType();
27535 EVT InnerVT = BC0->getValueType(0);
27536 EVT InnerSVT = InnerVT.getScalarType();
27537
27538 // Determine which shuffle works with the smaller scalar type.
27539 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
27540 EVT ScaleSVT = ScaleVT.getScalarType();
27541
27542 if (TLI.isTypeLegal(ScaleVT) &&
27543 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
27544 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
27545 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
27546 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
27547
27548 // Scale the shuffle masks to the smaller scalar type.
27549 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
27550 SmallVector<int, 8> InnerMask;
27551 SmallVector<int, 8> OuterMask;
27552 narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
27553 narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
27554
27555 // Merge the shuffle masks.
27556 SmallVector<int, 8> NewMask;
27557 for (int M : OuterMask)
27558 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
27559
27560 // Test for shuffle mask legality over both commutations.
27561 SDValue SV0 = BC0->getOperand(0);
27562 SDValue SV1 = BC0->getOperand(1);
27563 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
27564 if (!LegalMask) {
27565 std::swap(SV0, SV1);
27567 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
27568 }
27569
27570 if (LegalMask) {
27571 SV0 = DAG.getBitcast(ScaleVT, SV0);
27572 SV1 = DAG.getBitcast(ScaleVT, SV1);
27573 return DAG.getBitcast(
27574 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
27575 }
27576 }
27577 }
27578 }
27579
27580 // Match shuffles of bitcasts, so long as the mask can be treated as the
27581 // larger type.
27582 if (SDValue V = combineShuffleOfBitcast(SVN, DAG, TLI, LegalOperations))
27583 return V;
27584
27585 // Compute the combined shuffle mask for a shuffle with SV0 as the first
27586 // operand, and SV1 as the second operand.
27587 // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
27588 // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
27589 auto MergeInnerShuffle =
27590 [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
27591 ShuffleVectorSDNode *OtherSVN, SDValue N1,
27592 const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
27593 SmallVectorImpl<int> &Mask) -> bool {
27594 // Don't try to fold splats; they're likely to simplify somehow, or they
27595 // might be free.
27596 if (OtherSVN->isSplat())
27597 return false;
27598
27599 SV0 = SV1 = SDValue();
27600 Mask.clear();
27601
27602 for (unsigned i = 0; i != NumElts; ++i) {
27603 int Idx = SVN->getMaskElt(i);
27604 if (Idx < 0) {
27605 // Propagate Undef.
27606 Mask.push_back(Idx);
27607 continue;
27608 }
27609
27610 if (Commute)
27611 Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
27612
27613 SDValue CurrentVec;
27614 if (Idx < (int)NumElts) {
27615 // This shuffle index refers to the inner shuffle N0. Lookup the inner
27616 // shuffle mask to identify which vector is actually referenced.
27617 Idx = OtherSVN->getMaskElt(Idx);
27618 if (Idx < 0) {
27619 // Propagate Undef.
27620 Mask.push_back(Idx);
27621 continue;
27622 }
27623 CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
27624 : OtherSVN->getOperand(1);
27625 } else {
27626 // This shuffle index references an element within N1.
27627 CurrentVec = N1;
27628 }
27629
27630 // Simple case where 'CurrentVec' is UNDEF.
27631 if (CurrentVec.isUndef()) {
27632 Mask.push_back(-1);
27633 continue;
27634 }
27635
27636 // Canonicalize the shuffle index. We don't know yet if CurrentVec
27637 // will be the first or second operand of the combined shuffle.
27638 Idx = Idx % NumElts;
27639 if (!SV0.getNode() || SV0 == CurrentVec) {
27640 // Ok. CurrentVec is the left hand side.
27641 // Update the mask accordingly.
27642 SV0 = CurrentVec;
27643 Mask.push_back(Idx);
27644 continue;
27645 }
27646 if (!SV1.getNode() || SV1 == CurrentVec) {
27647 // Ok. CurrentVec is the right hand side.
27648 // Update the mask accordingly.
27649 SV1 = CurrentVec;
27650 Mask.push_back(Idx + NumElts);
27651 continue;
27652 }
27653
27654 // Last chance - see if the vector is another shuffle and if it
27655 // uses one of the existing candidate shuffle ops.
27656 if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
27657 int InnerIdx = CurrentSVN->getMaskElt(Idx);
27658 if (InnerIdx < 0) {
27659 Mask.push_back(-1);
27660 continue;
27661 }
27662 SDValue InnerVec = (InnerIdx < (int)NumElts)
27663 ? CurrentSVN->getOperand(0)
27664 : CurrentSVN->getOperand(1);
27665 if (InnerVec.isUndef()) {
27666 Mask.push_back(-1);
27667 continue;
27668 }
27669 InnerIdx %= NumElts;
27670 if (InnerVec == SV0) {
27671 Mask.push_back(InnerIdx);
27672 continue;
27673 }
27674 if (InnerVec == SV1) {
27675 Mask.push_back(InnerIdx + NumElts);
27676 continue;
27677 }
27678 }
27679
27680 // Bail out if we cannot convert the shuffle pair into a single shuffle.
27681 return false;
27682 }
27683
27684 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
27685 return true;
27686
27687 // Avoid introducing shuffles with illegal mask.
27688 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
27689 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
27690 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
27691 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
27692 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
27693 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
27694 if (TLI.isShuffleMaskLegal(Mask, VT))
27695 return true;
27696
27697 std::swap(SV0, SV1);
27699 return TLI.isShuffleMaskLegal(Mask, VT);
27700 };
27701
27702 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
27703 // Canonicalize shuffles according to rules:
27704 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
27705 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
27706 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
27707 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
27709 // The incoming shuffle must be of the same type as the result of the
27710 // current shuffle.
27711 assert(N1->getOperand(0).getValueType() == VT &&
27712 "Shuffle types don't match");
27713
27714 SDValue SV0 = N1->getOperand(0);
27715 SDValue SV1 = N1->getOperand(1);
27716 bool HasSameOp0 = N0 == SV0;
27717 bool IsSV1Undef = SV1.isUndef();
27718 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
27719 // Commute the operands of this shuffle so merging below will trigger.
27720 return DAG.getCommutedVectorShuffle(*SVN);
27721 }
27722
27723 // Canonicalize splat shuffles to the RHS to improve merging below.
27724 // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
27725 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
27726 N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
27727 cast<ShuffleVectorSDNode>(N0)->isSplat() &&
27728 !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
27729 return DAG.getCommutedVectorShuffle(*SVN);
27730 }
27731
27732 // Try to fold according to rules:
27733 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
27734 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
27735 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
27736 // Don't try to fold shuffles with illegal type.
27737 // Only fold if this shuffle is the only user of the other shuffle.
27738 // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
27739 for (int i = 0; i != 2; ++i) {
27740 if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
27741 N->isOnlyUserOf(N->getOperand(i).getNode())) {
27742 // The incoming shuffle must be of the same type as the result of the
27743 // current shuffle.
27744 auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
27745 assert(OtherSV->getOperand(0).getValueType() == VT &&
27746 "Shuffle types don't match");
27747
27748 SDValue SV0, SV1;
27749 SmallVector<int, 4> Mask;
27750 if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
27751 SV0, SV1, Mask)) {
27752 // Check if all indices in Mask are Undef. In case, propagate Undef.
27753 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
27754 return DAG.getUNDEF(VT);
27755
27756 return DAG.getVectorShuffle(VT, SDLoc(N),
27757 SV0 ? SV0 : DAG.getUNDEF(VT),
27758 SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
27759 }
27760 }
27761 }
27762
27763 // Merge shuffles through binops if we are able to merge it with at least
27764 // one other shuffles.
27765 // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
27766 // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
27767 unsigned SrcOpcode = N0.getOpcode();
27768 if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
27769 (N1.isUndef() ||
27770 (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
27771 // Get binop source ops, or just pass on the undef.
27772 SDValue Op00 = N0.getOperand(0);
27773 SDValue Op01 = N0.getOperand(1);
27774 SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
27775 SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
27776 // TODO: We might be able to relax the VT check but we don't currently
27777 // have any isBinOp() that has different result/ops VTs so play safe until
27778 // we have test coverage.
27779 if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
27780 Op01.getValueType() == VT && Op11.getValueType() == VT &&
27781 (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
27782 Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
27783 Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
27784 Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
27785 auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
27786 SmallVectorImpl<int> &Mask, bool LeftOp,
27787 bool Commute) {
27788 SDValue InnerN = Commute ? N1 : N0;
27789 SDValue Op0 = LeftOp ? Op00 : Op01;
27790 SDValue Op1 = LeftOp ? Op10 : Op11;
27791 if (Commute)
27792 std::swap(Op0, Op1);
27793 // Only accept the merged shuffle if we don't introduce undef elements,
27794 // or the inner shuffle already contained undef elements.
27795 auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
27796 return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
27797 MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
27798 Mask) &&
27799 (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
27800 llvm::none_of(Mask, [](int M) { return M < 0; }));
27801 };
27802
27803 // Ensure we don't increase the number of shuffles - we must merge a
27804 // shuffle from at least one of the LHS and RHS ops.
27805 bool MergedLeft = false;
27806 SDValue LeftSV0, LeftSV1;
27807 SmallVector<int, 4> LeftMask;
27808 if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
27809 CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
27810 MergedLeft = true;
27811 } else {
27812 LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
27813 LeftSV0 = Op00, LeftSV1 = Op10;
27814 }
27815
27816 bool MergedRight = false;
27817 SDValue RightSV0, RightSV1;
27818 SmallVector<int, 4> RightMask;
27819 if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
27820 CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
27821 MergedRight = true;
27822 } else {
27823 RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
27824 RightSV0 = Op01, RightSV1 = Op11;
27825 }
27826
27827 if (MergedLeft || MergedRight) {
27828 SDLoc DL(N);
27830 VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
27831 LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
27833 VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
27834 RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
27835 return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
27836 }
27837 }
27838 }
27839 }
27840
27841 if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
27842 return V;
27843
27844 // Match shuffles that can be converted to ISD::ZERO_EXTEND_VECTOR_INREG.
27845 // Perform this really late, because it could eliminate knowledge
27846 // of undef elements created by this shuffle.
27847 if (Level < AfterLegalizeTypes)
27848 if (SDValue V = combineShuffleToZeroExtendVectorInReg(SVN, DAG, TLI,
27849 LegalOperations))
27850 return V;
27851
27852 return SDValue();
27853}
27854
27855SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
27856 EVT VT = N->getValueType(0);
27857 if (!VT.isFixedLengthVector())
27858 return SDValue();
27859
27860 // Try to convert a scalar binop with an extracted vector element to a vector
27861 // binop. This is intended to reduce potentially expensive register moves.
27862 // TODO: Check if both operands are extracted.
27863 // TODO: How to prefer scalar/vector ops with multiple uses of the extact?
27864 // TODO: Generalize this, so it can be called from visitINSERT_VECTOR_ELT().
27865 SDValue Scalar = N->getOperand(0);
27866 unsigned Opcode = Scalar.getOpcode();
27867 EVT VecEltVT = VT.getScalarType();
27868 if (Scalar.hasOneUse() && Scalar->getNumValues() == 1 &&
27869 TLI.isBinOp(Opcode) && Scalar.getValueType() == VecEltVT &&
27870 Scalar.getOperand(0).getValueType() == VecEltVT &&
27871 Scalar.getOperand(1).getValueType() == VecEltVT &&
27872 Scalar->isOnlyUserOf(Scalar.getOperand(0).getNode()) &&
27873 Scalar->isOnlyUserOf(Scalar.getOperand(1).getNode()) &&
27874 DAG.isSafeToSpeculativelyExecute(Opcode) && hasOperation(Opcode, VT)) {
27875 // Match an extract element and get a shuffle mask equivalent.
27876 SmallVector<int, 8> ShufMask(VT.getVectorNumElements(), -1);
27877
27878 for (int i : {0, 1}) {
27879 // s2v (bo (extelt V, Idx), C) --> shuffle (bo V, C'), {Idx, -1, -1...}
27880 // s2v (bo C, (extelt V, Idx)) --> shuffle (bo C', V), {Idx, -1, -1...}
27881 SDValue EE = Scalar.getOperand(i);
27882 auto *C = dyn_cast<ConstantSDNode>(Scalar.getOperand(i ? 0 : 1));
27883 if (C && EE.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
27884 EE.getOperand(0).getValueType() == VT &&
27886 // Mask = {ExtractIndex, undef, undef....}
27887 ShufMask[0] = EE.getConstantOperandVal(1);
27888 // Make sure the shuffle is legal if we are crossing lanes.
27889 if (TLI.isShuffleMaskLegal(ShufMask, VT)) {
27890 SDLoc DL(N);
27891 SDValue V[] = {EE.getOperand(0),
27892 DAG.getConstant(C->getAPIntValue(), DL, VT)};
27893 SDValue VecBO = DAG.getNode(Opcode, DL, VT, V[i], V[1 - i]);
27894 return DAG.getVectorShuffle(VT, DL, VecBO, DAG.getUNDEF(VT),
27895 ShufMask);
27896 }
27897 }
27898 }
27899 }
27900
27901 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
27902 // with a VECTOR_SHUFFLE and possible truncate.
27903 if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
27904 !Scalar.getOperand(0).getValueType().isFixedLengthVector())
27905 return SDValue();
27906
27907 // If we have an implicit truncate, truncate here if it is legal.
27908 if (VecEltVT != Scalar.getValueType() &&
27909 Scalar.getValueType().isScalarInteger() && isTypeLegal(VecEltVT)) {
27910 SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(Scalar), VecEltVT, Scalar);
27911 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
27912 }
27913
27914 auto *ExtIndexC = dyn_cast<ConstantSDNode>(Scalar.getOperand(1));
27915 if (!ExtIndexC)
27916 return SDValue();
27917
27918 SDValue SrcVec = Scalar.getOperand(0);
27919 EVT SrcVT = SrcVec.getValueType();
27920 unsigned SrcNumElts = SrcVT.getVectorNumElements();
27921 unsigned VTNumElts = VT.getVectorNumElements();
27922 if (VecEltVT == SrcVT.getScalarType() && VTNumElts <= SrcNumElts) {
27923 // Create a shuffle equivalent for scalar-to-vector: {ExtIndex, -1, -1, ...}
27924 SmallVector<int, 8> Mask(SrcNumElts, -1);
27925 Mask[0] = ExtIndexC->getZExtValue();
27926 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
27927 SrcVT, SDLoc(N), SrcVec, DAG.getUNDEF(SrcVT), Mask, DAG);
27928 if (!LegalShuffle)
27929 return SDValue();
27930
27931 // If the initial vector is the same size, the shuffle is the result.
27932 if (VT == SrcVT)
27933 return LegalShuffle;
27934
27935 // If not, shorten the shuffled vector.
27936 if (VTNumElts != SrcNumElts) {
27937 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
27938 EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
27939 SrcVT.getVectorElementType(), VTNumElts);
27940 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, LegalShuffle,
27941 ZeroIdx);
27942 }
27943 }
27944
27945 return SDValue();
27946}
27947
27948SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
27949 EVT VT = N->getValueType(0);
27950 SDValue N0 = N->getOperand(0);
27951 SDValue N1 = N->getOperand(1);
27952 SDValue N2 = N->getOperand(2);
27953 uint64_t InsIdx = N->getConstantOperandVal(2);
27954
27955 // Remove insert of UNDEF/POISON.
27956 if (N1.isUndef()) {
27957 if (N1.getOpcode() == ISD::POISON || N0.getOpcode() == ISD::UNDEF)
27958 return N0;
27959 return DAG.getFreeze(N0);
27960 }
27961
27962 // If this is an insert of an extracted vector into an undef/poison vector, we
27963 // can just use the input to the extract if the types match, and can simplify
27964 // in some cases even if they don't.
27965 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
27966 N1.getOperand(1) == N2) {
27967 EVT N1VT = N1.getValueType();
27968 EVT SrcVT = N1.getOperand(0).getValueType();
27969 if (SrcVT == VT) {
27970 // Need to ensure that result isn't more poisonous if skipping both the
27971 // extract+insert.
27972 if (N0.getOpcode() == ISD::POISON)
27973 return N1.getOperand(0);
27974 if (VT.isFixedLengthVector() && N1VT.isFixedLengthVector()) {
27975 unsigned SubVecNumElts = N1VT.getVectorNumElements();
27976 APInt EltMask = APInt::getBitsSet(VT.getVectorNumElements(), InsIdx,
27977 InsIdx + SubVecNumElts);
27978 if (DAG.isGuaranteedNotToBePoison(N1.getOperand(0), ~EltMask))
27979 return N1.getOperand(0);
27980 } else if (DAG.isGuaranteedNotToBePoison(N1.getOperand(0)))
27981 return N1.getOperand(0);
27982 }
27983 // TODO: To remove the zero check, need to adjust the offset to
27984 // a multiple of the new src type.
27985 if (isNullConstant(N2)) {
27986 if (VT.knownBitsGE(SrcVT) &&
27987 !(VT.isFixedLengthVector() && SrcVT.isScalableVector()))
27988 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
27989 VT, N0, N1.getOperand(0), N2);
27990 else if (VT.knownBitsLE(SrcVT) &&
27991 !(VT.isScalableVector() && SrcVT.isFixedLengthVector()))
27992 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N),
27993 VT, N1.getOperand(0), N2);
27994 }
27995 }
27996
27997 // Handle case where we've ended up inserting back into the source vector
27998 // we extracted the subvector from.
27999 // insert_subvector(N0, extract_subvector(N0, N2), N2) --> N0
28000 if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && N1.getOperand(0) == N0 &&
28001 N1.getOperand(1) == N2)
28002 return N0;
28003
28004 // Simplify scalar inserts into an undef vector:
28005 // insert_subvector undef, (splat X), N2 -> splat X
28006 if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
28007 if (DAG.isConstantValueOfAnyType(N1.getOperand(0)) || N1.hasOneUse())
28008 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
28009
28010 // insert_subvector (splat X), (splat X), N2 -> splat X
28011 if (N0.getOpcode() == ISD::SPLAT_VECTOR && N0.getOpcode() == N1.getOpcode() &&
28012 N0.getOperand(0) == N1.getOperand(0))
28013 return N0;
28014
28015 // If we are inserting a bitcast value into an undef, with the same
28016 // number of elements, just use the bitcast input of the extract.
28017 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
28018 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
28019 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
28021 N1.getOperand(0).getOperand(1) == N2 &&
28023 VT.getVectorElementCount() &&
28025 VT.getSizeInBits()) {
28026 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
28027 }
28028
28029 // If both N1 and N2 are bitcast values on which insert_subvector
28030 // would makes sense, pull the bitcast through.
28031 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
28032 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
28033 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
28034 SDValue CN0 = N0.getOperand(0);
28035 SDValue CN1 = N1.getOperand(0);
28036 EVT CN0VT = CN0.getValueType();
28037 EVT CN1VT = CN1.getValueType();
28038 if (CN0VT.isVector() && CN1VT.isVector() &&
28039 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
28041 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
28042 CN0.getValueType(), CN0, CN1, N2);
28043 return DAG.getBitcast(VT, NewINSERT);
28044 }
28045 }
28046
28047 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
28048 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
28049 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
28050 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
28051 N0.getOperand(1).getValueType() == N1.getValueType() &&
28052 N0.getOperand(2) == N2)
28053 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
28054 N1, N2);
28055
28056 // Eliminate an intermediate insert into an undef vector:
28057 // insert_subvector undef, (insert_subvector undef, X, 0), 0 -->
28058 // insert_subvector undef, X, 0
28059 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
28060 N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)) &&
28061 isNullConstant(N2))
28062 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
28063 N1.getOperand(1), N2);
28064
28065 // Push subvector bitcasts to the output, adjusting the index as we go.
28066 // insert_subvector(bitcast(v), bitcast(s), c1)
28067 // -> bitcast(insert_subvector(v, s, c2))
28068 if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
28069 N1.getOpcode() == ISD::BITCAST) {
28070 SDValue N0Src = peekThroughBitcasts(N0);
28071 SDValue N1Src = peekThroughBitcasts(N1);
28072 EVT N0SrcSVT = N0Src.getValueType().getScalarType();
28073 EVT N1SrcSVT = N1Src.getValueType().getScalarType();
28074 if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
28075 N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
28076 EVT NewVT;
28077 SDLoc DL(N);
28078 SDValue NewIdx;
28079 LLVMContext &Ctx = *DAG.getContext();
28080 ElementCount NumElts = VT.getVectorElementCount();
28081 unsigned EltSizeInBits = VT.getScalarSizeInBits();
28082 if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
28083 unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
28084 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
28085 NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
28086 } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
28087 unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
28088 if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
28089 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
28090 NumElts.divideCoefficientBy(Scale));
28091 NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
28092 }
28093 }
28094 if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
28095 SDValue Res = DAG.getBitcast(NewVT, N0Src);
28096 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
28097 return DAG.getBitcast(VT, Res);
28098 }
28099 }
28100 }
28101
28102 // Canonicalize insert_subvector dag nodes.
28103 // Example:
28104 // (insert_subvector (insert_subvector A, Idx0), Idx1)
28105 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
28106 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
28107 N1.getValueType() == N0.getOperand(1).getValueType()) {
28108 unsigned OtherIdx = N0.getConstantOperandVal(2);
28109 if (InsIdx < OtherIdx) {
28110 // Swap nodes.
28111 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
28112 N0.getOperand(0), N1, N2);
28113 AddToWorklist(NewOp.getNode());
28114 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
28115 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
28116 }
28117 }
28118
28119 // If the input vector is a concatenation, and the insert replaces
28120 // one of the pieces, we can optimize into a single concat_vectors.
28121 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
28122 N0.getOperand(0).getValueType() == N1.getValueType() &&
28125 unsigned Factor = N1.getValueType().getVectorMinNumElements();
28127 Ops[InsIdx / Factor] = N1;
28128 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
28129 }
28130
28131 // Simplify source operands based on insertion.
28133 return SDValue(N, 0);
28134
28135 return SDValue();
28136}
28137
28138SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
28139 SDValue N0 = N->getOperand(0);
28140
28141 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
28142 if (N0->getOpcode() == ISD::FP16_TO_FP)
28143 return N0->getOperand(0);
28144
28145 return SDValue();
28146}
28147
28148SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
28149 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
28150 auto Op = N->getOpcode();
28151 assert((Op == ISD::FP16_TO_FP || Op == ISD::BF16_TO_FP) &&
28152 "opcode should be FP16_TO_FP or BF16_TO_FP.");
28153 SDValue N0 = N->getOperand(0);
28154
28155 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) or
28156 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
28157 if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
28158 ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
28159 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
28160 return DAG.getNode(Op, SDLoc(N), N->getValueType(0), N0.getOperand(0));
28161 }
28162 }
28163
28164 if (SDValue CastEliminated = eliminateFPCastPair(N))
28165 return CastEliminated;
28166
28167 // Sometimes constants manage to survive very late in the pipeline, e.g.,
28168 // because they are wrapped inside the <1 x f16> type. Try one last time to
28169 // get rid of them.
28170 SDValue Folded = DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N),
28171 N->getValueType(0), {N0});
28172 return Folded;
28173}
28174
28175SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {
28176 SDValue N0 = N->getOperand(0);
28177
28178 // fold (fp_to_bf16 (bf16_to_fp op)) -> op
28179 if (N0->getOpcode() == ISD::BF16_TO_FP)
28180 return N0->getOperand(0);
28181
28182 return SDValue();
28183}
28184
28185SDValue DAGCombiner::visitBF16_TO_FP(SDNode *N) {
28186 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
28187 return visitFP16_TO_FP(N);
28188}
28189
28190SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
28191 SDValue N0 = N->getOperand(0);
28192 EVT VT = N0.getValueType();
28193 unsigned Opcode = N->getOpcode();
28194
28195 // VECREDUCE over 1-element vector is just an extract.
28196 if (VT.getVectorElementCount().isScalar()) {
28197 SDLoc dl(N);
28198 SDValue Res =
28200 DAG.getVectorIdxConstant(0, dl));
28201 if (Res.getValueType() != N->getValueType(0))
28202 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
28203 return Res;
28204 }
28205
28206 // On an boolean vector an and/or reduction is the same as a umin/umax
28207 // reduction. Convert them if the latter is legal while the former isn't.
28208 if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
28209 unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
28210 ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
28211 if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
28212 TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
28214 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
28215 }
28216
28217 // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
28218 // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
28219 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
28220 TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
28221 SDValue Vec = N0.getOperand(0);
28222 SDValue Subvec = N0.getOperand(1);
28223 if ((Opcode == ISD::VECREDUCE_OR &&
28224 (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) ||
28225 (Opcode == ISD::VECREDUCE_AND &&
28226 (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec))))
28227 return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
28228 }
28229
28230 // vecreduce_or(sext(x)) -> sext(vecreduce_or(x))
28231 // Same for zext and anyext, and for and/or/xor reductions.
28232 if ((Opcode == ISD::VECREDUCE_OR || Opcode == ISD::VECREDUCE_AND ||
28233 Opcode == ISD::VECREDUCE_XOR) &&
28234 (N0.getOpcode() == ISD::SIGN_EXTEND ||
28235 N0.getOpcode() == ISD::ZERO_EXTEND ||
28236 N0.getOpcode() == ISD::ANY_EXTEND) &&
28237 TLI.isOperationLegalOrCustom(Opcode, N0.getOperand(0).getValueType())) {
28238 SDValue Red = DAG.getNode(Opcode, SDLoc(N),
28240 N0.getOperand(0));
28241 return DAG.getNode(N0.getOpcode(), SDLoc(N), N->getValueType(0), Red);
28242 }
28243 return SDValue();
28244}
28245
28246SDValue DAGCombiner::visitVP_FSUB(SDNode *N) {
28247 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
28248
28249 // FSUB -> FMA combines:
28250 if (SDValue Fused = visitFSUBForFMACombine<VPMatchContext>(N)) {
28251 AddToWorklist(Fused.getNode());
28252 return Fused;
28253 }
28254 return SDValue();
28255}
28256
28257SDValue DAGCombiner::visitVPOp(SDNode *N) {
28258
28259 if (N->getOpcode() == ISD::VP_GATHER)
28260 if (SDValue SD = visitVPGATHER(N))
28261 return SD;
28262
28263 if (N->getOpcode() == ISD::VP_SCATTER)
28264 if (SDValue SD = visitVPSCATTER(N))
28265 return SD;
28266
28267 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD)
28268 if (SDValue SD = visitVP_STRIDED_LOAD(N))
28269 return SD;
28270
28271 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE)
28272 if (SDValue SD = visitVP_STRIDED_STORE(N))
28273 return SD;
28274
28275 // VP operations in which all vector elements are disabled - either by
28276 // determining that the mask is all false or that the EVL is 0 - can be
28277 // eliminated.
28278 bool AreAllEltsDisabled = false;
28279 if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
28280 AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
28281 if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
28282 AreAllEltsDisabled |=
28283 ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
28284
28285 // This is the only generic VP combine we support for now.
28286 if (!AreAllEltsDisabled) {
28287 switch (N->getOpcode()) {
28288 case ISD::VP_FADD:
28289 return visitVP_FADD(N);
28290 case ISD::VP_FSUB:
28291 return visitVP_FSUB(N);
28292 case ISD::VP_FMA:
28293 return visitFMA<VPMatchContext>(N);
28294 case ISD::VP_SELECT:
28295 return visitVP_SELECT(N);
28296 case ISD::VP_MUL:
28297 return visitMUL<VPMatchContext>(N);
28298 case ISD::VP_SUB:
28299 return foldSubCtlzNot<VPMatchContext>(N, DAG);
28300 default:
28301 break;
28302 }
28303 return SDValue();
28304 }
28305
28306 // Binary operations can be replaced by UNDEF.
28307 if (ISD::isVPBinaryOp(N->getOpcode()))
28308 return DAG.getUNDEF(N->getValueType(0));
28309
28310 // VP Memory operations can be replaced by either the chain (stores) or the
28311 // chain + undef (loads).
28312 if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
28313 if (MemSD->writeMem())
28314 return MemSD->getChain();
28315 return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
28316 }
28317
28318 // Reduction operations return the start operand when no elements are active.
28319 if (ISD::isVPReduction(N->getOpcode()))
28320 return N->getOperand(0);
28321
28322 return SDValue();
28323}
28324
28325SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) {
28326 SDValue Chain = N->getOperand(0);
28327 SDValue Ptr = N->getOperand(1);
28328 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
28329
28330 // Check if the memory, where FP state is written to, is used only in a single
28331 // load operation.
28332 LoadSDNode *LdNode = nullptr;
28333 for (auto *U : Ptr->users()) {
28334 if (U == N)
28335 continue;
28336 if (auto *Ld = dyn_cast<LoadSDNode>(U)) {
28337 if (LdNode && LdNode != Ld)
28338 return SDValue();
28339 LdNode = Ld;
28340 continue;
28341 }
28342 return SDValue();
28343 }
28344 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
28345 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
28347 return SDValue();
28348
28349 // Check if the loaded value is used only in a store operation.
28350 StoreSDNode *StNode = nullptr;
28351 for (SDUse &U : LdNode->uses()) {
28352 if (U.getResNo() == 0) {
28353 if (auto *St = dyn_cast<StoreSDNode>(U.getUser())) {
28354 if (StNode)
28355 return SDValue();
28356 StNode = St;
28357 } else {
28358 return SDValue();
28359 }
28360 }
28361 }
28362 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
28363 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
28364 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
28365 return SDValue();
28366
28367 // Create new node GET_FPENV_MEM, which uses the store address to write FP
28368 // environment.
28369 SDValue Res = DAG.getGetFPEnv(Chain, SDLoc(N), StNode->getBasePtr(), MemVT,
28370 StNode->getMemOperand());
28371 CombineTo(StNode, Res, false);
28372 return Res;
28373}
28374
28375SDValue DAGCombiner::visitSET_FPENV_MEM(SDNode *N) {
28376 SDValue Chain = N->getOperand(0);
28377 SDValue Ptr = N->getOperand(1);
28378 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
28379
28380 // Check if the address of FP state is used also in a store operation only.
28381 StoreSDNode *StNode = nullptr;
28382 for (auto *U : Ptr->users()) {
28383 if (U == N)
28384 continue;
28385 if (auto *St = dyn_cast<StoreSDNode>(U)) {
28386 if (StNode && StNode != St)
28387 return SDValue();
28388 StNode = St;
28389 continue;
28390 }
28391 return SDValue();
28392 }
28393 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
28394 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
28395 !Chain.reachesChainWithoutSideEffects(SDValue(StNode, 0)))
28396 return SDValue();
28397
28398 // Check if the stored value is loaded from some location and the loaded
28399 // value is used only in the store operation.
28400 SDValue StValue = StNode->getValue();
28401 auto *LdNode = dyn_cast<LoadSDNode>(StValue);
28402 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
28403 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
28404 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
28405 return SDValue();
28406
28407 // Create new node SET_FPENV_MEM, which uses the load address to read FP
28408 // environment.
28409 SDValue Res =
28410 DAG.getSetFPEnv(LdNode->getChain(), SDLoc(N), LdNode->getBasePtr(), MemVT,
28411 LdNode->getMemOperand());
28412 return Res;
28413}
28414
28415/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
28416/// with the destination vector and a zero vector.
28417/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
28418/// vector_shuffle V, Zero, <0, 4, 2, 4>
28419SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
28420 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
28421
28422 EVT VT = N->getValueType(0);
28423 SDValue LHS = N->getOperand(0);
28424 SDValue RHS = peekThroughBitcasts(N->getOperand(1));
28425 SDLoc DL(N);
28426
28427 // Make sure we're not running after operation legalization where it
28428 // may have custom lowered the vector shuffles.
28429 if (LegalOperations)
28430 return SDValue();
28431
28432 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
28433 return SDValue();
28434
28435 EVT RVT = RHS.getValueType();
28436 unsigned NumElts = RHS.getNumOperands();
28437
28438 // Attempt to create a valid clear mask, splitting the mask into
28439 // sub elements and checking to see if each is
28440 // all zeros or all ones - suitable for shuffle masking.
28441 auto BuildClearMask = [&](int Split) {
28442 int NumSubElts = NumElts * Split;
28443 int NumSubBits = RVT.getScalarSizeInBits() / Split;
28444
28445 SmallVector<int, 8> Indices;
28446 for (int i = 0; i != NumSubElts; ++i) {
28447 int EltIdx = i / Split;
28448 int SubIdx = i % Split;
28449 SDValue Elt = RHS.getOperand(EltIdx);
28450 // X & undef --> 0 (not undef). So this lane must be converted to choose
28451 // from the zero constant vector (same as if the element had all 0-bits).
28452 if (Elt.isUndef()) {
28453 Indices.push_back(i + NumSubElts);
28454 continue;
28455 }
28456
28457 std::optional<APInt> Bits = Elt->bitcastToAPInt();
28458 if (!Bits)
28459 return SDValue();
28460
28461 // Extract the sub element from the constant bit mask.
28462 if (DAG.getDataLayout().isBigEndian())
28463 *Bits =
28464 Bits->extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
28465 else
28466 *Bits = Bits->extractBits(NumSubBits, SubIdx * NumSubBits);
28467
28468 if (Bits->isAllOnes())
28469 Indices.push_back(i);
28470 else if (*Bits == 0)
28471 Indices.push_back(i + NumSubElts);
28472 else
28473 return SDValue();
28474 }
28475
28476 // Let's see if the target supports this vector_shuffle.
28477 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
28478 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
28479 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
28480 return SDValue();
28481
28482 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
28483 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
28484 DAG.getBitcast(ClearVT, LHS),
28485 Zero, Indices));
28486 };
28487
28488 // Determine maximum split level (byte level masking).
28489 int MaxSplit = 1;
28490 if (RVT.getScalarSizeInBits() % 8 == 0)
28491 MaxSplit = RVT.getScalarSizeInBits() / 8;
28492
28493 for (int Split = 1; Split <= MaxSplit; ++Split)
28494 if (RVT.getScalarSizeInBits() % Split == 0)
28495 if (SDValue S = BuildClearMask(Split))
28496 return S;
28497
28498 return SDValue();
28499}
28500
28501/// If a vector binop is performed on splat values, it may be profitable to
28502/// extract, scalarize, and insert/splat.
28504 const SDLoc &DL, bool LegalTypes) {
28505 SDValue N0 = N->getOperand(0);
28506 SDValue N1 = N->getOperand(1);
28507 unsigned Opcode = N->getOpcode();
28508 EVT VT = N->getValueType(0);
28509 EVT EltVT = VT.getVectorElementType();
28510 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
28511
28512 // TODO: Remove/replace the extract cost check? If the elements are available
28513 // as scalars, then there may be no extract cost. Should we ask if
28514 // inserting a scalar back into a vector is cheap instead?
28515 int Index0, Index1;
28516 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
28517 SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
28518 // Extract element from splat_vector should be free.
28519 // TODO: use DAG.isSplatValue instead?
28520 bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
28522 if (!Src0 || !Src1 || Index0 != Index1 ||
28523 Src0.getValueType().getVectorElementType() != EltVT ||
28524 Src1.getValueType().getVectorElementType() != EltVT ||
28525 !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
28526 // If before type legalization, allow scalar types that will eventually be
28527 // made legal.
28529 Opcode, LegalTypes
28530 ? EltVT
28531 : TLI.getTypeToTransformTo(*DAG.getContext(), EltVT)))
28532 return SDValue();
28533
28534 // FIXME: Type legalization can't handle illegal MULHS/MULHU.
28535 if ((Opcode == ISD::MULHS || Opcode == ISD::MULHU) && !TLI.isTypeLegal(EltVT))
28536 return SDValue();
28537
28538 if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode()) {
28539 // All but one element should have an undef input, which will fold to a
28540 // constant or undef. Avoid splatting which would over-define potentially
28541 // undefined elements.
28542
28543 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
28544 // build_vec ..undef, (bo X, Y), undef...
28545 SmallVector<SDValue, 16> EltsX, EltsY, EltsResult;
28546 DAG.ExtractVectorElements(Src0, EltsX);
28547 DAG.ExtractVectorElements(Src1, EltsY);
28548
28549 for (auto [X, Y] : zip(EltsX, EltsY))
28550 EltsResult.push_back(DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags()));
28551 return DAG.getBuildVector(VT, DL, EltsResult);
28552 }
28553
28554 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
28555 SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
28556 SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
28557 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
28558
28559 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
28560 return DAG.getSplat(VT, DL, ScalarBO);
28561}
28562
28563/// Visit a vector cast operation, like FP_EXTEND.
28564SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) {
28565 EVT VT = N->getValueType(0);
28566 assert(VT.isVector() && "SimplifyVCastOp only works on vectors!");
28567 EVT EltVT = VT.getVectorElementType();
28568 unsigned Opcode = N->getOpcode();
28569
28570 SDValue N0 = N->getOperand(0);
28571 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
28572
28573 // TODO: promote operation might be also good here?
28574 int Index0;
28575 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
28576 if (Src0 &&
28577 (N0.getOpcode() == ISD::SPLAT_VECTOR ||
28578 TLI.isExtractVecEltCheap(VT, Index0)) &&
28579 TLI.isOperationLegalOrCustom(Opcode, EltVT) &&
28580 TLI.preferScalarizeSplat(N)) {
28581 EVT SrcVT = N0.getValueType();
28582 EVT SrcEltVT = SrcVT.getVectorElementType();
28583 if (!LegalTypes || TLI.isTypeLegal(SrcEltVT)) {
28584 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
28585 SDValue Elt =
28586 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC);
28587 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags());
28588 if (VT.isScalableVector())
28589 return DAG.getSplatVector(VT, DL, ScalarBO);
28591 return DAG.getBuildVector(VT, DL, Ops);
28592 }
28593 }
28594
28595 return SDValue();
28596}
28597
28598/// Visit a binary vector operation, like ADD.
28599SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
28600 EVT VT = N->getValueType(0);
28601 assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
28602
28603 SDValue LHS = N->getOperand(0);
28604 SDValue RHS = N->getOperand(1);
28605 unsigned Opcode = N->getOpcode();
28606 SDNodeFlags Flags = N->getFlags();
28607
28608 // Move unary shuffles with identical masks after a vector binop:
28609 // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
28610 // --> shuffle (VBinOp A, B), Undef, Mask
28611 // This does not require type legality checks because we are creating the
28612 // same types of operations that are in the original sequence. We do have to
28613 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
28614 // though. This code is adapted from the identical transform in instcombine.
28615 if (DAG.isSafeToSpeculativelyExecute(Opcode)) {
28616 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
28617 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
28618 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
28619 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
28620 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
28621 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
28622 RHS.getOperand(0), Flags);
28623 SDValue UndefV = LHS.getOperand(1);
28624 return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
28625 }
28626
28627 // Try to sink a splat shuffle after a binop with a uniform constant.
28628 // This is limited to cases where neither the shuffle nor the constant have
28629 // undefined elements because that could be poison-unsafe or inhibit
28630 // demanded elements analysis. It is further limited to not change a splat
28631 // of an inserted scalar because that may be optimized better by
28632 // load-folding or other target-specific behaviors.
28633 if (isConstOrConstSplat(RHS) && Shuf0 && all_equal(Shuf0->getMask()) &&
28634 Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
28635 Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
28636 // binop (splat X), (splat C) --> splat (binop X, C)
28637 SDValue X = Shuf0->getOperand(0);
28638 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
28639 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
28640 Shuf0->getMask());
28641 }
28642 if (isConstOrConstSplat(LHS) && Shuf1 && all_equal(Shuf1->getMask()) &&
28643 Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
28644 Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
28645 // binop (splat C), (splat X) --> splat (binop C, X)
28646 SDValue X = Shuf1->getOperand(0);
28647 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
28648 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
28649 Shuf1->getMask());
28650 }
28651 }
28652
28653 // The following pattern is likely to emerge with vector reduction ops. Moving
28654 // the binary operation ahead of insertion may allow using a narrower vector
28655 // instruction that has better performance than the wide version of the op:
28656 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
28657 if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
28658 RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
28659 LHS.getOperand(2) == RHS.getOperand(2) &&
28660 (LHS.hasOneUse() || RHS.hasOneUse())) {
28661 SDValue X = LHS.getOperand(1);
28662 SDValue Y = RHS.getOperand(1);
28663 SDValue Z = LHS.getOperand(2);
28664 EVT NarrowVT = X.getValueType();
28665 if (NarrowVT == Y.getValueType() &&
28666 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
28667 LegalOperations)) {
28668 // (binop undef, undef) may not return undef, so compute that result.
28669 SDValue VecC =
28670 DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
28671 SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
28672 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
28673 }
28674 }
28675
28676 // Make sure all but the first op are undef or constant.
28677 auto ConcatWithConstantOrUndef = [](SDValue Concat) {
28678 return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
28679 all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
28680 return Op.isUndef() ||
28681 ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
28682 });
28683 };
28684
28685 // The following pattern is likely to emerge with vector reduction ops. Moving
28686 // the binary operation ahead of the concat may allow using a narrower vector
28687 // instruction that has better performance than the wide version of the op:
28688 // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
28689 // concat (VBinOp X, Y), VecC
28690 if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
28691 (LHS.hasOneUse() || RHS.hasOneUse())) {
28692 EVT NarrowVT = LHS.getOperand(0).getValueType();
28693 if (NarrowVT == RHS.getOperand(0).getValueType() &&
28694 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
28695 unsigned NumOperands = LHS.getNumOperands();
28696 SmallVector<SDValue, 4> ConcatOps;
28697 for (unsigned i = 0; i != NumOperands; ++i) {
28698 // This constant fold for operands 1 and up.
28699 ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
28700 RHS.getOperand(i)));
28701 }
28702
28703 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
28704 }
28705 }
28706
28707 if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL, LegalTypes))
28708 return V;
28709
28710 return SDValue();
28711}
28712
28713SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
28714 SDValue N2) {
28715 assert(N0.getOpcode() == ISD::SETCC &&
28716 "First argument must be a SetCC node!");
28717
28718 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
28719 cast<CondCodeSDNode>(N0.getOperand(2))->get());
28720
28721 // If we got a simplified select_cc node back from SimplifySelectCC, then
28722 // break it down into a new SETCC node, and a new SELECT node, and then return
28723 // the SELECT node, since we were called with a SELECT node.
28724 if (SCC.getNode()) {
28725 // Check to see if we got a select_cc back (to turn into setcc/select).
28726 // Otherwise, just return whatever node we got back, like fabs.
28727 if (SCC.getOpcode() == ISD::SELECT_CC) {
28728 const SDNodeFlags Flags = N0->getFlags();
28729 SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
28730 N0.getValueType(),
28731 SCC.getOperand(0), SCC.getOperand(1),
28732 SCC.getOperand(4), Flags);
28733 AddToWorklist(SETCC.getNode());
28734 return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
28735 SCC.getOperand(2), SCC.getOperand(3), Flags);
28736 }
28737
28738 return SCC;
28739 }
28740 return SDValue();
28741}
28742
28743/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
28744/// being selected between, see if we can simplify the select. Callers of this
28745/// should assume that TheSelect is deleted if this returns true. As such, they
28746/// should return the appropriate thing (e.g. the node) back to the top-level of
28747/// the DAG combiner loop to avoid it being looked at.
28748bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
28749 SDValue RHS) {
28750 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
28751 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
28752 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
28753 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
28754 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
28755 SDValue Sqrt = RHS;
28756 ISD::CondCode CC;
28757 SDValue CmpLHS;
28758 const ConstantFPSDNode *Zero = nullptr;
28759
28760 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
28761 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
28762 CmpLHS = TheSelect->getOperand(0);
28763 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
28764 } else {
28765 // SELECT or VSELECT
28766 SDValue Cmp = TheSelect->getOperand(0);
28767 if (Cmp.getOpcode() == ISD::SETCC) {
28768 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
28769 CmpLHS = Cmp.getOperand(0);
28770 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
28771 }
28772 }
28773 if (Zero && Zero->isZero() &&
28774 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
28775 CC == ISD::SETULT || CC == ISD::SETLT)) {
28776 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
28777 CombineTo(TheSelect, Sqrt);
28778 return true;
28779 }
28780 }
28781 }
28782 // Cannot simplify select with vector condition
28783 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
28784
28785 // If this is a select from two identical things, try to pull the operation
28786 // through the select.
28787 if (LHS.getOpcode() != RHS.getOpcode() ||
28788 !LHS.hasOneUse() || !RHS.hasOneUse())
28789 return false;
28790
28791 // If this is a load and the token chain is identical, replace the select
28792 // of two loads with a load through a select of the address to load from.
28793 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
28794 // constants have been dropped into the constant pool.
28795 if (LHS.getOpcode() == ISD::LOAD) {
28796 LoadSDNode *LLD = cast<LoadSDNode>(LHS);
28797 LoadSDNode *RLD = cast<LoadSDNode>(RHS);
28798
28799 // Token chains must be identical.
28800 if (LHS.getOperand(0) != RHS.getOperand(0) ||
28801 // Do not let this transformation reduce the number of volatile loads.
28802 // Be conservative for atomics for the moment
28803 // TODO: This does appear to be legal for unordered atomics (see D66309)
28804 !LLD->isSimple() || !RLD->isSimple() ||
28805 // FIXME: If either is a pre/post inc/dec load,
28806 // we'd need to split out the address adjustment.
28807 LLD->isIndexed() || RLD->isIndexed() ||
28808 // If this is an EXTLOAD, the VT's must match.
28809 LLD->getMemoryVT() != RLD->getMemoryVT() ||
28810 // If this is an EXTLOAD, the kind of extension must match.
28811 (LLD->getExtensionType() != RLD->getExtensionType() &&
28812 // The only exception is if one of the extensions is anyext.
28813 LLD->getExtensionType() != ISD::EXTLOAD &&
28814 RLD->getExtensionType() != ISD::EXTLOAD) ||
28815 // FIXME: this discards src value information. This is
28816 // over-conservative. It would be beneficial to be able to remember
28817 // both potential memory locations. Since we are discarding
28818 // src value info, don't do the transformation if the memory
28819 // locations are not in the default address space.
28820 LLD->getPointerInfo().getAddrSpace() != 0 ||
28821 RLD->getPointerInfo().getAddrSpace() != 0 ||
28822 // We can't produce a CMOV of a TargetFrameIndex since we won't
28823 // generate the address generation required.
28826 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
28827 LLD->getBasePtr().getValueType()))
28828 return false;
28829
28830 // The loads must not depend on one another.
28831 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
28832 return false;
28833
28834 // Check that the select condition doesn't reach either load. If so,
28835 // folding this will induce a cycle into the DAG. If not, this is safe to
28836 // xform, so create a select of the addresses.
28837
28838 SmallPtrSet<const SDNode *, 32> Visited;
28840
28841 // Always fail if LLD and RLD are not independent. TheSelect is a
28842 // predecessor to all Nodes in question so we need not search past it.
28843
28844 Visited.insert(TheSelect);
28845 Worklist.push_back(LLD);
28846 Worklist.push_back(RLD);
28847
28848 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
28849 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
28850 return false;
28851
28852 SDValue Addr;
28853 if (TheSelect->getOpcode() == ISD::SELECT) {
28854 // We cannot do this optimization if any pair of {RLD, LLD} is a
28855 // predecessor to {RLD, LLD, CondNode}. As we've already compared the
28856 // Loads, we only need to check if CondNode is a successor to one of the
28857 // loads. We can further avoid this if there's no use of their chain
28858 // value.
28859 SDNode *CondNode = TheSelect->getOperand(0).getNode();
28860 Worklist.push_back(CondNode);
28861
28862 if ((LLD->hasAnyUseOfValue(1) &&
28863 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
28864 (RLD->hasAnyUseOfValue(1) &&
28865 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
28866 return false;
28867
28868 Addr = DAG.getSelect(SDLoc(TheSelect),
28869 LLD->getBasePtr().getValueType(),
28870 TheSelect->getOperand(0), LLD->getBasePtr(),
28871 RLD->getBasePtr());
28872 } else { // Otherwise SELECT_CC
28873 // We cannot do this optimization if any pair of {RLD, LLD} is a
28874 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
28875 // the Loads, we only need to check if CondLHS/CondRHS is a successor to
28876 // one of the loads. We can further avoid this if there's no use of their
28877 // chain value.
28878
28879 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
28880 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
28881 Worklist.push_back(CondLHS);
28882 Worklist.push_back(CondRHS);
28883
28884 if ((LLD->hasAnyUseOfValue(1) &&
28885 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
28886 (RLD->hasAnyUseOfValue(1) &&
28887 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
28888 return false;
28889
28890 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
28891 LLD->getBasePtr().getValueType(),
28892 TheSelect->getOperand(0),
28893 TheSelect->getOperand(1),
28894 LLD->getBasePtr(), RLD->getBasePtr(),
28895 TheSelect->getOperand(4));
28896 }
28897
28898 SDValue Load;
28899 // It is safe to replace the two loads if they have different alignments,
28900 // but the new load must be the minimum (most restrictive) alignment of the
28901 // inputs.
28902 Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
28903 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
28904 if (!RLD->isInvariant())
28905 MMOFlags &= ~MachineMemOperand::MOInvariant;
28906 if (!RLD->isDereferenceable())
28907 MMOFlags &= ~MachineMemOperand::MODereferenceable;
28908 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
28909 // FIXME: Discards pointer and AA info.
28910 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
28911 LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
28912 MMOFlags);
28913 } else {
28914 // FIXME: Discards pointer and AA info.
28915 Load = DAG.getExtLoad(
28917 : LLD->getExtensionType(),
28918 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
28919 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
28920 }
28921
28922 // Users of the select now use the result of the load.
28923 CombineTo(TheSelect, Load);
28924
28925 // Users of the old loads now use the new load's chain. We know the
28926 // old-load value is dead now.
28927 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
28928 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
28929 return true;
28930 }
28931
28932 return false;
28933}
28934
28935/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
28936/// bitwise 'and'.
28937SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
28938 SDValue N1, SDValue N2, SDValue N3,
28939 ISD::CondCode CC) {
28940 // If this is a select where the false operand is zero and the compare is a
28941 // check of the sign bit, see if we can perform the "gzip trick":
28942 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
28943 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
28944 EVT XType = N0.getValueType();
28945 EVT AType = N2.getValueType();
28946 if (!isNullConstant(N3) || !XType.bitsGE(AType))
28947 return SDValue();
28948
28949 // If the comparison is testing for a positive value, we have to invert
28950 // the sign bit mask, so only do that transform if the target has a bitwise
28951 // 'and not' instruction (the invert is free).
28952 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
28953 // (X > -1) ? A : 0
28954 // (X > 0) ? X : 0 <-- This is canonical signed max.
28955 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
28956 return SDValue();
28957 } else if (CC == ISD::SETLT) {
28958 // (X < 0) ? A : 0
28959 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
28960 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
28961 return SDValue();
28962 } else {
28963 return SDValue();
28964 }
28965
28966 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
28967 // constant.
28968 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
28969 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
28970 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
28971 if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
28972 SDValue ShiftAmt = DAG.getShiftAmountConstant(ShCt, XType, DL);
28973 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
28974 AddToWorklist(Shift.getNode());
28975
28976 if (XType.bitsGT(AType)) {
28977 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
28978 AddToWorklist(Shift.getNode());
28979 }
28980
28981 if (CC == ISD::SETGT)
28982 Shift = DAG.getNOT(DL, Shift, AType);
28983
28984 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
28985 }
28986 }
28987
28988 unsigned ShCt = XType.getSizeInBits() - 1;
28989 if (TLI.shouldAvoidTransformToShift(XType, ShCt))
28990 return SDValue();
28991
28992 SDValue ShiftAmt = DAG.getShiftAmountConstant(ShCt, XType, DL);
28993 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
28994 AddToWorklist(Shift.getNode());
28995
28996 if (XType.bitsGT(AType)) {
28997 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
28998 AddToWorklist(Shift.getNode());
28999 }
29000
29001 if (CC == ISD::SETGT)
29002 Shift = DAG.getNOT(DL, Shift, AType);
29003
29004 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
29005}
29006
29007// Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
29008SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
29009 SDValue N0 = N->getOperand(0);
29010 SDValue N1 = N->getOperand(1);
29011 SDValue N2 = N->getOperand(2);
29012 SDLoc DL(N);
29013
29014 unsigned BinOpc = N1.getOpcode();
29015 if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc) ||
29016 (N1.getResNo() != N2.getResNo()))
29017 return SDValue();
29018
29019 // The use checks are intentionally on SDNode because we may be dealing
29020 // with opcodes that produce more than one SDValue.
29021 // TODO: Do we really need to check N0 (the condition operand of the select)?
29022 // But removing that clause could cause an infinite loop...
29023 if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
29024 return SDValue();
29025
29026 // Binops may include opcodes that return multiple values, so all values
29027 // must be created/propagated from the newly created binops below.
29028 SDVTList OpVTs = N1->getVTList();
29029
29030 // Fold select(cond, binop(x, y), binop(z, y))
29031 // --> binop(select(cond, x, z), y)
29032 if (N1.getOperand(1) == N2.getOperand(1)) {
29033 SDValue N10 = N1.getOperand(0);
29034 SDValue N20 = N2.getOperand(0);
29035 SDValue NewSel = DAG.getSelect(DL, N10.getValueType(), N0, N10, N20);
29036 SDNodeFlags Flags = N1->getFlags() & N2->getFlags();
29037 SDValue NewBinOp =
29038 DAG.getNode(BinOpc, DL, OpVTs, {NewSel, N1.getOperand(1)}, Flags);
29039 return SDValue(NewBinOp.getNode(), N1.getResNo());
29040 }
29041
29042 // Fold select(cond, binop(x, y), binop(x, z))
29043 // --> binop(x, select(cond, y, z))
29044 if (N1.getOperand(0) == N2.getOperand(0)) {
29045 SDValue N11 = N1.getOperand(1);
29046 SDValue N21 = N2.getOperand(1);
29047 // Second op VT might be different (e.g. shift amount type)
29048 if (N11.getValueType() == N21.getValueType()) {
29049 SDValue NewSel = DAG.getSelect(DL, N11.getValueType(), N0, N11, N21);
29050 SDNodeFlags Flags = N1->getFlags() & N2->getFlags();
29051 SDValue NewBinOp =
29052 DAG.getNode(BinOpc, DL, OpVTs, {N1.getOperand(0), NewSel}, Flags);
29053 return SDValue(NewBinOp.getNode(), N1.getResNo());
29054 }
29055 }
29056
29057 // TODO: Handle isCommutativeBinOp patterns as well?
29058 return SDValue();
29059}
29060
29061// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
29062SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
29063 SDValue N0 = N->getOperand(0);
29064 EVT VT = N->getValueType(0);
29065 bool IsFabs = N->getOpcode() == ISD::FABS;
29066 bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
29067
29068 if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
29069 return SDValue();
29070
29071 SDValue Int = N0.getOperand(0);
29072 EVT IntVT = Int.getValueType();
29073
29074 // The operand to cast should be integer.
29075 if (!IntVT.isInteger() || IntVT.isVector())
29076 return SDValue();
29077
29078 // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
29079 // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
29080 APInt SignMask;
29081 if (N0.getValueType().isVector()) {
29082 // For vector, create a sign mask (0x80...) or its inverse (for fabs,
29083 // 0x7f...) per element and splat it.
29085 if (IsFabs)
29086 SignMask = ~SignMask;
29087 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
29088 } else {
29089 // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
29090 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
29091 if (IsFabs)
29092 SignMask = ~SignMask;
29093 }
29094 SDLoc DL(N0);
29095 Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
29096 DAG.getConstant(SignMask, DL, IntVT));
29097 AddToWorklist(Int.getNode());
29098 return DAG.getBitcast(VT, Int);
29099}
29100
29101/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
29102/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
29103/// in it. This may be a win when the constant is not otherwise available
29104/// because it replaces two constant pool loads with one.
29105SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
29106 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
29107 ISD::CondCode CC) {
29109 return SDValue();
29110
29111 // If we are before legalize types, we want the other legalization to happen
29112 // first (for example, to avoid messing with soft float).
29113 auto *TV = dyn_cast<ConstantFPSDNode>(N2);
29114 auto *FV = dyn_cast<ConstantFPSDNode>(N3);
29115 EVT VT = N2.getValueType();
29116 if (!TV || !FV || !TLI.isTypeLegal(VT))
29117 return SDValue();
29118
29119 // If a constant can be materialized without loads, this does not make sense.
29121 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
29122 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
29123 return SDValue();
29124
29125 // If both constants have multiple uses, then we won't need to do an extra
29126 // load. The values are likely around in registers for other users.
29127 if (!TV->hasOneUse() && !FV->hasOneUse())
29128 return SDValue();
29129
29130 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
29131 const_cast<ConstantFP*>(TV->getConstantFPValue()) };
29132 Type *FPTy = Elts[0]->getType();
29133 const DataLayout &TD = DAG.getDataLayout();
29134
29135 // Create a ConstantArray of the two constants.
29136 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
29137 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
29138 TD.getPrefTypeAlign(FPTy));
29139 Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
29140
29141 // Get offsets to the 0 and 1 elements of the array, so we can select between
29142 // them.
29143 SDValue Zero = DAG.getIntPtrConstant(0, DL);
29144 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
29145 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
29146 SDValue Cond =
29147 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
29148 AddToWorklist(Cond.getNode());
29149 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
29150 AddToWorklist(CstOffset.getNode());
29151 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
29152 AddToWorklist(CPIdx.getNode());
29153 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
29155 DAG.getMachineFunction()), Alignment);
29156}
29157
29158/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
29159/// where 'cond' is the comparison specified by CC.
29160SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
29161 SDValue N2, SDValue N3, ISD::CondCode CC,
29162 bool NotExtCompare) {
29163 // (x ? y : y) -> y.
29164 if (N2 == N3) return N2;
29165
29166 EVT CmpOpVT = N0.getValueType();
29167 EVT CmpResVT = getSetCCResultType(CmpOpVT);
29168 EVT VT = N2.getValueType();
29169 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
29170 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
29171 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
29172
29173 // Determine if the condition we're dealing with is constant.
29174 if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
29175 AddToWorklist(SCC.getNode());
29176 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
29177 // fold select_cc true, x, y -> x
29178 // fold select_cc false, x, y -> y
29179 return !(SCCC->isZero()) ? N2 : N3;
29180 }
29181 }
29182
29183 if (SDValue V =
29184 convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
29185 return V;
29186
29187 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
29188 return V;
29189
29190 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A)
29191 // where y is has a single bit set.
29192 // A plaintext description would be, we can turn the SELECT_CC into an AND
29193 // when the condition can be materialized as an all-ones register. Any
29194 // single bit-test can be materialized as an all-ones register with
29195 // shift-left and shift-right-arith.
29196 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
29197 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
29198 SDValue AndLHS = N0->getOperand(0);
29199 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
29200 if (ConstAndRHS && ConstAndRHS->getAPIntValue().popcount() == 1) {
29201 // Shift the tested bit over the sign bit.
29202 const APInt &AndMask = ConstAndRHS->getAPIntValue();
29203 if (TLI.shouldFoldSelectWithSingleBitTest(VT, AndMask)) {
29204 unsigned ShCt = AndMask.getBitWidth() - 1;
29205 SDValue ShlAmt = DAG.getShiftAmountConstant(AndMask.countl_zero(), VT,
29206 SDLoc(AndLHS));
29207 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
29208
29209 // Now arithmetic right shift it all the way over, so the result is
29210 // either all-ones, or zero.
29211 SDValue ShrAmt = DAG.getShiftAmountConstant(ShCt, VT, SDLoc(Shl));
29212 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
29213
29214 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
29215 }
29216 }
29217 }
29218
29219 // fold select C, 16, 0 -> shl C, 4
29220 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
29221 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
29222
29223 if ((Fold || Swap) &&
29224 TLI.getBooleanContents(CmpOpVT) ==
29226 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT)) &&
29228
29229 if (Swap) {
29230 CC = ISD::getSetCCInverse(CC, CmpOpVT);
29231 std::swap(N2C, N3C);
29232 }
29233
29234 // If the caller doesn't want us to simplify this into a zext of a compare,
29235 // don't do it.
29236 if (NotExtCompare && N2C->isOne())
29237 return SDValue();
29238
29239 SDValue Temp, SCC;
29240 // zext (setcc n0, n1)
29241 if (LegalTypes) {
29242 SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
29243 Temp = DAG.getZExtOrTrunc(SCC, SDLoc(N2), VT);
29244 } else {
29245 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
29246 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
29247 }
29248
29249 AddToWorklist(SCC.getNode());
29250 AddToWorklist(Temp.getNode());
29251
29252 if (N2C->isOne())
29253 return Temp;
29254
29255 unsigned ShCt = N2C->getAPIntValue().logBase2();
29256 if (TLI.shouldAvoidTransformToShift(VT, ShCt))
29257 return SDValue();
29258
29259 // shl setcc result by log2 n2c
29260 return DAG.getNode(
29261 ISD::SHL, DL, N2.getValueType(), Temp,
29262 DAG.getShiftAmountConstant(ShCt, N2.getValueType(), SDLoc(Temp)));
29263 }
29264
29265 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
29266 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
29267 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
29268 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
29269 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
29270 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
29271 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
29272 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
29273 if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
29274 SDValue ValueOnZero = N2;
29275 SDValue Count = N3;
29276 // If the condition is NE instead of E, swap the operands.
29277 if (CC == ISD::SETNE)
29278 std::swap(ValueOnZero, Count);
29279 // Check if the value on zero is a constant equal to the bits in the type.
29280 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
29281 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
29282 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
29283 // legal, combine to just cttz.
29284 if ((Count.getOpcode() == ISD::CTTZ ||
29285 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
29286 N0 == Count.getOperand(0) &&
29287 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
29288 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
29289 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
29290 // legal, combine to just ctlz.
29291 if ((Count.getOpcode() == ISD::CTLZ ||
29292 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
29293 N0 == Count.getOperand(0) &&
29294 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
29295 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
29296 }
29297 }
29298 }
29299
29300 // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
29301 // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
29302 if (!NotExtCompare && N1C && N2C && N3C &&
29303 N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
29304 ((N1C->isAllOnes() && CC == ISD::SETGT) ||
29305 (N1C->isZero() && CC == ISD::SETLT)) &&
29306 !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
29307 SDValue ASHR =
29308 DAG.getNode(ISD::SRA, DL, CmpOpVT, N0,
29310 CmpOpVT.getScalarSizeInBits() - 1, CmpOpVT, DL));
29311 return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASHR, DL, VT),
29312 DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
29313 }
29314
29315 // Fold sign pattern select_cc setgt X, -1, 1, -1 -> or (ashr X, BW-1), 1
29316 if (CC == ISD::SETGT && N1C && N2C && N3C && N1C->isAllOnes() &&
29317 N2C->isOne() && N3C->isAllOnes() &&
29318 !TLI.shouldAvoidTransformToShift(CmpOpVT,
29319 CmpOpVT.getScalarSizeInBits() - 1)) {
29320 SDValue ASHR =
29321 DAG.getNode(ISD::SRA, DL, CmpOpVT, N0,
29323 CmpOpVT.getScalarSizeInBits() - 1, CmpOpVT, DL));
29324 return DAG.getNode(ISD::OR, DL, VT, DAG.getSExtOrTrunc(ASHR, DL, VT),
29325 DAG.getConstant(1, DL, VT));
29326 }
29327
29328 if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
29329 return S;
29330 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
29331 return S;
29332 if (SDValue ABD = foldSelectToABD(N0, N1, N2, N3, CC, DL))
29333 return ABD;
29334
29335 return SDValue();
29336}
29337
29339 const TargetLowering &TLI) {
29340 // Match a pattern such as:
29341 // (X | (X >> C0) | (X >> C1) | ...) & Mask
29342 // This extracts contiguous parts of X and ORs them together before comparing.
29343 // We can optimize this so that we directly check (X & SomeMask) instead,
29344 // eliminating the shifts.
29345
29346 EVT VT = Root.getValueType();
29347
29348 // TODO: Support vectors?
29349 if (!VT.isScalarInteger() || Root.getOpcode() != ISD::AND)
29350 return SDValue();
29351
29352 SDValue N0 = Root.getOperand(0);
29353 SDValue N1 = Root.getOperand(1);
29354
29355 if (N0.getOpcode() != ISD::OR || !isa<ConstantSDNode>(N1))
29356 return SDValue();
29357
29358 APInt RootMask = cast<ConstantSDNode>(N1)->getAsAPIntVal();
29359
29360 SDValue Src;
29361 const auto IsSrc = [&](SDValue V) {
29362 if (!Src) {
29363 Src = V;
29364 return true;
29365 }
29366
29367 return Src == V;
29368 };
29369
29370 SmallVector<SDValue> Worklist = {N0};
29371 APInt PartsMask(VT.getSizeInBits(), 0);
29372 while (!Worklist.empty()) {
29373 SDValue V = Worklist.pop_back_val();
29374 if (!V.hasOneUse() && (Src && Src != V))
29375 return SDValue();
29376
29377 if (V.getOpcode() == ISD::OR) {
29378 Worklist.push_back(V.getOperand(0));
29379 Worklist.push_back(V.getOperand(1));
29380 continue;
29381 }
29382
29383 if (V.getOpcode() == ISD::SRL) {
29384 SDValue ShiftSrc = V.getOperand(0);
29385 SDValue ShiftAmt = V.getOperand(1);
29386
29387 if (!IsSrc(ShiftSrc) || !isa<ConstantSDNode>(ShiftAmt))
29388 return SDValue();
29389
29390 auto ShiftAmtVal = cast<ConstantSDNode>(ShiftAmt)->getAsZExtVal();
29391 if (ShiftAmtVal > RootMask.getBitWidth())
29392 return SDValue();
29393
29394 PartsMask |= (RootMask << ShiftAmtVal);
29395 continue;
29396 }
29397
29398 if (IsSrc(V)) {
29399 PartsMask |= RootMask;
29400 continue;
29401 }
29402
29403 return SDValue();
29404 }
29405
29406 if (!Src)
29407 return SDValue();
29408
29409 SDLoc DL(Root);
29410 return DAG.getNode(ISD::AND, DL, VT,
29411 {Src, DAG.getConstant(PartsMask, DL, VT)});
29412}
29413
29414/// This is a stub for TargetLowering::SimplifySetCC.
29415SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
29416 ISD::CondCode Cond, const SDLoc &DL,
29417 bool foldBooleans) {
29418 TargetLowering::DAGCombinerInfo
29419 DagCombineInfo(DAG, Level, false, this);
29420 if (SDValue C =
29421 TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL))
29422 return C;
29423
29425 isNullConstant(N1)) {
29426
29427 if (SDValue Res = matchMergedBFX(N0, DAG, TLI))
29428 return DAG.getSetCC(DL, VT, Res, N1, Cond);
29429 }
29430
29431 return SDValue();
29432}
29433
29434/// Given an ISD::SDIV node expressing a divide by constant, return
29435/// a DAG expression to select that will generate the same value by multiplying
29436/// by a magic number.
29437/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
29438SDValue DAGCombiner::BuildSDIV(SDNode *N) {
29439 // when optimising for minimum size, we don't want to expand a div to a mul
29440 // and a shift.
29442 return SDValue();
29443
29445 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, LegalTypes, Built)) {
29446 for (SDNode *N : Built)
29447 AddToWorklist(N);
29448 return S;
29449 }
29450
29451 return SDValue();
29452}
29453
29454/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
29455/// DAG expression that will generate the same value by right shifting.
29456SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
29457 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
29458 if (!C)
29459 return SDValue();
29460
29461 // Avoid division by zero.
29462 if (C->isZero())
29463 return SDValue();
29464
29466 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
29467 for (SDNode *N : Built)
29468 AddToWorklist(N);
29469 return S;
29470 }
29471
29472 return SDValue();
29473}
29474
29475/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
29476/// expression that will generate the same value by multiplying by a magic
29477/// number.
29478/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
29479SDValue DAGCombiner::BuildUDIV(SDNode *N) {
29480 // when optimising for minimum size, we don't want to expand a div to a mul
29481 // and a shift.
29483 return SDValue();
29484
29486 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, LegalTypes, Built)) {
29487 for (SDNode *N : Built)
29488 AddToWorklist(N);
29489 return S;
29490 }
29491
29492 return SDValue();
29493}
29494
29495/// Given an ISD::SREM node expressing a remainder by constant power of 2,
29496/// return a DAG expression that will generate the same value.
29497SDValue DAGCombiner::BuildSREMPow2(SDNode *N) {
29498 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
29499 if (!C)
29500 return SDValue();
29501
29502 // Avoid division by zero.
29503 if (C->isZero())
29504 return SDValue();
29505
29507 if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) {
29508 for (SDNode *N : Built)
29509 AddToWorklist(N);
29510 return S;
29511 }
29512
29513 return SDValue();
29514}
29515
29516// This is basically just a port of takeLog2 from InstCombineMulDivRem.cpp
29517//
29518// Returns the node that represents `Log2(Op)`. This may create a new node. If
29519// we are unable to compute `Log2(Op)` its return `SDValue()`.
29520//
29521// All nodes will be created at `DL` and the output will be of type `VT`.
29522//
29523// This will only return `Log2(Op)` if we can prove `Op` is non-zero. Set
29524// `AssumeNonZero` if this function should simply assume (not require proving
29525// `Op` is non-zero).
29527 SDValue Op, unsigned Depth,
29528 bool AssumeNonZero) {
29529 assert(VT.isInteger() && "Only integer types are supported!");
29530
29531 auto PeekThroughCastsAndTrunc = [](SDValue V) {
29532 while (true) {
29533 switch (V.getOpcode()) {
29534 case ISD::TRUNCATE:
29535 case ISD::ZERO_EXTEND:
29536 V = V.getOperand(0);
29537 break;
29538 default:
29539 return V;
29540 }
29541 }
29542 };
29543
29544 if (VT.isScalableVector())
29545 return SDValue();
29546
29547 Op = PeekThroughCastsAndTrunc(Op);
29548
29549 // Helper for determining whether a value is a power-2 constant scalar or a
29550 // vector of such elements.
29551 SmallVector<APInt> Pow2Constants;
29552 auto IsPowerOfTwo = [&Pow2Constants](ConstantSDNode *C) {
29553 if (C->isZero() || C->isOpaque())
29554 return false;
29555 // TODO: We may also be able to support negative powers of 2 here.
29556 if (C->getAPIntValue().isPowerOf2()) {
29557 Pow2Constants.emplace_back(C->getAPIntValue());
29558 return true;
29559 }
29560 return false;
29561 };
29562
29563 if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo)) {
29564 if (!VT.isVector())
29565 return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
29566 // We need to create a build vector
29567 if (Op.getOpcode() == ISD::SPLAT_VECTOR)
29568 return DAG.getSplat(VT, DL,
29569 DAG.getConstant(Pow2Constants.back().logBase2(), DL,
29570 VT.getScalarType()));
29571 SmallVector<SDValue> Log2Ops;
29572 for (const APInt &Pow2 : Pow2Constants)
29573 Log2Ops.emplace_back(
29574 DAG.getConstant(Pow2.logBase2(), DL, VT.getScalarType()));
29575 return DAG.getBuildVector(VT, DL, Log2Ops);
29576 }
29577
29578 if (Depth >= DAG.MaxRecursionDepth)
29579 return SDValue();
29580
29581 auto CastToVT = [&](EVT NewVT, SDValue ToCast) {
29582 // Peek through zero extend. We can't peek through truncates since this
29583 // function is called on a shift amount. We must ensure that all of the bits
29584 // above the original shift amount are zeroed by this function.
29585 while (ToCast.getOpcode() == ISD::ZERO_EXTEND)
29586 ToCast = ToCast.getOperand(0);
29587 EVT CurVT = ToCast.getValueType();
29588 if (NewVT == CurVT)
29589 return ToCast;
29590
29591 if (NewVT.getSizeInBits() == CurVT.getSizeInBits())
29592 return DAG.getBitcast(NewVT, ToCast);
29593
29594 return DAG.getZExtOrTrunc(ToCast, DL, NewVT);
29595 };
29596
29597 // log2(X << Y) -> log2(X) + Y
29598 if (Op.getOpcode() == ISD::SHL) {
29599 // 1 << Y and X nuw/nsw << Y are all non-zero.
29600 if (AssumeNonZero || Op->getFlags().hasNoUnsignedWrap() ||
29601 Op->getFlags().hasNoSignedWrap() || isOneConstant(Op.getOperand(0)))
29602 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0),
29603 Depth + 1, AssumeNonZero))
29604 return DAG.getNode(ISD::ADD, DL, VT, LogX,
29605 CastToVT(VT, Op.getOperand(1)));
29606 }
29607
29608 // c ? X : Y -> c ? Log2(X) : Log2(Y)
29609 if ((Op.getOpcode() == ISD::SELECT || Op.getOpcode() == ISD::VSELECT) &&
29610 Op.hasOneUse()) {
29611 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1),
29612 Depth + 1, AssumeNonZero))
29613 if (SDValue LogY = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(2),
29614 Depth + 1, AssumeNonZero))
29615 return DAG.getSelect(DL, VT, Op.getOperand(0), LogX, LogY);
29616 }
29617
29618 // log2(umin(X, Y)) -> umin(log2(X), log2(Y))
29619 // log2(umax(X, Y)) -> umax(log2(X), log2(Y))
29620 if ((Op.getOpcode() == ISD::UMIN || Op.getOpcode() == ISD::UMAX) &&
29621 Op.hasOneUse()) {
29622 // Use AssumeNonZero as false here. Otherwise we can hit case where
29623 // log2(umax(X, Y)) != umax(log2(X), log2(Y)) (because overflow).
29624 if (SDValue LogX =
29625 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0), Depth + 1,
29626 /*AssumeNonZero*/ false))
29627 if (SDValue LogY =
29628 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1), Depth + 1,
29629 /*AssumeNonZero*/ false))
29630 return DAG.getNode(Op.getOpcode(), DL, VT, LogX, LogY);
29631 }
29632
29633 return SDValue();
29634}
29635
29636/// Determines the LogBase2 value for a non-null input value using the
29637/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
29638SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL,
29639 bool KnownNonZero, bool InexpensiveOnly,
29640 std::optional<EVT> OutVT) {
29641 EVT VT = OutVT ? *OutVT : V.getValueType();
29642 SDValue InexpensiveLogBase2 =
29643 takeInexpensiveLog2(DAG, DL, VT, V, /*Depth*/ 0, KnownNonZero);
29644 if (InexpensiveLogBase2 || InexpensiveOnly || !DAG.isKnownToBeAPowerOfTwo(V))
29645 return InexpensiveLogBase2;
29646
29647 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
29648 SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
29649 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
29650 return LogBase2;
29651}
29652
29653/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
29654/// For the reciprocal, we need to find the zero of the function:
29655/// F(X) = 1/X - A [which has a zero at X = 1/A]
29656/// =>
29657/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
29658/// does not require additional intermediate precision]
29659/// For the last iteration, put numerator N into it to gain more precision:
29660/// Result = N X_i + X_i (N - N A X_i)
29661SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
29662 SDNodeFlags Flags) {
29663 if (LegalDAG)
29664 return SDValue();
29665
29666 // TODO: Handle extended types?
29667 EVT VT = Op.getValueType();
29668 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
29669 VT.getScalarType() != MVT::f64)
29670 return SDValue();
29671
29672 // If estimates are explicitly disabled for this function, we're done.
29673 MachineFunction &MF = DAG.getMachineFunction();
29674 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
29675 if (Enabled == TLI.ReciprocalEstimate::Disabled)
29676 return SDValue();
29677
29678 // Estimates may be explicitly enabled for this type with a custom number of
29679 // refinement steps.
29680 int Iterations = TLI.getDivRefinementSteps(VT, MF);
29681 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
29682 AddToWorklist(Est.getNode());
29683
29684 SDLoc DL(Op);
29685 if (Iterations) {
29686 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
29687
29688 // Newton iterations: Est = Est + Est (N - Arg * Est)
29689 // If this is the last iteration, also multiply by the numerator.
29690 for (int i = 0; i < Iterations; ++i) {
29691 SDValue MulEst = Est;
29692
29693 if (i == Iterations - 1) {
29694 MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
29695 AddToWorklist(MulEst.getNode());
29696 }
29697
29698 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
29699 AddToWorklist(NewEst.getNode());
29700
29701 NewEst = DAG.getNode(ISD::FSUB, DL, VT,
29702 (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
29703 AddToWorklist(NewEst.getNode());
29704
29705 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
29706 AddToWorklist(NewEst.getNode());
29707
29708 Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
29709 AddToWorklist(Est.getNode());
29710 }
29711 } else {
29712 // If no iterations are available, multiply with N.
29713 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
29714 AddToWorklist(Est.getNode());
29715 }
29716
29717 return Est;
29718 }
29719
29720 return SDValue();
29721}
29722
29723/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
29724/// For the reciprocal sqrt, we need to find the zero of the function:
29725/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
29726/// =>
29727/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
29728/// As a result, we precompute A/2 prior to the iteration loop.
29729SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
29730 unsigned Iterations,
29731 SDNodeFlags Flags, bool Reciprocal) {
29732 EVT VT = Arg.getValueType();
29733 SDLoc DL(Arg);
29734 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
29735
29736 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
29737 // this entire sequence requires only one FP constant.
29738 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
29739 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
29740
29741 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
29742 for (unsigned i = 0; i < Iterations; ++i) {
29743 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
29744 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
29745 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
29746 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
29747 }
29748
29749 // If non-reciprocal square root is requested, multiply the result by Arg.
29750 if (!Reciprocal)
29751 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
29752
29753 return Est;
29754}
29755
29756/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
29757/// For the reciprocal sqrt, we need to find the zero of the function:
29758/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
29759/// =>
29760/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
29761SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
29762 unsigned Iterations,
29763 SDNodeFlags Flags, bool Reciprocal) {
29764 EVT VT = Arg.getValueType();
29765 SDLoc DL(Arg);
29766 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
29767 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
29768
29769 // This routine must enter the loop below to work correctly
29770 // when (Reciprocal == false).
29771 assert(Iterations > 0);
29772
29773 // Newton iterations for reciprocal square root:
29774 // E = (E * -0.5) * ((A * E) * E + -3.0)
29775 for (unsigned i = 0; i < Iterations; ++i) {
29776 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
29777 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
29778 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
29779
29780 // When calculating a square root at the last iteration build:
29781 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
29782 // (notice a common subexpression)
29783 SDValue LHS;
29784 if (Reciprocal || (i + 1) < Iterations) {
29785 // RSQRT: LHS = (E * -0.5)
29786 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
29787 } else {
29788 // SQRT: LHS = (A * E) * -0.5
29789 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
29790 }
29791
29792 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
29793 }
29794
29795 return Est;
29796}
29797
29798/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
29799/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
29800/// Op can be zero.
29801SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
29802 bool Reciprocal) {
29803 if (LegalDAG)
29804 return SDValue();
29805
29806 // TODO: Handle extended types?
29807 EVT VT = Op.getValueType();
29808 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
29809 VT.getScalarType() != MVT::f64)
29810 return SDValue();
29811
29812 // If estimates are explicitly disabled for this function, we're done.
29813 MachineFunction &MF = DAG.getMachineFunction();
29814 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
29815 if (Enabled == TLI.ReciprocalEstimate::Disabled)
29816 return SDValue();
29817
29818 // Estimates may be explicitly enabled for this type with a custom number of
29819 // refinement steps.
29820 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
29821
29822 bool UseOneConstNR = false;
29823 if (SDValue Est =
29824 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
29825 Reciprocal)) {
29826 AddToWorklist(Est.getNode());
29827
29828 if (Iterations > 0)
29829 Est = UseOneConstNR
29830 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
29831 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
29832 if (!Reciprocal) {
29833 SDLoc DL(Op);
29834 // Try the target specific test first.
29835 SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
29836
29837 // The estimate is now completely wrong if the input was exactly 0.0 or
29838 // possibly a denormal. Force the answer to 0.0 or value provided by
29839 // target for those cases.
29840 Est = DAG.getSelect(DL, VT, Test,
29841 TLI.getSqrtResultForDenormInput(Op, DAG), Est);
29842 }
29843 return Est;
29844 }
29845
29846 return SDValue();
29847}
29848
29849SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
29850 return buildSqrtEstimateImpl(Op, Flags, true);
29851}
29852
29853SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
29854 return buildSqrtEstimateImpl(Op, Flags, false);
29855}
29856
29857/// Return true if there is any possibility that the two addresses overlap.
29858bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
29859
29860 struct MemUseCharacteristics {
29861 bool IsVolatile;
29862 bool IsAtomic;
29864 int64_t Offset;
29865 LocationSize NumBytes;
29866 MachineMemOperand *MMO;
29867 };
29868
29869 auto getCharacteristics = [this](SDNode *N) -> MemUseCharacteristics {
29870 if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
29871 int64_t Offset = 0;
29872 if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
29873 Offset = (LSN->getAddressingMode() == ISD::PRE_INC) ? C->getSExtValue()
29874 : (LSN->getAddressingMode() == ISD::PRE_DEC)
29875 ? -1 * C->getSExtValue()
29876 : 0;
29877 TypeSize Size = LSN->getMemoryVT().getStoreSize();
29878 return {LSN->isVolatile(), LSN->isAtomic(),
29879 LSN->getBasePtr(), Offset /*base offset*/,
29880 LocationSize::precise(Size), LSN->getMemOperand()};
29881 }
29882 if (const auto *LN = cast<LifetimeSDNode>(N)) {
29883 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
29884 return {false /*isVolatile*/,
29885 /*isAtomic*/ false,
29886 LN->getOperand(1),
29887 0,
29888 LocationSize::precise(MFI.getObjectSize(LN->getFrameIndex())),
29889 (MachineMemOperand *)nullptr};
29890 }
29891 // Default.
29892 return {false /*isvolatile*/,
29893 /*isAtomic*/ false,
29894 SDValue(),
29895 (int64_t)0 /*offset*/,
29897 (MachineMemOperand *)nullptr};
29898 };
29899
29900 MemUseCharacteristics MUC0 = getCharacteristics(Op0),
29901 MUC1 = getCharacteristics(Op1);
29902
29903 // If they are to the same address, then they must be aliases.
29904 if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
29905 MUC0.Offset == MUC1.Offset)
29906 return true;
29907
29908 // If they are both volatile then they cannot be reordered.
29909 if (MUC0.IsVolatile && MUC1.IsVolatile)
29910 return true;
29911
29912 // Be conservative about atomics for the moment
29913 // TODO: This is way overconservative for unordered atomics (see D66309)
29914 if (MUC0.IsAtomic && MUC1.IsAtomic)
29915 return true;
29916
29917 if (MUC0.MMO && MUC1.MMO) {
29918 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
29919 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
29920 return false;
29921 }
29922
29923 // If NumBytes is scalable and offset is not 0, conservatively return may
29924 // alias
29925 if ((MUC0.NumBytes.hasValue() && MUC0.NumBytes.isScalable() &&
29926 MUC0.Offset != 0) ||
29927 (MUC1.NumBytes.hasValue() && MUC1.NumBytes.isScalable() &&
29928 MUC1.Offset != 0))
29929 return true;
29930 // Try to prove that there is aliasing, or that there is no aliasing. Either
29931 // way, we can return now. If nothing can be proved, proceed with more tests.
29932 bool IsAlias;
29933 if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
29934 DAG, IsAlias))
29935 return IsAlias;
29936
29937 // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
29938 // either are not known.
29939 if (!MUC0.MMO || !MUC1.MMO)
29940 return true;
29941
29942 // If one operation reads from invariant memory, and the other may store, they
29943 // cannot alias. These should really be checking the equivalent of mayWrite,
29944 // but it only matters for memory nodes other than load /store.
29945 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
29946 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
29947 return false;
29948
29949 // If we know required SrcValue1 and SrcValue2 have relatively large
29950 // alignment compared to the size and offset of the access, we may be able
29951 // to prove they do not alias. This check is conservative for now to catch
29952 // cases created by splitting vector types, it only works when the offsets are
29953 // multiples of the size of the data.
29954 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
29955 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
29956 Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
29957 Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
29958 LocationSize Size0 = MUC0.NumBytes;
29959 LocationSize Size1 = MUC1.NumBytes;
29960
29961 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
29962 Size0.hasValue() && Size1.hasValue() && !Size0.isScalable() &&
29963 !Size1.isScalable() && Size0 == Size1 &&
29964 OrigAlignment0 > Size0.getValue().getKnownMinValue() &&
29965 SrcValOffset0 % Size0.getValue().getKnownMinValue() == 0 &&
29966 SrcValOffset1 % Size1.getValue().getKnownMinValue() == 0) {
29967 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
29968 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
29969
29970 // There is no overlap between these relatively aligned accesses of
29971 // similar size. Return no alias.
29972 if ((OffAlign0 + static_cast<int64_t>(
29973 Size0.getValue().getKnownMinValue())) <= OffAlign1 ||
29974 (OffAlign1 + static_cast<int64_t>(
29975 Size1.getValue().getKnownMinValue())) <= OffAlign0)
29976 return false;
29977 }
29978
29981 : DAG.getSubtarget().useAA();
29982#ifndef NDEBUG
29983 if (CombinerAAOnlyFunc.getNumOccurrences() &&
29985 UseAA = false;
29986#endif
29987
29988 if (UseAA && BatchAA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
29989 Size0.hasValue() && Size1.hasValue() &&
29990 // Can't represent a scalable size + fixed offset in LocationSize
29991 (!Size0.isScalable() || SrcValOffset0 == 0) &&
29992 (!Size1.isScalable() || SrcValOffset1 == 0)) {
29993 // Use alias analysis information.
29994 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
29995 int64_t Overlap0 =
29996 Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
29997 int64_t Overlap1 =
29998 Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
29999 LocationSize Loc0 =
30000 Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0);
30001 LocationSize Loc1 =
30002 Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1);
30003 if (BatchAA->isNoAlias(
30004 MemoryLocation(MUC0.MMO->getValue(), Loc0,
30005 UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
30006 MemoryLocation(MUC1.MMO->getValue(), Loc1,
30007 UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
30008 return false;
30009 }
30010
30011 // Otherwise we have to assume they alias.
30012 return true;
30013}
30014
30015/// Walk up chain skipping non-aliasing memory nodes,
30016/// looking for aliasing nodes and adding them to the Aliases vector.
30017void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
30018 SmallVectorImpl<SDValue> &Aliases) {
30019 SmallVector<SDValue, 8> Chains; // List of chains to visit.
30020 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
30021
30022 // Get alias information for node.
30023 // TODO: relax aliasing for unordered atomics (see D66309)
30024 const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
30025
30026 // Starting off.
30027 Chains.push_back(OriginalChain);
30028 unsigned Depth = 0;
30029
30030 // Attempt to improve chain by a single step
30031 auto ImproveChain = [&](SDValue &C) -> bool {
30032 switch (C.getOpcode()) {
30033 case ISD::EntryToken:
30034 // No need to mark EntryToken.
30035 C = SDValue();
30036 return true;
30037 case ISD::LOAD:
30038 case ISD::STORE: {
30039 // Get alias information for C.
30040 // TODO: Relax aliasing for unordered atomics (see D66309)
30041 bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
30042 cast<LSBaseSDNode>(C.getNode())->isSimple();
30043 if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
30044 // Look further up the chain.
30045 C = C.getOperand(0);
30046 return true;
30047 }
30048 // Alias, so stop here.
30049 return false;
30050 }
30051
30052 case ISD::CopyFromReg:
30053 // Always forward past CopyFromReg.
30054 C = C.getOperand(0);
30055 return true;
30056
30057 case ISD::LIFETIME_START:
30058 case ISD::LIFETIME_END: {
30059 // We can forward past any lifetime start/end that can be proven not to
30060 // alias the memory access.
30061 if (!mayAlias(N, C.getNode())) {
30062 // Look further up the chain.
30063 C = C.getOperand(0);
30064 return true;
30065 }
30066 return false;
30067 }
30068 default:
30069 return false;
30070 }
30071 };
30072
30073 // Look at each chain and determine if it is an alias. If so, add it to the
30074 // aliases list. If not, then continue up the chain looking for the next
30075 // candidate.
30076 while (!Chains.empty()) {
30077 SDValue Chain = Chains.pop_back_val();
30078
30079 // Don't bother if we've seen Chain before.
30080 if (!Visited.insert(Chain.getNode()).second)
30081 continue;
30082
30083 // For TokenFactor nodes, look at each operand and only continue up the
30084 // chain until we reach the depth limit.
30085 //
30086 // FIXME: The depth check could be made to return the last non-aliasing
30087 // chain we found before we hit a tokenfactor rather than the original
30088 // chain.
30089 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
30090 Aliases.clear();
30091 Aliases.push_back(OriginalChain);
30092 return;
30093 }
30094
30095 if (Chain.getOpcode() == ISD::TokenFactor) {
30096 // We have to check each of the operands of the token factor for "small"
30097 // token factors, so we queue them up. Adding the operands to the queue
30098 // (stack) in reverse order maintains the original order and increases the
30099 // likelihood that getNode will find a matching token factor (CSE.)
30100 if (Chain.getNumOperands() > 16) {
30101 Aliases.push_back(Chain);
30102 continue;
30103 }
30104 for (unsigned n = Chain.getNumOperands(); n;)
30105 Chains.push_back(Chain.getOperand(--n));
30106 ++Depth;
30107 continue;
30108 }
30109 // Everything else
30110 if (ImproveChain(Chain)) {
30111 // Updated Chain Found, Consider new chain if one exists.
30112 if (Chain.getNode())
30113 Chains.push_back(Chain);
30114 ++Depth;
30115 continue;
30116 }
30117 // No Improved Chain Possible, treat as Alias.
30118 Aliases.push_back(Chain);
30119 }
30120}
30121
30122/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
30123/// (aliasing node.)
30124SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
30125 if (OptLevel == CodeGenOptLevel::None)
30126 return OldChain;
30127
30128 // Ops for replacing token factor.
30130
30131 // Accumulate all the aliases to this node.
30132 GatherAllAliases(N, OldChain, Aliases);
30133
30134 // If no operands then chain to entry token.
30135 if (Aliases.empty())
30136 return DAG.getEntryNode();
30137
30138 // If a single operand then chain to it. We don't need to revisit it.
30139 if (Aliases.size() == 1)
30140 return Aliases[0];
30141
30142 // Construct a custom tailored token factor.
30143 return DAG.getTokenFactor(SDLoc(N), Aliases);
30144}
30145
30146// This function tries to collect a bunch of potentially interesting
30147// nodes to improve the chains of, all at once. This might seem
30148// redundant, as this function gets called when visiting every store
30149// node, so why not let the work be done on each store as it's visited?
30150//
30151// I believe this is mainly important because mergeConsecutiveStores
30152// is unable to deal with merging stores of different sizes, so unless
30153// we improve the chains of all the potential candidates up-front
30154// before running mergeConsecutiveStores, it might only see some of
30155// the nodes that will eventually be candidates, and then not be able
30156// to go from a partially-merged state to the desired final
30157// fully-merged state.
30158
30159bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
30160 SmallVector<StoreSDNode *, 8> ChainedStores;
30161 StoreSDNode *STChain = St;
30162 // Intervals records which offsets from BaseIndex have been covered. In
30163 // the common case, every store writes to the immediately previous address
30164 // space and thus merged with the previous interval at insertion time.
30165
30166 using IMap = llvm::IntervalMap<int64_t, std::monostate, 8,
30167 IntervalMapHalfOpenInfo<int64_t>>;
30168 IMap::Allocator A;
30169 IMap Intervals(A);
30170
30171 // This holds the base pointer, index, and the offset in bytes from the base
30172 // pointer.
30173 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
30174
30175 // We must have a base and an offset.
30176 if (!BasePtr.getBase().getNode())
30177 return false;
30178
30179 // Do not handle stores to undef base pointers.
30180 if (BasePtr.getBase().isUndef())
30181 return false;
30182
30183 // Do not handle stores to opaque types
30184 if (St->getMemoryVT().isZeroSized())
30185 return false;
30186
30187 // BaseIndexOffset assumes that offsets are fixed-size, which
30188 // is not valid for scalable vectors where the offsets are
30189 // scaled by `vscale`, so bail out early.
30190 if (St->getMemoryVT().isScalableVT())
30191 return false;
30192
30193 // Add ST's interval.
30194 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8,
30195 std::monostate{});
30196
30197 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
30198 if (Chain->getMemoryVT().isScalableVector())
30199 return false;
30200
30201 // If the chain has more than one use, then we can't reorder the mem ops.
30202 if (!SDValue(Chain, 0)->hasOneUse())
30203 break;
30204 // TODO: Relax for unordered atomics (see D66309)
30205 if (!Chain->isSimple() || Chain->isIndexed())
30206 break;
30207
30208 // Find the base pointer and offset for this memory node.
30209 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
30210 // Check that the base pointer is the same as the original one.
30211 int64_t Offset;
30212 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
30213 break;
30214 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
30215 // Make sure we don't overlap with other intervals by checking the ones to
30216 // the left or right before inserting.
30217 auto I = Intervals.find(Offset);
30218 // If there's a next interval, we should end before it.
30219 if (I != Intervals.end() && I.start() < (Offset + Length))
30220 break;
30221 // If there's a previous interval, we should start after it.
30222 if (I != Intervals.begin() && (--I).stop() <= Offset)
30223 break;
30224 Intervals.insert(Offset, Offset + Length, std::monostate{});
30225
30226 ChainedStores.push_back(Chain);
30227 STChain = Chain;
30228 }
30229
30230 // If we didn't find a chained store, exit.
30231 if (ChainedStores.empty())
30232 return false;
30233
30234 // Improve all chained stores (St and ChainedStores members) starting from
30235 // where the store chain ended and return single TokenFactor.
30236 SDValue NewChain = STChain->getChain();
30238 for (unsigned I = ChainedStores.size(); I;) {
30239 StoreSDNode *S = ChainedStores[--I];
30240 SDValue BetterChain = FindBetterChain(S, NewChain);
30242 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
30243 TFOps.push_back(SDValue(S, 0));
30244 ChainedStores[I] = S;
30245 }
30246
30247 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
30248 SDValue BetterChain = FindBetterChain(St, NewChain);
30249 SDValue NewST;
30250 if (St->isTruncatingStore())
30251 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
30252 St->getBasePtr(), St->getMemoryVT(),
30253 St->getMemOperand());
30254 else
30255 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
30256 St->getBasePtr(), St->getMemOperand());
30257
30258 TFOps.push_back(NewST);
30259
30260 // If we improved every element of TFOps, then we've lost the dependence on
30261 // NewChain to successors of St and we need to add it back to TFOps. Do so at
30262 // the beginning to keep relative order consistent with FindBetterChains.
30263 auto hasImprovedChain = [&](SDValue ST) -> bool {
30264 return ST->getOperand(0) != NewChain;
30265 };
30266 bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
30267 if (AddNewChain)
30268 TFOps.insert(TFOps.begin(), NewChain);
30269
30270 SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
30271 CombineTo(St, TF);
30272
30273 // Add TF and its operands to the worklist.
30274 AddToWorklist(TF.getNode());
30275 for (const SDValue &Op : TF->ops())
30276 AddToWorklist(Op.getNode());
30277 AddToWorklist(STChain);
30278 return true;
30279}
30280
30281bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
30282 if (OptLevel == CodeGenOptLevel::None)
30283 return false;
30284
30285 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
30286
30287 // We must have a base and an offset.
30288 if (!BasePtr.getBase().getNode())
30289 return false;
30290
30291 // Do not handle stores to undef base pointers.
30292 if (BasePtr.getBase().isUndef())
30293 return false;
30294
30295 // Directly improve a chain of disjoint stores starting at St.
30296 if (parallelizeChainedStores(St))
30297 return true;
30298
30299 // Improve St's Chain..
30300 SDValue BetterChain = FindBetterChain(St, St->getChain());
30301 if (St->getChain() != BetterChain) {
30302 replaceStoreChain(St, BetterChain);
30303 return true;
30304 }
30305 return false;
30306}
30307
30308/// This is the entry point for the file.
30310 CodeGenOptLevel OptLevel) {
30311 /// This is the main entry point to this class.
30312 DAGCombiner(*this, BatchAA, OptLevel).Run(Level);
30313}
return SDValue()
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
constexpr LLT S1
AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques=false)
static cl::opt< bool > EnableShrinkLoadReplaceStoreWithStore("combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable load/<replace bytes>/store with " "a narrower store"))
static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, unsigned ExtOpc, SmallVectorImpl< SDNode * > &ExtendNodes, const TargetLowering &TLI)
static cl::opt< unsigned > TokenFactorInlineLimit("combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), cl::desc("Limit the number of operands to inline for Token Factors"))
static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc, bool NonNegZExt=false)
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG)
static SDNode * getBuildPairElt(SDNode *N, unsigned i)
static SDValue foldExtractSubvectorFromShuffleVector(EVT NarrowVT, SDValue Src, unsigned Index, const SDLoc &DL, SelectionDAG &DAG, bool LegalOperations)
Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)), try to produce VECTOR_SHUFFLE(EXTRACT_SUBVEC...
static SDValue foldToMaskedStore(StoreSDNode *Store, SelectionDAG &DAG, const SDLoc &Dl)
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtendOfConstant(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalTypes)
Try to fold a sext/zext/aext dag node into a ConstantSDNode or a build_vector of constants.
static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, SDValue ExtractFrom, SDValue &Mask, const SDLoc &DL)
Helper function for visitOR to extract the needed side of a rotate idiom from a shl/srl/mul/udiv.
static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec, bool &IsLoad, bool &IsMasked, SDValue &Ptr, const TargetLowering &TLI)
bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled, SelectionDAG &DAG, const SDLoc &DL)
static SDValue narrowExtractedVectorLoad(EVT VT, SDValue Src, unsigned Index, const SDLoc &DL, SelectionDAG &DAG)
If we are extracting a subvector from a wide vector load, convert to a narrow load to eliminate the e...
static SDValue scalarizeExtractedBinOp(SDNode *ExtElt, SelectionDAG &DAG, const SDLoc &DL, bool LegalTypes)
Transform a vector binary operation into a scalar binary operation by moving the math/logic after an ...
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI)
Return true if divmod libcall is available.
static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG)
static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, SelectionDAG &DAG, const SDLoc &DL)
Given the operands of an add/sub operation, see if the 2nd operand is a masked 0/1 whose source opera...
static bool mergeEltWithShuffle(SDValue &X, SDValue &Y, ArrayRef< int > Mask, SmallVectorImpl< int > &NewMask, SDValue Elt, unsigned InsIndex)
static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf)
If we have a unary shuffle of a shuffle, see if it can be folded away completely.
static bool canSplitIdx(LoadSDNode *LD)
static SDValue ShrinkLoadReplaceStoreWithStore(const std::pair< unsigned, unsigned > &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC)
Check to see if IVal is something that provides a value as specified by MaskInfo.
static cl::opt< bool > StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, cl::desc("Bypass the profitability model of load slicing"), cl::init(false))
Hidden option to stress test load slicing, i.e., when this option is enabled, load slicing bypasses m...
static cl::opt< bool > UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), cl::desc("Enable DAG combiner's use of TBAA"))
static void adjustCostForPairing(SmallVectorImpl< LoadedSlice > &LoadedSlices, LoadedSlice::Cost &GlobalLSCost)
Adjust the GlobalLSCost according to the target paring capabilities and the layout of the slices.
static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI, SDValue N0, SDValue N1, SDNode *N)
static cl::opt< bool > DisableCombines("combiner-disabled", cl::Hidden, cl::init(false), cl::desc("Disable the DAG combiner"))
static SDValue foldExtendVectorInregToExtendOfSubvector(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalOperations)
static SDValue narrowExtractedVectorBinOp(EVT VT, SDValue Src, unsigned Index, const SDLoc &DL, SelectionDAG &DAG, bool LegalOperations)
If we are extracting a subvector produced by a wide binary operator try to use a narrow binary operat...
static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode)
Check if N satisfies: N is used once.
static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG, const SDLoc &DL)
Given an extending node with a pop-count operand, if the target does not support a pop-count in the n...
static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand, SDValue RightHand, SelectionDAG &DAG)
Given a tree of logic operations with shape like (LOGIC (LOGIC (X, Y), LOGIC (Z, Y))) try to match an...
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG)
static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned Depth, bool AssumeNonZero)
static SDValue combineSelectAsExtAnd(SDValue Cond, SDValue T, SDValue F, const SDLoc &DL, SelectionDAG &DAG)
static bool areUsedBitsDense(const APInt &UsedBits)
Check that all bits set in UsedBits form a dense region, i.e., UsedBits looks like 0....
static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG, const TargetLowering &TLI, const SDLoc &DL)
Fold "masked merge" expressions like (m & x) | (~m & y) and its DeMorgan variant (~m | x) & (m | y) i...
static SDValue getInputChainForNode(SDNode *N)
Given a node, return its input chain if it has one, otherwise return a null sd operand.
static ElementCount numVectorEltsOrZero(EVT T)
static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG, bool ShouldCommuteOperands)
This inverts a canonicalization in IR that replaces a variable select arm with an identity constant.
static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG)
static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
For targets that support usubsat, match a bit-hack form of that operation that ends in 'and' and conv...
static cl::opt< bool > CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Enable DAG combiner's use of IR alias analysis"))
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy)
static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf, SelectionDAG &DAG)
Combine shuffle of shuffle of the form: shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X...
static bool isDivisorPowerOfTwo(SDValue Divisor)
static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift, SDValue &Mask)
Match "(X shl/srl V1) & V2" where V2 may not be present.
static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG)
static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS, const SDNodeFlags Flags, const TargetLowering &TLI)
static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static std::optional< EVT > canCombineShuffleToExtendVectorInreg(unsigned Opcode, EVT VT, std::function< bool(unsigned)> Match, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue combineShuffleToAnyExtendVectorInreg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static SDValue foldAddSubOfSignBit(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold a 'not' shifted sign-bit with add/sub with constant operand into a shift and add with a d...
static SDValue stripTruncAndExt(SDValue Value)
static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, SDValue X, SDValue Carry0, SDValue Carry1, SDNode *N)
If we are facing some sort of diamond carry propagation pattern try to break it up to generate someth...
static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles followed by concatenatio...
static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
static auto getFirstIndexOf(R &&Range, const T &Val)
static SDValue getSubVectorSrc(SDValue V, unsigned Index, EVT SubVT)
static std::pair< unsigned, unsigned > CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain)
Check to see if V is (and load (ptr), imm), where the load is having specific bytes cleared out.
static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef< int > Mask)
If the shuffle mask is taking exactly one element from the first vector operand and passing through a...
static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT, const TargetLowering &TLI)
static cl::opt< bool > EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable merging multiple stores " "into a wider store"))
static bool isContractableFMUL(const TargetOptions &Options, SDValue N)
static cl::opt< bool > MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), cl::desc("DAG combiner may split indexing from loads"))
static bool areSlicesNextToEachOther(const LoadedSlice &First, const LoadedSlice &Second)
Check whether or not First and Second are next to each other in memory.
static SDValue stripConstantMask(const SelectionDAG &DAG, SDValue Op, SDValue &Mask)
static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static bool isBSwapHWordPair(SDValue N, MutableArrayRef< SDNode * > Parts)
static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI)
static bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy)
copysign(x, fp_extend(y)) -> copysign(x, y) copysign(x, fp_round(y)) -> copysign(x,...
static cl::opt< bool > ReduceLoadOpStoreWidthForceNarrowingProfitable("combiner-reduce-load-op-store-width-force-narrowing-profitable", cl::Hidden, cl::init(false), cl::desc("DAG combiner force override the narrowing profitable check when " "reducing the width of load/op/store sequences"))
static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2, ISD::CondCode CC, unsigned OrAndOpcode, SelectionDAG &DAG, bool isFMAXNUMFMINNUM_IEEE, bool isFMAXNUMFMINNUM)
static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
static SDValue foldToSaturated(SDNode *N, EVT &VT, SDValue &Src, EVT &SrcVT, SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG)
static SDValue FoldIntToFPToInt(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue foldSubCtlzNot(SDNode *N, SelectionDAG &DAG)
static SDNode * getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad, bool &IsMasked, SDValue &Ptr, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, const TargetLowering &TLI, bool Force)
Flips a boolean if it is cheaper to compute.
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, KnownBits &Known)
static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc)
static SDValue combineConcatVectorOfShuffleAndItsOperands(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT, SelectionDAG &DAG)
static SDValue foldRemainderIdiom(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, SelectionDAG &DAG)
static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG)
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG)
static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset=0)
static SDValue combineShiftToMULH(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI)
static ConstantSDNode * getAsNonOpaqueConstant(SDValue N)
If N is a ConstantSDNode with isOpaque() == false return it casted to a ConstantSDNode pointer else n...
static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static SDValue detectUSatUPattern(SDValue In, EVT VT)
Detect patterns of truncation with unsigned saturation:
static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue combineConcatVectorOfSplats(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N)
OR combines for which the commuted variant will be tried as well.
static SDValue detectSSatUPattern(SDValue In, EVT VT, SelectionDAG &DAG, const SDLoc &DL)
Detect patterns of truncation with unsigned saturation:
static SDValue combineShuffleToZeroExtendVectorInReg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static cl::opt< bool > EnableReduceLoadOpStoreWidth("combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable reducing the width of load/op/store " "sequence"))
static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal, SDValue FVal, const TargetLowering &TLI, SelectionDAG &DAG, const SDLoc &DL)
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, SelectionDAG &DAG, bool IsRotate, bool FromAdd)
static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG)
static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG)
Try to replace shift/logic that tests if a bit is clear with mask + setcc.
static bool areBitwiseNotOfEachother(SDValue Op0, SDValue Op1)
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG)
static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, bool LegalTypes)
If a vector binop is performed on splat values, it may be profitable to extract, scalarize,...
static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG)
static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue combineConcatVectorOfConcatVectors(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtOfAtomicLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI, SelectionDAG &DAG, SDNode *N, SDValue N0, SDValue N1, EVT VT)
static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG, const SDLoc &DL, CombineLevel Level)
Fold (sext (select c, load x, load y)) -> (select c, sextload x, sextload y) (zext (select c,...
static SDValue getAsCarry(const TargetLowering &TLI, SDValue V, bool ForceCarryReconstruction=false)
static SDValue matchMergedBFX(SDValue Root, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue foldSelectOfConstantsUsingSra(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
If a (v)select has a condition value that is a sign-bit test, try to smear the condition operand sign...
static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG)
static SDValue detectSSatSPattern(SDValue In, EVT VT)
Detect patterns of truncation with signed saturation: (truncate (smin (smax (x, signed_min_of_dest_ty...
static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG)
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations)
static cl::opt< unsigned > StoreMergeDependenceLimit("combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10), cl::desc("Limit the number of times for the same StoreNode and RootNode " "to bail out in store merging dependence check"))
static SDValue eliminateFPCastPair(SDNode *N)
static cl::opt< std::string > CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, cl::desc("Only use DAG-combiner alias analysis in this" " function"))
static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp, SelectionDAG &DAG)
Given a bitwise logic operation N with a matching bitwise logic operand, fold a pattern where 2 of th...
ByteProvider< SDNode * > SDByteProvider
Recursively traverses the expression calculating the origin of the requested byte of the given value.
static bool isSlicingProfitable(SmallVectorImpl< LoadedSlice > &LoadedSlices, const APInt &UsedBits, bool ForCodeSize)
Check the profitability of all involved LoadedSlice.
static SDValue narrowInsertExtractVectorBinOp(EVT SubVT, SDValue BinOp, unsigned Index, const SDLoc &DL, SelectionDAG &DAG, bool LegalOperations)
static bool isBSwapHWordElement(SDValue N, MutableArrayRef< SDNode * > Parts)
Return true if the specified node is an element that makes up a 32-bit packed halfword byteswap.
static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, unsigned &BW, bool &Unsigned, SelectionDAG &DAG)
static SDValue foldBoolSelectToLogic(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static std::optional< SDByteProvider > calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, std::optional< uint64_t > VectorIndex, unsigned StartingIndex=0)
dxil translate DXIL Translate Metadata
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
This file defines the DenseMap class.
static bool isSigned(unsigned int Opcode)
static MaybeAlign getAlign(Value *Ptr)
iv Induction Variable Users
Definition IVUsers.cpp:48
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
static Value * simplifyDivRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse)
Check for common or similar folds of integer division or integer remainder.
This file implements a coalescing interval map for small objects.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
static bool isUndef(const MachineInstr &MI)
Register const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
This file contains the declarations for metadata subclasses.
#define T
#define T1
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
if(PassOpts->AAPipeline)
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
static bool isSimple(Instruction *I)
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
static cl::opt< unsigned > MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), cl::desc("DAG combiner limit number of steps when searching DAG " "for predecessor nodes"))
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static unsigned getScalarSizeInBits(Type *Ty)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
static constexpr int Concat[]
Value * RHS
Value * LHS
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
Definition APFloat.h:1120
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1208
bool isNegative() const
Definition APFloat.h:1449
bool isNormal() const
Definition APFloat.h:1453
bool isDenormal() const
Definition APFloat.h:1450
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1432
const fltSemantics & getSemantics() const
Definition APFloat.h:1457
APFloat makeQuiet() const
Assuming this is an IEEE-754 NaN value, quiet its signaling bit.
Definition APFloat.h:1316
bool isNaN() const
Definition APFloat.h:1447
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition APFloat.h:1088
bool isSignaling() const
Definition APFloat.h:1451
APInt bitcastToAPInt() const
Definition APFloat.h:1353
bool isLargest() const
Definition APFloat.h:1465
bool isInfinity() const
Definition APFloat.h:1446
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt umul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1971
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1758
LLVM_ABI APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition APInt.cpp:644
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:449
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1670
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1385
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1512
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:206
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
APInt abs() const
Get the absolute value.
Definition APInt.h:1795
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1182
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:258
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
Definition APInt.h:466
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1111
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:209
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:329
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1249
int32_t exactLogBase2() const
Definition APInt.h:1783
LLVM_ABI APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1935
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1598
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:219
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1531
unsigned countLeadingZeros() const
Definition APInt.h:1606
void flipAllBits()
Toggle every bit to its opposite value.
Definition APInt.h:1452
unsigned logBase2() const
Definition APInt.h:1761
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:510
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:475
bool getBoolValue() const
Convert APInt to a boolean value.
Definition APInt.h:471
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition APInt.cpp:1736
LLVM_ABI APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1960
bool isMask(unsigned numBits) const
Definition APInt.h:488
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition APInt.h:1150
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition APInt.h:1367
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:306
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:296
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:389
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:286
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:239
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:851
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1656
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1221
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition ArrayRef.h:200
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
static LLVM_ABI BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
static LLVM_ABI bool computeAliasing(const SDNode *Op0, const LocationSize NumBytes0, const SDNode *Op1, const LocationSize NumBytes1, const SelectionDAG &DAG, bool &IsAlias)
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
LLVM_ABI bool isConstant() const
Represents known origin of an individual byte in combine pattern.
static ByteProvider getConstantZero()
static ByteProvider getSrc(std::optional< SDNode * > Val, int64_t ByteOffset, int64_t VectorOffset)
Combiner implementation.
Definition Combiner.h:34
ISD::CondCode get() const
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:536
const APFloat & getValueAPF() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
bool isNegative() const
Return true if the value is negative.
bool isZero() const
Return true if the value is positive or negative zero.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI bool isFullSet() const
Return true if this set contains all of the elements possible for this data-type.
LLVM_ABI ConstantRange truncate(uint32_t BitWidth, unsigned NoWrapKind=0) const
Return a new range in the specified integer type, which must be strictly smaller than the current typ...
const APInt & getUpper() const
Return the upper value for this range.
uint32_t getBitWidth() const
Get the bit width of this ConstantRange.
const ConstantInt * getConstantIntValue() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition Constant.h:43
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:207
bool isBigEndian() const
Definition DataLayout.h:208
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
static bool shouldExecute(unsigned CounterName)
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:167
iterator end()
Definition DenseMap.h:81
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:310
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:321
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
const_iterator find(KeyT x) const
find - Return an iterator pointing to the first interval ending at or after x, or end().
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
static LocationSize precise(uint64_t Value)
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
bool isScalable() const
TypeSize getValue() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
Machine Value Type.
SimpleValueType SimpleTy
static auto all_valuetypes()
SimpleValueType Iteration.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
A description of a memory reference used in the backend.
const PseudoSourceValue * getPseudoValue() const
void clearRanges()
Unset the tracked range metadata.
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MONonTemporal
The memory access is non-temporal.
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
const SDValue & getPassThru() const
ISD::LoadExtType getExtensionType() const
const SDValue & getBasePtr() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getInc() const
const SDValue & getScale() const
const SDValue & getMask() const
const SDValue & getIntID() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
ISD::MemIndexType getIndexType() const
This class is used to represent an MLOAD node.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
const SDValue & getMask() const
const SDValue & getPassThru() const
const SDValue & getOffset() const
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This class is used to represent an MSTORE node.
bool isCompressingStore() const
Returns true if the op does a compression to the vector before storing.
const SDValue & getOffset() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getBaseAlign() const
Returns alignment and volatility of the memory access.
const MDNode * getRanges() const
Returns the Ranges that describes the dereference.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
bool isNonTemporal() const
bool isInvariant() const
bool isDereferenceable() const
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
MutableArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition ArrayRef.h:424
iterator end() const
Definition ArrayRef.h:348
iterator begin() const
Definition ArrayRef.h:347
MutableArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition ArrayRef.h:417
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
LLVM_ABI void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
TypeSize getValueSizeInBits(unsigned ResNo) const
Returns MVT::getSizeInBits(getValueType(ResNo)).
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
LLVM_ABI bool isOperandOf(const SDNode *N) const
Return true if this node is an operand of N.
const APInt & getConstantOperandAPInt(unsigned Num) const
Helper method returns the APInt of a ConstantSDNode operand.
std::optional< APInt > bitcastToAPInt() const
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
LLVM_ABI bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
LLVM_ABI bool reachesChainWithoutSideEffects(SDValue Dest, unsigned Depth=2) const
Return true if this operand (which must be a chain) reaches the specified operand without crossing an...
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isAnyAdd() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
LLVM_ABI SDValue getSplatSourceVector(SDValue V, int &SplatIndex)
If V is a splatted value, return the source vector and its splat index.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
LLVM_ABI SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
bool isKnownNeverSNaN(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
LLVM_ABI std::optional< bool > isBoolConstant(SDValue N) const
Check if a value \op N is a constant using the target's BooleanContent for its type.
const TargetSubtargetInfo & getSubtarget() const
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
LLVM_ABI SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getAtomicLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, MachineMemOperand *MMO)
LLVM_ABI SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags, bool AllowCommute=false)
Get the specified node if it's already available, or else return NULL.
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
LLVM_ABI void Combine(CombineLevel Level, BatchAAResults *BatchAA, CodeGenOptLevel OptLevel)
This iterates over the nodes in the SelectionDAG, folding certain types of nodes together,...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
static LLVM_ABI unsigned getHasPredecessorMaxSteps()
LLVM_ABI bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI bool cannotBeOrderedNegativeFP(SDValue Op) const
Test whether the given float value is known to be positive.
LLVM_ABI SDValue getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
LLVM_ABI SDValue getAssertAlign(const SDLoc &DL, SDValue V, Align A)
Return an AssertAlignSDNode.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
bool isGuaranteedNotToBePoison(SDValue Op, unsigned Depth=0) const
Return true if this function can prove that Op is never poison.
LLVM_ABI SDValue getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
LLVM_ABI APInt computeVectorKnownZeroElements(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
For each demanded element of a vector, see if it is known to be zero.
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
LLVM_ABI SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI void DeleteNode(SDNode *N)
Remove the specified node from the system.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI std::optional< unsigned > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
LLVM_ABI SDValue simplifySelect(SDValue Cond, SDValue TVal, SDValue FVal)
Try to simplify a select/vselect into 1 of its operands or a constant.
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
LLVM_ABI bool isConstantFPBuildVectorOrConstantFP(SDValue N) const
Test whether the given value is a constant FP or similar node.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
LLVM_ABI bool LegalizeOp(SDNode *N, SmallSetVector< SDNode *, 16 > &UpdatedNodes)
Transforms a SelectionDAG node and any operands to it into a node that is compatible with the target ...
LLVM_ABI bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
LLVM_ABI bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
LLVM_ABI SDValue getMaskedHistogram(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
LLVM_ABI SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
bool willNotOverflowMul(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the mul of 2 nodes can never overflow.
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
LLVM_ABI MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
bool isConstantValueOfAnyType(SDValue N) const
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
LLVM_ABI bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
LLVM_ABI SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getSetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
iterator_range< allnodes_iterator > allnodes()
LLVM_ABI SDValue getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, const MDNode *Ranges=nullptr, bool IsExpanding=false)
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
LLVM_ABI SDValue FoldConstantBuildVector(BuildVectorSDNode *BV, const SDLoc &DL, EVT DstEltVT)
Fold BUILD_VECTOR of constants/undefs to the destination type BUILD_VECTOR of constants/undefs elemen...
LLVM_ABI SDValue getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
const TargetLibraryInfo & getLibInfo() const
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI bool MaskedVectorIsZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Return true if 'Op' is known to be zero in DemandedElts.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
LLVM_ABI bool canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, bool PoisonOnly=false, bool ConsiderFlags=true, unsigned Depth=0) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
LLVM_ABI OverflowKind computeOverflowForUnsignedAdd(SDValue N0, SDValue N1) const
Determine if the result of the unsigned addition of 2 nodes can overflow.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
bool isSafeToSpeculativelyExecuteNode(const SDNode *N) const
Check if the provided node is save to speculatively executed given its current arguments.
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVM_ABI bool isKnownToBeAPowerOfTwoFP(SDValue Val, unsigned Depth=0) const
Test if the given fp value is known to be an integer power-of-2, either positive or negative.
LLVMContext * getContext() const
LLVM_ABI SDValue simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y, SDNodeFlags Flags)
Try to simplify a floating-point binary operation into 1 of its operands or a constant.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
LLVM_ABI bool isUndef(unsigned Opcode, ArrayRef< SDValue > Ops)
Return true if the result of this operation is always undefined.
LLVM_ABI SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
LLVM_ABI SDValue getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
DenormalMode getDenormalMode(EVT VT) const
Return the current function's default denormal handling kind for the given floating point type.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
static unsigned getOpcode_EXTEND(unsigned Opcode)
Convert *_EXTEND_VECTOR_INREG to *_EXTEND opcode.
LLVM_ABI bool isADDLike(SDValue Op, bool NoWrap=false) const
Return true if the specified operand is an ISD::OR or ISD::XOR node that can be treated as an ISD::AD...
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
LLVM_ABI SDValue simplifyShift(SDValue X, SDValue Y)
Try to simplify a shift into 1 of its operands or a constant.
LLVM_ABI void transferDbgValues(SDValue From, SDValue To, unsigned OffsetInBits=0, unsigned SizeInBits=0, bool InvalidateDbg=true)
Transfer debug values from one node to another, while optionally generating fragment expressions for ...
LLVM_ABI SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
LLVM_ABI SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:99
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:150
value_type pop_back_val()
Definition SetVector.h:278
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
void push_back(bool Val)
void reserve(unsigned N)
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:338
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
bool empty() const
Definition SmallSet.h:168
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
iterator insert(iterator I, T &&Elt)
void resize(size_type N)
void push_back(const T &Elt)
pointer data()
Return a pointer to the vector's buffer, even if empty().
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
bool has(LibFunc F) const
Tests whether a library function is available.
virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT) const
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, EVT ExtVT) const
virtual bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
virtual bool hasAndNot(SDValue X) const
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool enableAggressiveFMAFusion(EVT VT) const
Return true if target always benefits from combining into FMA for a given value type.
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
SDValue promoteTargetBoolean(SelectionDAG &DAG, SDValue Bool, EVT ValVT) const
Promote the given target boolean to a target boolean of the given type.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, std::optional< unsigned > ByteOffset=std::nullopt) const
Return true if it is profitable to reduce a load to a smaller type.
virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT, bool LegalOnly) const
virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const
Use bitwise logic to make pairs of compares more efficient.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a square root of the given type based on the function's at...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, LLT DestTy, LLT SrcTy) const
Return true if an fpext operation input to an Opcode operation is free (for instance,...
virtual bool hasBitTest(SDValue X, SDValue Y) const
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: fold (conv (load x)) -> (load (conv*)x) On arch...
virtual bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
virtual bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst, SDValue IntPow2) const
virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, EVT VT) const
Returns true if we should normalize select(N0&N1, X, Y) => select(N0, select(N1, X,...
virtual bool preferScalarizeSplat(SDNode *N) const
bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const
Return true if it is profitable to convert a select of FP constants into a constant pool load whose a...
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
virtual bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
bool isPartialReduceMLALegalOrCustom(unsigned Opc, EVT AccVT, EVT InputVT) const
Return true if a PARTIAL_REDUCE_U/SMLA node with the specified types is legal or custom for this targ...
virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const
Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
int getDivRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a division of the given type based on the function's attributes.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N) const
Return true if it is profitable to fold a pair of shifts into a mask.
int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a division of the given type based on the function's attri...
virtual bool preferIncOfAddToSubOfNot(EVT VT) const
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual bool canTransformPtrArithOutOfBounds(const Function &F, EVT PtrVT) const
True if the target allows transformations of in-bounds pointer arithmetic that cause out-of-bounds in...
virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const
virtual bool isFNegFree(EVT VT) const
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
virtual bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X, SDValue Y) const
Return true if pulling a binary operation into a select with an identity constant is profitable.
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
virtual bool mergeStoresAfterLegalization(EVT MemVT) const
Allow store merging for the specified type after legalization in addition to before legalization.
virtual bool shouldMergeStoreOfLoadsOverCall(EVT, EVT) const
Returns true if it's profitable to allow merging store of loads when there are functions calls betwee...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getGatherAllAliasesMaxDepth() const
virtual bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, unsigned AddrSpace) const
Return true if it is expected to be cheaper to do a store of vector constant with the given size and ...
virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal or custom on this target.
bool isAtomicLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified atomic load with extension is legal on this target.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const
There are two ways to clear extreme bits (either low or high): Mask: x & (-1 << y) (the instcombine c...
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const
Returns if it's reasonable to merge stores to MemVT size.
virtual bool preferABDSToABSWithNSW(EVT VT) const
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
AndOrSETCCFoldKind
Enum of different potentially desirable ways to fold (and/or (setcc ...), (setcc ....
virtual bool shouldScalarizeBinop(SDValue VecOp) const
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: (store (y (conv x)), y*)) -> (store x,...
bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool isVectorClearMaskLegal(ArrayRef< int >, EVT) const
Similar to isShuffleMaskLegal.
bool hasTargetDAGCombine(ISD::NodeType NT) const
If true, the target has custom DAG combine transformations that it can perform for the specified node...
virtual bool shouldSplatInsEltVarIndex(EVT) const
Return true if inserting a scalar into a variable element of an undef vector is more efficiently hand...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a square root of the given type based on the function's attribut...
virtual unsigned preferedOpcodeForCmpEqPiecesOfOperand(EVT VT, unsigned ShiftOpc, bool MayTransformRotate, const APInt &ShiftOrRotateAmt, const std::optional< APInt > &AndMask) const
virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const
Returns true if MI can be combined with another instruction to form TargetOpcode::G_FMAD.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const
virtual bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
virtual bool generateFMAsInMachineCombiner(EVT VT, CodeGenOptLevel OptLevel) const
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool hasPairedLoad(EVT, Align &) const
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
virtual bool convertSelectOfConstantsToMath(EVT VT) const
Return true if a select of constants (select Cond, C1, C2) should be transformed into simple math ops...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const
Hooks for building estimates in place of slower divisions and square roots.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const
SDValue getCheaperOrNeutralNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, const NegatibleCost CostThreshold=NegatibleCost::Neutral, unsigned Depth=0) const
virtual bool isTargetCanonicalSelect(SDNode *N) const
Return true if the given select/vselect should be considered canonical and not be transformed.
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
virtual bool IsDesirableToPromoteOp(SDValue, EVT &) const
This method query the target whether it is beneficial for dag combiner to promote the specified node.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const
Return a reciprocal estimate value for the input operand.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
TargetLowering(const TargetLowering &)=delete
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool getPostIndexedAddressParts(SDNode *, SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to move this shift by a constant amount through its operand,...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual AndOrSETCCFoldKind isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const
virtual bool getPreIndexedAddressParts(SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
SDValue scalarizeExtractedVectorLoad(EVT ResultVT, const SDLoc &DL, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad, SelectionDAG &DAG) const
Replace an extraction of a load with a narrowed load.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
virtual bool isDesirableToTransformToIntegerOp(unsigned, EVT) const
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetOptions Options
unsigned NoSignedZerosFPMath
NoSignedZerosFPMath - This flag is enabled when the -enable-no-signed-zeros-fp-math is specified on t...
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:344
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:107
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:232
const SDValue & getScale() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getVectorLength() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
iterator_range< user_iterator > users()
Definition Value.h:426
int getNumOccurrences() const
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:181
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:201
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:231
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:169
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:253
Changed
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition APInt.h:2248
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition APInt.h:2253
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition APInt.h:2258
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition APInt.h:2263
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Entry
Definition COFF.h:862
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical AND between different comparisons of identical values: ((X op1 Y) & (X...
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:801
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:774
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ POISON
POISON - A poison node.
Definition ISDOpcodes.h:231
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:587
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:765
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition ISDOpcodes.h:387
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:289
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition ISDOpcodes.h:515
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:393
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:571
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:738
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:892
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:826
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:706
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:656
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:773
@ TRUNCATE_SSAT_U
Definition ISDOpcodes.h:855
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition ISDOpcodes.h:809
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:347
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor to...
Definition ISDOpcodes.h:622
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition ISDOpcodes.h:682
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:528
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:778
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:663
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition ISDOpcodes.h:69
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:225
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:343
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:601
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
Definition ISDOpcodes.h:48
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:563
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:832
@ TargetConstantFP
Definition ISDOpcodes.h:175
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:793
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:379
@ SMULO
Same for multiplication.
Definition ISDOpcodes.h:351
@ TargetFrameIndex
Definition ISDOpcodes.h:182
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:881
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:718
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:787
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:323
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:908
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:174
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ CARRY_FALSE
CARRY_FALSE - This node is used when folding other nodes, like ADDC/SUBC, which indicate the carry re...
Definition ISDOpcodes.h:280
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:701
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:299
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:420
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:941
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition ISDOpcodes.h:690
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:903
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:927
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:521
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
Definition ISDOpcodes.h:853
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:713
@ TRUNCATE_USAT_U
Definition ISDOpcodes.h:857
@ SADDO_CARRY
Carry-using overflow-aware nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:333
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:543
bool isIndexTypeSigned(MemIndexType IndexType)
bool isExtVecInRegOpcode(unsigned Opcode)
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool matchUnaryFpPredicate(SDValue Op, std::function< bool(ConstantFPSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantFPSDNode predicate.
bool isFPEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with floati...
bool isExtOpcode(unsigned Opcode)
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI bool isVPBinaryOp(unsigned Opcode)
Whether this is a vector-predicated binary operation opcode.
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
LLVM_ABI std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
LLVM_ABI std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
LLVM_ABI bool allOperandsUndef(const SDNode *N)
Return true if the node has at least one operand and all operands of the specified node are ISD::UNDE...
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
LLVM_ABI NodeType getInverseMinMaxOpcode(unsigned MinMaxOpc)
Given a MinMaxOpc of ISD::(U|S)MIN or ISD::(U|S)MAX, returns ISD::(U|S)MAX and ISD::(U|S)MIN,...
LLVM_ABI bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
LLVM_ABI bool isVPReduction(unsigned Opcode)
Whether this is a vector-predicated reduction opcode.
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTruncation=false)
Hook for matching ConstantSDNode predicate.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
LLVM_ABI bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
LLVM_ABI CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical OR between different comparisons of identical values: ((X op1 Y) | (X ...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
CastInst_match< OpTy, FPToUIInst > m_FPToUI(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::SRem > m_SRem(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinOpPred_match< LHS, RHS, is_bitwiselogic_op > m_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
@ Undef
Value of the register doesn't matter.
Opcode_match m_Opc(unsigned Opcode)
auto m_SelectCCLike(const LTy &L, const RTy &R, const TTy &T, const FTy &F, const CCTy &CC)
BinaryOpc_match< LHS, RHS > m_Srl(const LHS &L, const RHS &R)
auto m_SpecificVT(EVT RefVT, const Pattern &P)
Match a specific ValueType.
BinaryOpc_match< LHS, RHS > m_Sra(const LHS &L, const RHS &R)
auto m_UMinLike(const LHS &L, const RHS &R)
auto m_UMaxLike(const LHS &L, const RHS &R)
UnaryOpc_match< Opnd > m_Abs(const Opnd &Op)
Or< Preds... > m_AnyOf(const Preds &...preds)
And< Preds... > m_AllOf(const Preds &...preds)
TernaryOpc_match< T0_P, T1_P, T2_P > m_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
UnaryOpc_match< Opnd > m_AnyExt(const Opnd &Op)
auto m_SMaxLike(const LHS &L, const RHS &R)
UnaryOpc_match< Opnd > m_Ctlz(const Opnd &Op)
TernaryOpc_match< T0_P, T1_P, T2_P > m_VSelect(const T0_P &Cond, const T1_P &T, const T2_P &F)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
UnaryOpc_match< Opnd > m_UnaryOp(unsigned Opc, const Opnd &Op)
auto m_SMinLike(const LHS &L, const RHS &R)
CondCode_match m_SpecificCondCode(ISD::CondCode CC)
Match a conditional code SDNode with a specific ISD::CondCode.
NUses_match< 1, Value_match > m_OneUse()
CondCode_match m_CondCode()
Match any conditional code SDNode.
Not(const Pred &P) -> Not< Pred >
TernaryOpc_match< T0_P, T1_P, T2_P, true, false > m_c_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
bool sd_context_match(SDValue N, const MatchContext &Ctx, Pattern &&P)
ConstantInt_match m_ConstInt()
Match any integer constants or splat of an integer constant.
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:667
constexpr double e
Definition MathExtras.h:47
@ User
could "use" a pointer
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:355
@ Offset
Definition DWP.cpp:477
@ Length
Definition DWP.cpp:477
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:362
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void stable_sort(R &&Range)
Definition STLExtras.h:2058
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
InstructionCost Cost
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition Utils.cpp:1607
LLVM_ABI SDValue getBitwiseNotOperand(SDValue V, SDValue Mask, bool AllowUndefs)
If V is a bitwise not, returns the inverted operand.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2472
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:279
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
LLVM_ABI llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
bool operator!=(uint64_t V1, const APInt &V2)
Definition APInt.h:2113
bool operator>=(int64_t V1, const APSInt &V2)
Definition APSInt.h:361
LLVM_ATTRIBUTE_ALWAYS_INLINE DynamicAPInt & operator+=(DynamicAPInt &A, int64_t B)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2136
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
int ilogb(const APFloat &Arg)
Returns the exponent of the internal representation of the APFloat.
Definition APFloat.h:1534
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1589
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:348
LLVM_ABI bool isMinSignedConstant(SDValue V)
Returns true if V is a constant min signed integer value.
LLVM_ABI ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
LLVM_ABI ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:396
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:186
unsigned M1(unsigned Val)
Definition VE.h:377
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
LLVM_ABI bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition Utils.cpp:1545
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:222
bool operator>(int64_t V1, const APSInt &V2)
Definition APSInt.h:363
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1622
detail::ValueMatchesPoly< M > HasValue(M Matcher)
Definition Error.h:221
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI SDValue peekThroughTruncates(SDValue V)
Return the non-truncated source operand of V if it exists.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
LLVM_ABI SDValue peekThroughOneUseBitcasts(SDValue V)
Return the non-bitcasted and one-use source operand of V if it exists.
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
Definition ModRef.h:68
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
CombineLevel
Definition DAGCombine.h:15
@ AfterLegalizeDAG
Definition DAGCombine.h:19
@ AfterLegalizeVectorOps
Definition DAGCombine.h:18
@ BeforeLegalizeTypes
Definition DAGCombine.h:16
@ AfterLegalizeTypes
Definition DAGCombine.h:17
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ FMul
Product of floats.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1961
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:560
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
LLVM_ABI void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition STLExtras.h:2108
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool operator<=(int64_t V1, const APSInt &V2)
Definition APSInt.h:360
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:384
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
LLVM_ABI AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.
static LLVM_ABI ExponentType semanticsMinExponent(const fltSemantics &)
Definition APFloat.cpp:332
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static LLVM_ABI ExponentType semanticsMaxExponent(const fltSemantics &)
Definition APFloat.cpp:328
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:324
static LLVM_ABI bool isIEEELikeFP(const fltSemantics &)
Definition APFloat.cpp:365
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:320
static LLVM_ABI unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition APFloat.cpp:338
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
static constexpr DenormalMode getIEEE()
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
bool knownBitsLE(EVT VT) const
Return true if we know at compile time this has fewer than or the same bits as VT.
Definition ValueTypes.h:279
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition ValueTypes.h:121
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:243
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition ValueTypes.h:470
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:412
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isScalableVT() const
Return true if the type is a scalable type.
Definition ValueTypes.h:187
bool isFixedLengthVector() const
Definition ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:292
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition ValueTypes.h:256
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition ValueTypes.h:248
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
bool knownBitsGE(EVT VT) const
Return true if we know at compile time this has more than or the same bits as VT.
Definition ValueTypes.h:268
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition ValueTypes.h:142
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool isZeroSized() const
Test if the given EVT has zero size, this will fail if called on a scalable type.
Definition ValueTypes.h:132
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:308
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:108
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:242
bool isConstant() const
Returns true if we know the value of all bits.
Definition KnownBits.h:54
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:296
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:248
bool isAllOnes() const
Returns true if value is all one bits.
Definition KnownBits.h:83
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition KnownBits.h:60
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
These are IR-level optimization flags that may be propagated to SDNodes.
void setAllowContract(bool b)
bool hasNoUnsignedWrap() const
void setAllowReassociation(bool b)
void setAllowReciprocal(bool b)
bool hasAllowContract() const
bool hasApproximateFuncs() const
void setApproximateFuncs(bool b)
bool hasNoSignedWrap() const
bool hasAllowReciprocal() const
bool hasAllowReassociation() const
Clients of various APIs that cause global effects on the DAG can optionally implement this interface.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI bool recursivelyDeleteUnusedNodes(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
LLVM_ABI void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...