LLVM 22.0.0git
DAGCombiner.cpp
Go to the documentation of this file.
1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/DenseMap.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/SmallSet.h"
29#include "llvm/ADT/Statistic.h"
51#include "llvm/IR/Attributes.h"
52#include "llvm/IR/Constant.h"
53#include "llvm/IR/DataLayout.h"
55#include "llvm/IR/Function.h"
56#include "llvm/IR/Metadata.h"
61#include "llvm/Support/Debug.h"
69#include <algorithm>
70#include <cassert>
71#include <cstdint>
72#include <functional>
73#include <iterator>
74#include <optional>
75#include <string>
76#include <tuple>
77#include <utility>
78#include <variant>
79
80#include "MatchContext.h"
81
82using namespace llvm;
83using namespace llvm::SDPatternMatch;
84
85#define DEBUG_TYPE "dagcombine"
86
87STATISTIC(NodesCombined , "Number of dag nodes combined");
88STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
89STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
90STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
91STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
92STATISTIC(SlicedLoads, "Number of load sliced");
93STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
94
95DEBUG_COUNTER(DAGCombineCounter, "dagcombine",
96 "Controls whether a DAG combine is performed for a node");
97
98static cl::opt<bool>
99CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
100 cl::desc("Enable DAG combiner's use of IR alias analysis"));
101
102static cl::opt<bool>
103UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
104 cl::desc("Enable DAG combiner's use of TBAA"));
105
106#ifndef NDEBUG
108CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
109 cl::desc("Only use DAG-combiner alias analysis in this"
110 " function"));
111#endif
112
113/// Hidden option to stress test load slicing, i.e., when this option
114/// is enabled, load slicing bypasses most of its profitability guards.
115static cl::opt<bool>
116StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
117 cl::desc("Bypass the profitability model of load slicing"),
118 cl::init(false));
119
120static cl::opt<bool>
121 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
122 cl::desc("DAG combiner may split indexing from loads"));
123
124static cl::opt<bool>
125 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
126 cl::desc("DAG combiner enable merging multiple stores "
127 "into a wider store"));
128
130 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
131 cl::desc("Limit the number of operands to inline for Token Factors"));
132
134 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
135 cl::desc("Limit the number of times for the same StoreNode and RootNode "
136 "to bail out in store merging dependence check"));
137
139 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
140 cl::desc("DAG combiner enable reducing the width of load/op/store "
141 "sequence"));
143 "combiner-reduce-load-op-store-width-force-narrowing-profitable",
144 cl::Hidden, cl::init(false),
145 cl::desc("DAG combiner force override the narrowing profitable check when "
146 "reducing the width of load/op/store sequences"));
147
149 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
150 cl::desc("DAG combiner enable load/<replace bytes>/store with "
151 "a narrower store"));
152
153static cl::opt<bool> DisableCombines("combiner-disabled", cl::Hidden,
154 cl::init(false),
155 cl::desc("Disable the DAG combiner"));
156
157namespace {
158
159 class DAGCombiner {
160 SelectionDAG &DAG;
161 const TargetLowering &TLI;
162 const SelectionDAGTargetInfo *STI;
164 CodeGenOptLevel OptLevel;
165 bool LegalDAG = false;
166 bool LegalOperations = false;
167 bool LegalTypes = false;
168 bool ForCodeSize;
169 bool DisableGenericCombines;
170
171 /// Worklist of all of the nodes that need to be simplified.
172 ///
173 /// This must behave as a stack -- new nodes to process are pushed onto the
174 /// back and when processing we pop off of the back.
175 ///
176 /// The worklist will not contain duplicates but may contain null entries
177 /// due to nodes being deleted from the underlying DAG. For fast lookup and
178 /// deduplication, the index of the node in this vector is stored in the
179 /// node in SDNode::CombinerWorklistIndex.
181
182 /// This records all nodes attempted to be added to the worklist since we
183 /// considered a new worklist entry. As we keep do not add duplicate nodes
184 /// in the worklist, this is different from the tail of the worklist.
186
187 /// Map from candidate StoreNode to the pair of RootNode and count.
188 /// The count is used to track how many times we have seen the StoreNode
189 /// with the same RootNode bail out in dependence check. If we have seen
190 /// the bail out for the same pair many times over a limit, we won't
191 /// consider the StoreNode with the same RootNode as store merging
192 /// candidate again.
194
195 // BatchAA - Used for DAG load/store alias analysis.
196 BatchAAResults *BatchAA;
197
198 /// This caches all chains that have already been processed in
199 /// DAGCombiner::getStoreMergeCandidates() and found to have no mergeable
200 /// stores candidates.
201 SmallPtrSet<SDNode *, 4> ChainsWithoutMergeableStores;
202
203 /// When an instruction is simplified, add all users of the instruction to
204 /// the work lists because they might get more simplified now.
205 void AddUsersToWorklist(SDNode *N) {
206 for (SDNode *Node : N->users())
207 AddToWorklist(Node);
208 }
209
210 /// Convenient shorthand to add a node and all of its user to the worklist.
211 void AddToWorklistWithUsers(SDNode *N) {
212 AddUsersToWorklist(N);
213 AddToWorklist(N);
214 }
215
216 // Prune potentially dangling nodes. This is called after
217 // any visit to a node, but should also be called during a visit after any
218 // failed combine which may have created a DAG node.
219 void clearAddedDanglingWorklistEntries() {
220 // Check any nodes added to the worklist to see if they are prunable.
221 while (!PruningList.empty()) {
222 auto *N = PruningList.pop_back_val();
223 if (N->use_empty())
224 recursivelyDeleteUnusedNodes(N);
225 }
226 }
227
228 SDNode *getNextWorklistEntry() {
229 // Before we do any work, remove nodes that are not in use.
230 clearAddedDanglingWorklistEntries();
231 SDNode *N = nullptr;
232 // The Worklist holds the SDNodes in order, but it may contain null
233 // entries.
234 while (!N && !Worklist.empty()) {
235 N = Worklist.pop_back_val();
236 }
237
238 if (N) {
239 assert(N->getCombinerWorklistIndex() >= 0 &&
240 "Found a worklist entry without a corresponding map entry!");
241 // Set to -2 to indicate that we combined the node.
242 N->setCombinerWorklistIndex(-2);
243 }
244 return N;
245 }
246
247 /// Call the node-specific routine that folds each particular type of node.
248 SDValue visit(SDNode *N);
249
250 public:
251 DAGCombiner(SelectionDAG &D, BatchAAResults *BatchAA, CodeGenOptLevel OL)
252 : DAG(D), TLI(D.getTargetLoweringInfo()),
253 STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL),
254 BatchAA(BatchAA) {
255 ForCodeSize = DAG.shouldOptForSize();
256 DisableGenericCombines =
257 DisableCombines || (STI && STI->disableGenericCombines(OptLevel));
258
259 MaximumLegalStoreInBits = 0;
260 // We use the minimum store size here, since that's all we can guarantee
261 // for the scalable vector types.
262 for (MVT VT : MVT::all_valuetypes())
263 if (EVT(VT).isSimple() && VT != MVT::Other &&
264 TLI.isTypeLegal(EVT(VT)) &&
265 VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
266 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
267 }
268
269 void ConsiderForPruning(SDNode *N) {
270 // Mark this for potential pruning.
271 PruningList.insert(N);
272 }
273
274 /// Add to the worklist making sure its instance is at the back (next to be
275 /// processed.)
276 void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true,
277 bool SkipIfCombinedBefore = false) {
278 assert(N->getOpcode() != ISD::DELETED_NODE &&
279 "Deleted Node added to Worklist");
280
281 // Skip handle nodes as they can't usefully be combined and confuse the
282 // zero-use deletion strategy.
283 if (N->getOpcode() == ISD::HANDLENODE)
284 return;
285
286 if (SkipIfCombinedBefore && N->getCombinerWorklistIndex() == -2)
287 return;
288
289 if (IsCandidateForPruning)
290 ConsiderForPruning(N);
291
292 if (N->getCombinerWorklistIndex() < 0) {
293 N->setCombinerWorklistIndex(Worklist.size());
294 Worklist.push_back(N);
295 }
296 }
297
298 /// Remove all instances of N from the worklist.
299 void removeFromWorklist(SDNode *N) {
300 PruningList.remove(N);
301 StoreRootCountMap.erase(N);
302
303 int WorklistIndex = N->getCombinerWorklistIndex();
304 // If not in the worklist, the index might be -1 or -2 (was combined
305 // before). As the node gets deleted anyway, there's no need to update
306 // the index.
307 if (WorklistIndex < 0)
308 return; // Not in the worklist.
309
310 // Null out the entry rather than erasing it to avoid a linear operation.
311 Worklist[WorklistIndex] = nullptr;
312 N->setCombinerWorklistIndex(-1);
313 }
314
315 void deleteAndRecombine(SDNode *N);
316 bool recursivelyDeleteUnusedNodes(SDNode *N);
317
318 /// Replaces all uses of the results of one DAG node with new values.
319 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
320 bool AddTo = true);
321
322 /// Replaces all uses of the results of one DAG node with new values.
323 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
324 return CombineTo(N, &Res, 1, AddTo);
325 }
326
327 /// Replaces all uses of the results of one DAG node with new values.
328 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
329 bool AddTo = true) {
330 SDValue To[] = { Res0, Res1 };
331 return CombineTo(N, To, 2, AddTo);
332 }
333
334 SDValue CombineTo(SDNode *N, SmallVectorImpl<SDValue> *To,
335 bool AddTo = true) {
336 return CombineTo(N, To->data(), To->size(), AddTo);
337 }
338
339 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
340
341 private:
342 unsigned MaximumLegalStoreInBits;
343
344 /// Check the specified integer node value to see if it can be simplified or
345 /// if things it uses can be simplified by bit propagation.
346 /// If so, return true.
347 bool SimplifyDemandedBits(SDValue Op) {
348 unsigned BitWidth = Op.getScalarValueSizeInBits();
349 APInt DemandedBits = APInt::getAllOnes(BitWidth);
350 return SimplifyDemandedBits(Op, DemandedBits);
351 }
352
353 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
354 EVT VT = Op.getValueType();
355 APInt DemandedElts = VT.isFixedLengthVector()
357 : APInt(1, 1);
358 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, false);
359 }
360
361 /// Check the specified vector node value to see if it can be simplified or
362 /// if things it uses can be simplified as it only uses some of the
363 /// elements. If so, return true.
364 bool SimplifyDemandedVectorElts(SDValue Op) {
365 // TODO: For now just pretend it cannot be simplified.
366 if (Op.getValueType().isScalableVector())
367 return false;
368
369 unsigned NumElts = Op.getValueType().getVectorNumElements();
370 APInt DemandedElts = APInt::getAllOnes(NumElts);
371 return SimplifyDemandedVectorElts(Op, DemandedElts);
372 }
373
374 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
375 const APInt &DemandedElts,
376 bool AssumeSingleUse = false);
377 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
378 bool AssumeSingleUse = false);
379
380 bool CombineToPreIndexedLoadStore(SDNode *N);
381 bool CombineToPostIndexedLoadStore(SDNode *N);
382 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
383 bool SliceUpLoad(SDNode *N);
384
385 // Looks up the chain to find a unique (unaliased) store feeding the passed
386 // load. If no such store is found, returns a nullptr.
387 // Note: This will look past a CALLSEQ_START if the load is chained to it so
388 // so that it can find stack stores for byval params.
389 StoreSDNode *getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset);
390 // Scalars have size 0 to distinguish from singleton vectors.
391 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
392 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
393 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
394
395 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
396 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
397 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
398 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
399 SDValue PromoteIntBinOp(SDValue Op);
400 SDValue PromoteIntShiftOp(SDValue Op);
401 SDValue PromoteExtend(SDValue Op);
402 bool PromoteLoad(SDValue Op);
403
404 SDValue foldShiftToAvg(SDNode *N, const SDLoc &DL);
405 // Fold `a bitwiseop (~b +/- c)` -> `a bitwiseop ~(b -/+ c)`
406 SDValue foldBitwiseOpWithNeg(SDNode *N, const SDLoc &DL, EVT VT);
407
408 SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
409 SDValue RHS, SDValue True, SDValue False,
410 ISD::CondCode CC);
411
412 /// Call the node-specific routine that knows how to fold each
413 /// particular type of node. If that doesn't do anything, try the
414 /// target-specific DAG combines.
415 SDValue combine(SDNode *N);
416
417 // Visitation implementation - Implement dag node combining for different
418 // node types. The semantics are as follows:
419 // Return Value:
420 // SDValue.getNode() == 0 - No change was made
421 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
422 // otherwise - N should be replaced by the returned Operand.
423 //
424 SDValue visitTokenFactor(SDNode *N);
425 SDValue visitMERGE_VALUES(SDNode *N);
426 SDValue visitADD(SDNode *N);
427 SDValue visitADDLike(SDNode *N);
428 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1,
429 SDNode *LocReference);
430 SDValue visitPTRADD(SDNode *N);
431 SDValue visitSUB(SDNode *N);
432 SDValue visitADDSAT(SDNode *N);
433 SDValue visitSUBSAT(SDNode *N);
434 SDValue visitADDC(SDNode *N);
435 SDValue visitADDO(SDNode *N);
436 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
437 SDValue visitSUBC(SDNode *N);
438 SDValue visitSUBO(SDNode *N);
439 SDValue visitADDE(SDNode *N);
440 SDValue visitUADDO_CARRY(SDNode *N);
441 SDValue visitSADDO_CARRY(SDNode *N);
442 SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
443 SDNode *N);
444 SDValue visitSADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
445 SDNode *N);
446 SDValue visitSUBE(SDNode *N);
447 SDValue visitUSUBO_CARRY(SDNode *N);
448 SDValue visitSSUBO_CARRY(SDNode *N);
449 template <class MatchContextClass> SDValue visitMUL(SDNode *N);
450 SDValue visitMULFIX(SDNode *N);
451 SDValue useDivRem(SDNode *N);
452 SDValue visitSDIV(SDNode *N);
453 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
454 SDValue visitUDIV(SDNode *N);
455 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
456 SDValue visitREM(SDNode *N);
457 SDValue visitMULHU(SDNode *N);
458 SDValue visitMULHS(SDNode *N);
459 SDValue visitAVG(SDNode *N);
460 SDValue visitABD(SDNode *N);
461 SDValue visitSMUL_LOHI(SDNode *N);
462 SDValue visitUMUL_LOHI(SDNode *N);
463 SDValue visitMULO(SDNode *N);
464 SDValue visitIMINMAX(SDNode *N);
465 SDValue visitAND(SDNode *N);
466 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
467 SDValue visitOR(SDNode *N);
468 SDValue visitORLike(SDValue N0, SDValue N1, const SDLoc &DL);
469 SDValue visitXOR(SDNode *N);
470 SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
471 SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
472 SDValue visitSHL(SDNode *N);
473 SDValue visitSRA(SDNode *N);
474 SDValue visitSRL(SDNode *N);
475 SDValue visitFunnelShift(SDNode *N);
476 SDValue visitSHLSAT(SDNode *N);
477 SDValue visitRotate(SDNode *N);
478 SDValue visitABS(SDNode *N);
479 SDValue visitBSWAP(SDNode *N);
480 SDValue visitBITREVERSE(SDNode *N);
481 SDValue visitCTLZ(SDNode *N);
482 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
483 SDValue visitCTTZ(SDNode *N);
484 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
485 SDValue visitCTPOP(SDNode *N);
486 SDValue visitSELECT(SDNode *N);
487 SDValue visitVSELECT(SDNode *N);
488 SDValue visitVP_SELECT(SDNode *N);
489 SDValue visitSELECT_CC(SDNode *N);
490 SDValue visitSETCC(SDNode *N);
491 SDValue visitSETCCCARRY(SDNode *N);
492 SDValue visitSIGN_EXTEND(SDNode *N);
493 SDValue visitZERO_EXTEND(SDNode *N);
494 SDValue visitANY_EXTEND(SDNode *N);
495 SDValue visitAssertExt(SDNode *N);
496 SDValue visitAssertAlign(SDNode *N);
497 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
498 SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
499 SDValue visitTRUNCATE(SDNode *N);
500 SDValue visitTRUNCATE_USAT_U(SDNode *N);
501 SDValue visitBITCAST(SDNode *N);
502 SDValue visitFREEZE(SDNode *N);
503 SDValue visitBUILD_PAIR(SDNode *N);
504 SDValue visitFADD(SDNode *N);
505 SDValue visitVP_FADD(SDNode *N);
506 SDValue visitVP_FSUB(SDNode *N);
507 SDValue visitSTRICT_FADD(SDNode *N);
508 SDValue visitFSUB(SDNode *N);
509 SDValue visitFMUL(SDNode *N);
510 template <class MatchContextClass> SDValue visitFMA(SDNode *N);
511 SDValue visitFMAD(SDNode *N);
512 SDValue visitFDIV(SDNode *N);
513 SDValue visitFREM(SDNode *N);
514 SDValue visitFSQRT(SDNode *N);
515 SDValue visitFCOPYSIGN(SDNode *N);
516 SDValue visitFPOW(SDNode *N);
517 SDValue visitFCANONICALIZE(SDNode *N);
518 SDValue visitSINT_TO_FP(SDNode *N);
519 SDValue visitUINT_TO_FP(SDNode *N);
520 SDValue visitFP_TO_SINT(SDNode *N);
521 SDValue visitFP_TO_UINT(SDNode *N);
522 SDValue visitXROUND(SDNode *N);
523 SDValue visitFP_ROUND(SDNode *N);
524 SDValue visitFP_EXTEND(SDNode *N);
525 SDValue visitFNEG(SDNode *N);
526 SDValue visitFABS(SDNode *N);
527 SDValue visitFCEIL(SDNode *N);
528 SDValue visitFTRUNC(SDNode *N);
529 SDValue visitFFREXP(SDNode *N);
530 SDValue visitFFLOOR(SDNode *N);
531 SDValue visitFMinMax(SDNode *N);
532 SDValue visitBRCOND(SDNode *N);
533 SDValue visitBR_CC(SDNode *N);
534 SDValue visitLOAD(SDNode *N);
535
536 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
537 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
538 SDValue replaceStoreOfInsertLoad(StoreSDNode *ST);
539
540 bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
541
542 SDValue visitSTORE(SDNode *N);
543 SDValue visitATOMIC_STORE(SDNode *N);
544 SDValue visitLIFETIME_END(SDNode *N);
545 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
546 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
547 SDValue visitBUILD_VECTOR(SDNode *N);
548 SDValue visitCONCAT_VECTORS(SDNode *N);
549 SDValue visitVECTOR_INTERLEAVE(SDNode *N);
550 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
551 SDValue visitVECTOR_SHUFFLE(SDNode *N);
552 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
553 SDValue visitINSERT_SUBVECTOR(SDNode *N);
554 SDValue visitVECTOR_COMPRESS(SDNode *N);
555 SDValue visitMLOAD(SDNode *N);
556 SDValue visitMSTORE(SDNode *N);
557 SDValue visitMGATHER(SDNode *N);
558 SDValue visitMSCATTER(SDNode *N);
559 SDValue visitMHISTOGRAM(SDNode *N);
560 SDValue visitPARTIAL_REDUCE_MLA(SDNode *N);
561 SDValue visitVPGATHER(SDNode *N);
562 SDValue visitVPSCATTER(SDNode *N);
563 SDValue visitVP_STRIDED_LOAD(SDNode *N);
564 SDValue visitVP_STRIDED_STORE(SDNode *N);
565 SDValue visitFP_TO_FP16(SDNode *N);
566 SDValue visitFP16_TO_FP(SDNode *N);
567 SDValue visitFP_TO_BF16(SDNode *N);
568 SDValue visitBF16_TO_FP(SDNode *N);
569 SDValue visitVECREDUCE(SDNode *N);
570 SDValue visitVPOp(SDNode *N);
571 SDValue visitGET_FPENV_MEM(SDNode *N);
572 SDValue visitSET_FPENV_MEM(SDNode *N);
573
574 template <class MatchContextClass>
575 SDValue visitFADDForFMACombine(SDNode *N);
576 template <class MatchContextClass>
577 SDValue visitFSUBForFMACombine(SDNode *N);
578 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
579
580 SDValue XformToShuffleWithZero(SDNode *N);
581 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
582 const SDLoc &DL,
583 SDNode *N,
584 SDValue N0,
585 SDValue N1);
586 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
587 SDValue N1, SDNodeFlags Flags);
588 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
589 SDValue N1, SDNodeFlags Flags);
590 SDValue reassociateReduction(unsigned RedOpc, unsigned Opc, const SDLoc &DL,
591 EVT VT, SDValue N0, SDValue N1,
592 SDNodeFlags Flags = SDNodeFlags());
593
594 SDValue visitShiftByConstant(SDNode *N);
595
596 SDValue foldSelectOfConstants(SDNode *N);
597 SDValue foldVSelectOfConstants(SDNode *N);
598 SDValue foldBinOpIntoSelect(SDNode *BO);
599 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
600 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
601 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
602 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
603 SDValue N2, SDValue N3, ISD::CondCode CC,
604 bool NotExtCompare = false);
605 SDValue convertSelectOfFPConstantsToLoadOffset(
606 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
607 ISD::CondCode CC);
608 SDValue foldSignChangeInBitcast(SDNode *N);
609 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
610 SDValue N2, SDValue N3, ISD::CondCode CC);
611 SDValue foldSelectOfBinops(SDNode *N);
612 SDValue foldSextSetcc(SDNode *N);
613 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
614 const SDLoc &DL);
615 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL);
616 SDValue foldABSToABD(SDNode *N, const SDLoc &DL);
617 SDValue foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
618 SDValue False, ISD::CondCode CC, const SDLoc &DL);
619 SDValue foldSelectToUMin(SDValue LHS, SDValue RHS, SDValue True,
620 SDValue False, ISD::CondCode CC, const SDLoc &DL);
621 SDValue unfoldMaskedMerge(SDNode *N);
622 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
623 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
624 const SDLoc &DL, bool foldBooleans);
625 SDValue rebuildSetCC(SDValue N);
626
627 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
628 SDValue &CC, bool MatchStrict = false) const;
629 bool isOneUseSetCC(SDValue N) const;
630
631 SDValue foldAddToAvg(SDNode *N, const SDLoc &DL);
632 SDValue foldSubToAvg(SDNode *N, const SDLoc &DL);
633
634 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
635 unsigned HiOp);
636 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
637 SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
638 const TargetLowering &TLI);
639 SDValue foldPartialReduceMLAMulOp(SDNode *N);
640 SDValue foldPartialReduceAdd(SDNode *N);
641
642 SDValue CombineExtLoad(SDNode *N);
643 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
644 SDValue combineRepeatedFPDivisors(SDNode *N);
645 SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
646 SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf);
647 SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
648 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
649 SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
650 SDValue BuildSDIV(SDNode *N);
651 SDValue BuildSDIVPow2(SDNode *N);
652 SDValue BuildUDIV(SDNode *N);
653 SDValue BuildSREMPow2(SDNode *N);
654 SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
655 SDValue BuildLogBase2(SDValue V, const SDLoc &DL,
656 bool KnownNeverZero = false,
657 bool InexpensiveOnly = false,
658 std::optional<EVT> OutVT = std::nullopt);
659 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
660 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
661 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
662 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
663 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
664 SDNodeFlags Flags, bool Reciprocal);
665 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
666 SDNodeFlags Flags, bool Reciprocal);
667 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
668 bool DemandHighBits = true);
669 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
670 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
671 SDValue InnerPos, SDValue InnerNeg, bool FromAdd,
672 bool HasPos, unsigned PosOpcode,
673 unsigned NegOpcode, const SDLoc &DL);
674 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
675 SDValue InnerPos, SDValue InnerNeg, bool FromAdd,
676 bool HasPos, unsigned PosOpcode,
677 unsigned NegOpcode, const SDLoc &DL);
678 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL,
679 bool FromAdd);
680 SDValue MatchLoadCombine(SDNode *N);
681 SDValue mergeTruncStores(StoreSDNode *N);
682 SDValue reduceLoadWidth(SDNode *N);
683 SDValue ReduceLoadOpStoreWidth(SDNode *N);
684 SDValue splitMergedValStore(StoreSDNode *ST);
685 SDValue TransformFPLoadStorePair(SDNode *N);
686 SDValue convertBuildVecZextToZext(SDNode *N);
687 SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
688 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
689 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
690 SDValue reduceBuildVecToShuffle(SDNode *N);
691 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
692 ArrayRef<int> VectorMask, SDValue VecIn1,
693 SDValue VecIn2, unsigned LeftIdx,
694 bool DidSplitVec);
695 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
696
697 /// Walk up chain skipping non-aliasing memory nodes,
698 /// looking for aliasing nodes and adding them to the Aliases vector.
699 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
700 SmallVectorImpl<SDValue> &Aliases);
701
702 /// Return true if there is any possibility that the two addresses overlap.
703 bool mayAlias(SDNode *Op0, SDNode *Op1) const;
704
705 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
706 /// chain (aliasing node.)
707 SDValue FindBetterChain(SDNode *N, SDValue Chain);
708
709 /// Try to replace a store and any possibly adjacent stores on
710 /// consecutive chains with better chains. Return true only if St is
711 /// replaced.
712 ///
713 /// Notice that other chains may still be replaced even if the function
714 /// returns false.
715 bool findBetterNeighborChains(StoreSDNode *St);
716
717 // Helper for findBetterNeighborChains. Walk up store chain add additional
718 // chained stores that do not overlap and can be parallelized.
719 bool parallelizeChainedStores(StoreSDNode *St);
720
721 /// Holds a pointer to an LSBaseSDNode as well as information on where it
722 /// is located in a sequence of memory operations connected by a chain.
723 struct MemOpLink {
724 // Ptr to the mem node.
725 LSBaseSDNode *MemNode;
726
727 // Offset from the base ptr.
728 int64_t OffsetFromBase;
729
730 MemOpLink(LSBaseSDNode *N, int64_t Offset)
731 : MemNode(N), OffsetFromBase(Offset) {}
732 };
733
734 // Classify the origin of a stored value.
735 enum class StoreSource { Unknown, Constant, Extract, Load };
736 StoreSource getStoreSource(SDValue StoreVal) {
737 switch (StoreVal.getOpcode()) {
738 case ISD::Constant:
739 case ISD::ConstantFP:
740 return StoreSource::Constant;
744 return StoreSource::Constant;
745 return StoreSource::Unknown;
748 return StoreSource::Extract;
749 case ISD::LOAD:
750 return StoreSource::Load;
751 default:
752 return StoreSource::Unknown;
753 }
754 }
755
756 /// This is a helper function for visitMUL to check the profitability
757 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
758 /// MulNode is the original multiply, AddNode is (add x, c1),
759 /// and ConstNode is c2.
760 bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
761 SDValue ConstNode);
762
763 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
764 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
765 /// the type of the loaded value to be extended.
766 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
767 EVT LoadResultTy, EVT &ExtVT);
768
769 /// Helper function to calculate whether the given Load/Store can have its
770 /// width reduced to ExtVT.
771 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
772 EVT &MemVT, unsigned ShAmt = 0);
773
774 /// Used by BackwardsPropagateMask to find suitable loads.
775 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
776 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
777 ConstantSDNode *Mask, SDNode *&NodeToMask);
778 /// Attempt to propagate a given AND node back to load leaves so that they
779 /// can be combined into narrow loads.
780 bool BackwardsPropagateMask(SDNode *N);
781
782 /// Helper function for mergeConsecutiveStores which merges the component
783 /// store chains.
784 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
785 unsigned NumStores);
786
787 /// Helper function for mergeConsecutiveStores which checks if all the store
788 /// nodes have the same underlying object. We can still reuse the first
789 /// store's pointer info if all the stores are from the same object.
790 bool hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes);
791
792 /// This is a helper function for mergeConsecutiveStores. When the source
793 /// elements of the consecutive stores are all constants or all extracted
794 /// vector elements, try to merge them into one larger store introducing
795 /// bitcasts if necessary. \return True if a merged store was created.
796 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
797 EVT MemVT, unsigned NumStores,
798 bool IsConstantSrc, bool UseVector,
799 bool UseTrunc);
800
801 /// This is a helper function for mergeConsecutiveStores. Stores that
802 /// potentially may be merged with St are placed in StoreNodes. On success,
803 /// returns a chain predecessor to all store candidates.
804 SDNode *getStoreMergeCandidates(StoreSDNode *St,
805 SmallVectorImpl<MemOpLink> &StoreNodes);
806
807 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
808 /// have indirect dependency through their operands. RootNode is the
809 /// predecessor to all stores calculated by getStoreMergeCandidates and is
810 /// used to prune the dependency check. \return True if safe to merge.
811 bool checkMergeStoreCandidatesForDependencies(
812 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
813 SDNode *RootNode);
814
815 /// Helper function for tryStoreMergeOfLoads. Checks if the load/store
816 /// chain has a call in it. \return True if a call is found.
817 bool hasCallInLdStChain(StoreSDNode *St, LoadSDNode *Ld);
818
819 /// This is a helper function for mergeConsecutiveStores. Given a list of
820 /// store candidates, find the first N that are consecutive in memory.
821 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
822 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
823 int64_t ElementSizeBytes) const;
824
825 /// This is a helper function for mergeConsecutiveStores. It is used for
826 /// store chains that are composed entirely of constant values.
827 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
828 unsigned NumConsecutiveStores,
829 EVT MemVT, SDNode *Root, bool AllowVectors);
830
831 /// This is a helper function for mergeConsecutiveStores. It is used for
832 /// store chains that are composed entirely of extracted vector elements.
833 /// When extracting multiple vector elements, try to store them in one
834 /// vector store rather than a sequence of scalar stores.
835 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
836 unsigned NumConsecutiveStores, EVT MemVT,
837 SDNode *Root);
838
839 /// This is a helper function for mergeConsecutiveStores. It is used for
840 /// store chains that are composed entirely of loaded values.
841 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
842 unsigned NumConsecutiveStores, EVT MemVT,
843 SDNode *Root, bool AllowVectors,
844 bool IsNonTemporalStore, bool IsNonTemporalLoad);
845
846 /// Merge consecutive store operations into a wide store.
847 /// This optimization uses wide integers or vectors when possible.
848 /// \return true if stores were merged.
849 bool mergeConsecutiveStores(StoreSDNode *St);
850
851 /// Try to transform a truncation where C is a constant:
852 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
853 ///
854 /// \p N needs to be a truncation and its first operand an AND. Other
855 /// requirements are checked by the function (e.g. that trunc is
856 /// single-use) and if missed an empty SDValue is returned.
857 SDValue distributeTruncateThroughAnd(SDNode *N);
858
859 /// Helper function to determine whether the target supports operation
860 /// given by \p Opcode for type \p VT, that is, whether the operation
861 /// is legal or custom before legalizing operations, and whether is
862 /// legal (but not custom) after legalization.
863 bool hasOperation(unsigned Opcode, EVT VT) {
864 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
865 }
866
867 bool hasUMin(EVT VT) const {
868 auto LK = TLI.getTypeConversion(*DAG.getContext(), VT);
869 return (LK.first == TargetLoweringBase::TypeLegal ||
871 TLI.isOperationLegalOrCustom(ISD::UMIN, LK.second);
872 }
873
874 public:
875 /// Runs the dag combiner on all nodes in the work list
876 void Run(CombineLevel AtLevel);
877
878 SelectionDAG &getDAG() const { return DAG; }
879
880 /// Convenience wrapper around TargetLowering::getShiftAmountTy.
881 EVT getShiftAmountTy(EVT LHSTy) {
882 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout());
883 }
884
885 /// This method returns true if we are running before type legalization or
886 /// if the specified VT is legal.
887 bool isTypeLegal(const EVT &VT) {
888 if (!LegalTypes) return true;
889 return TLI.isTypeLegal(VT);
890 }
891
892 /// Convenience wrapper around TargetLowering::getSetCCResultType
893 EVT getSetCCResultType(EVT VT) const {
894 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
895 }
896
897 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
898 SDValue OrigLoad, SDValue ExtLoad,
899 ISD::NodeType ExtType);
900 };
901
902/// This class is a DAGUpdateListener that removes any deleted
903/// nodes from the worklist.
904class WorklistRemover : public SelectionDAG::DAGUpdateListener {
905 DAGCombiner &DC;
906
907public:
908 explicit WorklistRemover(DAGCombiner &dc)
909 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
910
911 void NodeDeleted(SDNode *N, SDNode *E) override {
912 DC.removeFromWorklist(N);
913 }
914};
915
916class WorklistInserter : public SelectionDAG::DAGUpdateListener {
917 DAGCombiner &DC;
918
919public:
920 explicit WorklistInserter(DAGCombiner &dc)
921 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
922
923 // FIXME: Ideally we could add N to the worklist, but this causes exponential
924 // compile time costs in large DAGs, e.g. Halide.
925 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
926};
927
928} // end anonymous namespace
929
930//===----------------------------------------------------------------------===//
931// TargetLowering::DAGCombinerInfo implementation
932//===----------------------------------------------------------------------===//
933
935 ((DAGCombiner*)DC)->AddToWorklist(N);
936}
937
939CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
940 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
941}
942
944CombineTo(SDNode *N, SDValue Res, bool AddTo) {
945 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
946}
947
949CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
950 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
951}
952
955 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
956}
957
960 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
961}
962
963//===----------------------------------------------------------------------===//
964// Helper Functions
965//===----------------------------------------------------------------------===//
966
967void DAGCombiner::deleteAndRecombine(SDNode *N) {
968 removeFromWorklist(N);
969
970 // If the operands of this node are only used by the node, they will now be
971 // dead. Make sure to re-visit them and recursively delete dead nodes.
972 for (const SDValue &Op : N->ops())
973 // For an operand generating multiple values, one of the values may
974 // become dead allowing further simplification (e.g. split index
975 // arithmetic from an indexed load).
976 if (Op->hasOneUse() || Op->getNumValues() > 1)
977 AddToWorklist(Op.getNode());
978
979 DAG.DeleteNode(N);
980}
981
982// APInts must be the same size for most operations, this helper
983// function zero extends the shorter of the pair so that they match.
984// We provide an Offset so that we can create bitwidths that won't overflow.
985static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
986 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
987 LHS = LHS.zext(Bits);
988 RHS = RHS.zext(Bits);
989}
990
991// Return true if this node is a setcc, or is a select_cc
992// that selects between the target values used for true and false, making it
993// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
994// the appropriate nodes based on the type of node we are checking. This
995// simplifies life a bit for the callers.
996bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
997 SDValue &CC, bool MatchStrict) const {
998 if (N.getOpcode() == ISD::SETCC) {
999 LHS = N.getOperand(0);
1000 RHS = N.getOperand(1);
1001 CC = N.getOperand(2);
1002 return true;
1003 }
1004
1005 if (MatchStrict &&
1006 (N.getOpcode() == ISD::STRICT_FSETCC ||
1007 N.getOpcode() == ISD::STRICT_FSETCCS)) {
1008 LHS = N.getOperand(1);
1009 RHS = N.getOperand(2);
1010 CC = N.getOperand(3);
1011 return true;
1012 }
1013
1014 if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
1015 !TLI.isConstFalseVal(N.getOperand(3)))
1016 return false;
1017
1018 if (TLI.getBooleanContents(N.getValueType()) ==
1020 return false;
1021
1022 LHS = N.getOperand(0);
1023 RHS = N.getOperand(1);
1024 CC = N.getOperand(4);
1025 return true;
1026}
1027
1028/// Return true if this is a SetCC-equivalent operation with only one use.
1029/// If this is true, it allows the users to invert the operation for free when
1030/// it is profitable to do so.
1031bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1032 SDValue N0, N1, N2;
1033 if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
1034 return true;
1035 return false;
1036}
1037
1039 if (!ScalarTy.isSimple())
1040 return false;
1041
1042 uint64_t MaskForTy = 0ULL;
1043 switch (ScalarTy.getSimpleVT().SimpleTy) {
1044 case MVT::i8:
1045 MaskForTy = 0xFFULL;
1046 break;
1047 case MVT::i16:
1048 MaskForTy = 0xFFFFULL;
1049 break;
1050 case MVT::i32:
1051 MaskForTy = 0xFFFFFFFFULL;
1052 break;
1053 default:
1054 return false;
1055 break;
1056 }
1057
1058 APInt Val;
1059 if (ISD::isConstantSplatVector(N, Val))
1060 return Val.getLimitedValue() == MaskForTy;
1061
1062 return false;
1063}
1064
1065// Determines if it is a constant integer or a splat/build vector of constant
1066// integers (and undefs).
1067// Do not permit build vector implicit truncation.
1068static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1070 return !(Const->isOpaque() && NoOpaques);
1071 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
1072 return false;
1073 unsigned BitWidth = N.getScalarValueSizeInBits();
1074 for (const SDValue &Op : N->op_values()) {
1075 if (Op.isUndef())
1076 continue;
1078 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1079 (Const->isOpaque() && NoOpaques))
1080 return false;
1081 }
1082 return true;
1083}
1084
1085// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1086// undef's.
1087static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1088 if (V.getOpcode() != ISD::BUILD_VECTOR)
1089 return false;
1090 return isConstantOrConstantVector(V, NoOpaques) ||
1092}
1093
1094// Determine if this an indexed load with an opaque target constant index.
1095static bool canSplitIdx(LoadSDNode *LD) {
1096 return MaySplitLoadIndex &&
1097 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1098 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1099}
1100
1101bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1102 const SDLoc &DL,
1103 SDNode *N,
1104 SDValue N0,
1105 SDValue N1) {
1106 // Currently this only tries to ensure we don't undo the GEP splits done by
1107 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1108 // we check if the following transformation would be problematic:
1109 // (load/store (add, (add, x, offset1), offset2)) ->
1110 // (load/store (add, x, offset1+offset2)).
1111
1112 // (load/store (add, (add, x, y), offset2)) ->
1113 // (load/store (add, (add, x, offset2), y)).
1114
1115 if (!N0.isAnyAdd())
1116 return false;
1117
1118 // Check for vscale addressing modes.
1119 // (load/store (add/sub (add x, y), vscale))
1120 // (load/store (add/sub (add x, y), (lsl vscale, C)))
1121 // (load/store (add/sub (add x, y), (mul vscale, C)))
1122 if ((N1.getOpcode() == ISD::VSCALE ||
1123 ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::MUL) &&
1124 N1.getOperand(0).getOpcode() == ISD::VSCALE &&
1126 N1.getValueType().getFixedSizeInBits() <= 64) {
1127 int64_t ScalableOffset = N1.getOpcode() == ISD::VSCALE
1128 ? N1.getConstantOperandVal(0)
1129 : (N1.getOperand(0).getConstantOperandVal(0) *
1130 (N1.getOpcode() == ISD::SHL
1131 ? (1LL << N1.getConstantOperandVal(1))
1132 : N1.getConstantOperandVal(1)));
1133 if (Opc == ISD::SUB)
1134 ScalableOffset = -ScalableOffset;
1135 if (all_of(N->users(), [&](SDNode *Node) {
1136 if (auto *LoadStore = dyn_cast<MemSDNode>(Node);
1137 LoadStore && LoadStore->getBasePtr().getNode() == N) {
1138 TargetLoweringBase::AddrMode AM;
1139 AM.HasBaseReg = true;
1140 AM.ScalableOffset = ScalableOffset;
1141 EVT VT = LoadStore->getMemoryVT();
1142 unsigned AS = LoadStore->getAddressSpace();
1143 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1144 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy,
1145 AS);
1146 }
1147 return false;
1148 }))
1149 return true;
1150 }
1151
1152 if (Opc != ISD::ADD && Opc != ISD::PTRADD)
1153 return false;
1154
1155 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1156 if (!C2)
1157 return false;
1158
1159 const APInt &C2APIntVal = C2->getAPIntValue();
1160 if (C2APIntVal.getSignificantBits() > 64)
1161 return false;
1162
1163 if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1164 if (N0.hasOneUse())
1165 return false;
1166
1167 const APInt &C1APIntVal = C1->getAPIntValue();
1168 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1169 if (CombinedValueIntVal.getSignificantBits() > 64)
1170 return false;
1171 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1172
1173 for (SDNode *Node : N->users()) {
1174 if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1175 // Is x[offset2] already not a legal addressing mode? If so then
1176 // reassociating the constants breaks nothing (we test offset2 because
1177 // that's the one we hope to fold into the load or store).
1178 TargetLoweringBase::AddrMode AM;
1179 AM.HasBaseReg = true;
1180 AM.BaseOffs = C2APIntVal.getSExtValue();
1181 EVT VT = LoadStore->getMemoryVT();
1182 unsigned AS = LoadStore->getAddressSpace();
1183 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1184 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1185 continue;
1186
1187 // Would x[offset1+offset2] still be a legal addressing mode?
1188 AM.BaseOffs = CombinedValue;
1189 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1190 return true;
1191 }
1192 }
1193 } else {
1194 if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1195 if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1196 return false;
1197
1198 for (SDNode *Node : N->users()) {
1199 auto *LoadStore = dyn_cast<MemSDNode>(Node);
1200 if (!LoadStore)
1201 return false;
1202
1203 // Is x[offset2] a legal addressing mode? If so then
1204 // reassociating the constants breaks address pattern
1205 TargetLoweringBase::AddrMode AM;
1206 AM.HasBaseReg = true;
1207 AM.BaseOffs = C2APIntVal.getSExtValue();
1208 EVT VT = LoadStore->getMemoryVT();
1209 unsigned AS = LoadStore->getAddressSpace();
1210 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1211 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1212 return false;
1213 }
1214 return true;
1215 }
1216
1217 return false;
1218}
1219
1220/// Helper for DAGCombiner::reassociateOps. Try to reassociate (Opc N0, N1) if
1221/// \p N0 is the same kind of operation as \p Opc.
1222SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1223 SDValue N0, SDValue N1,
1224 SDNodeFlags Flags) {
1225 EVT VT = N0.getValueType();
1226
1227 if (N0.getOpcode() != Opc)
1228 return SDValue();
1229
1230 SDValue N00 = N0.getOperand(0);
1231 SDValue N01 = N0.getOperand(1);
1232
1234 SDNodeFlags NewFlags;
1235 if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
1236 Flags.hasNoUnsignedWrap())
1237 NewFlags |= SDNodeFlags::NoUnsignedWrap;
1238
1240 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1241 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1})) {
1242 NewFlags.setDisjoint(Flags.hasDisjoint() &&
1243 N0->getFlags().hasDisjoint());
1244 return DAG.getNode(Opc, DL, VT, N00, OpNode, NewFlags);
1245 }
1246 return SDValue();
1247 }
1248 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1249 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1250 // iff (op x, c1) has one use
1251 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags);
1252 return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags);
1253 }
1254 }
1255
1256 // Check for repeated operand logic simplifications.
1257 if (Opc == ISD::AND || Opc == ISD::OR) {
1258 // (N00 & N01) & N00 --> N00 & N01
1259 // (N00 & N01) & N01 --> N00 & N01
1260 // (N00 | N01) | N00 --> N00 | N01
1261 // (N00 | N01) | N01 --> N00 | N01
1262 if (N1 == N00 || N1 == N01)
1263 return N0;
1264 }
1265 if (Opc == ISD::XOR) {
1266 // (N00 ^ N01) ^ N00 --> N01
1267 if (N1 == N00)
1268 return N01;
1269 // (N00 ^ N01) ^ N01 --> N00
1270 if (N1 == N01)
1271 return N00;
1272 }
1273
1274 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1275 if (N1 != N01) {
1276 // Reassociate if (op N00, N1) already exist
1277 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1278 // if Op (Op N00, N1), N01 already exist
1279 // we need to stop reassciate to avoid dead loop
1280 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1281 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1282 }
1283 }
1284
1285 if (N1 != N00) {
1286 // Reassociate if (op N01, N1) already exist
1287 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1288 // if Op (Op N01, N1), N00 already exist
1289 // we need to stop reassciate to avoid dead loop
1290 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1291 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1292 }
1293 }
1294
1295 // Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND
1296 // (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same
1297 // predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are
1298 // comparisons with the same predicate. This enables optimizations as the
1299 // following one:
1300 // CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
1301 // CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)
1302 if (Opc == ISD::AND || Opc == ISD::OR) {
1303 if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC &&
1304 N01->getOpcode() == ISD::SETCC) {
1305 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
1306 ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get();
1307 ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get();
1308 if (CC1 == CC00 && CC1 != CC01) {
1309 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, Flags);
1310 return DAG.getNode(Opc, DL, VT, OpNode, N01, Flags);
1311 }
1312 if (CC1 == CC01 && CC1 != CC00) {
1313 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N01, N1, Flags);
1314 return DAG.getNode(Opc, DL, VT, OpNode, N00, Flags);
1315 }
1316 }
1317 }
1318 }
1319
1320 return SDValue();
1321}
1322
1323/// Try to reassociate commutative (Opc N0, N1) if either \p N0 or \p N1 is the
1324/// same kind of operation as \p Opc.
1325SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1326 SDValue N1, SDNodeFlags Flags) {
1327 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1328
1329 // Floating-point reassociation is not allowed without loose FP math.
1330 if (N0.getValueType().isFloatingPoint() ||
1332 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1333 return SDValue();
1334
1335 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1, Flags))
1336 return Combined;
1337 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0, Flags))
1338 return Combined;
1339 return SDValue();
1340}
1341
1342// Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
1343// Note that we only expect Flags to be passed from FP operations. For integer
1344// operations they need to be dropped.
1345SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
1346 const SDLoc &DL, EVT VT, SDValue N0,
1347 SDValue N1, SDNodeFlags Flags) {
1348 if (N0.getOpcode() == RedOpc && N1.getOpcode() == RedOpc &&
1349 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
1350 N0->hasOneUse() && N1->hasOneUse() &&
1352 TLI.shouldReassociateReduction(RedOpc, N0.getOperand(0).getValueType())) {
1353 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
1354 return DAG.getNode(RedOpc, DL, VT,
1355 DAG.getNode(Opc, DL, N0.getOperand(0).getValueType(),
1356 N0.getOperand(0), N1.getOperand(0)));
1357 }
1358
1359 // Reassociate op(op(vecreduce(a), b), op(vecreduce(c), d)) into
1360 // op(vecreduce(op(a, c)), op(b, d)), to combine the reductions into a
1361 // single node.
1362 SDValue A, B, C, D, RedA, RedB;
1363 if (sd_match(N0, m_OneUse(m_c_BinOp(
1364 Opc,
1365 m_AllOf(m_OneUse(m_UnaryOp(RedOpc, m_Value(A))),
1366 m_Value(RedA)),
1367 m_Value(B)))) &&
1369 Opc,
1370 m_AllOf(m_OneUse(m_UnaryOp(RedOpc, m_Value(C))),
1371 m_Value(RedB)),
1372 m_Value(D)))) &&
1373 !sd_match(B, m_UnaryOp(RedOpc, m_Value())) &&
1374 !sd_match(D, m_UnaryOp(RedOpc, m_Value())) &&
1375 A.getValueType() == C.getValueType() &&
1376 hasOperation(Opc, A.getValueType()) &&
1377 TLI.shouldReassociateReduction(RedOpc, VT)) {
1378 if ((Opc == ISD::FADD || Opc == ISD::FMUL) &&
1379 (!N0->getFlags().hasAllowReassociation() ||
1381 !RedA->getFlags().hasAllowReassociation() ||
1382 !RedB->getFlags().hasAllowReassociation()))
1383 return SDValue();
1384 SelectionDAG::FlagInserter FlagsInserter(
1385 DAG, Flags & N0->getFlags() & N1->getFlags() & RedA->getFlags() &
1386 RedB->getFlags());
1387 SDValue Op = DAG.getNode(Opc, DL, A.getValueType(), A, C);
1388 SDValue Red = DAG.getNode(RedOpc, DL, VT, Op);
1389 SDValue Op2 = DAG.getNode(Opc, DL, VT, B, D);
1390 return DAG.getNode(Opc, DL, VT, Red, Op2);
1391 }
1392 return SDValue();
1393}
1394
1395SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1396 bool AddTo) {
1397 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1398 ++NodesCombined;
1399 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1400 To[0].dump(&DAG);
1401 dbgs() << " and " << NumTo - 1 << " other values\n");
1402 for (unsigned i = 0, e = NumTo; i != e; ++i)
1403 assert((!To[i].getNode() ||
1404 N->getValueType(i) == To[i].getValueType()) &&
1405 "Cannot combine value to value of different type!");
1406
1407 WorklistRemover DeadNodes(*this);
1408 DAG.ReplaceAllUsesWith(N, To);
1409 if (AddTo) {
1410 // Push the new nodes and any users onto the worklist
1411 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1412 if (To[i].getNode())
1413 AddToWorklistWithUsers(To[i].getNode());
1414 }
1415 }
1416
1417 // Finally, if the node is now dead, remove it from the graph. The node
1418 // may not be dead if the replacement process recursively simplified to
1419 // something else needing this node.
1420 if (N->use_empty())
1421 deleteAndRecombine(N);
1422 return SDValue(N, 0);
1423}
1424
1425void DAGCombiner::
1426CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1427 // Replace the old value with the new one.
1428 ++NodesCombined;
1429 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1430 dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1431
1432 // Replace all uses.
1433 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1434
1435 // Push the new node and any (possibly new) users onto the worklist.
1436 AddToWorklistWithUsers(TLO.New.getNode());
1437
1438 // Finally, if the node is now dead, remove it from the graph.
1439 recursivelyDeleteUnusedNodes(TLO.Old.getNode());
1440}
1441
1442/// Check the specified integer node value to see if it can be simplified or if
1443/// things it uses can be simplified by bit propagation. If so, return true.
1444bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1445 const APInt &DemandedElts,
1446 bool AssumeSingleUse) {
1447 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1448 KnownBits Known;
1449 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1450 AssumeSingleUse))
1451 return false;
1452
1453 // Revisit the node.
1454 AddToWorklist(Op.getNode());
1455
1456 CommitTargetLoweringOpt(TLO);
1457 return true;
1458}
1459
1460/// Check the specified vector node value to see if it can be simplified or
1461/// if things it uses can be simplified as it only uses some of the elements.
1462/// If so, return true.
1463bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1464 const APInt &DemandedElts,
1465 bool AssumeSingleUse) {
1466 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1467 APInt KnownUndef, KnownZero;
1468 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1469 TLO, 0, AssumeSingleUse))
1470 return false;
1471
1472 // Revisit the node.
1473 AddToWorklist(Op.getNode());
1474
1475 CommitTargetLoweringOpt(TLO);
1476 return true;
1477}
1478
1479void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1480 SDLoc DL(Load);
1481 EVT VT = Load->getValueType(0);
1482 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1483
1484 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1485 Trunc.dump(&DAG); dbgs() << '\n');
1486
1487 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1488 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1489
1490 AddToWorklist(Trunc.getNode());
1491 recursivelyDeleteUnusedNodes(Load);
1492}
1493
1494SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1495 Replace = false;
1496 SDLoc DL(Op);
1497 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1498 LoadSDNode *LD = cast<LoadSDNode>(Op);
1499 EVT MemVT = LD->getMemoryVT();
1501 : LD->getExtensionType();
1502 Replace = true;
1503 return DAG.getExtLoad(ExtType, DL, PVT,
1504 LD->getChain(), LD->getBasePtr(),
1505 MemVT, LD->getMemOperand());
1506 }
1507
1508 unsigned Opc = Op.getOpcode();
1509 switch (Opc) {
1510 default: break;
1511 case ISD::AssertSext:
1512 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1513 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1514 break;
1515 case ISD::AssertZext:
1516 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1517 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1518 break;
1519 case ISD::Constant: {
1520 unsigned ExtOpc =
1521 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1522 return DAG.getNode(ExtOpc, DL, PVT, Op);
1523 }
1524 }
1525
1526 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1527 return SDValue();
1528 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1529}
1530
1531SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1533 return SDValue();
1534 EVT OldVT = Op.getValueType();
1535 SDLoc DL(Op);
1536 bool Replace = false;
1537 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1538 if (!NewOp.getNode())
1539 return SDValue();
1540 AddToWorklist(NewOp.getNode());
1541
1542 if (Replace)
1543 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1544 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1545 DAG.getValueType(OldVT));
1546}
1547
1548SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1549 EVT OldVT = Op.getValueType();
1550 SDLoc DL(Op);
1551 bool Replace = false;
1552 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1553 if (!NewOp.getNode())
1554 return SDValue();
1555 AddToWorklist(NewOp.getNode());
1556
1557 if (Replace)
1558 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1559 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1560}
1561
1562/// Promote the specified integer binary operation if the target indicates it is
1563/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1564/// i32 since i16 instructions are longer.
1565SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1566 if (!LegalOperations)
1567 return SDValue();
1568
1569 EVT VT = Op.getValueType();
1570 if (VT.isVector() || !VT.isInteger())
1571 return SDValue();
1572
1573 // If operation type is 'undesirable', e.g. i16 on x86, consider
1574 // promoting it.
1575 unsigned Opc = Op.getOpcode();
1576 if (TLI.isTypeDesirableForOp(Opc, VT))
1577 return SDValue();
1578
1579 EVT PVT = VT;
1580 // Consult target whether it is a good idea to promote this operation and
1581 // what's the right type to promote it to.
1582 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1583 assert(PVT != VT && "Don't know what type to promote to!");
1584
1585 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1586
1587 bool Replace0 = false;
1588 SDValue N0 = Op.getOperand(0);
1589 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1590
1591 bool Replace1 = false;
1592 SDValue N1 = Op.getOperand(1);
1593 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1594 SDLoc DL(Op);
1595
1596 SDValue RV =
1597 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1598
1599 // We are always replacing N0/N1's use in N and only need additional
1600 // replacements if there are additional uses.
1601 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1602 // (SDValue) here because the node may reference multiple values
1603 // (for example, the chain value of a load node).
1604 Replace0 &= !N0->hasOneUse();
1605 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1606
1607 // Combine Op here so it is preserved past replacements.
1608 CombineTo(Op.getNode(), RV);
1609
1610 // If operands have a use ordering, make sure we deal with
1611 // predecessor first.
1612 if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1613 std::swap(N0, N1);
1614 std::swap(NN0, NN1);
1615 }
1616
1617 if (Replace0) {
1618 AddToWorklist(NN0.getNode());
1619 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1620 }
1621 if (Replace1) {
1622 AddToWorklist(NN1.getNode());
1623 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1624 }
1625 return Op;
1626 }
1627 return SDValue();
1628}
1629
1630/// Promote the specified integer shift operation if the target indicates it is
1631/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1632/// i32 since i16 instructions are longer.
1633SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1634 if (!LegalOperations)
1635 return SDValue();
1636
1637 EVT VT = Op.getValueType();
1638 if (VT.isVector() || !VT.isInteger())
1639 return SDValue();
1640
1641 // If operation type is 'undesirable', e.g. i16 on x86, consider
1642 // promoting it.
1643 unsigned Opc = Op.getOpcode();
1644 if (TLI.isTypeDesirableForOp(Opc, VT))
1645 return SDValue();
1646
1647 EVT PVT = VT;
1648 // Consult target whether it is a good idea to promote this operation and
1649 // what's the right type to promote it to.
1650 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1651 assert(PVT != VT && "Don't know what type to promote to!");
1652
1653 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1654
1655 bool Replace = false;
1656 SDValue N0 = Op.getOperand(0);
1657 if (Opc == ISD::SRA)
1658 N0 = SExtPromoteOperand(N0, PVT);
1659 else if (Opc == ISD::SRL)
1660 N0 = ZExtPromoteOperand(N0, PVT);
1661 else
1662 N0 = PromoteOperand(N0, PVT, Replace);
1663
1664 if (!N0.getNode())
1665 return SDValue();
1666
1667 SDLoc DL(Op);
1668 SDValue N1 = Op.getOperand(1);
1669 SDValue RV =
1670 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1671
1672 if (Replace)
1673 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1674
1675 // Deal with Op being deleted.
1676 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1677 return RV;
1678 }
1679 return SDValue();
1680}
1681
1682SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1683 if (!LegalOperations)
1684 return SDValue();
1685
1686 EVT VT = Op.getValueType();
1687 if (VT.isVector() || !VT.isInteger())
1688 return SDValue();
1689
1690 // If operation type is 'undesirable', e.g. i16 on x86, consider
1691 // promoting it.
1692 unsigned Opc = Op.getOpcode();
1693 if (TLI.isTypeDesirableForOp(Opc, VT))
1694 return SDValue();
1695
1696 EVT PVT = VT;
1697 // Consult target whether it is a good idea to promote this operation and
1698 // what's the right type to promote it to.
1699 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1700 assert(PVT != VT && "Don't know what type to promote to!");
1701 // fold (aext (aext x)) -> (aext x)
1702 // fold (aext (zext x)) -> (zext x)
1703 // fold (aext (sext x)) -> (sext x)
1704 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1705 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1706 }
1707 return SDValue();
1708}
1709
1710bool DAGCombiner::PromoteLoad(SDValue Op) {
1711 if (!LegalOperations)
1712 return false;
1713
1714 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1715 return false;
1716
1717 EVT VT = Op.getValueType();
1718 if (VT.isVector() || !VT.isInteger())
1719 return false;
1720
1721 // If operation type is 'undesirable', e.g. i16 on x86, consider
1722 // promoting it.
1723 unsigned Opc = Op.getOpcode();
1724 if (TLI.isTypeDesirableForOp(Opc, VT))
1725 return false;
1726
1727 EVT PVT = VT;
1728 // Consult target whether it is a good idea to promote this operation and
1729 // what's the right type to promote it to.
1730 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1731 assert(PVT != VT && "Don't know what type to promote to!");
1732
1733 SDLoc DL(Op);
1734 SDNode *N = Op.getNode();
1735 LoadSDNode *LD = cast<LoadSDNode>(N);
1736 EVT MemVT = LD->getMemoryVT();
1738 : LD->getExtensionType();
1739 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1740 LD->getChain(), LD->getBasePtr(),
1741 MemVT, LD->getMemOperand());
1742 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1743
1744 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1745 Result.dump(&DAG); dbgs() << '\n');
1746
1747 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1748 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1749
1750 AddToWorklist(Result.getNode());
1751 recursivelyDeleteUnusedNodes(N);
1752 return true;
1753 }
1754
1755 return false;
1756}
1757
1758/// Recursively delete a node which has no uses and any operands for
1759/// which it is the only use.
1760///
1761/// Note that this both deletes the nodes and removes them from the worklist.
1762/// It also adds any nodes who have had a user deleted to the worklist as they
1763/// may now have only one use and subject to other combines.
1764bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1765 if (!N->use_empty())
1766 return false;
1767
1768 SmallSetVector<SDNode *, 16> Nodes;
1769 Nodes.insert(N);
1770 do {
1771 N = Nodes.pop_back_val();
1772 if (!N)
1773 continue;
1774
1775 if (N->use_empty()) {
1776 for (const SDValue &ChildN : N->op_values())
1777 Nodes.insert(ChildN.getNode());
1778
1779 removeFromWorklist(N);
1780 DAG.DeleteNode(N);
1781 } else {
1782 AddToWorklist(N);
1783 }
1784 } while (!Nodes.empty());
1785 return true;
1786}
1787
1788//===----------------------------------------------------------------------===//
1789// Main DAG Combiner implementation
1790//===----------------------------------------------------------------------===//
1791
1792void DAGCombiner::Run(CombineLevel AtLevel) {
1793 // set the instance variables, so that the various visit routines may use it.
1794 Level = AtLevel;
1795 LegalDAG = Level >= AfterLegalizeDAG;
1796 LegalOperations = Level >= AfterLegalizeVectorOps;
1797 LegalTypes = Level >= AfterLegalizeTypes;
1798
1799 WorklistInserter AddNodes(*this);
1800
1801 // Add all the dag nodes to the worklist.
1802 //
1803 // Note: All nodes are not added to PruningList here, this is because the only
1804 // nodes which can be deleted are those which have no uses and all other nodes
1805 // which would otherwise be added to the worklist by the first call to
1806 // getNextWorklistEntry are already present in it.
1807 for (SDNode &Node : DAG.allnodes())
1808 AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
1809
1810 // Create a dummy node (which is not added to allnodes), that adds a reference
1811 // to the root node, preventing it from being deleted, and tracking any
1812 // changes of the root.
1813 HandleSDNode Dummy(DAG.getRoot());
1814
1815 // While we have a valid worklist entry node, try to combine it.
1816 while (SDNode *N = getNextWorklistEntry()) {
1817 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1818 // N is deleted from the DAG, since they too may now be dead or may have a
1819 // reduced number of uses, allowing other xforms.
1820 if (recursivelyDeleteUnusedNodes(N))
1821 continue;
1822
1823 WorklistRemover DeadNodes(*this);
1824
1825 // If this combine is running after legalizing the DAG, re-legalize any
1826 // nodes pulled off the worklist.
1827 if (LegalDAG) {
1828 SmallSetVector<SDNode *, 16> UpdatedNodes;
1829 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1830
1831 for (SDNode *LN : UpdatedNodes)
1832 AddToWorklistWithUsers(LN);
1833
1834 if (!NIsValid)
1835 continue;
1836 }
1837
1838 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1839
1840 // Add any operands of the new node which have not yet been combined to the
1841 // worklist as well. getNextWorklistEntry flags nodes that have been
1842 // combined before. Because the worklist uniques things already, this won't
1843 // repeatedly process the same operand.
1844 for (const SDValue &ChildN : N->op_values())
1845 AddToWorklist(ChildN.getNode(), /*IsCandidateForPruning=*/true,
1846 /*SkipIfCombinedBefore=*/true);
1847
1848 SDValue RV = combine(N);
1849
1850 if (!RV.getNode())
1851 continue;
1852
1853 ++NodesCombined;
1854
1855 // Invalidate cached info.
1856 ChainsWithoutMergeableStores.clear();
1857
1858 // If we get back the same node we passed in, rather than a new node or
1859 // zero, we know that the node must have defined multiple values and
1860 // CombineTo was used. Since CombineTo takes care of the worklist
1861 // mechanics for us, we have no work to do in this case.
1862 if (RV.getNode() == N)
1863 continue;
1864
1865 assert(N->getOpcode() != ISD::DELETED_NODE &&
1866 RV.getOpcode() != ISD::DELETED_NODE &&
1867 "Node was deleted but visit returned new node!");
1868
1869 LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1870
1871 if (N->getNumValues() == RV->getNumValues())
1872 DAG.ReplaceAllUsesWith(N, RV.getNode());
1873 else {
1874 assert(N->getValueType(0) == RV.getValueType() &&
1875 N->getNumValues() == 1 && "Type mismatch");
1876 DAG.ReplaceAllUsesWith(N, &RV);
1877 }
1878
1879 // Push the new node and any users onto the worklist. Omit this if the
1880 // new node is the EntryToken (e.g. if a store managed to get optimized
1881 // out), because re-visiting the EntryToken and its users will not uncover
1882 // any additional opportunities, but there may be a large number of such
1883 // users, potentially causing compile time explosion.
1884 if (RV.getOpcode() != ISD::EntryToken)
1885 AddToWorklistWithUsers(RV.getNode());
1886
1887 // Finally, if the node is now dead, remove it from the graph. The node
1888 // may not be dead if the replacement process recursively simplified to
1889 // something else needing this node. This will also take care of adding any
1890 // operands which have lost a user to the worklist.
1891 recursivelyDeleteUnusedNodes(N);
1892 }
1893
1894 // If the root changed (e.g. it was a dead load, update the root).
1895 DAG.setRoot(Dummy.getValue());
1896 DAG.RemoveDeadNodes();
1897}
1898
1899SDValue DAGCombiner::visit(SDNode *N) {
1900 // clang-format off
1901 switch (N->getOpcode()) {
1902 default: break;
1903 case ISD::TokenFactor: return visitTokenFactor(N);
1904 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1905 case ISD::ADD: return visitADD(N);
1906 case ISD::PTRADD: return visitPTRADD(N);
1907 case ISD::SUB: return visitSUB(N);
1908 case ISD::SADDSAT:
1909 case ISD::UADDSAT: return visitADDSAT(N);
1910 case ISD::SSUBSAT:
1911 case ISD::USUBSAT: return visitSUBSAT(N);
1912 case ISD::ADDC: return visitADDC(N);
1913 case ISD::SADDO:
1914 case ISD::UADDO: return visitADDO(N);
1915 case ISD::SUBC: return visitSUBC(N);
1916 case ISD::SSUBO:
1917 case ISD::USUBO: return visitSUBO(N);
1918 case ISD::ADDE: return visitADDE(N);
1919 case ISD::UADDO_CARRY: return visitUADDO_CARRY(N);
1920 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1921 case ISD::SUBE: return visitSUBE(N);
1922 case ISD::USUBO_CARRY: return visitUSUBO_CARRY(N);
1923 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1924 case ISD::SMULFIX:
1925 case ISD::SMULFIXSAT:
1926 case ISD::UMULFIX:
1927 case ISD::UMULFIXSAT: return visitMULFIX(N);
1928 case ISD::MUL: return visitMUL<EmptyMatchContext>(N);
1929 case ISD::SDIV: return visitSDIV(N);
1930 case ISD::UDIV: return visitUDIV(N);
1931 case ISD::SREM:
1932 case ISD::UREM: return visitREM(N);
1933 case ISD::MULHU: return visitMULHU(N);
1934 case ISD::MULHS: return visitMULHS(N);
1935 case ISD::AVGFLOORS:
1936 case ISD::AVGFLOORU:
1937 case ISD::AVGCEILS:
1938 case ISD::AVGCEILU: return visitAVG(N);
1939 case ISD::ABDS:
1940 case ISD::ABDU: return visitABD(N);
1941 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1942 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1943 case ISD::SMULO:
1944 case ISD::UMULO: return visitMULO(N);
1945 case ISD::SMIN:
1946 case ISD::SMAX:
1947 case ISD::UMIN:
1948 case ISD::UMAX: return visitIMINMAX(N);
1949 case ISD::AND: return visitAND(N);
1950 case ISD::OR: return visitOR(N);
1951 case ISD::XOR: return visitXOR(N);
1952 case ISD::SHL: return visitSHL(N);
1953 case ISD::SRA: return visitSRA(N);
1954 case ISD::SRL: return visitSRL(N);
1955 case ISD::ROTR:
1956 case ISD::ROTL: return visitRotate(N);
1957 case ISD::FSHL:
1958 case ISD::FSHR: return visitFunnelShift(N);
1959 case ISD::SSHLSAT:
1960 case ISD::USHLSAT: return visitSHLSAT(N);
1961 case ISD::ABS: return visitABS(N);
1962 case ISD::BSWAP: return visitBSWAP(N);
1963 case ISD::BITREVERSE: return visitBITREVERSE(N);
1964 case ISD::CTLZ: return visitCTLZ(N);
1965 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1966 case ISD::CTTZ: return visitCTTZ(N);
1967 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1968 case ISD::CTPOP: return visitCTPOP(N);
1969 case ISD::SELECT: return visitSELECT(N);
1970 case ISD::VSELECT: return visitVSELECT(N);
1971 case ISD::SELECT_CC: return visitSELECT_CC(N);
1972 case ISD::SETCC: return visitSETCC(N);
1973 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1974 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1975 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1976 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1977 case ISD::AssertSext:
1978 case ISD::AssertZext: return visitAssertExt(N);
1979 case ISD::AssertAlign: return visitAssertAlign(N);
1980 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1983 case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1984 case ISD::TRUNCATE: return visitTRUNCATE(N);
1985 case ISD::TRUNCATE_USAT_U: return visitTRUNCATE_USAT_U(N);
1986 case ISD::BITCAST: return visitBITCAST(N);
1987 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1988 case ISD::FADD: return visitFADD(N);
1989 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1990 case ISD::FSUB: return visitFSUB(N);
1991 case ISD::FMUL: return visitFMUL(N);
1992 case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
1993 case ISD::FMAD: return visitFMAD(N);
1994 case ISD::FDIV: return visitFDIV(N);
1995 case ISD::FREM: return visitFREM(N);
1996 case ISD::FSQRT: return visitFSQRT(N);
1997 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1998 case ISD::FPOW: return visitFPOW(N);
1999 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
2000 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
2001 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
2002 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
2003 case ISD::LROUND:
2004 case ISD::LLROUND:
2005 case ISD::LRINT:
2006 case ISD::LLRINT: return visitXROUND(N);
2007 case ISD::FP_ROUND: return visitFP_ROUND(N);
2008 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
2009 case ISD::FNEG: return visitFNEG(N);
2010 case ISD::FABS: return visitFABS(N);
2011 case ISD::FFLOOR: return visitFFLOOR(N);
2012 case ISD::FMINNUM:
2013 case ISD::FMAXNUM:
2014 case ISD::FMINIMUM:
2015 case ISD::FMAXIMUM:
2016 case ISD::FMINIMUMNUM:
2017 case ISD::FMAXIMUMNUM: return visitFMinMax(N);
2018 case ISD::FCEIL: return visitFCEIL(N);
2019 case ISD::FTRUNC: return visitFTRUNC(N);
2020 case ISD::FFREXP: return visitFFREXP(N);
2021 case ISD::BRCOND: return visitBRCOND(N);
2022 case ISD::BR_CC: return visitBR_CC(N);
2023 case ISD::LOAD: return visitLOAD(N);
2024 case ISD::STORE: return visitSTORE(N);
2025 case ISD::ATOMIC_STORE: return visitATOMIC_STORE(N);
2026 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
2027 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
2028 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
2029 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
2030 case ISD::VECTOR_INTERLEAVE: return visitVECTOR_INTERLEAVE(N);
2031 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
2032 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
2033 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
2034 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
2035 case ISD::MGATHER: return visitMGATHER(N);
2036 case ISD::MLOAD: return visitMLOAD(N);
2037 case ISD::MSCATTER: return visitMSCATTER(N);
2038 case ISD::MSTORE: return visitMSTORE(N);
2039 case ISD::EXPERIMENTAL_VECTOR_HISTOGRAM: return visitMHISTOGRAM(N);
2040 case ISD::PARTIAL_REDUCE_SMLA:
2041 case ISD::PARTIAL_REDUCE_UMLA:
2042 case ISD::PARTIAL_REDUCE_SUMLA:
2043 return visitPARTIAL_REDUCE_MLA(N);
2044 case ISD::VECTOR_COMPRESS: return visitVECTOR_COMPRESS(N);
2045 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
2046 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
2047 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
2048 case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
2049 case ISD::BF16_TO_FP: return visitBF16_TO_FP(N);
2050 case ISD::FREEZE: return visitFREEZE(N);
2051 case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
2052 case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
2053 case ISD::FCANONICALIZE: return visitFCANONICALIZE(N);
2054 case ISD::VECREDUCE_FADD:
2055 case ISD::VECREDUCE_FMUL:
2056 case ISD::VECREDUCE_ADD:
2057 case ISD::VECREDUCE_MUL:
2058 case ISD::VECREDUCE_AND:
2059 case ISD::VECREDUCE_OR:
2060 case ISD::VECREDUCE_XOR:
2061 case ISD::VECREDUCE_SMAX:
2062 case ISD::VECREDUCE_SMIN:
2063 case ISD::VECREDUCE_UMAX:
2064 case ISD::VECREDUCE_UMIN:
2065 case ISD::VECREDUCE_FMAX:
2066 case ISD::VECREDUCE_FMIN:
2067 case ISD::VECREDUCE_FMAXIMUM:
2068 case ISD::VECREDUCE_FMINIMUM: return visitVECREDUCE(N);
2069#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
2070#include "llvm/IR/VPIntrinsics.def"
2071 return visitVPOp(N);
2072 }
2073 // clang-format on
2074 return SDValue();
2075}
2076
2077SDValue DAGCombiner::combine(SDNode *N) {
2078 if (!DebugCounter::shouldExecute(DAGCombineCounter))
2079 return SDValue();
2080
2081 SDValue RV;
2082 if (!DisableGenericCombines)
2083 RV = visit(N);
2084
2085 // If nothing happened, try a target-specific DAG combine.
2086 if (!RV.getNode()) {
2087 assert(N->getOpcode() != ISD::DELETED_NODE &&
2088 "Node was deleted but visit returned NULL!");
2089
2090 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
2091 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
2092
2093 // Expose the DAG combiner to the target combiner impls.
2094 TargetLowering::DAGCombinerInfo
2095 DagCombineInfo(DAG, Level, false, this);
2096
2097 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
2098 }
2099 }
2100
2101 // If nothing happened still, try promoting the operation.
2102 if (!RV.getNode()) {
2103 switch (N->getOpcode()) {
2104 default: break;
2105 case ISD::ADD:
2106 case ISD::SUB:
2107 case ISD::MUL:
2108 case ISD::AND:
2109 case ISD::OR:
2110 case ISD::XOR:
2111 RV = PromoteIntBinOp(SDValue(N, 0));
2112 break;
2113 case ISD::SHL:
2114 case ISD::SRA:
2115 case ISD::SRL:
2116 RV = PromoteIntShiftOp(SDValue(N, 0));
2117 break;
2118 case ISD::SIGN_EXTEND:
2119 case ISD::ZERO_EXTEND:
2120 case ISD::ANY_EXTEND:
2121 RV = PromoteExtend(SDValue(N, 0));
2122 break;
2123 case ISD::LOAD:
2124 if (PromoteLoad(SDValue(N, 0)))
2125 RV = SDValue(N, 0);
2126 break;
2127 }
2128 }
2129
2130 // If N is a commutative binary node, try to eliminate it if the commuted
2131 // version is already present in the DAG.
2132 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
2133 SDValue N0 = N->getOperand(0);
2134 SDValue N1 = N->getOperand(1);
2135
2136 // Constant operands are canonicalized to RHS.
2137 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
2138 SDValue Ops[] = {N1, N0};
2139 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
2140 N->getFlags());
2141 if (CSENode)
2142 return SDValue(CSENode, 0);
2143 }
2144 }
2145
2146 return RV;
2147}
2148
2149/// Given a node, return its input chain if it has one, otherwise return a null
2150/// sd operand.
2152 if (unsigned NumOps = N->getNumOperands()) {
2153 if (N->getOperand(0).getValueType() == MVT::Other)
2154 return N->getOperand(0);
2155 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
2156 return N->getOperand(NumOps-1);
2157 for (unsigned i = 1; i < NumOps-1; ++i)
2158 if (N->getOperand(i).getValueType() == MVT::Other)
2159 return N->getOperand(i);
2160 }
2161 return SDValue();
2162}
2163
2164SDValue DAGCombiner::visitFCANONICALIZE(SDNode *N) {
2165 SDValue Operand = N->getOperand(0);
2166 EVT VT = Operand.getValueType();
2167 SDLoc dl(N);
2168
2169 // Canonicalize undef to quiet NaN.
2170 if (Operand.isUndef()) {
2171 APFloat CanonicalQNaN = APFloat::getQNaN(VT.getFltSemantics());
2172 return DAG.getConstantFP(CanonicalQNaN, dl, VT);
2173 }
2174 return SDValue();
2175}
2176
2177SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
2178 // If N has two operands, where one has an input chain equal to the other,
2179 // the 'other' chain is redundant.
2180 if (N->getNumOperands() == 2) {
2181 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
2182 return N->getOperand(0);
2183 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
2184 return N->getOperand(1);
2185 }
2186
2187 // Don't simplify token factors if optnone.
2188 if (OptLevel == CodeGenOptLevel::None)
2189 return SDValue();
2190
2191 // Don't simplify the token factor if the node itself has too many operands.
2192 if (N->getNumOperands() > TokenFactorInlineLimit)
2193 return SDValue();
2194
2195 // If the sole user is a token factor, we should make sure we have a
2196 // chance to merge them together. This prevents TF chains from inhibiting
2197 // optimizations.
2198 if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::TokenFactor)
2199 AddToWorklist(*(N->user_begin()));
2200
2201 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
2202 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
2203 SmallPtrSet<SDNode*, 16> SeenOps;
2204 bool Changed = false; // If we should replace this token factor.
2205
2206 // Start out with this token factor.
2207 TFs.push_back(N);
2208
2209 // Iterate through token factors. The TFs grows when new token factors are
2210 // encountered.
2211 for (unsigned i = 0; i < TFs.size(); ++i) {
2212 // Limit number of nodes to inline, to avoid quadratic compile times.
2213 // We have to add the outstanding Token Factors to Ops, otherwise we might
2214 // drop Ops from the resulting Token Factors.
2215 if (Ops.size() > TokenFactorInlineLimit) {
2216 for (unsigned j = i; j < TFs.size(); j++)
2217 Ops.emplace_back(TFs[j], 0);
2218 // Drop unprocessed Token Factors from TFs, so we do not add them to the
2219 // combiner worklist later.
2220 TFs.resize(i);
2221 break;
2222 }
2223
2224 SDNode *TF = TFs[i];
2225 // Check each of the operands.
2226 for (const SDValue &Op : TF->op_values()) {
2227 switch (Op.getOpcode()) {
2228 case ISD::EntryToken:
2229 // Entry tokens don't need to be added to the list. They are
2230 // redundant.
2231 Changed = true;
2232 break;
2233
2234 case ISD::TokenFactor:
2235 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
2236 // Queue up for processing.
2237 TFs.push_back(Op.getNode());
2238 Changed = true;
2239 break;
2240 }
2241 [[fallthrough]];
2242
2243 default:
2244 // Only add if it isn't already in the list.
2245 if (SeenOps.insert(Op.getNode()).second)
2246 Ops.push_back(Op);
2247 else
2248 Changed = true;
2249 break;
2250 }
2251 }
2252 }
2253
2254 // Re-visit inlined Token Factors, to clean them up in case they have been
2255 // removed. Skip the first Token Factor, as this is the current node.
2256 for (unsigned i = 1, e = TFs.size(); i < e; i++)
2257 AddToWorklist(TFs[i]);
2258
2259 // Remove Nodes that are chained to another node in the list. Do so
2260 // by walking up chains breath-first stopping when we've seen
2261 // another operand. In general we must climb to the EntryNode, but we can exit
2262 // early if we find all remaining work is associated with just one operand as
2263 // no further pruning is possible.
2264
2265 // List of nodes to search through and original Ops from which they originate.
2267 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
2268 SmallPtrSet<SDNode *, 16> SeenChains;
2269 bool DidPruneOps = false;
2270
2271 unsigned NumLeftToConsider = 0;
2272 for (const SDValue &Op : Ops) {
2273 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2274 OpWorkCount.push_back(1);
2275 }
2276
2277 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2278 // If this is an Op, we can remove the op from the list. Remark any
2279 // search associated with it as from the current OpNumber.
2280 if (SeenOps.contains(Op)) {
2281 Changed = true;
2282 DidPruneOps = true;
2283 unsigned OrigOpNumber = 0;
2284 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2285 OrigOpNumber++;
2286 assert((OrigOpNumber != Ops.size()) &&
2287 "expected to find TokenFactor Operand");
2288 // Re-mark worklist from OrigOpNumber to OpNumber
2289 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2290 if (Worklist[i].second == OrigOpNumber) {
2291 Worklist[i].second = OpNumber;
2292 }
2293 }
2294 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2295 OpWorkCount[OrigOpNumber] = 0;
2296 NumLeftToConsider--;
2297 }
2298 // Add if it's a new chain
2299 if (SeenChains.insert(Op).second) {
2300 OpWorkCount[OpNumber]++;
2301 Worklist.push_back(std::make_pair(Op, OpNumber));
2302 }
2303 };
2304
2305 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2306 // We need at least be consider at least 2 Ops to prune.
2307 if (NumLeftToConsider <= 1)
2308 break;
2309 auto CurNode = Worklist[i].first;
2310 auto CurOpNumber = Worklist[i].second;
2311 assert((OpWorkCount[CurOpNumber] > 0) &&
2312 "Node should not appear in worklist");
2313 switch (CurNode->getOpcode()) {
2314 case ISD::EntryToken:
2315 // Hitting EntryToken is the only way for the search to terminate without
2316 // hitting
2317 // another operand's search. Prevent us from marking this operand
2318 // considered.
2319 NumLeftToConsider++;
2320 break;
2321 case ISD::TokenFactor:
2322 for (const SDValue &Op : CurNode->op_values())
2323 AddToWorklist(i, Op.getNode(), CurOpNumber);
2324 break;
2325 case ISD::LIFETIME_START:
2326 case ISD::LIFETIME_END:
2327 case ISD::CopyFromReg:
2328 case ISD::CopyToReg:
2329 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2330 break;
2331 default:
2332 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2333 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2334 break;
2335 }
2336 OpWorkCount[CurOpNumber]--;
2337 if (OpWorkCount[CurOpNumber] == 0)
2338 NumLeftToConsider--;
2339 }
2340
2341 // If we've changed things around then replace token factor.
2342 if (Changed) {
2344 if (Ops.empty()) {
2345 // The entry token is the only possible outcome.
2346 Result = DAG.getEntryNode();
2347 } else {
2348 if (DidPruneOps) {
2349 SmallVector<SDValue, 8> PrunedOps;
2350 //
2351 for (const SDValue &Op : Ops) {
2352 if (SeenChains.count(Op.getNode()) == 0)
2353 PrunedOps.push_back(Op);
2354 }
2355 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2356 } else {
2357 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2358 }
2359 }
2360 return Result;
2361 }
2362 return SDValue();
2363}
2364
2365/// MERGE_VALUES can always be eliminated.
2366SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2367 WorklistRemover DeadNodes(*this);
2368 // Replacing results may cause a different MERGE_VALUES to suddenly
2369 // be CSE'd with N, and carry its uses with it. Iterate until no
2370 // uses remain, to ensure that the node can be safely deleted.
2371 // First add the users of this node to the work list so that they
2372 // can be tried again once they have new operands.
2373 AddUsersToWorklist(N);
2374 do {
2375 // Do as a single replacement to avoid rewalking use lists.
2377 DAG.ReplaceAllUsesWith(N, Ops.data());
2378 } while (!N->use_empty());
2379 deleteAndRecombine(N);
2380 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2381}
2382
2383/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2384/// ConstantSDNode pointer else nullptr.
2387 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2388}
2389
2390// isTruncateOf - If N is a truncate of some other value, return true, record
2391// the value being truncated in Op and which of Op's bits are zero/one in Known.
2392// This function computes KnownBits to avoid a duplicated call to
2393// computeKnownBits in the caller.
2395 KnownBits &Known) {
2396 if (N->getOpcode() == ISD::TRUNCATE) {
2397 Op = N->getOperand(0);
2398 Known = DAG.computeKnownBits(Op);
2399 if (N->getFlags().hasNoUnsignedWrap())
2400 Known.Zero.setBitsFrom(N.getScalarValueSizeInBits());
2401 return true;
2402 }
2403
2404 if (N.getValueType().getScalarType() != MVT::i1 ||
2405 !sd_match(
2407 return false;
2408
2409 Known = DAG.computeKnownBits(Op);
2410 return (Known.Zero | 1).isAllOnes();
2411}
2412
2413/// Return true if 'Use' is a load or a store that uses N as its base pointer
2414/// and that N may be folded in the load / store addressing mode.
2416 const TargetLowering &TLI) {
2417 EVT VT;
2418 unsigned AS;
2419
2420 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2421 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2422 return false;
2423 VT = LD->getMemoryVT();
2424 AS = LD->getAddressSpace();
2425 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2426 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2427 return false;
2428 VT = ST->getMemoryVT();
2429 AS = ST->getAddressSpace();
2431 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2432 return false;
2433 VT = LD->getMemoryVT();
2434 AS = LD->getAddressSpace();
2436 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2437 return false;
2438 VT = ST->getMemoryVT();
2439 AS = ST->getAddressSpace();
2440 } else {
2441 return false;
2442 }
2443
2445 if (N->isAnyAdd()) {
2446 AM.HasBaseReg = true;
2448 if (Offset)
2449 // [reg +/- imm]
2450 AM.BaseOffs = Offset->getSExtValue();
2451 else
2452 // [reg +/- reg]
2453 AM.Scale = 1;
2454 } else if (N->getOpcode() == ISD::SUB) {
2455 AM.HasBaseReg = true;
2457 if (Offset)
2458 // [reg +/- imm]
2459 AM.BaseOffs = -Offset->getSExtValue();
2460 else
2461 // [reg +/- reg]
2462 AM.Scale = 1;
2463 } else {
2464 return false;
2465 }
2466
2467 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2468 VT.getTypeForEVT(*DAG.getContext()), AS);
2469}
2470
2471/// This inverts a canonicalization in IR that replaces a variable select arm
2472/// with an identity constant. Codegen improves if we re-use the variable
2473/// operand rather than load a constant. This can also be converted into a
2474/// masked vector operation if the target supports it.
2476 bool ShouldCommuteOperands) {
2477 // Match a select as operand 1. The identity constant that we are looking for
2478 // is only valid as operand 1 of a non-commutative binop.
2479 SDValue N0 = N->getOperand(0);
2480 SDValue N1 = N->getOperand(1);
2481 if (ShouldCommuteOperands)
2482 std::swap(N0, N1);
2483
2484 unsigned SelOpcode = N1.getOpcode();
2485 if ((SelOpcode != ISD::VSELECT && SelOpcode != ISD::SELECT) ||
2486 !N1.hasOneUse())
2487 return SDValue();
2488
2489 // We can't hoist all instructions because of immediate UB (not speculatable).
2490 // For example div/rem by zero.
2492 return SDValue();
2493
2494 unsigned Opcode = N->getOpcode();
2495 EVT VT = N->getValueType(0);
2496 SDValue Cond = N1.getOperand(0);
2497 SDValue TVal = N1.getOperand(1);
2498 SDValue FVal = N1.getOperand(2);
2499 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2500
2501 // This transform increases uses of N0, so freeze it to be safe.
2502 // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2503 unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
2504 if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo) &&
2505 TLI.shouldFoldSelectWithIdentityConstant(Opcode, VT, SelOpcode, N0,
2506 FVal)) {
2507 SDValue F0 = DAG.getFreeze(N0);
2508 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2509 return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2510 }
2511 // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2512 if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo) &&
2513 TLI.shouldFoldSelectWithIdentityConstant(Opcode, VT, SelOpcode, N0,
2514 TVal)) {
2515 SDValue F0 = DAG.getFreeze(N0);
2516 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2517 return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2518 }
2519
2520 return SDValue();
2521}
2522
2523SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2524 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2525 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2526 "Unexpected binary operator");
2527
2528 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2529 return Sel;
2530
2531 if (TLI.isCommutativeBinOp(BO->getOpcode()))
2532 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2533 return Sel;
2534
2535 // Don't do this unless the old select is going away. We want to eliminate the
2536 // binary operator, not replace a binop with a select.
2537 // TODO: Handle ISD::SELECT_CC.
2538 unsigned SelOpNo = 0;
2539 SDValue Sel = BO->getOperand(0);
2540 auto BinOpcode = BO->getOpcode();
2541 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2542 SelOpNo = 1;
2543 Sel = BO->getOperand(1);
2544
2545 // Peek through trunc to shift amount type.
2546 if ((BinOpcode == ISD::SHL || BinOpcode == ISD::SRA ||
2547 BinOpcode == ISD::SRL) && Sel.hasOneUse()) {
2548 // This is valid when the truncated bits of x are already zero.
2549 SDValue Op;
2550 KnownBits Known;
2551 if (isTruncateOf(DAG, Sel, Op, Known) &&
2553 Sel = Op;
2554 }
2555 }
2556
2557 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2558 return SDValue();
2559
2560 SDValue CT = Sel.getOperand(1);
2561 if (!isConstantOrConstantVector(CT, true) &&
2563 return SDValue();
2564
2565 SDValue CF = Sel.getOperand(2);
2566 if (!isConstantOrConstantVector(CF, true) &&
2568 return SDValue();
2569
2570 // Bail out if any constants are opaque because we can't constant fold those.
2571 // The exception is "and" and "or" with either 0 or -1 in which case we can
2572 // propagate non constant operands into select. I.e.:
2573 // and (select Cond, 0, -1), X --> select Cond, 0, X
2574 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2575 bool CanFoldNonConst =
2576 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2579
2580 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2581 if (!CanFoldNonConst &&
2582 !isConstantOrConstantVector(CBO, true) &&
2584 return SDValue();
2585
2586 SDLoc DL(Sel);
2587 SDValue NewCT, NewCF;
2588 EVT VT = BO->getValueType(0);
2589
2590 if (CanFoldNonConst) {
2591 // If CBO is an opaque constant, we can't rely on getNode to constant fold.
2592 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
2593 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
2594 NewCT = CT;
2595 else
2596 NewCT = CBO;
2597
2598 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
2599 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
2600 NewCF = CF;
2601 else
2602 NewCF = CBO;
2603 } else {
2604 // We have a select-of-constants followed by a binary operator with a
2605 // constant. Eliminate the binop by pulling the constant math into the
2606 // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2607 // CBO, CF + CBO
2608 NewCT = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CT})
2609 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CT, CBO});
2610 if (!NewCT)
2611 return SDValue();
2612
2613 NewCF = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CF})
2614 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CF, CBO});
2615 if (!NewCF)
2616 return SDValue();
2617 }
2618
2619 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF, BO->getFlags());
2620}
2621
2623 SelectionDAG &DAG) {
2624 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2625 "Expecting add or sub");
2626
2627 // Match a constant operand and a zext operand for the math instruction:
2628 // add Z, C
2629 // sub C, Z
2630 bool IsAdd = N->getOpcode() == ISD::ADD;
2631 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2632 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2633 auto *CN = dyn_cast<ConstantSDNode>(C);
2634 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2635 return SDValue();
2636
2637 // Match the zext operand as a setcc of a boolean.
2638 if (Z.getOperand(0).getValueType() != MVT::i1)
2639 return SDValue();
2640
2641 // Match the compare as: setcc (X & 1), 0, eq.
2642 if (!sd_match(Z.getOperand(0), m_SetCC(m_And(m_Value(), m_One()), m_Zero(),
2644 return SDValue();
2645
2646 // We are adding/subtracting a constant and an inverted low bit. Turn that
2647 // into a subtract/add of the low bit with incremented/decremented constant:
2648 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2649 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2650 EVT VT = C.getValueType();
2651 SDValue LowBit = DAG.getZExtOrTrunc(Z.getOperand(0).getOperand(0), DL, VT);
2652 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT)
2653 : DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2654 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2655}
2656
2657// Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
2658SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) {
2659 SDValue N0 = N->getOperand(0);
2660 EVT VT = N0.getValueType();
2661 SDValue A, B;
2662
2663 if ((!LegalOperations || hasOperation(ISD::AVGCEILU, VT)) &&
2665 m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2666 return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
2667 }
2668 if ((!LegalOperations || hasOperation(ISD::AVGCEILS, VT)) &&
2670 m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2671 return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
2672 }
2673 return SDValue();
2674}
2675
2676/// Try to fold a pointer arithmetic node.
2677/// This needs to be done separately from normal addition, because pointer
2678/// addition is not commutative.
2679SDValue DAGCombiner::visitPTRADD(SDNode *N) {
2680 SDValue N0 = N->getOperand(0);
2681 SDValue N1 = N->getOperand(1);
2682 EVT PtrVT = N0.getValueType();
2683 EVT IntVT = N1.getValueType();
2684 SDLoc DL(N);
2685
2686 // This is already ensured by an assert in SelectionDAG::getNode(). Several
2687 // combines here depend on this assumption.
2688 assert(PtrVT == IntVT &&
2689 "PTRADD with different operand types is not supported");
2690
2691 // fold (ptradd x, 0) -> x
2692 if (isNullConstant(N1))
2693 return N0;
2694
2695 // fold (ptradd 0, x) -> x
2696 if (PtrVT == IntVT && isNullConstant(N0))
2697 return N1;
2698
2699 if (N0.getOpcode() != ISD::PTRADD ||
2700 reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1))
2701 return SDValue();
2702
2703 SDValue X = N0.getOperand(0);
2704 SDValue Y = N0.getOperand(1);
2705 SDValue Z = N1;
2706 bool N0OneUse = N0.hasOneUse();
2707 bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2708 bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2709
2710 // (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
2711 // * y is a constant and (ptradd x, y) has one use; or
2712 // * y and z are both constants.
2713 if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
2714 // If both additions in the original were NUW, the new ones are as well.
2715 SDNodeFlags Flags =
2716 (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2717 SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
2718 AddToWorklist(Add.getNode());
2719 return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
2720 }
2721
2722 // TODO: There is another possible fold here that was proven useful.
2723 // It would be this:
2724 //
2725 // (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y) if:
2726 // * (ptradd x, y) has one use; and
2727 // * y is a constant; and
2728 // * z is not a constant.
2729 //
2730 // In some cases, specifically in AArch64's FEAT_CPA, it exposes the
2731 // opportunity to select more complex instructions such as SUBPT and
2732 // MSUBPT. However, a hypothetical corner case has been found that we could
2733 // not avoid. Consider this (pseudo-POSIX C):
2734 //
2735 // char *foo(char *x, int z) {return (x + LARGE_CONSTANT) + z;}
2736 // char *p = mmap(LARGE_CONSTANT);
2737 // char *q = foo(p, -LARGE_CONSTANT);
2738 //
2739 // Then x + LARGE_CONSTANT is one-past-the-end, so valid, and a
2740 // further + z takes it back to the start of the mapping, so valid,
2741 // regardless of the address mmap gave back. However, if mmap gives you an
2742 // address < LARGE_CONSTANT (ignoring high bits), x - LARGE_CONSTANT will
2743 // borrow from the high bits (with the subsequent + z carrying back into
2744 // the high bits to give you a well-defined pointer) and thus trip
2745 // FEAT_CPA's pointer corruption checks.
2746 //
2747 // We leave this fold as an opportunity for future work, addressing the
2748 // corner case for FEAT_CPA, as well as reconciling the solution with the
2749 // more general application of pointer arithmetic in other future targets.
2750 // For now each architecture that wants this fold must implement it in the
2751 // target-specific code (see e.g. SITargetLowering::performPtrAddCombine)
2752
2753 return SDValue();
2754}
2755
2756/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2757/// a shift and add with a different constant.
2759 SelectionDAG &DAG) {
2760 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2761 "Expecting add or sub");
2762
2763 // We need a constant operand for the add/sub, and the other operand is a
2764 // logical shift right: add (srl), C or sub C, (srl).
2765 bool IsAdd = N->getOpcode() == ISD::ADD;
2766 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2767 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2768 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2769 ShiftOp.getOpcode() != ISD::SRL)
2770 return SDValue();
2771
2772 // The shift must be of a 'not' value.
2773 SDValue Not = ShiftOp.getOperand(0);
2774 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2775 return SDValue();
2776
2777 // The shift must be moving the sign bit to the least-significant-bit.
2778 EVT VT = ShiftOp.getValueType();
2779 SDValue ShAmt = ShiftOp.getOperand(1);
2780 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2781 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2782 return SDValue();
2783
2784 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2785 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2786 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2787 if (SDValue NewC = DAG.FoldConstantArithmetic(
2788 IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2789 {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2790 SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2791 Not.getOperand(0), ShAmt);
2792 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2793 }
2794
2795 return SDValue();
2796}
2797
2798static bool
2800 return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
2801 (isBitwiseNot(Op1) && Op1.getOperand(0) == Op0);
2802}
2803
2804/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2805/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2806/// are no common bits set in the operands).
2807SDValue DAGCombiner::visitADDLike(SDNode *N) {
2808 SDValue N0 = N->getOperand(0);
2809 SDValue N1 = N->getOperand(1);
2810 EVT VT = N0.getValueType();
2811 SDLoc DL(N);
2812
2813 // fold (add x, undef) -> undef
2814 if (N0.isUndef())
2815 return N0;
2816 if (N1.isUndef())
2817 return N1;
2818
2819 // fold (add c1, c2) -> c1+c2
2820 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2821 return C;
2822
2823 // canonicalize constant to RHS
2826 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2827
2828 if (areBitwiseNotOfEachother(N0, N1))
2829 return DAG.getConstant(APInt::getAllOnes(VT.getScalarSizeInBits()), DL, VT);
2830
2831 // fold vector ops
2832 if (VT.isVector()) {
2833 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2834 return FoldedVOp;
2835
2836 // fold (add x, 0) -> x, vector edition
2838 return N0;
2839 }
2840
2841 // fold (add x, 0) -> x
2842 if (isNullConstant(N1))
2843 return N0;
2844
2845 if (N0.getOpcode() == ISD::SUB) {
2846 SDValue N00 = N0.getOperand(0);
2847 SDValue N01 = N0.getOperand(1);
2848
2849 // fold ((A-c1)+c2) -> (A+(c2-c1))
2850 if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2851 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2852
2853 // fold ((c1-A)+c2) -> (c1+c2)-A
2854 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2855 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2856 }
2857
2858 // add (sext i1 X), 1 -> zext (not i1 X)
2859 // We don't transform this pattern:
2860 // add (zext i1 X), -1 -> sext (not i1 X)
2861 // because most (?) targets generate better code for the zext form.
2862 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2863 isOneOrOneSplat(N1)) {
2864 SDValue X = N0.getOperand(0);
2865 if ((!LegalOperations ||
2866 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2868 X.getScalarValueSizeInBits() == 1) {
2869 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2870 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2871 }
2872 }
2873
2874 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2875 // iff (or x, c0) is equivalent to (add x, c0).
2876 // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2877 // iff (xor x, c0) is equivalent to (add x, c0).
2878 if (DAG.isADDLike(N0)) {
2879 SDValue N01 = N0.getOperand(1);
2880 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2881 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2882 }
2883
2884 if (SDValue NewSel = foldBinOpIntoSelect(N))
2885 return NewSel;
2886
2887 // reassociate add
2888 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2889 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2890 return RADD;
2891
2892 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2893 // equivalent to (add x, c).
2894 // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2895 // equivalent to (add x, c).
2896 // Do this optimization only when adding c does not introduce instructions
2897 // for adding carries.
2898 auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2899 if (DAG.isADDLike(N0) && N0.hasOneUse() &&
2900 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2901 // If N0's type does not split or is a sign mask, it does not introduce
2902 // add carry.
2903 auto TyActn = TLI.getTypeAction(*DAG.getContext(), N0.getValueType());
2904 bool NoAddCarry = TyActn == TargetLoweringBase::TypeLegal ||
2907 if (NoAddCarry)
2908 return DAG.getNode(
2909 ISD::ADD, DL, VT,
2910 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2911 N0.getOperand(1));
2912 }
2913 return SDValue();
2914 };
2915 if (SDValue Add = ReassociateAddOr(N0, N1))
2916 return Add;
2917 if (SDValue Add = ReassociateAddOr(N1, N0))
2918 return Add;
2919
2920 // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
2921 if (SDValue SD =
2922 reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
2923 return SD;
2924 }
2925
2926 SDValue A, B, C, D;
2927
2928 // fold ((0-A) + B) -> B-A
2929 if (sd_match(N0, m_Neg(m_Value(A))))
2930 return DAG.getNode(ISD::SUB, DL, VT, N1, A);
2931
2932 // fold (A + (0-B)) -> A-B
2933 if (sd_match(N1, m_Neg(m_Value(B))))
2934 return DAG.getNode(ISD::SUB, DL, VT, N0, B);
2935
2936 // fold (A+(B-A)) -> B
2937 if (sd_match(N1, m_Sub(m_Value(B), m_Specific(N0))))
2938 return B;
2939
2940 // fold ((B-A)+A) -> B
2941 if (sd_match(N0, m_Sub(m_Value(B), m_Specific(N1))))
2942 return B;
2943
2944 // fold ((A-B)+(C-A)) -> (C-B)
2945 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2947 return DAG.getNode(ISD::SUB, DL, VT, C, B);
2948
2949 // fold ((A-B)+(B-C)) -> (A-C)
2950 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2952 return DAG.getNode(ISD::SUB, DL, VT, A, C);
2953
2954 // fold (A+(B-(A+C))) to (B-C)
2955 // fold (A+(B-(C+A))) to (B-C)
2956 if (sd_match(N1, m_Sub(m_Value(B), m_Add(m_Specific(N0), m_Value(C)))))
2957 return DAG.getNode(ISD::SUB, DL, VT, B, C);
2958
2959 // fold (A+((B-A)+or-C)) to (B+or-C)
2960 if (sd_match(N1,
2962 m_Sub(m_Sub(m_Value(B), m_Specific(N0)), m_Value(C)))))
2963 return DAG.getNode(N1.getOpcode(), DL, VT, B, C);
2964
2965 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2966 if (sd_match(N0, m_OneUse(m_Sub(m_Value(A), m_Value(B)))) &&
2967 sd_match(N1, m_OneUse(m_Sub(m_Value(C), m_Value(D)))) &&
2969 return DAG.getNode(ISD::SUB, DL, VT,
2970 DAG.getNode(ISD::ADD, SDLoc(N0), VT, A, C),
2971 DAG.getNode(ISD::ADD, SDLoc(N1), VT, B, D));
2972
2973 // fold (add (umax X, C), -C) --> (usubsat X, C)
2974 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2975 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2976 return (!Max && !Op) ||
2977 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2978 };
2979 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2980 /*AllowUndefs*/ true))
2981 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2982 N0.getOperand(1));
2983 }
2984
2986 return SDValue(N, 0);
2987
2988 if (isOneOrOneSplat(N1)) {
2989 // fold (add (xor a, -1), 1) -> (sub 0, a)
2990 if (isBitwiseNot(N0))
2991 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2992 N0.getOperand(0));
2993
2994 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2995 if (N0.getOpcode() == ISD::ADD) {
2996 SDValue A, Xor;
2997
2998 if (isBitwiseNot(N0.getOperand(0))) {
2999 A = N0.getOperand(1);
3000 Xor = N0.getOperand(0);
3001 } else if (isBitwiseNot(N0.getOperand(1))) {
3002 A = N0.getOperand(0);
3003 Xor = N0.getOperand(1);
3004 }
3005
3006 if (Xor)
3007 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
3008 }
3009
3010 // Look for:
3011 // add (add x, y), 1
3012 // And if the target does not like this form then turn into:
3013 // sub y, (xor x, -1)
3014 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3015 N0.hasOneUse() &&
3016 // Limit this to after legalization if the add has wrap flags
3017 (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
3018 !N->getFlags().hasNoSignedWrap()))) {
3019 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3020 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
3021 }
3022 }
3023
3024 // (x - y) + -1 -> add (xor y, -1), x
3025 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
3026 isAllOnesOrAllOnesSplat(N1, /*AllowUndefs=*/true)) {
3027 SDValue Not = DAG.getNOT(DL, N0.getOperand(1), VT);
3028 return DAG.getNode(ISD::ADD, DL, VT, Not, N0.getOperand(0));
3029 }
3030
3031 // Fold add(mul(add(A, CA), CM), CB) -> add(mul(A, CM), CM*CA+CB).
3032 // This can help if the inner add has multiple uses.
3033 APInt CM, CA;
3034 if (ConstantSDNode *CB = dyn_cast<ConstantSDNode>(N1)) {
3035 if (VT.getScalarSizeInBits() <= 64) {
3037 m_ConstInt(CM)))) &&
3039 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
3040 SDNodeFlags Flags;
3041 // If all the inputs are nuw, the outputs can be nuw. If all the input
3042 // are _also_ nsw the outputs can be too.
3043 if (N->getFlags().hasNoUnsignedWrap() &&
3044 N0->getFlags().hasNoUnsignedWrap() &&
3047 if (N->getFlags().hasNoSignedWrap() &&
3048 N0->getFlags().hasNoSignedWrap() &&
3051 }
3052 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
3053 DAG.getConstant(CM, DL, VT), Flags);
3054 return DAG.getNode(
3055 ISD::ADD, DL, VT, Mul,
3056 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
3057 }
3058 // Also look in case there is an intermediate add.
3059 if (sd_match(N0, m_OneUse(m_Add(
3061 m_ConstInt(CM))),
3062 m_Value(B)))) &&
3064 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
3065 SDNodeFlags Flags;
3066 // If all the inputs are nuw, the outputs can be nuw. If all the input
3067 // are _also_ nsw the outputs can be too.
3068 SDValue OMul =
3069 N0.getOperand(0) == B ? N0.getOperand(1) : N0.getOperand(0);
3070 if (N->getFlags().hasNoUnsignedWrap() &&
3071 N0->getFlags().hasNoUnsignedWrap() &&
3072 OMul->getFlags().hasNoUnsignedWrap() &&
3073 OMul.getOperand(0)->getFlags().hasNoUnsignedWrap()) {
3075 if (N->getFlags().hasNoSignedWrap() &&
3076 N0->getFlags().hasNoSignedWrap() &&
3077 OMul->getFlags().hasNoSignedWrap() &&
3078 OMul.getOperand(0)->getFlags().hasNoSignedWrap())
3080 }
3081 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
3082 DAG.getConstant(CM, DL, VT), Flags);
3083 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N1), VT, Mul, B, Flags);
3084 return DAG.getNode(
3085 ISD::ADD, DL, VT, Add,
3086 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
3087 }
3088 }
3089 }
3090
3091 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
3092 return Combined;
3093
3094 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
3095 return Combined;
3096
3097 return SDValue();
3098}
3099
3100// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
3101SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) {
3102 SDValue N0 = N->getOperand(0);
3103 EVT VT = N0.getValueType();
3104 SDValue A, B;
3105
3106 if ((!LegalOperations || hasOperation(ISD::AVGFLOORU, VT)) &&
3108 m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
3109 return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
3110 }
3111 if ((!LegalOperations || hasOperation(ISD::AVGFLOORS, VT)) &&
3113 m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
3114 return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
3115 }
3116
3117 return SDValue();
3118}
3119
3120SDValue DAGCombiner::visitADD(SDNode *N) {
3121 SDValue N0 = N->getOperand(0);
3122 SDValue N1 = N->getOperand(1);
3123 EVT VT = N0.getValueType();
3124 SDLoc DL(N);
3125
3126 if (SDValue Combined = visitADDLike(N))
3127 return Combined;
3128
3129 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
3130 return V;
3131
3132 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
3133 return V;
3134
3135 if (SDValue V = MatchRotate(N0, N1, SDLoc(N), /*FromAdd=*/true))
3136 return V;
3137
3138 // Try to match AVGFLOOR fixedwidth pattern
3139 if (SDValue V = foldAddToAvg(N, DL))
3140 return V;
3141
3142 // fold (a+b) -> (a|b) iff a and b share no bits.
3143 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
3144 DAG.haveNoCommonBitsSet(N0, N1))
3145 return DAG.getNode(ISD::OR, DL, VT, N0, N1, SDNodeFlags::Disjoint);
3146
3147 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
3148 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
3149 const APInt &C0 = N0->getConstantOperandAPInt(0);
3150 const APInt &C1 = N1->getConstantOperandAPInt(0);
3151 return DAG.getVScale(DL, VT, C0 + C1);
3152 }
3153
3154 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
3155 if (N0.getOpcode() == ISD::ADD &&
3156 N0.getOperand(1).getOpcode() == ISD::VSCALE &&
3157 N1.getOpcode() == ISD::VSCALE) {
3158 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3159 const APInt &VS1 = N1->getConstantOperandAPInt(0);
3160 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
3161 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
3162 }
3163
3164 // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
3165 if (N0.getOpcode() == ISD::STEP_VECTOR &&
3166 N1.getOpcode() == ISD::STEP_VECTOR) {
3167 const APInt &C0 = N0->getConstantOperandAPInt(0);
3168 const APInt &C1 = N1->getConstantOperandAPInt(0);
3169 APInt NewStep = C0 + C1;
3170 return DAG.getStepVector(DL, VT, NewStep);
3171 }
3172
3173 // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
3174 if (N0.getOpcode() == ISD::ADD &&
3176 N1.getOpcode() == ISD::STEP_VECTOR) {
3177 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3178 const APInt &SV1 = N1->getConstantOperandAPInt(0);
3179 APInt NewStep = SV0 + SV1;
3180 SDValue SV = DAG.getStepVector(DL, VT, NewStep);
3181 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
3182 }
3183
3184 return SDValue();
3185}
3186
3187SDValue DAGCombiner::visitADDSAT(SDNode *N) {
3188 unsigned Opcode = N->getOpcode();
3189 SDValue N0 = N->getOperand(0);
3190 SDValue N1 = N->getOperand(1);
3191 EVT VT = N0.getValueType();
3192 bool IsSigned = Opcode == ISD::SADDSAT;
3193 SDLoc DL(N);
3194
3195 // fold (add_sat x, undef) -> -1
3196 if (N0.isUndef() || N1.isUndef())
3197 return DAG.getAllOnesConstant(DL, VT);
3198
3199 // fold (add_sat c1, c2) -> c3
3200 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
3201 return C;
3202
3203 // canonicalize constant to RHS
3206 return DAG.getNode(Opcode, DL, VT, N1, N0);
3207
3208 // fold vector ops
3209 if (VT.isVector()) {
3210 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3211 return FoldedVOp;
3212
3213 // fold (add_sat x, 0) -> x, vector edition
3215 return N0;
3216 }
3217
3218 // fold (add_sat x, 0) -> x
3219 if (isNullConstant(N1))
3220 return N0;
3221
3222 // If it cannot overflow, transform into an add.
3223 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3224 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
3225
3226 return SDValue();
3227}
3228
3230 bool ForceCarryReconstruction = false) {
3231 bool Masked = false;
3232
3233 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
3234 while (true) {
3235 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
3236 V = V.getOperand(0);
3237 continue;
3238 }
3239
3240 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
3241 if (ForceCarryReconstruction)
3242 return V;
3243
3244 Masked = true;
3245 V = V.getOperand(0);
3246 continue;
3247 }
3248
3249 if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
3250 return V;
3251
3252 break;
3253 }
3254
3255 // If this is not a carry, return.
3256 if (V.getResNo() != 1)
3257 return SDValue();
3258
3259 if (V.getOpcode() != ISD::UADDO_CARRY && V.getOpcode() != ISD::USUBO_CARRY &&
3260 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
3261 return SDValue();
3262
3263 EVT VT = V->getValueType(0);
3264 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
3265 return SDValue();
3266
3267 // If the result is masked, then no matter what kind of bool it is we can
3268 // return. If it isn't, then we need to make sure the bool type is either 0 or
3269 // 1 and not other values.
3270 if (Masked ||
3271 TLI.getBooleanContents(V.getValueType()) ==
3273 return V;
3274
3275 return SDValue();
3276}
3277
3278/// Given the operands of an add/sub operation, see if the 2nd operand is a
3279/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
3280/// the opcode and bypass the mask operation.
3281static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
3282 SelectionDAG &DAG, const SDLoc &DL) {
3283 if (N1.getOpcode() == ISD::ZERO_EXTEND)
3284 N1 = N1.getOperand(0);
3285
3286 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
3287 return SDValue();
3288
3289 EVT VT = N0.getValueType();
3290 SDValue N10 = N1.getOperand(0);
3291 if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
3292 N10 = N10.getOperand(0);
3293
3294 if (N10.getValueType() != VT)
3295 return SDValue();
3296
3297 if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits())
3298 return SDValue();
3299
3300 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
3301 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
3302 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10);
3303}
3304
3305/// Helper for doing combines based on N0 and N1 being added to each other.
3306SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
3307 SDNode *LocReference) {
3308 EVT VT = N0.getValueType();
3309 SDLoc DL(LocReference);
3310
3311 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
3312 SDValue Y, N;
3313 if (sd_match(N1, m_Shl(m_Neg(m_Value(Y)), m_Value(N))))
3314 return DAG.getNode(ISD::SUB, DL, VT, N0,
3315 DAG.getNode(ISD::SHL, DL, VT, Y, N));
3316
3317 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
3318 return V;
3319
3320 // Look for:
3321 // add (add x, 1), y
3322 // And if the target does not like this form then turn into:
3323 // sub y, (xor x, -1)
3324 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3325 N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1)) &&
3326 // Limit this to after legalization if the add has wrap flags
3327 (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
3328 !N0->getFlags().hasNoSignedWrap()))) {
3329 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3330 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
3331 }
3332
3333 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
3334 // Hoist one-use subtraction by non-opaque constant:
3335 // (x - C) + y -> (x + y) - C
3336 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3337 if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3338 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
3339 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
3340 }
3341 // Hoist one-use subtraction from non-opaque constant:
3342 // (C - x) + y -> (y - x) + C
3343 if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3344 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
3345 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
3346 }
3347 }
3348
3349 // add (mul x, C), x -> mul x, C+1
3350 if (N0.getOpcode() == ISD::MUL && N0.getOperand(0) == N1 &&
3351 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true) &&
3352 N0.hasOneUse()) {
3353 SDValue NewC = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
3354 DAG.getConstant(1, DL, VT));
3355 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), NewC);
3356 }
3357
3358 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
3359 // rather than 'add 0/-1' (the zext should get folded).
3360 // add (sext i1 Y), X --> sub X, (zext i1 Y)
3361 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
3362 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
3364 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
3365 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
3366 }
3367
3368 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
3369 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3370 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3371 if (TN->getVT() == MVT::i1) {
3372 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3373 DAG.getConstant(1, DL, VT));
3374 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
3375 }
3376 }
3377
3378 // (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3379 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1)) &&
3380 N1.getResNo() == 0)
3381 return DAG.getNode(ISD::UADDO_CARRY, DL, N1->getVTList(),
3382 N0, N1.getOperand(0), N1.getOperand(2));
3383
3384 // (add X, Carry) -> (uaddo_carry X, 0, Carry)
3386 if (SDValue Carry = getAsCarry(TLI, N1))
3387 return DAG.getNode(ISD::UADDO_CARRY, DL,
3388 DAG.getVTList(VT, Carry.getValueType()), N0,
3389 DAG.getConstant(0, DL, VT), Carry);
3390
3391 return SDValue();
3392}
3393
3394SDValue DAGCombiner::visitADDC(SDNode *N) {
3395 SDValue N0 = N->getOperand(0);
3396 SDValue N1 = N->getOperand(1);
3397 EVT VT = N0.getValueType();
3398 SDLoc DL(N);
3399
3400 // If the flag result is dead, turn this into an ADD.
3401 if (!N->hasAnyUseOfValue(1))
3402 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3403 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3404
3405 // canonicalize constant to RHS.
3406 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3407 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3408 if (N0C && !N1C)
3409 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
3410
3411 // fold (addc x, 0) -> x + no carry out
3412 if (isNullConstant(N1))
3413 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
3414 DL, MVT::Glue));
3415
3416 // If it cannot overflow, transform into an add.
3418 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3419 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3420
3421 return SDValue();
3422}
3423
3424/**
3425 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
3426 * then the flip also occurs if computing the inverse is the same cost.
3427 * This function returns an empty SDValue in case it cannot flip the boolean
3428 * without increasing the cost of the computation. If you want to flip a boolean
3429 * no matter what, use DAG.getLogicalNOT.
3430 */
3432 const TargetLowering &TLI,
3433 bool Force) {
3434 if (Force && isa<ConstantSDNode>(V))
3435 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3436
3437 if (V.getOpcode() != ISD::XOR)
3438 return SDValue();
3439
3440 if (DAG.isBoolConstant(V.getOperand(1)) == true)
3441 return V.getOperand(0);
3442 if (Force && isConstOrConstSplat(V.getOperand(1), false))
3443 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3444 return SDValue();
3445}
3446
3447SDValue DAGCombiner::visitADDO(SDNode *N) {
3448 SDValue N0 = N->getOperand(0);
3449 SDValue N1 = N->getOperand(1);
3450 EVT VT = N0.getValueType();
3451 bool IsSigned = (ISD::SADDO == N->getOpcode());
3452
3453 EVT CarryVT = N->getValueType(1);
3454 SDLoc DL(N);
3455
3456 // If the flag result is dead, turn this into an ADD.
3457 if (!N->hasAnyUseOfValue(1))
3458 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3459 DAG.getUNDEF(CarryVT));
3460
3461 // canonicalize constant to RHS.
3464 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
3465
3466 // fold (addo x, 0) -> x + no carry out
3467 if (isNullOrNullSplat(N1))
3468 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3469
3470 // If it cannot overflow, transform into an add.
3471 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3472 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3473 DAG.getConstant(0, DL, CarryVT));
3474
3475 if (IsSigned) {
3476 // fold (saddo (xor a, -1), 1) -> (ssub 0, a).
3477 if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
3478 return DAG.getNode(ISD::SSUBO, DL, N->getVTList(),
3479 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3480 } else {
3481 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
3482 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
3483 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
3484 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3485 return CombineTo(
3486 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3487 }
3488
3489 if (SDValue Combined = visitUADDOLike(N0, N1, N))
3490 return Combined;
3491
3492 if (SDValue Combined = visitUADDOLike(N1, N0, N))
3493 return Combined;
3494 }
3495
3496 return SDValue();
3497}
3498
3499SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3500 EVT VT = N0.getValueType();
3501 if (VT.isVector())
3502 return SDValue();
3503
3504 // (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3505 // If Y + 1 cannot overflow.
3506 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1))) {
3507 SDValue Y = N1.getOperand(0);
3508 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3510 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0, Y,
3511 N1.getOperand(2));
3512 }
3513
3514 // (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
3516 if (SDValue Carry = getAsCarry(TLI, N1))
3517 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0,
3518 DAG.getConstant(0, SDLoc(N), VT), Carry);
3519
3520 return SDValue();
3521}
3522
3523SDValue DAGCombiner::visitADDE(SDNode *N) {
3524 SDValue N0 = N->getOperand(0);
3525 SDValue N1 = N->getOperand(1);
3526 SDValue CarryIn = N->getOperand(2);
3527
3528 // canonicalize constant to RHS
3529 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3530 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3531 if (N0C && !N1C)
3532 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3533 N1, N0, CarryIn);
3534
3535 // fold (adde x, y, false) -> (addc x, y)
3536 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3537 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3538
3539 return SDValue();
3540}
3541
3542SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
3543 SDValue N0 = N->getOperand(0);
3544 SDValue N1 = N->getOperand(1);
3545 SDValue CarryIn = N->getOperand(2);
3546 SDLoc DL(N);
3547
3548 // canonicalize constant to RHS
3549 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3550 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3551 if (N0C && !N1C)
3552 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3553
3554 // fold (uaddo_carry x, y, false) -> (uaddo x, y)
3555 if (isNullConstant(CarryIn)) {
3556 if (!LegalOperations ||
3557 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3558 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3559 }
3560
3561 // fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3562 if (isNullConstant(N0) && isNullConstant(N1)) {
3563 EVT VT = N0.getValueType();
3564 EVT CarryVT = CarryIn.getValueType();
3565 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3566 AddToWorklist(CarryExt.getNode());
3567 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3568 DAG.getConstant(1, DL, VT)),
3569 DAG.getConstant(0, DL, CarryVT));
3570 }
3571
3572 if (SDValue Combined = visitUADDO_CARRYLike(N0, N1, CarryIn, N))
3573 return Combined;
3574
3575 if (SDValue Combined = visitUADDO_CARRYLike(N1, N0, CarryIn, N))
3576 return Combined;
3577
3578 // We want to avoid useless duplication.
3579 // TODO: This is done automatically for binary operations. As UADDO_CARRY is
3580 // not a binary operation, this is not really possible to leverage this
3581 // existing mechanism for it. However, if more operations require the same
3582 // deduplication logic, then it may be worth generalize.
3583 SDValue Ops[] = {N1, N0, CarryIn};
3584 SDNode *CSENode =
3585 DAG.getNodeIfExists(ISD::UADDO_CARRY, N->getVTList(), Ops, N->getFlags());
3586 if (CSENode)
3587 return SDValue(CSENode, 0);
3588
3589 return SDValue();
3590}
3591
3592/**
3593 * If we are facing some sort of diamond carry propagation pattern try to
3594 * break it up to generate something like:
3595 * (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
3596 *
3597 * The end result is usually an increase in operation required, but because the
3598 * carry is now linearized, other transforms can kick in and optimize the DAG.
3599 *
3600 * Patterns typically look something like
3601 * (uaddo A, B)
3602 * / \
3603 * Carry Sum
3604 * | \
3605 * | (uaddo_carry *, 0, Z)
3606 * | /
3607 * \ Carry
3608 * | /
3609 * (uaddo_carry X, *, *)
3610 *
3611 * But numerous variation exist. Our goal is to identify A, B, X and Z and
3612 * produce a combine with a single path for carry propagation.
3613 */
3615 SelectionDAG &DAG, SDValue X,
3616 SDValue Carry0, SDValue Carry1,
3617 SDNode *N) {
3618 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3619 return SDValue();
3620 if (Carry1.getOpcode() != ISD::UADDO)
3621 return SDValue();
3622
3623 SDValue Z;
3624
3625 /**
3626 * First look for a suitable Z. It will present itself in the form of
3627 * (uaddo_carry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3628 */
3629 if (Carry0.getOpcode() == ISD::UADDO_CARRY &&
3630 isNullConstant(Carry0.getOperand(1))) {
3631 Z = Carry0.getOperand(2);
3632 } else if (Carry0.getOpcode() == ISD::UADDO &&
3633 isOneConstant(Carry0.getOperand(1))) {
3634 EVT VT = Carry0->getValueType(1);
3635 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3636 } else {
3637 // We couldn't find a suitable Z.
3638 return SDValue();
3639 }
3640
3641
3642 auto cancelDiamond = [&](SDValue A,SDValue B) {
3643 SDLoc DL(N);
3644 SDValue NewY =
3645 DAG.getNode(ISD::UADDO_CARRY, DL, Carry0->getVTList(), A, B, Z);
3646 Combiner.AddToWorklist(NewY.getNode());
3647 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), X,
3648 DAG.getConstant(0, DL, X.getValueType()),
3649 NewY.getValue(1));
3650 };
3651
3652 /**
3653 * (uaddo A, B)
3654 * |
3655 * Sum
3656 * |
3657 * (uaddo_carry *, 0, Z)
3658 */
3659 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3660 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3661 }
3662
3663 /**
3664 * (uaddo_carry A, 0, Z)
3665 * |
3666 * Sum
3667 * |
3668 * (uaddo *, B)
3669 */
3670 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3671 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3672 }
3673
3674 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3675 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3676 }
3677
3678 return SDValue();
3679}
3680
3681// If we are facing some sort of diamond carry/borrow in/out pattern try to
3682// match patterns like:
3683//
3684// (uaddo A, B) CarryIn
3685// | \ |
3686// | \ |
3687// PartialSum PartialCarryOutX /
3688// | | /
3689// | ____|____________/
3690// | / |
3691// (uaddo *, *) \________
3692// | \ \
3693// | \ |
3694// | PartialCarryOutY |
3695// | \ |
3696// | \ /
3697// AddCarrySum | ______/
3698// | /
3699// CarryOut = (or *, *)
3700//
3701// And generate UADDO_CARRY (or USUBO_CARRY) with two result values:
3702//
3703// {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn)
3704//
3705// Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY
3706// with a single path for carry/borrow out propagation.
3708 SDValue N0, SDValue N1, SDNode *N) {
3709 SDValue Carry0 = getAsCarry(TLI, N0);
3710 if (!Carry0)
3711 return SDValue();
3712 SDValue Carry1 = getAsCarry(TLI, N1);
3713 if (!Carry1)
3714 return SDValue();
3715
3716 unsigned Opcode = Carry0.getOpcode();
3717 if (Opcode != Carry1.getOpcode())
3718 return SDValue();
3719 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3720 return SDValue();
3721 // Guarantee identical type of CarryOut
3722 EVT CarryOutType = N->getValueType(0);
3723 if (CarryOutType != Carry0.getValue(1).getValueType() ||
3724 CarryOutType != Carry1.getValue(1).getValueType())
3725 return SDValue();
3726
3727 // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3728 // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3729 if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3730 std::swap(Carry0, Carry1);
3731
3732 // Check if nodes are connected in expected way.
3733 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3734 Carry1.getOperand(1) != Carry0.getValue(0))
3735 return SDValue();
3736
3737 // The carry in value must be on the righthand side for subtraction.
3738 unsigned CarryInOperandNum =
3739 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3740 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3741 return SDValue();
3742 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3743
3744 unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
3745 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3746 return SDValue();
3747
3748 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3749 CarryIn = getAsCarry(TLI, CarryIn, true);
3750 if (!CarryIn)
3751 return SDValue();
3752
3753 SDLoc DL(N);
3754 CarryIn = DAG.getBoolExtOrTrunc(CarryIn, DL, Carry1->getValueType(1),
3755 Carry1->getValueType(0));
3756 SDValue Merged =
3757 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3758 Carry0.getOperand(1), CarryIn);
3759
3760 // Please note that because we have proven that the result of the UADDO/USUBO
3761 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3762 // therefore prove that if the first UADDO/USUBO overflows, the second
3763 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3764 // maximum value.
3765 //
3766 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3767 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3768 //
3769 // This is important because it means that OR and XOR can be used to merge
3770 // carry flags; and that AND can return a constant zero.
3771 //
3772 // TODO: match other operations that can merge flags (ADD, etc)
3773 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3774 if (N->getOpcode() == ISD::AND)
3775 return DAG.getConstant(0, DL, CarryOutType);
3776 return Merged.getValue(1);
3777}
3778
3779SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
3780 SDValue CarryIn, SDNode *N) {
3781 // fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
3782 // carry.
3783 if (isBitwiseNot(N0))
3784 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3785 SDLoc DL(N);
3786 SDValue Sub = DAG.getNode(ISD::USUBO_CARRY, DL, N->getVTList(), N1,
3787 N0.getOperand(0), NotC);
3788 return CombineTo(
3789 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3790 }
3791
3792 // Iff the flag result is dead:
3793 // (uaddo_carry (add|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
3794 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3795 // or the dependency between the instructions.
3796 if ((N0.getOpcode() == ISD::ADD ||
3797 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3798 N0.getValue(1) != CarryIn)) &&
3799 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3800 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(),
3801 N0.getOperand(0), N0.getOperand(1), CarryIn);
3802
3803 /**
3804 * When one of the uaddo_carry argument is itself a carry, we may be facing
3805 * a diamond carry propagation. In which case we try to transform the DAG
3806 * to ensure linear carry propagation if that is possible.
3807 */
3808 if (auto Y = getAsCarry(TLI, N1)) {
3809 // Because both are carries, Y and Z can be swapped.
3810 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3811 return R;
3812 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3813 return R;
3814 }
3815
3816 return SDValue();
3817}
3818
3819SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1,
3820 SDValue CarryIn, SDNode *N) {
3821 // fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c)
3822 if (isBitwiseNot(N0)) {
3823 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true))
3824 return DAG.getNode(ISD::SSUBO_CARRY, SDLoc(N), N->getVTList(), N1,
3825 N0.getOperand(0), NotC);
3826 }
3827
3828 return SDValue();
3829}
3830
3831SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3832 SDValue N0 = N->getOperand(0);
3833 SDValue N1 = N->getOperand(1);
3834 SDValue CarryIn = N->getOperand(2);
3835 SDLoc DL(N);
3836
3837 // canonicalize constant to RHS
3838 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3839 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3840 if (N0C && !N1C)
3841 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3842
3843 // fold (saddo_carry x, y, false) -> (saddo x, y)
3844 if (isNullConstant(CarryIn)) {
3845 if (!LegalOperations ||
3846 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3847 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3848 }
3849
3850 if (SDValue Combined = visitSADDO_CARRYLike(N0, N1, CarryIn, N))
3851 return Combined;
3852
3853 if (SDValue Combined = visitSADDO_CARRYLike(N1, N0, CarryIn, N))
3854 return Combined;
3855
3856 return SDValue();
3857}
3858
3859// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3860// clamp/truncation if necessary.
3862 SDValue RHS, SelectionDAG &DAG,
3863 const SDLoc &DL) {
3864 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3865 "Illegal truncation");
3866
3867 if (DstVT == SrcVT)
3868 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3869
3870 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3871 // clamping RHS.
3873 DstVT.getScalarSizeInBits());
3874 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3875 return SDValue();
3876
3877 SDValue SatLimit =
3879 DstVT.getScalarSizeInBits()),
3880 DL, SrcVT);
3881 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3882 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3883 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3884 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3885}
3886
3887// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3888// usubsat(a,b), optionally as a truncated type.
3889SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL) {
3890 if (N->getOpcode() != ISD::SUB ||
3891 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3892 return SDValue();
3893
3894 EVT SubVT = N->getValueType(0);
3895 SDValue Op0 = N->getOperand(0);
3896 SDValue Op1 = N->getOperand(1);
3897
3898 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3899 // they may be converted to usubsat(a,b).
3900 if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3901 SDValue MaxLHS = Op0.getOperand(0);
3902 SDValue MaxRHS = Op0.getOperand(1);
3903 if (MaxLHS == Op1)
3904 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, DL);
3905 if (MaxRHS == Op1)
3906 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, DL);
3907 }
3908
3909 if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3910 SDValue MinLHS = Op1.getOperand(0);
3911 SDValue MinRHS = Op1.getOperand(1);
3912 if (MinLHS == Op0)
3913 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, DL);
3914 if (MinRHS == Op0)
3915 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, DL);
3916 }
3917
3918 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3919 if (Op1.getOpcode() == ISD::TRUNCATE &&
3920 Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3921 Op1.getOperand(0).hasOneUse()) {
3922 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3923 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3924 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3925 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3926 DAG, DL);
3927 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3928 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3929 DAG, DL);
3930 }
3931
3932 return SDValue();
3933}
3934
3935// Refinement of DAG/Type Legalisation (promotion) when CTLZ is used for
3936// counting leading ones. Broadly, it replaces the substraction with a left
3937// shift.
3938//
3939// * DAG Legalisation Pattern:
3940//
3941// (sub (ctlz (zeroextend (not Src)))
3942// BitWidthDiff)
3943//
3944// if BitWidthDiff == BitWidth(Node) - BitWidth(Src)
3945// -->
3946//
3947// (ctlz_zero_undef (not (shl (anyextend Src)
3948// BitWidthDiff)))
3949//
3950// * Type Legalisation Pattern:
3951//
3952// (sub (ctlz (and (xor Src XorMask)
3953// AndMask))
3954// BitWidthDiff)
3955//
3956// if AndMask has only trailing ones
3957// and MaskBitWidth(AndMask) == BitWidth(Node) - BitWidthDiff
3958// and XorMask has more trailing ones than AndMask
3959// -->
3960//
3961// (ctlz_zero_undef (not (shl Src BitWidthDiff)))
3962template <class MatchContextClass>
3964 const SDLoc DL(N);
3965 SDValue N0 = N->getOperand(0);
3966 EVT VT = N0.getValueType();
3967 unsigned BitWidth = VT.getScalarSizeInBits();
3968
3969 MatchContextClass Matcher(DAG, DAG.getTargetLoweringInfo(), N);
3970
3971 APInt AndMask;
3972 APInt XorMask;
3973 APInt BitWidthDiff;
3974
3975 SDValue CtlzOp;
3976 SDValue Src;
3977
3978 if (!sd_context_match(
3979 N, Matcher, m_Sub(m_Ctlz(m_Value(CtlzOp)), m_ConstInt(BitWidthDiff))))
3980 return SDValue();
3981
3982 if (sd_context_match(CtlzOp, Matcher, m_ZExt(m_Not(m_Value(Src))))) {
3983 // DAG Legalisation Pattern:
3984 // (sub (ctlz (zero_extend (not Op)) BitWidthDiff))
3985 if ((BitWidth - Src.getValueType().getScalarSizeInBits()) != BitWidthDiff)
3986 return SDValue();
3987
3988 Src = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Src);
3989 } else if (sd_context_match(CtlzOp, Matcher,
3990 m_And(m_Xor(m_Value(Src), m_ConstInt(XorMask)),
3991 m_ConstInt(AndMask)))) {
3992 // Type Legalisation Pattern:
3993 // (sub (ctlz (and (xor Op XorMask) AndMask)) BitWidthDiff)
3994 unsigned AndMaskWidth = BitWidth - BitWidthDiff.getZExtValue();
3995 if (!(AndMask.isMask(AndMaskWidth) && XorMask.countr_one() >= AndMaskWidth))
3996 return SDValue();
3997 } else
3998 return SDValue();
3999
4000 SDValue ShiftConst = DAG.getShiftAmountConstant(BitWidthDiff, VT, DL);
4001 SDValue LShift = Matcher.getNode(ISD::SHL, DL, VT, Src, ShiftConst);
4002 SDValue Not =
4003 Matcher.getNode(ISD::XOR, DL, VT, LShift, DAG.getAllOnesConstant(DL, VT));
4004
4005 return Matcher.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, Not);
4006}
4007
4008// Fold sub(x, mul(divrem(x,y)[0], y)) to divrem(x, y)[1]
4010 const SDLoc &DL) {
4011 assert(N->getOpcode() == ISD::SUB && "Node must be a SUB");
4012 SDValue Sub0 = N->getOperand(0);
4013 SDValue Sub1 = N->getOperand(1);
4014
4015 auto CheckAndFoldMulCase = [&](SDValue DivRem, SDValue MaybeY) -> SDValue {
4016 if ((DivRem.getOpcode() == ISD::SDIVREM ||
4017 DivRem.getOpcode() == ISD::UDIVREM) &&
4018 DivRem.getResNo() == 0 && DivRem.getOperand(0) == Sub0 &&
4019 DivRem.getOperand(1) == MaybeY) {
4020 return SDValue(DivRem.getNode(), 1);
4021 }
4022 return SDValue();
4023 };
4024
4025 if (Sub1.getOpcode() == ISD::MUL) {
4026 // (sub x, (mul divrem(x,y)[0], y))
4027 SDValue Mul0 = Sub1.getOperand(0);
4028 SDValue Mul1 = Sub1.getOperand(1);
4029
4030 if (SDValue Res = CheckAndFoldMulCase(Mul0, Mul1))
4031 return Res;
4032
4033 if (SDValue Res = CheckAndFoldMulCase(Mul1, Mul0))
4034 return Res;
4035
4036 } else if (Sub1.getOpcode() == ISD::SHL) {
4037 // Handle (sub x, (shl divrem(x,y)[0], C)) where y = 1 << C
4038 SDValue Shl0 = Sub1.getOperand(0);
4039 SDValue Shl1 = Sub1.getOperand(1);
4040 // Check if Shl0 is divrem(x, Y)[0]
4041 if ((Shl0.getOpcode() == ISD::SDIVREM ||
4042 Shl0.getOpcode() == ISD::UDIVREM) &&
4043 Shl0.getResNo() == 0 && Shl0.getOperand(0) == Sub0) {
4044
4045 SDValue Divisor = Shl0.getOperand(1);
4046
4047 ConstantSDNode *DivC = isConstOrConstSplat(Divisor);
4049 if (!DivC || !ShC)
4050 return SDValue();
4051
4052 if (DivC->getAPIntValue().isPowerOf2() &&
4053 DivC->getAPIntValue().logBase2() == ShC->getAPIntValue())
4054 return SDValue(Shl0.getNode(), 1);
4055 }
4056 }
4057 return SDValue();
4058}
4059
4060// Since it may not be valid to emit a fold to zero for vector initializers
4061// check if we can before folding.
4062static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
4063 SelectionDAG &DAG, bool LegalOperations) {
4064 if (!VT.isVector())
4065 return DAG.getConstant(0, DL, VT);
4066 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
4067 return DAG.getConstant(0, DL, VT);
4068 return SDValue();
4069}
4070
4071SDValue DAGCombiner::visitSUB(SDNode *N) {
4072 SDValue N0 = N->getOperand(0);
4073 SDValue N1 = N->getOperand(1);
4074 EVT VT = N0.getValueType();
4075 unsigned BitWidth = VT.getScalarSizeInBits();
4076 SDLoc DL(N);
4077
4079 return V;
4080
4081 // fold (sub x, x) -> 0
4082 if (N0 == N1)
4083 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4084
4085 // fold (sub c1, c2) -> c3
4086 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
4087 return C;
4088
4089 // fold vector ops
4090 if (VT.isVector()) {
4091 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4092 return FoldedVOp;
4093
4094 // fold (sub x, 0) -> x, vector edition
4096 return N0;
4097 }
4098
4099 // (sub x, ([v]select (ult x, y), 0, y)) -> (umin x, (sub x, y))
4100 // (sub x, ([v]select (uge x, y), y, 0)) -> (umin x, (sub x, y))
4101 if (N1.hasOneUse() && hasUMin(VT)) {
4102 SDValue Y;
4103 auto MS0 = m_Specific(N0);
4104 auto MVY = m_Value(Y);
4105 auto MZ = m_Zero();
4106 auto MCC1 = m_SpecificCondCode(ISD::SETULT);
4107 auto MCC2 = m_SpecificCondCode(ISD::SETUGE);
4108
4109 if (sd_match(N1, m_SelectCCLike(MS0, MVY, MZ, m_Deferred(Y), MCC1)) ||
4110 sd_match(N1, m_SelectCCLike(MS0, MVY, m_Deferred(Y), MZ, MCC2)) ||
4111 sd_match(N1, m_VSelect(m_SetCC(MS0, MVY, MCC1), MZ, m_Deferred(Y))) ||
4112 sd_match(N1, m_VSelect(m_SetCC(MS0, MVY, MCC2), m_Deferred(Y), MZ)))
4113
4114 return DAG.getNode(ISD::UMIN, DL, VT, N0,
4115 DAG.getNode(ISD::SUB, DL, VT, N0, Y));
4116 }
4117
4118 if (SDValue NewSel = foldBinOpIntoSelect(N))
4119 return NewSel;
4120
4121 // fold (sub x, c) -> (add x, -c)
4122 if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1))
4123 return DAG.getNode(ISD::ADD, DL, VT, N0,
4124 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4125
4126 if (isNullOrNullSplat(N0)) {
4127 // Right-shifting everything out but the sign bit followed by negation is
4128 // the same as flipping arithmetic/logical shift type without the negation:
4129 // -(X >>u 31) -> (X >>s 31)
4130 // -(X >>s 31) -> (X >>u 31)
4131 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
4132 ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
4133 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
4134 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
4135 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
4136 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
4137 }
4138 }
4139
4140 // 0 - X --> 0 if the sub is NUW.
4141 if (N->getFlags().hasNoUnsignedWrap())
4142 return N0;
4143
4145 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
4146 // N1 must be 0 because negating the minimum signed value is undefined.
4147 if (N->getFlags().hasNoSignedWrap())
4148 return N0;
4149
4150 // 0 - X --> X if X is 0 or the minimum signed value.
4151 return N1;
4152 }
4153
4154 // Convert 0 - abs(x).
4155 if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
4157 if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
4158 return Result;
4159
4160 // Similar to the previous rule, but this time targeting an expanded abs.
4161 // (sub 0, (max X, (sub 0, X))) --> (min X, (sub 0, X))
4162 // as well as
4163 // (sub 0, (min X, (sub 0, X))) --> (max X, (sub 0, X))
4164 // Note that these two are applicable to both signed and unsigned min/max.
4165 SDValue X;
4166 SDValue S0;
4167 auto NegPat = m_AllOf(m_Neg(m_Deferred(X)), m_Value(S0));
4168 if (sd_match(N1, m_OneUse(m_AnyOf(m_SMax(m_Value(X), NegPat),
4169 m_UMax(m_Value(X), NegPat),
4170 m_SMin(m_Value(X), NegPat),
4171 m_UMin(m_Value(X), NegPat))))) {
4172 unsigned NewOpc = ISD::getInverseMinMaxOpcode(N1->getOpcode());
4173 if (hasOperation(NewOpc, VT))
4174 return DAG.getNode(NewOpc, DL, VT, X, S0);
4175 }
4176
4177 // Fold neg(splat(neg(x)) -> splat(x)
4178 if (VT.isVector()) {
4179 SDValue N1S = DAG.getSplatValue(N1, true);
4180 if (N1S && N1S.getOpcode() == ISD::SUB &&
4181 isNullConstant(N1S.getOperand(0)))
4182 return DAG.getSplat(VT, DL, N1S.getOperand(1));
4183 }
4184
4185 // sub 0, (and x, 1) --> SIGN_EXTEND_INREG x, i1
4186 if (N1.getOpcode() == ISD::AND && N1.hasOneUse() &&
4187 isOneOrOneSplat(N1->getOperand(1))) {
4188 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), 1);
4189 if (VT.isVector())
4190 ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT,
4194 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N1->getOperand(0),
4195 DAG.getValueType(ExtVT));
4196 }
4197 }
4198 }
4199
4200 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
4202 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4203
4204 // fold (A - (0-B)) -> A+B
4205 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
4206 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
4207
4208 // fold A-(A-B) -> B
4209 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
4210 return N1.getOperand(1);
4211
4212 // fold (A+B)-A -> B
4213 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
4214 return N0.getOperand(1);
4215
4216 // fold (A+B)-B -> A
4217 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
4218 return N0.getOperand(0);
4219
4220 // fold (A+C1)-C2 -> A+(C1-C2)
4221 if (N0.getOpcode() == ISD::ADD) {
4222 SDValue N01 = N0.getOperand(1);
4223 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
4224 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
4225 }
4226
4227 // fold C2-(A+C1) -> (C2-C1)-A
4228 if (N1.getOpcode() == ISD::ADD) {
4229 SDValue N11 = N1.getOperand(1);
4230 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
4231 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
4232 }
4233
4234 // fold (A-C1)-C2 -> A-(C1+C2)
4235 if (N0.getOpcode() == ISD::SUB) {
4236 SDValue N01 = N0.getOperand(1);
4237 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
4238 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
4239 }
4240
4241 // fold (c1-A)-c2 -> (c1-c2)-A
4242 if (N0.getOpcode() == ISD::SUB) {
4243 SDValue N00 = N0.getOperand(0);
4244 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
4245 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
4246 }
4247
4248 SDValue A, B, C;
4249
4250 // fold ((A+(B+C))-B) -> A+C
4251 if (sd_match(N0, m_Add(m_Value(A), m_Add(m_Specific(N1), m_Value(C)))))
4252 return DAG.getNode(ISD::ADD, DL, VT, A, C);
4253
4254 // fold ((A+(B-C))-B) -> A-C
4255 if (sd_match(N0, m_Add(m_Value(A), m_Sub(m_Specific(N1), m_Value(C)))))
4256 return DAG.getNode(ISD::SUB, DL, VT, A, C);
4257
4258 // fold ((A-(B-C))-C) -> A-B
4259 if (sd_match(N0, m_Sub(m_Value(A), m_Sub(m_Value(B), m_Specific(N1)))))
4260 return DAG.getNode(ISD::SUB, DL, VT, A, B);
4261
4262 // fold (A-(B-C)) -> A+(C-B)
4263 if (sd_match(N1, m_OneUse(m_Sub(m_Value(B), m_Value(C)))))
4264 return DAG.getNode(ISD::ADD, DL, VT, N0,
4265 DAG.getNode(ISD::SUB, DL, VT, C, B));
4266
4267 // A - (A & B) -> A & (~B)
4268 if (sd_match(N1, m_And(m_Specific(N0), m_Value(B))) &&
4269 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true)))
4270 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getNOT(DL, B, VT));
4271
4272 // fold (A - (-B * C)) -> (A + (B * C))
4273 if (sd_match(N1, m_OneUse(m_Mul(m_Neg(m_Value(B)), m_Value(C)))))
4274 return DAG.getNode(ISD::ADD, DL, VT, N0,
4275 DAG.getNode(ISD::MUL, DL, VT, B, C));
4276
4277 // If either operand of a sub is undef, the result is undef
4278 if (N0.isUndef())
4279 return N0;
4280 if (N1.isUndef())
4281 return N1;
4282
4283 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
4284 return V;
4285
4286 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
4287 return V;
4288
4289 // Try to match AVGCEIL fixedwidth pattern
4290 if (SDValue V = foldSubToAvg(N, DL))
4291 return V;
4292
4293 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, DL))
4294 return V;
4295
4296 if (SDValue V = foldSubToUSubSat(VT, N, DL))
4297 return V;
4298
4299 if (SDValue V = foldRemainderIdiom(N, DAG, DL))
4300 return V;
4301
4302 // (A - B) - 1 -> add (xor B, -1), A
4304 m_One(/*AllowUndefs=*/true))))
4305 return DAG.getNode(ISD::ADD, DL, VT, A, DAG.getNOT(DL, B, VT));
4306
4307 // Look for:
4308 // sub y, (xor x, -1)
4309 // And if the target does not like this form then turn into:
4310 // add (add x, y), 1
4311 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
4312 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
4313 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
4314 }
4315
4316 // Hoist one-use addition by non-opaque constant:
4317 // (x + C) - y -> (x - y) + C
4318 if (!reassociationCanBreakAddressingModePattern(ISD::SUB, DL, N, N0, N1) &&
4319 N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
4320 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4321 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4322 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
4323 }
4324 // y - (x + C) -> (y - x) - C
4325 if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
4326 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
4327 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
4328 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
4329 }
4330 // (x - C) - y -> (x - y) - C
4331 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
4332 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4333 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4334 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4335 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
4336 }
4337 // (C - x) - y -> C - (x + y)
4338 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4339 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
4340 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
4341 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
4342 }
4343
4344 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
4345 // rather than 'sub 0/1' (the sext should get folded).
4346 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
4347 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
4348 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
4349 TLI.getBooleanContents(VT) ==
4351 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
4352 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
4353 }
4354
4355 // fold B = sra (A, size(A)-1); sub (xor (A, B), B) -> (abs A)
4356 if ((!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4358 sd_match(N0, m_Xor(m_Specific(A), m_Specific(N1))))
4359 return DAG.getNode(ISD::ABS, DL, VT, A);
4360
4361 // If the relocation model supports it, consider symbol offsets.
4362 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
4363 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
4364 // fold (sub Sym+c1, Sym+c2) -> c1-c2
4365 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
4366 if (GA->getGlobal() == GB->getGlobal())
4367 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
4368 DL, VT);
4369 }
4370
4371 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
4372 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
4373 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
4374 if (TN->getVT() == MVT::i1) {
4375 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
4376 DAG.getConstant(1, DL, VT));
4377 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
4378 }
4379 }
4380
4381 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
4382 if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
4383 const APInt &IntVal = N1.getConstantOperandAPInt(0);
4384 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
4385 }
4386
4387 // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
4388 if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
4389 APInt NewStep = -N1.getConstantOperandAPInt(0);
4390 return DAG.getNode(ISD::ADD, DL, VT, N0,
4391 DAG.getStepVector(DL, VT, NewStep));
4392 }
4393
4394 // Prefer an add for more folding potential and possibly better codegen:
4395 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
4396 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
4397 SDValue ShAmt = N1.getOperand(1);
4398 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
4399 if (ShAmtC && ShAmtC->getAPIntValue() == (BitWidth - 1)) {
4400 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
4401 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
4402 }
4403 }
4404
4405 // As with the previous fold, prefer add for more folding potential.
4406 // Subtracting SMIN/0 is the same as adding SMIN/0:
4407 // N0 - (X << BW-1) --> N0 + (X << BW-1)
4408 if (N1.getOpcode() == ISD::SHL) {
4409 ConstantSDNode *ShlC = isConstOrConstSplat(N1.getOperand(1));
4410 if (ShlC && ShlC->getAPIntValue() == (BitWidth - 1))
4411 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
4412 }
4413
4414 // (sub (usubo_carry X, 0, Carry), Y) -> (usubo_carry X, Y, Carry)
4415 if (N0.getOpcode() == ISD::USUBO_CARRY && isNullConstant(N0.getOperand(1)) &&
4416 N0.getResNo() == 0 && N0.hasOneUse())
4417 return DAG.getNode(ISD::USUBO_CARRY, DL, N0->getVTList(),
4418 N0.getOperand(0), N1, N0.getOperand(2));
4419
4421 // (sub Carry, X) -> (uaddo_carry (sub 0, X), 0, Carry)
4422 if (SDValue Carry = getAsCarry(TLI, N0)) {
4423 SDValue X = N1;
4424 SDValue Zero = DAG.getConstant(0, DL, VT);
4425 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
4426 return DAG.getNode(ISD::UADDO_CARRY, DL,
4427 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
4428 Carry);
4429 }
4430 }
4431
4432 // If there's no chance of borrowing from adjacent bits, then sub is xor:
4433 // sub C0, X --> xor X, C0
4434 if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
4435 if (!C0->isOpaque()) {
4436 const APInt &C0Val = C0->getAPIntValue();
4437 const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
4438 if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
4439 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4440 }
4441 }
4442
4443 // smax(a,b) - smin(a,b) --> abds(a,b)
4444 if ((!LegalOperations || hasOperation(ISD::ABDS, VT)) &&
4445 sd_match(N0, m_SMaxLike(m_Value(A), m_Value(B))) &&
4447 return DAG.getNode(ISD::ABDS, DL, VT, A, B);
4448
4449 // smin(a,b) - smax(a,b) --> neg(abds(a,b))
4450 if (hasOperation(ISD::ABDS, VT) &&
4451 sd_match(N0, m_SMinLike(m_Value(A), m_Value(B))) &&
4453 return DAG.getNegative(DAG.getNode(ISD::ABDS, DL, VT, A, B), DL, VT);
4454
4455 // umax(a,b) - umin(a,b) --> abdu(a,b)
4456 if ((!LegalOperations || hasOperation(ISD::ABDU, VT)) &&
4457 sd_match(N0, m_UMaxLike(m_Value(A), m_Value(B))) &&
4459 return DAG.getNode(ISD::ABDU, DL, VT, A, B);
4460
4461 // umin(a,b) - umax(a,b) --> neg(abdu(a,b))
4462 if (hasOperation(ISD::ABDU, VT) &&
4463 sd_match(N0, m_UMinLike(m_Value(A), m_Value(B))) &&
4465 return DAG.getNegative(DAG.getNode(ISD::ABDU, DL, VT, A, B), DL, VT);
4466
4467 return SDValue();
4468}
4469
4470SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
4471 unsigned Opcode = N->getOpcode();
4472 SDValue N0 = N->getOperand(0);
4473 SDValue N1 = N->getOperand(1);
4474 EVT VT = N0.getValueType();
4475 bool IsSigned = Opcode == ISD::SSUBSAT;
4476 SDLoc DL(N);
4477
4478 // fold (sub_sat x, undef) -> 0
4479 if (N0.isUndef() || N1.isUndef())
4480 return DAG.getConstant(0, DL, VT);
4481
4482 // fold (sub_sat x, x) -> 0
4483 if (N0 == N1)
4484 return DAG.getConstant(0, DL, VT);
4485
4486 // fold (sub_sat c1, c2) -> c3
4487 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4488 return C;
4489
4490 // fold vector ops
4491 if (VT.isVector()) {
4492 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4493 return FoldedVOp;
4494
4495 // fold (sub_sat x, 0) -> x, vector edition
4497 return N0;
4498 }
4499
4500 // fold (sub_sat x, 0) -> x
4501 if (isNullConstant(N1))
4502 return N0;
4503
4504 // If it cannot overflow, transform into an sub.
4505 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4506 return DAG.getNode(ISD::SUB, DL, VT, N0, N1);
4507
4508 return SDValue();
4509}
4510
4511SDValue DAGCombiner::visitSUBC(SDNode *N) {
4512 SDValue N0 = N->getOperand(0);
4513 SDValue N1 = N->getOperand(1);
4514 EVT VT = N0.getValueType();
4515 SDLoc DL(N);
4516
4517 // If the flag result is dead, turn this into an SUB.
4518 if (!N->hasAnyUseOfValue(1))
4519 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4520 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4521
4522 // fold (subc x, x) -> 0 + no borrow
4523 if (N0 == N1)
4524 return CombineTo(N, DAG.getConstant(0, DL, VT),
4525 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4526
4527 // fold (subc x, 0) -> x + no borrow
4528 if (isNullConstant(N1))
4529 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4530
4531 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4532 if (isAllOnesConstant(N0))
4533 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4534 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4535
4536 return SDValue();
4537}
4538
4539SDValue DAGCombiner::visitSUBO(SDNode *N) {
4540 SDValue N0 = N->getOperand(0);
4541 SDValue N1 = N->getOperand(1);
4542 EVT VT = N0.getValueType();
4543 bool IsSigned = (ISD::SSUBO == N->getOpcode());
4544
4545 EVT CarryVT = N->getValueType(1);
4546 SDLoc DL(N);
4547
4548 // If the flag result is dead, turn this into an SUB.
4549 if (!N->hasAnyUseOfValue(1))
4550 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4551 DAG.getUNDEF(CarryVT));
4552
4553 // fold (subo x, x) -> 0 + no borrow
4554 if (N0 == N1)
4555 return CombineTo(N, DAG.getConstant(0, DL, VT),
4556 DAG.getConstant(0, DL, CarryVT));
4557
4558 // fold (subox, c) -> (addo x, -c)
4559 if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1))
4560 if (IsSigned && !N1C->isMinSignedValue())
4561 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
4562 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4563
4564 // fold (subo x, 0) -> x + no borrow
4565 if (isNullOrNullSplat(N1))
4566 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
4567
4568 // If it cannot overflow, transform into an sub.
4569 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4570 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4571 DAG.getConstant(0, DL, CarryVT));
4572
4573 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4574 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
4575 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4576 DAG.getConstant(0, DL, CarryVT));
4577
4578 return SDValue();
4579}
4580
4581SDValue DAGCombiner::visitSUBE(SDNode *N) {
4582 SDValue N0 = N->getOperand(0);
4583 SDValue N1 = N->getOperand(1);
4584 SDValue CarryIn = N->getOperand(2);
4585
4586 // fold (sube x, y, false) -> (subc x, y)
4587 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
4588 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
4589
4590 return SDValue();
4591}
4592
4593SDValue DAGCombiner::visitUSUBO_CARRY(SDNode *N) {
4594 SDValue N0 = N->getOperand(0);
4595 SDValue N1 = N->getOperand(1);
4596 SDValue CarryIn = N->getOperand(2);
4597
4598 // fold (usubo_carry x, y, false) -> (usubo x, y)
4599 if (isNullConstant(CarryIn)) {
4600 if (!LegalOperations ||
4601 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
4602 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
4603 }
4604
4605 return SDValue();
4606}
4607
4608SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
4609 SDValue N0 = N->getOperand(0);
4610 SDValue N1 = N->getOperand(1);
4611 SDValue CarryIn = N->getOperand(2);
4612
4613 // fold (ssubo_carry x, y, false) -> (ssubo x, y)
4614 if (isNullConstant(CarryIn)) {
4615 if (!LegalOperations ||
4616 TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
4617 return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
4618 }
4619
4620 return SDValue();
4621}
4622
4623// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
4624// UMULFIXSAT here.
4625SDValue DAGCombiner::visitMULFIX(SDNode *N) {
4626 SDValue N0 = N->getOperand(0);
4627 SDValue N1 = N->getOperand(1);
4628 SDValue Scale = N->getOperand(2);
4629 EVT VT = N0.getValueType();
4630
4631 // fold (mulfix x, undef, scale) -> 0
4632 if (N0.isUndef() || N1.isUndef())
4633 return DAG.getConstant(0, SDLoc(N), VT);
4634
4635 // Canonicalize constant to RHS (vector doesn't have to splat)
4638 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
4639
4640 // fold (mulfix x, 0, scale) -> 0
4641 if (isNullConstant(N1))
4642 return DAG.getConstant(0, SDLoc(N), VT);
4643
4644 return SDValue();
4645}
4646
4647template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) {
4648 SDValue N0 = N->getOperand(0);
4649 SDValue N1 = N->getOperand(1);
4650 EVT VT = N0.getValueType();
4651 unsigned BitWidth = VT.getScalarSizeInBits();
4652 SDLoc DL(N);
4653 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
4654 MatchContextClass Matcher(DAG, TLI, N);
4655
4656 // fold (mul x, undef) -> 0
4657 if (N0.isUndef() || N1.isUndef())
4658 return DAG.getConstant(0, DL, VT);
4659
4660 // fold (mul c1, c2) -> c1*c2
4661 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
4662 return C;
4663
4664 // canonicalize constant to RHS (vector doesn't have to splat)
4667 return Matcher.getNode(ISD::MUL, DL, VT, N1, N0);
4668
4669 bool N1IsConst = false;
4670 bool N1IsOpaqueConst = false;
4671 APInt ConstValue1;
4672
4673 // fold vector ops
4674 if (VT.isVector()) {
4675 // TODO: Change this to use SimplifyVBinOp when it supports VP op.
4676 if (!UseVP)
4677 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4678 return FoldedVOp;
4679
4680 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
4681 assert((!N1IsConst || ConstValue1.getBitWidth() == BitWidth) &&
4682 "Splat APInt should be element width");
4683 } else {
4684 N1IsConst = isa<ConstantSDNode>(N1);
4685 if (N1IsConst) {
4686 ConstValue1 = N1->getAsAPIntVal();
4687 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
4688 }
4689 }
4690
4691 // fold (mul x, 0) -> 0
4692 if (N1IsConst && ConstValue1.isZero())
4693 return N1;
4694
4695 // fold (mul x, 1) -> x
4696 if (N1IsConst && ConstValue1.isOne())
4697 return N0;
4698
4699 if (!UseVP)
4700 if (SDValue NewSel = foldBinOpIntoSelect(N))
4701 return NewSel;
4702
4703 // fold (mul x, -1) -> 0-x
4704 if (N1IsConst && ConstValue1.isAllOnes())
4705 return Matcher.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4706
4707 // fold (mul x, (1 << c)) -> x << c
4708 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4709 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
4710 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4711 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4712 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4713 SDNodeFlags Flags;
4714 Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap());
4715 // TODO: Preserve setNoSignedWrap if LogBase2 isn't BitWidth - 1.
4716 return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc, Flags);
4717 }
4718 }
4719
4720 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4721 if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
4722 unsigned Log2Val = (-ConstValue1).logBase2();
4723
4724 // FIXME: If the input is something that is easily negated (e.g. a
4725 // single-use add), we should put the negate there.
4726 return Matcher.getNode(
4727 ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
4728 Matcher.getNode(ISD::SHL, DL, VT, N0,
4729 DAG.getShiftAmountConstant(Log2Val, VT, DL)));
4730 }
4731
4732 // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
4733 // hi result is in use in case we hit this mid-legalization.
4734 if (!UseVP) {
4735 for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
4736 if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
4737 SDVTList LoHiVT = DAG.getVTList(VT, VT);
4738 // TODO: Can we match commutable operands with getNodeIfExists?
4739 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
4740 if (LoHi->hasAnyUseOfValue(1))
4741 return SDValue(LoHi, 0);
4742 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
4743 if (LoHi->hasAnyUseOfValue(1))
4744 return SDValue(LoHi, 0);
4745 }
4746 }
4747 }
4748
4749 // Try to transform:
4750 // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4751 // mul x, (2^N + 1) --> add (shl x, N), x
4752 // mul x, (2^N - 1) --> sub (shl x, N), x
4753 // Examples: x * 33 --> (x << 5) + x
4754 // x * 15 --> (x << 4) - x
4755 // x * -33 --> -((x << 5) + x)
4756 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
4757 // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4758 // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4759 // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4760 // Examples: x * 0x8800 --> (x << 15) + (x << 11)
4761 // x * 0xf800 --> (x << 16) - (x << 11)
4762 // x * -0x8800 --> -((x << 15) + (x << 11))
4763 // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
4764 if (!UseVP && N1IsConst &&
4765 TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
4766 // TODO: We could handle more general decomposition of any constant by
4767 // having the target set a limit on number of ops and making a
4768 // callback to determine that sequence (similar to sqrt expansion).
4769 unsigned MathOp = ISD::DELETED_NODE;
4770 APInt MulC = ConstValue1.abs();
4771 // The constant `2` should be treated as (2^0 + 1).
4772 unsigned TZeros = MulC == 2 ? 0 : MulC.countr_zero();
4773 MulC.lshrInPlace(TZeros);
4774 if ((MulC - 1).isPowerOf2())
4775 MathOp = ISD::ADD;
4776 else if ((MulC + 1).isPowerOf2())
4777 MathOp = ISD::SUB;
4778
4779 if (MathOp != ISD::DELETED_NODE) {
4780 unsigned ShAmt =
4781 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4782 ShAmt += TZeros;
4783 assert(ShAmt < BitWidth &&
4784 "multiply-by-constant generated out of bounds shift");
4785 SDValue Shl =
4786 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
4787 SDValue R =
4788 TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
4789 DAG.getNode(ISD::SHL, DL, VT, N0,
4790 DAG.getConstant(TZeros, DL, VT)))
4791 : DAG.getNode(MathOp, DL, VT, Shl, N0);
4792 if (ConstValue1.isNegative())
4793 R = DAG.getNegative(R, DL, VT);
4794 return R;
4795 }
4796 }
4797
4798 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4799 if (sd_context_match(N0, Matcher, m_Opc(ISD::SHL))) {
4800 SDValue N01 = N0.getOperand(1);
4801 if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
4802 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
4803 }
4804
4805 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4806 // use.
4807 {
4808 SDValue Sh, Y;
4809
4810 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
4811 if (sd_context_match(N0, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
4813 Sh = N0; Y = N1;
4814 } else if (sd_context_match(N1, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
4816 Sh = N1; Y = N0;
4817 }
4818
4819 if (Sh.getNode()) {
4820 SDValue Mul = Matcher.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
4821 return Matcher.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
4822 }
4823 }
4824
4825 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4826 if (sd_context_match(N0, Matcher, m_Opc(ISD::ADD)) &&
4830 return Matcher.getNode(
4831 ISD::ADD, DL, VT,
4832 Matcher.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
4833 Matcher.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
4834
4835 // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4836 ConstantSDNode *NC1 = isConstOrConstSplat(N1);
4837 if (!UseVP && N0.getOpcode() == ISD::VSCALE && NC1) {
4838 const APInt &C0 = N0.getConstantOperandAPInt(0);
4839 const APInt &C1 = NC1->getAPIntValue();
4840 return DAG.getVScale(DL, VT, C0 * C1);
4841 }
4842
4843 // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4844 APInt MulVal;
4845 if (!UseVP && N0.getOpcode() == ISD::STEP_VECTOR &&
4846 ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4847 const APInt &C0 = N0.getConstantOperandAPInt(0);
4848 APInt NewStep = C0 * MulVal;
4849 return DAG.getStepVector(DL, VT, NewStep);
4850 }
4851
4852 // Fold Y = sra (X, size(X)-1); mul (or (Y, 1), X) -> (abs X)
4853 SDValue X;
4854 if (!UseVP && (!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4856 N, Matcher,
4858 m_Deferred(X)))) {
4859 return Matcher.getNode(ISD::ABS, DL, VT, X);
4860 }
4861
4862 // Fold ((mul x, 0/undef) -> 0,
4863 // (mul x, 1) -> x) -> x)
4864 // -> and(x, mask)
4865 // We can replace vectors with '0' and '1' factors with a clearing mask.
4866 if (VT.isFixedLengthVector()) {
4867 unsigned NumElts = VT.getVectorNumElements();
4868 SmallBitVector ClearMask;
4869 ClearMask.reserve(NumElts);
4870 auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4871 if (!V || V->isZero()) {
4872 ClearMask.push_back(true);
4873 return true;
4874 }
4875 ClearMask.push_back(false);
4876 return V->isOne();
4877 };
4878 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4879 ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4880 assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4881 EVT LegalSVT = N1.getOperand(0).getValueType();
4882 SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4883 SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4885 for (unsigned I = 0; I != NumElts; ++I)
4886 if (ClearMask[I])
4887 Mask[I] = Zero;
4888 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4889 }
4890 }
4891
4892 // reassociate mul
4893 // TODO: Change reassociateOps to support vp ops.
4894 if (!UseVP)
4895 if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
4896 return RMUL;
4897
4898 // Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
4899 // TODO: Change reassociateReduction to support vp ops.
4900 if (!UseVP)
4901 if (SDValue SD =
4902 reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
4903 return SD;
4904
4905 // Simplify the operands using demanded-bits information.
4907 return SDValue(N, 0);
4908
4909 return SDValue();
4910}
4911
4912/// Return true if divmod libcall is available.
4914 const TargetLowering &TLI) {
4915 RTLIB::Libcall LC;
4916 EVT NodeType = Node->getValueType(0);
4917 if (!NodeType.isSimple())
4918 return false;
4919 switch (NodeType.getSimpleVT().SimpleTy) {
4920 default: return false; // No libcall for vector types.
4921 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
4922 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4923 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4924 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4925 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4926 }
4927
4928 return TLI.getLibcallName(LC) != nullptr;
4929}
4930
4931/// Issue divrem if both quotient and remainder are needed.
4932SDValue DAGCombiner::useDivRem(SDNode *Node) {
4933 if (Node->use_empty())
4934 return SDValue(); // This is a dead node, leave it alone.
4935
4936 unsigned Opcode = Node->getOpcode();
4937 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4938 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4939
4940 // DivMod lib calls can still work on non-legal types if using lib-calls.
4941 EVT VT = Node->getValueType(0);
4942 if (VT.isVector() || !VT.isInteger())
4943 return SDValue();
4944
4945 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4946 return SDValue();
4947
4948 // If DIVREM is going to get expanded into a libcall,
4949 // but there is no libcall available, then don't combine.
4950 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4952 return SDValue();
4953
4954 // If div is legal, it's better to do the normal expansion
4955 unsigned OtherOpcode = 0;
4956 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4957 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4958 if (TLI.isOperationLegalOrCustom(Opcode, VT))
4959 return SDValue();
4960 } else {
4961 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4962 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4963 return SDValue();
4964 }
4965
4966 SDValue Op0 = Node->getOperand(0);
4967 SDValue Op1 = Node->getOperand(1);
4968 SDValue combined;
4969 for (SDNode *User : Op0->users()) {
4970 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4971 User->use_empty())
4972 continue;
4973 // Convert the other matching node(s), too;
4974 // otherwise, the DIVREM may get target-legalized into something
4975 // target-specific that we won't be able to recognize.
4976 unsigned UserOpc = User->getOpcode();
4977 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4978 User->getOperand(0) == Op0 &&
4979 User->getOperand(1) == Op1) {
4980 if (!combined) {
4981 if (UserOpc == OtherOpcode) {
4982 SDVTList VTs = DAG.getVTList(VT, VT);
4983 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4984 } else if (UserOpc == DivRemOpc) {
4985 combined = SDValue(User, 0);
4986 } else {
4987 assert(UserOpc == Opcode);
4988 continue;
4989 }
4990 }
4991 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4992 CombineTo(User, combined);
4993 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4994 CombineTo(User, combined.getValue(1));
4995 }
4996 }
4997 return combined;
4998}
4999
5001 SDValue N0 = N->getOperand(0);
5002 SDValue N1 = N->getOperand(1);
5003 EVT VT = N->getValueType(0);
5004 SDLoc DL(N);
5005
5006 unsigned Opc = N->getOpcode();
5007 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
5009
5010 // X / undef -> undef
5011 // X % undef -> undef
5012 // X / 0 -> undef
5013 // X % 0 -> undef
5014 // NOTE: This includes vectors where any divisor element is zero/undef.
5015 if (DAG.isUndef(Opc, {N0, N1}))
5016 return DAG.getUNDEF(VT);
5017
5018 // undef / X -> 0
5019 // undef % X -> 0
5020 if (N0.isUndef())
5021 return DAG.getConstant(0, DL, VT);
5022
5023 // 0 / X -> 0
5024 // 0 % X -> 0
5026 if (N0C && N0C->isZero())
5027 return N0;
5028
5029 // X / X -> 1
5030 // X % X -> 0
5031 if (N0 == N1)
5032 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
5033
5034 // X / 1 -> X
5035 // X % 1 -> 0
5036 // If this is a boolean op (single-bit element type), we can't have
5037 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
5038 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
5039 // it's a 1.
5040 if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
5041 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
5042
5043 return SDValue();
5044}
5045
5046SDValue DAGCombiner::visitSDIV(SDNode *N) {
5047 SDValue N0 = N->getOperand(0);
5048 SDValue N1 = N->getOperand(1);
5049 EVT VT = N->getValueType(0);
5050 EVT CCVT = getSetCCResultType(VT);
5051 SDLoc DL(N);
5052
5053 // fold (sdiv c1, c2) -> c1/c2
5054 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
5055 return C;
5056
5057 // fold vector ops
5058 if (VT.isVector())
5059 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5060 return FoldedVOp;
5061
5062 // fold (sdiv X, -1) -> 0-X
5063 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5064 if (N1C && N1C->isAllOnes())
5065 return DAG.getNegative(N0, DL, VT);
5066
5067 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
5068 if (N1C && N1C->isMinSignedValue())
5069 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
5070 DAG.getConstant(1, DL, VT),
5071 DAG.getConstant(0, DL, VT));
5072
5073 if (SDValue V = simplifyDivRem(N, DAG))
5074 return V;
5075
5076 if (SDValue NewSel = foldBinOpIntoSelect(N))
5077 return NewSel;
5078
5079 // If we know the sign bits of both operands are zero, strength reduce to a
5080 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
5081 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
5082 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
5083
5084 if (SDValue V = visitSDIVLike(N0, N1, N)) {
5085 // If the corresponding remainder node exists, update its users with
5086 // (Dividend - (Quotient * Divisor).
5087 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
5088 { N0, N1 })) {
5089 // If the sdiv has the exact flag we shouldn't propagate it to the
5090 // remainder node.
5091 if (!N->getFlags().hasExact()) {
5092 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
5093 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5094 AddToWorklist(Mul.getNode());
5095 AddToWorklist(Sub.getNode());
5096 CombineTo(RemNode, Sub);
5097 }
5098 }
5099 return V;
5100 }
5101
5102 // sdiv, srem -> sdivrem
5103 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
5104 // true. Otherwise, we break the simplification logic in visitREM().
5105 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5106 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
5107 if (SDValue DivRem = useDivRem(N))
5108 return DivRem;
5109
5110 return SDValue();
5111}
5112
5113static bool isDivisorPowerOfTwo(SDValue Divisor) {
5114 // Helper for determining whether a value is a power-2 constant scalar or a
5115 // vector of such elements.
5116 auto IsPowerOfTwo = [](ConstantSDNode *C) {
5117 if (C->isZero() || C->isOpaque())
5118 return false;
5119 if (C->getAPIntValue().isPowerOf2())
5120 return true;
5121 if (C->getAPIntValue().isNegatedPowerOf2())
5122 return true;
5123 return false;
5124 };
5125
5126 return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
5127}
5128
5129SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
5130 SDLoc DL(N);
5131 EVT VT = N->getValueType(0);
5132 EVT CCVT = getSetCCResultType(VT);
5133 unsigned BitWidth = VT.getScalarSizeInBits();
5134
5135 // fold (sdiv X, pow2) -> simple ops after legalize
5136 // FIXME: We check for the exact bit here because the generic lowering gives
5137 // better results in that case. The target-specific lowering should learn how
5138 // to handle exact sdivs efficiently.
5139 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
5140 // Target-specific implementation of sdiv x, pow2.
5141 if (SDValue Res = BuildSDIVPow2(N))
5142 return Res;
5143
5144 // Create constants that are functions of the shift amount value.
5145 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
5146 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
5147 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
5148 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
5149 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
5150 if (!isConstantOrConstantVector(Inexact))
5151 return SDValue();
5152
5153 // Splat the sign bit into the register
5154 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
5155 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
5156 AddToWorklist(Sign.getNode());
5157
5158 // Add (N0 < 0) ? abs2 - 1 : 0;
5159 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
5160 AddToWorklist(Srl.getNode());
5161 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
5162 AddToWorklist(Add.getNode());
5163 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
5164 AddToWorklist(Sra.getNode());
5165
5166 // Special case: (sdiv X, 1) -> X
5167 // Special Case: (sdiv X, -1) -> 0-X
5168 SDValue One = DAG.getConstant(1, DL, VT);
5170 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
5171 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
5172 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
5173 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
5174
5175 // If dividing by a positive value, we're done. Otherwise, the result must
5176 // be negated.
5177 SDValue Zero = DAG.getConstant(0, DL, VT);
5178 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
5179
5180 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
5181 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
5182 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
5183 return Res;
5184 }
5185
5186 // If integer divide is expensive and we satisfy the requirements, emit an
5187 // alternate sequence. Targets may check function attributes for size/speed
5188 // trade-offs.
5189 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5191 !TLI.isIntDivCheap(N->getValueType(0), Attr))
5192 if (SDValue Op = BuildSDIV(N))
5193 return Op;
5194
5195 return SDValue();
5196}
5197
5198SDValue DAGCombiner::visitUDIV(SDNode *N) {
5199 SDValue N0 = N->getOperand(0);
5200 SDValue N1 = N->getOperand(1);
5201 EVT VT = N->getValueType(0);
5202 EVT CCVT = getSetCCResultType(VT);
5203 SDLoc DL(N);
5204
5205 // fold (udiv c1, c2) -> c1/c2
5206 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
5207 return C;
5208
5209 // fold vector ops
5210 if (VT.isVector())
5211 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5212 return FoldedVOp;
5213
5214 // fold (udiv X, -1) -> select(X == -1, 1, 0)
5215 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5216 if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) {
5217 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
5218 DAG.getConstant(1, DL, VT),
5219 DAG.getConstant(0, DL, VT));
5220 }
5221
5222 if (SDValue V = simplifyDivRem(N, DAG))
5223 return V;
5224
5225 if (SDValue NewSel = foldBinOpIntoSelect(N))
5226 return NewSel;
5227
5228 if (SDValue V = visitUDIVLike(N0, N1, N)) {
5229 // If the corresponding remainder node exists, update its users with
5230 // (Dividend - (Quotient * Divisor).
5231 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
5232 { N0, N1 })) {
5233 // If the udiv has the exact flag we shouldn't propagate it to the
5234 // remainder node.
5235 if (!N->getFlags().hasExact()) {
5236 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
5237 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5238 AddToWorklist(Mul.getNode());
5239 AddToWorklist(Sub.getNode());
5240 CombineTo(RemNode, Sub);
5241 }
5242 }
5243 return V;
5244 }
5245
5246 // sdiv, srem -> sdivrem
5247 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
5248 // true. Otherwise, we break the simplification logic in visitREM().
5249 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5250 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
5251 if (SDValue DivRem = useDivRem(N))
5252 return DivRem;
5253
5254 // Simplify the operands using demanded-bits information.
5255 // We don't have demanded bits support for UDIV so this just enables constant
5256 // folding based on known bits.
5258 return SDValue(N, 0);
5259
5260 return SDValue();
5261}
5262
5263SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
5264 SDLoc DL(N);
5265 EVT VT = N->getValueType(0);
5266
5267 // fold (udiv x, (1 << c)) -> x >>u c
5268 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
5269 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5270 AddToWorklist(LogBase2.getNode());
5271
5272 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5273 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
5274 AddToWorklist(Trunc.getNode());
5275 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5276 }
5277 }
5278
5279 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
5280 if (N1.getOpcode() == ISD::SHL) {
5281 SDValue N10 = N1.getOperand(0);
5282 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
5283 if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
5284 AddToWorklist(LogBase2.getNode());
5285
5286 EVT ADDVT = N1.getOperand(1).getValueType();
5287 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
5288 AddToWorklist(Trunc.getNode());
5289 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
5290 AddToWorklist(Add.getNode());
5291 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
5292 }
5293 }
5294 }
5295
5296 // fold (udiv x, c) -> alternate
5297 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5299 !TLI.isIntDivCheap(N->getValueType(0), Attr))
5300 if (SDValue Op = BuildUDIV(N))
5301 return Op;
5302
5303 return SDValue();
5304}
5305
5306SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) {
5307 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
5308 !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
5309 // Target-specific implementation of srem x, pow2.
5310 if (SDValue Res = BuildSREMPow2(N))
5311 return Res;
5312 }
5313 return SDValue();
5314}
5315
5316// handles ISD::SREM and ISD::UREM
5317SDValue DAGCombiner::visitREM(SDNode *N) {
5318 unsigned Opcode = N->getOpcode();
5319 SDValue N0 = N->getOperand(0);
5320 SDValue N1 = N->getOperand(1);
5321 EVT VT = N->getValueType(0);
5322 EVT CCVT = getSetCCResultType(VT);
5323
5324 bool isSigned = (Opcode == ISD::SREM);
5325 SDLoc DL(N);
5326
5327 // fold (rem c1, c2) -> c1%c2
5328 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5329 return C;
5330
5331 // fold (urem X, -1) -> select(FX == -1, 0, FX)
5332 // Freeze the numerator to avoid a miscompile with an undefined value.
5333 if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false) &&
5334 CCVT.isVector() == VT.isVector()) {
5335 SDValue F0 = DAG.getFreeze(N0);
5336 SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
5337 return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
5338 }
5339
5340 if (SDValue V = simplifyDivRem(N, DAG))
5341 return V;
5342
5343 if (SDValue NewSel = foldBinOpIntoSelect(N))
5344 return NewSel;
5345
5346 if (isSigned) {
5347 // If we know the sign bits of both operands are zero, strength reduce to a
5348 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
5349 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
5350 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
5351 } else {
5352 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
5353 // fold (urem x, pow2) -> (and x, pow2-1)
5354 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5355 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5356 AddToWorklist(Add.getNode());
5357 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5358 }
5359 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
5360 // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1))
5361 // TODO: We should sink the following into isKnownToBePowerOfTwo
5362 // using a OrZero parameter analogous to our handling in ValueTracking.
5363 if ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) &&
5365 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5366 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5367 AddToWorklist(Add.getNode());
5368 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5369 }
5370 }
5371
5372 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5373
5374 // If X/C can be simplified by the division-by-constant logic, lower
5375 // X%C to the equivalent of X-X/C*C.
5376 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
5377 // speculative DIV must not cause a DIVREM conversion. We guard against this
5378 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
5379 // combine will not return a DIVREM. Regardless, checking cheapness here
5380 // makes sense since the simplification results in fatter code.
5381 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
5382 if (isSigned) {
5383 // check if we can build faster implementation for srem
5384 if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))
5385 return OptimizedRem;
5386 }
5387
5388 SDValue OptimizedDiv =
5389 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
5390 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
5391 // If the equivalent Div node also exists, update its users.
5392 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
5393 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
5394 { N0, N1 }))
5395 CombineTo(DivNode, OptimizedDiv);
5396 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
5397 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5398 AddToWorklist(OptimizedDiv.getNode());
5399 AddToWorklist(Mul.getNode());
5400 return Sub;
5401 }
5402 }
5403
5404 // sdiv, srem -> sdivrem
5405 if (SDValue DivRem = useDivRem(N))
5406 return DivRem.getValue(1);
5407
5408 return SDValue();
5409}
5410
5411SDValue DAGCombiner::visitMULHS(SDNode *N) {
5412 SDValue N0 = N->getOperand(0);
5413 SDValue N1 = N->getOperand(1);
5414 EVT VT = N->getValueType(0);
5415 SDLoc DL(N);
5416
5417 // fold (mulhs c1, c2)
5418 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
5419 return C;
5420
5421 // canonicalize constant to RHS.
5424 return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
5425
5426 if (VT.isVector()) {
5427 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5428 return FoldedVOp;
5429
5430 // fold (mulhs x, 0) -> 0
5431 // do not return N1, because undef node may exist.
5433 return DAG.getConstant(0, DL, VT);
5434 }
5435
5436 // fold (mulhs x, 0) -> 0
5437 if (isNullConstant(N1))
5438 return N1;
5439
5440 // fold (mulhs x, 1) -> (sra x, size(x)-1)
5441 if (isOneConstant(N1))
5442 return DAG.getNode(
5443 ISD::SRA, DL, VT, N0,
5445
5446 // fold (mulhs x, undef) -> 0
5447 if (N0.isUndef() || N1.isUndef())
5448 return DAG.getConstant(0, DL, VT);
5449
5450 // If the type twice as wide is legal, transform the mulhs to a wider multiply
5451 // plus a shift.
5452 if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
5453 !VT.isVector()) {
5454 MVT Simple = VT.getSimpleVT();
5455 unsigned SimpleSize = Simple.getSizeInBits();
5456 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5457 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5458 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5459 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5460 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5461 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5462 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5463 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5464 }
5465 }
5466
5467 return SDValue();
5468}
5469
5470SDValue DAGCombiner::visitMULHU(SDNode *N) {
5471 SDValue N0 = N->getOperand(0);
5472 SDValue N1 = N->getOperand(1);
5473 EVT VT = N->getValueType(0);
5474 SDLoc DL(N);
5475
5476 // fold (mulhu c1, c2)
5477 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
5478 return C;
5479
5480 // canonicalize constant to RHS.
5483 return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
5484
5485 if (VT.isVector()) {
5486 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5487 return FoldedVOp;
5488
5489 // fold (mulhu x, 0) -> 0
5490 // do not return N1, because undef node may exist.
5492 return DAG.getConstant(0, DL, VT);
5493 }
5494
5495 // fold (mulhu x, 0) -> 0
5496 if (isNullConstant(N1))
5497 return N1;
5498
5499 // fold (mulhu x, 1) -> 0
5500 if (isOneConstant(N1))
5501 return DAG.getConstant(0, DL, VT);
5502
5503 // fold (mulhu x, undef) -> 0
5504 if (N0.isUndef() || N1.isUndef())
5505 return DAG.getConstant(0, DL, VT);
5506
5507 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
5508 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
5509 hasOperation(ISD::SRL, VT)) {
5510 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5511 unsigned NumEltBits = VT.getScalarSizeInBits();
5512 SDValue SRLAmt = DAG.getNode(
5513 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
5514 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5515 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
5516 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5517 }
5518 }
5519
5520 // If the type twice as wide is legal, transform the mulhu to a wider multiply
5521 // plus a shift.
5522 if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
5523 !VT.isVector()) {
5524 MVT Simple = VT.getSimpleVT();
5525 unsigned SimpleSize = Simple.getSizeInBits();
5526 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5527 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5528 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5529 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5530 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5531 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5532 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5533 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5534 }
5535 }
5536
5537 // Simplify the operands using demanded-bits information.
5538 // We don't have demanded bits support for MULHU so this just enables constant
5539 // folding based on known bits.
5541 return SDValue(N, 0);
5542
5543 return SDValue();
5544}
5545
5546SDValue DAGCombiner::visitAVG(SDNode *N) {
5547 unsigned Opcode = N->getOpcode();
5548 SDValue N0 = N->getOperand(0);
5549 SDValue N1 = N->getOperand(1);
5550 EVT VT = N->getValueType(0);
5551 SDLoc DL(N);
5552 bool IsSigned = Opcode == ISD::AVGCEILS || Opcode == ISD::AVGFLOORS;
5553
5554 // fold (avg c1, c2)
5555 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5556 return C;
5557
5558 // canonicalize constant to RHS.
5561 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5562
5563 if (VT.isVector())
5564 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5565 return FoldedVOp;
5566
5567 // fold (avg x, undef) -> x
5568 if (N0.isUndef())
5569 return N1;
5570 if (N1.isUndef())
5571 return N0;
5572
5573 // fold (avg x, x) --> x
5574 if (N0 == N1 && Level >= AfterLegalizeTypes)
5575 return N0;
5576
5577 // fold (avgfloor x, 0) -> x >> 1
5578 SDValue X, Y;
5580 return DAG.getNode(ISD::SRA, DL, VT, X,
5581 DAG.getShiftAmountConstant(1, VT, DL));
5583 return DAG.getNode(ISD::SRL, DL, VT, X,
5584 DAG.getShiftAmountConstant(1, VT, DL));
5585
5586 // fold avgu(zext(x), zext(y)) -> zext(avgu(x, y))
5587 // fold avgs(sext(x), sext(y)) -> sext(avgs(x, y))
5588 if (!IsSigned &&
5589 sd_match(N, m_BinOp(Opcode, m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) &&
5590 X.getValueType() == Y.getValueType() &&
5591 hasOperation(Opcode, X.getValueType())) {
5592 SDValue AvgU = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5593 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, AvgU);
5594 }
5595 if (IsSigned &&
5596 sd_match(N, m_BinOp(Opcode, m_SExt(m_Value(X)), m_SExt(m_Value(Y)))) &&
5597 X.getValueType() == Y.getValueType() &&
5598 hasOperation(Opcode, X.getValueType())) {
5599 SDValue AvgS = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5600 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, AvgS);
5601 }
5602
5603 // Fold avgflooru(x,y) -> avgceilu(x,y-1) iff y != 0
5604 // Fold avgflooru(x,y) -> avgceilu(x-1,y) iff x != 0
5605 // Check if avgflooru isn't legal/custom but avgceilu is.
5606 if (Opcode == ISD::AVGFLOORU && !hasOperation(ISD::AVGFLOORU, VT) &&
5607 (!LegalOperations || hasOperation(ISD::AVGCEILU, VT))) {
5608 if (DAG.isKnownNeverZero(N1))
5609 return DAG.getNode(
5610 ISD::AVGCEILU, DL, VT, N0,
5611 DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getAllOnesConstant(DL, VT)));
5612 if (DAG.isKnownNeverZero(N0))
5613 return DAG.getNode(
5614 ISD::AVGCEILU, DL, VT, N1,
5615 DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getAllOnesConstant(DL, VT)));
5616 }
5617
5618 // Fold avgfloor((add nw x,y), 1) -> avgceil(x,y)
5619 // Fold avgfloor((add nw x,1), y) -> avgceil(x,y)
5620 if ((Opcode == ISD::AVGFLOORU && hasOperation(ISD::AVGCEILU, VT)) ||
5621 (Opcode == ISD::AVGFLOORS && hasOperation(ISD::AVGCEILS, VT))) {
5622 SDValue Add;
5623 if (sd_match(N,
5624 m_c_BinOp(Opcode,
5626 m_One())) ||
5627 sd_match(N, m_c_BinOp(Opcode,
5629 m_Value(Y)))) {
5630
5631 if (IsSigned && Add->getFlags().hasNoSignedWrap())
5632 return DAG.getNode(ISD::AVGCEILS, DL, VT, X, Y);
5633
5634 if (!IsSigned && Add->getFlags().hasNoUnsignedWrap())
5635 return DAG.getNode(ISD::AVGCEILU, DL, VT, X, Y);
5636 }
5637 }
5638
5639 // Fold avgfloors(x,y) -> avgflooru(x,y) if both x and y are non-negative
5640 if (Opcode == ISD::AVGFLOORS && hasOperation(ISD::AVGFLOORU, VT)) {
5641 if (DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5642 return DAG.getNode(ISD::AVGFLOORU, DL, VT, N0, N1);
5643 }
5644
5645 return SDValue();
5646}
5647
5648SDValue DAGCombiner::visitABD(SDNode *N) {
5649 unsigned Opcode = N->getOpcode();
5650 SDValue N0 = N->getOperand(0);
5651 SDValue N1 = N->getOperand(1);
5652 EVT VT = N->getValueType(0);
5653 SDLoc DL(N);
5654
5655 // fold (abd c1, c2)
5656 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5657 return C;
5658
5659 // canonicalize constant to RHS.
5662 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5663
5664 if (VT.isVector())
5665 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5666 return FoldedVOp;
5667
5668 // fold (abd x, undef) -> 0
5669 if (N0.isUndef() || N1.isUndef())
5670 return DAG.getConstant(0, DL, VT);
5671
5672 // fold (abd x, x) -> 0
5673 if (N0 == N1)
5674 return DAG.getConstant(0, DL, VT);
5675
5676 SDValue X;
5677
5678 // fold (abds x, 0) -> abs x
5680 (!LegalOperations || hasOperation(ISD::ABS, VT)))
5681 return DAG.getNode(ISD::ABS, DL, VT, X);
5682
5683 // fold (abdu x, 0) -> x
5685 return X;
5686
5687 // fold (abds x, y) -> (abdu x, y) iff both args are known positive
5688 if (Opcode == ISD::ABDS && hasOperation(ISD::ABDU, VT) &&
5689 DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5690 return DAG.getNode(ISD::ABDU, DL, VT, N1, N0);
5691
5692 return SDValue();
5693}
5694
5695/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
5696/// give the opcodes for the two computations that are being performed. Return
5697/// true if a simplification was made.
5698SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
5699 unsigned HiOp) {
5700 // If the high half is not needed, just compute the low half.
5701 bool HiExists = N->hasAnyUseOfValue(1);
5702 if (!HiExists && (!LegalOperations ||
5703 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
5704 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5705 return CombineTo(N, Res, Res);
5706 }
5707
5708 // If the low half is not needed, just compute the high half.
5709 bool LoExists = N->hasAnyUseOfValue(0);
5710 if (!LoExists && (!LegalOperations ||
5711 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
5712 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5713 return CombineTo(N, Res, Res);
5714 }
5715
5716 // If both halves are used, return as it is.
5717 if (LoExists && HiExists)
5718 return SDValue();
5719
5720 // If the two computed results can be simplified separately, separate them.
5721 if (LoExists) {
5722 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5723 AddToWorklist(Lo.getNode());
5724 SDValue LoOpt = combine(Lo.getNode());
5725 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
5726 (!LegalOperations ||
5727 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
5728 return CombineTo(N, LoOpt, LoOpt);
5729 }
5730
5731 if (HiExists) {
5732 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5733 AddToWorklist(Hi.getNode());
5734 SDValue HiOpt = combine(Hi.getNode());
5735 if (HiOpt.getNode() && HiOpt != Hi &&
5736 (!LegalOperations ||
5737 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
5738 return CombineTo(N, HiOpt, HiOpt);
5739 }
5740
5741 return SDValue();
5742}
5743
5744SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
5745 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
5746 return Res;
5747
5748 SDValue N0 = N->getOperand(0);
5749 SDValue N1 = N->getOperand(1);
5750 EVT VT = N->getValueType(0);
5751 SDLoc DL(N);
5752
5753 // Constant fold.
5755 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1);
5756
5757 // canonicalize constant to RHS (vector doesn't have to splat)
5760 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0);
5761
5762 // If the type is twice as wide is legal, transform the mulhu to a wider
5763 // multiply plus a shift.
5764 if (VT.isSimple() && !VT.isVector()) {
5765 MVT Simple = VT.getSimpleVT();
5766 unsigned SimpleSize = Simple.getSizeInBits();
5767 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5768 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5769 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5770 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5771 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5772 // Compute the high part as N1.
5773 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5774 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5775 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5776 // Compute the low part as N0.
5777 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5778 return CombineTo(N, Lo, Hi);
5779 }
5780 }
5781
5782 return SDValue();
5783}
5784
5785SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
5786 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
5787 return Res;
5788
5789 SDValue N0 = N->getOperand(0);
5790 SDValue N1 = N->getOperand(1);
5791 EVT VT = N->getValueType(0);
5792 SDLoc DL(N);
5793
5794 // Constant fold.
5796 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1);
5797
5798 // canonicalize constant to RHS (vector doesn't have to splat)
5801 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0);
5802
5803 // (umul_lohi N0, 0) -> (0, 0)
5804 if (isNullConstant(N1)) {
5805 SDValue Zero = DAG.getConstant(0, DL, VT);
5806 return CombineTo(N, Zero, Zero);
5807 }
5808
5809 // (umul_lohi N0, 1) -> (N0, 0)
5810 if (isOneConstant(N1)) {
5811 SDValue Zero = DAG.getConstant(0, DL, VT);
5812 return CombineTo(N, N0, Zero);
5813 }
5814
5815 // If the type is twice as wide is legal, transform the mulhu to a wider
5816 // multiply plus a shift.
5817 if (VT.isSimple() && !VT.isVector()) {
5818 MVT Simple = VT.getSimpleVT();
5819 unsigned SimpleSize = Simple.getSizeInBits();
5820 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5821 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5822 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5823 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5824 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5825 // Compute the high part as N1.
5826 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5827 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5828 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5829 // Compute the low part as N0.
5830 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5831 return CombineTo(N, Lo, Hi);
5832 }
5833 }
5834
5835 return SDValue();
5836}
5837
5838SDValue DAGCombiner::visitMULO(SDNode *N) {
5839 SDValue N0 = N->getOperand(0);
5840 SDValue N1 = N->getOperand(1);
5841 EVT VT = N0.getValueType();
5842 bool IsSigned = (ISD::SMULO == N->getOpcode());
5843
5844 EVT CarryVT = N->getValueType(1);
5845 SDLoc DL(N);
5846
5847 ConstantSDNode *N0C = isConstOrConstSplat(N0);
5848 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5849
5850 // fold operation with constant operands.
5851 // TODO: Move this to FoldConstantArithmetic when it supports nodes with
5852 // multiple results.
5853 if (N0C && N1C) {
5854 bool Overflow;
5855 APInt Result =
5856 IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
5857 : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
5858 return CombineTo(N, DAG.getConstant(Result, DL, VT),
5859 DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
5860 }
5861
5862 // canonicalize constant to RHS.
5865 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
5866
5867 // fold (mulo x, 0) -> 0 + no carry out
5868 if (isNullOrNullSplat(N1))
5869 return CombineTo(N, DAG.getConstant(0, DL, VT),
5870 DAG.getConstant(0, DL, CarryVT));
5871
5872 // (mulo x, 2) -> (addo x, x)
5873 // FIXME: This needs a freeze.
5874 if (N1C && N1C->getAPIntValue() == 2 &&
5875 (!IsSigned || VT.getScalarSizeInBits() > 2))
5876 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
5877 N->getVTList(), N0, N0);
5878
5879 // A 1 bit SMULO overflows if both inputs are 1.
5880 if (IsSigned && VT.getScalarSizeInBits() == 1) {
5881 SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
5882 SDValue Cmp = DAG.getSetCC(DL, CarryVT, And,
5883 DAG.getConstant(0, DL, VT), ISD::SETNE);
5884 return CombineTo(N, And, Cmp);
5885 }
5886
5887 // If it cannot overflow, transform into a mul.
5888 if (DAG.willNotOverflowMul(IsSigned, N0, N1))
5889 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
5890 DAG.getConstant(0, DL, CarryVT));
5891 return SDValue();
5892}
5893
5894// Function to calculate whether the Min/Max pair of SDNodes (potentially
5895// swapped around) make a signed saturate pattern, clamping to between a signed
5896// saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
5897// Returns the node being clamped and the bitwidth of the clamp in BW. Should
5898// work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
5899// same as SimplifySelectCC. N0<N1 ? N2 : N3.
5901 SDValue N3, ISD::CondCode CC, unsigned &BW,
5902 bool &Unsigned, SelectionDAG &DAG) {
5903 auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
5904 ISD::CondCode CC) {
5905 // The compare and select operand should be the same or the select operands
5906 // should be truncated versions of the comparison.
5907 if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
5908 return 0;
5909 // The constants need to be the same or a truncated version of each other.
5912 if (!N1C || !N3C)
5913 return 0;
5914 const APInt &C1 = N1C->getAPIntValue().trunc(N1.getScalarValueSizeInBits());
5915 const APInt &C2 = N3C->getAPIntValue().trunc(N3.getScalarValueSizeInBits());
5916 if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth()))
5917 return 0;
5918 return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
5919 };
5920
5921 // Check the initial value is a SMIN/SMAX equivalent.
5922 unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
5923 if (!Opcode0)
5924 return SDValue();
5925
5926 // We could only need one range check, if the fptosi could never produce
5927 // the upper value.
5928 if (N0.getOpcode() == ISD::FP_TO_SINT && Opcode0 == ISD::SMAX) {
5929 if (isNullOrNullSplat(N3)) {
5930 EVT IntVT = N0.getValueType().getScalarType();
5931 EVT FPVT = N0.getOperand(0).getValueType().getScalarType();
5932 if (FPVT.isSimple()) {
5933 Type *InputTy = FPVT.getTypeForEVT(*DAG.getContext());
5934 const fltSemantics &Semantics = InputTy->getFltSemantics();
5935 uint32_t MinBitWidth =
5936 APFloatBase::semanticsIntSizeInBits(Semantics, /*isSigned*/ true);
5937 if (IntVT.getSizeInBits() >= MinBitWidth) {
5938 Unsigned = true;
5939 BW = PowerOf2Ceil(MinBitWidth);
5940 return N0;
5941 }
5942 }
5943 }
5944 }
5945
5946 SDValue N00, N01, N02, N03;
5947 ISD::CondCode N0CC;
5948 switch (N0.getOpcode()) {
5949 case ISD::SMIN:
5950 case ISD::SMAX:
5951 N00 = N02 = N0.getOperand(0);
5952 N01 = N03 = N0.getOperand(1);
5953 N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
5954 break;
5955 case ISD::SELECT_CC:
5956 N00 = N0.getOperand(0);
5957 N01 = N0.getOperand(1);
5958 N02 = N0.getOperand(2);
5959 N03 = N0.getOperand(3);
5960 N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
5961 break;
5962 case ISD::SELECT:
5963 case ISD::VSELECT:
5964 if (N0.getOperand(0).getOpcode() != ISD::SETCC)
5965 return SDValue();
5966 N00 = N0.getOperand(0).getOperand(0);
5967 N01 = N0.getOperand(0).getOperand(1);
5968 N02 = N0.getOperand(1);
5969 N03 = N0.getOperand(2);
5970 N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
5971 break;
5972 default:
5973 return SDValue();
5974 }
5975
5976 unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
5977 if (!Opcode1 || Opcode0 == Opcode1)
5978 return SDValue();
5979
5980 ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
5981 ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
5982 if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
5983 return SDValue();
5984
5985 const APInt &MinC = MinCOp->getAPIntValue();
5986 const APInt &MaxC = MaxCOp->getAPIntValue();
5987 APInt MinCPlus1 = MinC + 1;
5988 if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
5989 BW = MinCPlus1.exactLogBase2() + 1;
5990 Unsigned = false;
5991 return N02;
5992 }
5993
5994 if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
5995 BW = MinCPlus1.exactLogBase2();
5996 Unsigned = true;
5997 return N02;
5998 }
5999
6000 return SDValue();
6001}
6002
6004 SDValue N3, ISD::CondCode CC,
6005 SelectionDAG &DAG) {
6006 unsigned BW;
6007 bool Unsigned;
6008 SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned, DAG);
6009 if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
6010 return SDValue();
6011 EVT FPVT = Fp.getOperand(0).getValueType();
6012 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
6013 if (FPVT.isVector())
6014 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
6015 FPVT.getVectorElementCount());
6016 unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
6017 if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
6018 return SDValue();
6019 SDLoc DL(Fp);
6020 SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
6021 DAG.getValueType(NewVT.getScalarType()));
6022 return DAG.getExtOrTrunc(!Unsigned, Sat, DL, N2->getValueType(0));
6023}
6024
6026 SDValue N3, ISD::CondCode CC,
6027 SelectionDAG &DAG) {
6028 // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
6029 // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
6030 // be truncated versions of the setcc (N0/N1).
6031 if ((N0 != N2 &&
6032 (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
6033 N0.getOpcode() != ISD::FP_TO_UINT || CC != ISD::SETULT)
6034 return SDValue();
6037 if (!N1C || !N3C)
6038 return SDValue();
6039 const APInt &C1 = N1C->getAPIntValue();
6040 const APInt &C3 = N3C->getAPIntValue();
6041 if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
6042 C1 != C3.zext(C1.getBitWidth()))
6043 return SDValue();
6044
6045 unsigned BW = (C1 + 1).exactLogBase2();
6046 EVT FPVT = N0.getOperand(0).getValueType();
6047 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
6048 if (FPVT.isVector())
6049 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
6050 FPVT.getVectorElementCount());
6052 FPVT, NewVT))
6053 return SDValue();
6054
6055 SDValue Sat =
6056 DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
6057 DAG.getValueType(NewVT.getScalarType()));
6058 return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
6059}
6060
6061SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
6062 SDValue N0 = N->getOperand(0);
6063 SDValue N1 = N->getOperand(1);
6064 EVT VT = N0.getValueType();
6065 unsigned Opcode = N->getOpcode();
6066 SDLoc DL(N);
6067
6068 // fold operation with constant operands.
6069 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
6070 return C;
6071
6072 // If the operands are the same, this is a no-op.
6073 if (N0 == N1)
6074 return N0;
6075
6076 // Fold operation with vscale operands.
6077 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
6078 uint64_t C0 = N0->getConstantOperandVal(0);
6079 uint64_t C1 = N1->getConstantOperandVal(0);
6080 if (Opcode == ISD::UMAX)
6081 return C0 > C1 ? N0 : N1;
6082 else if (Opcode == ISD::UMIN)
6083 return C0 > C1 ? N1 : N0;
6084 }
6085
6086 // canonicalize constant to RHS
6089 return DAG.getNode(Opcode, DL, VT, N1, N0);
6090
6091 // fold vector ops
6092 if (VT.isVector())
6093 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
6094 return FoldedVOp;
6095
6096 // reassociate minmax
6097 if (SDValue RMINMAX = reassociateOps(Opcode, DL, N0, N1, N->getFlags()))
6098 return RMINMAX;
6099
6100 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
6101 // Only do this if:
6102 // 1. The current op isn't legal and the flipped is.
6103 // 2. The saturation pattern is broken by canonicalization in InstCombine.
6104 bool IsOpIllegal = !TLI.isOperationLegal(Opcode, VT);
6105 bool IsSatBroken = Opcode == ISD::UMIN && N0.getOpcode() == ISD::SMAX;
6106 if ((IsSatBroken || IsOpIllegal) && (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
6107 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
6108 unsigned AltOpcode;
6109 switch (Opcode) {
6110 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
6111 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
6112 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
6113 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
6114 default: llvm_unreachable("Unknown MINMAX opcode");
6115 }
6116 if ((IsSatBroken && IsOpIllegal) || TLI.isOperationLegal(AltOpcode, VT))
6117 return DAG.getNode(AltOpcode, DL, VT, N0, N1);
6118 }
6119
6120 if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
6122 N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
6123 return S;
6124 if (Opcode == ISD::UMIN)
6125 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
6126 return S;
6127
6128 // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
6129 auto ReductionOpcode = [](unsigned Opcode) {
6130 switch (Opcode) {
6131 case ISD::SMIN:
6132 return ISD::VECREDUCE_SMIN;
6133 case ISD::SMAX:
6134 return ISD::VECREDUCE_SMAX;
6135 case ISD::UMIN:
6136 return ISD::VECREDUCE_UMIN;
6137 case ISD::UMAX:
6138 return ISD::VECREDUCE_UMAX;
6139 default:
6140 llvm_unreachable("Unexpected opcode");
6141 }
6142 };
6143 if (SDValue SD = reassociateReduction(ReductionOpcode(Opcode), Opcode,
6144 SDLoc(N), VT, N0, N1))
6145 return SD;
6146
6147 // Simplify the operands using demanded-bits information.
6149 return SDValue(N, 0);
6150
6151 return SDValue();
6152}
6153
6154/// If this is a bitwise logic instruction and both operands have the same
6155/// opcode, try to sink the other opcode after the logic instruction.
6156SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
6157 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
6158 EVT VT = N0.getValueType();
6159 unsigned LogicOpcode = N->getOpcode();
6160 unsigned HandOpcode = N0.getOpcode();
6161 assert(ISD::isBitwiseLogicOp(LogicOpcode) && "Expected logic opcode");
6162 assert(HandOpcode == N1.getOpcode() && "Bad input!");
6163
6164 // Bail early if none of these transforms apply.
6165 if (N0.getNumOperands() == 0)
6166 return SDValue();
6167
6168 // FIXME: We should check number of uses of the operands to not increase
6169 // the instruction count for all transforms.
6170
6171 // Handle size-changing casts (or sign_extend_inreg).
6172 SDValue X = N0.getOperand(0);
6173 SDValue Y = N1.getOperand(0);
6174 EVT XVT = X.getValueType();
6175 SDLoc DL(N);
6176 if (ISD::isExtOpcode(HandOpcode) || ISD::isExtVecInRegOpcode(HandOpcode) ||
6177 (HandOpcode == ISD::SIGN_EXTEND_INREG &&
6178 N0.getOperand(1) == N1.getOperand(1))) {
6179 // If both operands have other uses, this transform would create extra
6180 // instructions without eliminating anything.
6181 if (!N0.hasOneUse() && !N1.hasOneUse())
6182 return SDValue();
6183 // We need matching integer source types.
6184 if (XVT != Y.getValueType())
6185 return SDValue();
6186 // Don't create an illegal op during or after legalization. Don't ever
6187 // create an unsupported vector op.
6188 if ((VT.isVector() || LegalOperations) &&
6189 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
6190 return SDValue();
6191 // Avoid infinite looping with PromoteIntBinOp.
6192 // TODO: Should we apply desirable/legal constraints to all opcodes?
6193 if ((HandOpcode == ISD::ANY_EXTEND ||
6194 HandOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
6195 LegalTypes && !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
6196 return SDValue();
6197 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
6198 SDNodeFlags LogicFlags;
6199 LogicFlags.setDisjoint(N->getFlags().hasDisjoint() &&
6200 ISD::isExtOpcode(HandOpcode));
6201 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y, LogicFlags);
6202 if (HandOpcode == ISD::SIGN_EXTEND_INREG)
6203 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
6204 return DAG.getNode(HandOpcode, DL, VT, Logic);
6205 }
6206
6207 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
6208 if (HandOpcode == ISD::TRUNCATE) {
6209 // If both operands have other uses, this transform would create extra
6210 // instructions without eliminating anything.
6211 if (!N0.hasOneUse() && !N1.hasOneUse())
6212 return SDValue();
6213 // We need matching source types.
6214 if (XVT != Y.getValueType())
6215 return SDValue();
6216 // Don't create an illegal op during or after legalization.
6217 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
6218 return SDValue();
6219 // Be extra careful sinking truncate. If it's free, there's no benefit in
6220 // widening a binop. Also, don't create a logic op on an illegal type.
6221 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
6222 return SDValue();
6223 if (!TLI.isTypeLegal(XVT))
6224 return SDValue();
6225 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6226 return DAG.getNode(HandOpcode, DL, VT, Logic);
6227 }
6228
6229 // For binops SHL/SRL/SRA/AND:
6230 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
6231 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
6232 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
6233 N0.getOperand(1) == N1.getOperand(1)) {
6234 // If either operand has other uses, this transform is not an improvement.
6235 if (!N0.hasOneUse() || !N1.hasOneUse())
6236 return SDValue();
6237 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6238 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
6239 }
6240
6241 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
6242 if (HandOpcode == ISD::BSWAP) {
6243 // If either operand has other uses, this transform is not an improvement.
6244 if (!N0.hasOneUse() || !N1.hasOneUse())
6245 return SDValue();
6246 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6247 return DAG.getNode(HandOpcode, DL, VT, Logic);
6248 }
6249
6250 // For funnel shifts FSHL/FSHR:
6251 // logic_op (OP x, x1, s), (OP y, y1, s) -->
6252 // --> OP (logic_op x, y), (logic_op, x1, y1), s
6253 if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) &&
6254 N0.getOperand(2) == N1.getOperand(2)) {
6255 if (!N0.hasOneUse() || !N1.hasOneUse())
6256 return SDValue();
6257 SDValue X1 = N0.getOperand(1);
6258 SDValue Y1 = N1.getOperand(1);
6259 SDValue S = N0.getOperand(2);
6260 SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y);
6261 SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1);
6262 return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S);
6263 }
6264
6265 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
6266 // Only perform this optimization up until type legalization, before
6267 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
6268 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
6269 // we don't want to undo this promotion.
6270 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
6271 // on scalars.
6272 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
6273 Level <= AfterLegalizeTypes) {
6274 // Input types must be integer and the same.
6275 if (XVT.isInteger() && XVT == Y.getValueType() &&
6276 !(VT.isVector() && TLI.isTypeLegal(VT) &&
6277 !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
6278 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6279 return DAG.getNode(HandOpcode, DL, VT, Logic);
6280 }
6281 }
6282
6283 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
6284 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
6285 // If both shuffles use the same mask, and both shuffle within a single
6286 // vector, then it is worthwhile to move the swizzle after the operation.
6287 // The type-legalizer generates this pattern when loading illegal
6288 // vector types from memory. In many cases this allows additional shuffle
6289 // optimizations.
6290 // There are other cases where moving the shuffle after the xor/and/or
6291 // is profitable even if shuffles don't perform a swizzle.
6292 // If both shuffles use the same mask, and both shuffles have the same first
6293 // or second operand, then it might still be profitable to move the shuffle
6294 // after the xor/and/or operation.
6295 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
6296 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
6297 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
6298 assert(X.getValueType() == Y.getValueType() &&
6299 "Inputs to shuffles are not the same type");
6300
6301 // Check that both shuffles use the same mask. The masks are known to be of
6302 // the same length because the result vector type is the same.
6303 // Check also that shuffles have only one use to avoid introducing extra
6304 // instructions.
6305 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
6306 !SVN0->getMask().equals(SVN1->getMask()))
6307 return SDValue();
6308
6309 // Don't try to fold this node if it requires introducing a
6310 // build vector of all zeros that might be illegal at this stage.
6311 SDValue ShOp = N0.getOperand(1);
6312 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
6313 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6314
6315 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
6316 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
6317 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
6318 N0.getOperand(0), N1.getOperand(0));
6319 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
6320 }
6321
6322 // Don't try to fold this node if it requires introducing a
6323 // build vector of all zeros that might be illegal at this stage.
6324 ShOp = N0.getOperand(0);
6325 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
6326 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6327
6328 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
6329 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
6330 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
6331 N1.getOperand(1));
6332 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
6333 }
6334 }
6335
6336 return SDValue();
6337}
6338
6339/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
6340SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
6341 const SDLoc &DL) {
6342 SDValue LL, LR, RL, RR, N0CC, N1CC;
6343 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
6344 !isSetCCEquivalent(N1, RL, RR, N1CC))
6345 return SDValue();
6346
6347 assert(N0.getValueType() == N1.getValueType() &&
6348 "Unexpected operand types for bitwise logic op");
6349 assert(LL.getValueType() == LR.getValueType() &&
6350 RL.getValueType() == RR.getValueType() &&
6351 "Unexpected operand types for setcc");
6352
6353 // If we're here post-legalization or the logic op type is not i1, the logic
6354 // op type must match a setcc result type. Also, all folds require new
6355 // operations on the left and right operands, so those types must match.
6356 EVT VT = N0.getValueType();
6357 EVT OpVT = LL.getValueType();
6358 if (LegalOperations || VT.getScalarType() != MVT::i1)
6359 if (VT != getSetCCResultType(OpVT))
6360 return SDValue();
6361 if (OpVT != RL.getValueType())
6362 return SDValue();
6363
6364 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
6365 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
6366 bool IsInteger = OpVT.isInteger();
6367 if (LR == RR && CC0 == CC1 && IsInteger) {
6368 bool IsZero = isNullOrNullSplat(LR);
6369 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
6370
6371 // All bits clear?
6372 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
6373 // All sign bits clear?
6374 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
6375 // Any bits set?
6376 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
6377 // Any sign bits set?
6378 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
6379
6380 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
6381 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
6382 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
6383 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
6384 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
6385 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
6386 AddToWorklist(Or.getNode());
6387 return DAG.getSetCC(DL, VT, Or, LR, CC1);
6388 }
6389
6390 // All bits set?
6391 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
6392 // All sign bits set?
6393 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
6394 // Any bits clear?
6395 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
6396 // Any sign bits clear?
6397 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
6398
6399 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
6400 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
6401 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
6402 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
6403 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
6404 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
6405 AddToWorklist(And.getNode());
6406 return DAG.getSetCC(DL, VT, And, LR, CC1);
6407 }
6408 }
6409
6410 // TODO: What is the 'or' equivalent of this fold?
6411 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
6412 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
6413 IsInteger && CC0 == ISD::SETNE &&
6414 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
6415 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
6416 SDValue One = DAG.getConstant(1, DL, OpVT);
6417 SDValue Two = DAG.getConstant(2, DL, OpVT);
6418 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
6419 AddToWorklist(Add.getNode());
6420 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
6421 }
6422
6423 // Try more general transforms if the predicates match and the only user of
6424 // the compares is the 'and' or 'or'.
6425 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
6426 N0.hasOneUse() && N1.hasOneUse()) {
6427 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
6428 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
6429 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
6430 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
6431 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
6432 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
6433 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6434 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
6435 }
6436
6437 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
6438 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
6439 // Match a shared variable operand and 2 non-opaque constant operands.
6440 auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) {
6441 // The difference of the constants must be a single bit.
6442 const APInt &CMax =
6443 APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
6444 const APInt &CMin =
6445 APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
6446 return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
6447 };
6448 if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) {
6449 // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
6450 // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
6451 SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
6452 SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
6453 SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
6454 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
6455 SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
6456 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
6457 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6458 return DAG.getSetCC(DL, VT, And, Zero, CC0);
6459 }
6460 }
6461 }
6462
6463 // Canonicalize equivalent operands to LL == RL.
6464 if (LL == RR && LR == RL) {
6466 std::swap(RL, RR);
6467 }
6468
6469 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6470 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6471 if (LL == RL && LR == RR) {
6472 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
6473 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
6474 if (NewCC != ISD::SETCC_INVALID &&
6475 (!LegalOperations ||
6476 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
6477 TLI.isOperationLegal(ISD::SETCC, OpVT))))
6478 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
6479 }
6480
6481 return SDValue();
6482}
6483
6484static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2,
6485 SelectionDAG &DAG) {
6486 return DAG.isKnownNeverSNaN(Operand2) && DAG.isKnownNeverSNaN(Operand1);
6487}
6488
6489static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
6490 SelectionDAG &DAG) {
6491 return DAG.isKnownNeverNaN(Operand2) && DAG.isKnownNeverNaN(Operand1);
6492}
6493
6494// FIXME: use FMINIMUMNUM if possible, such as for RISC-V.
6495static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
6496 ISD::CondCode CC, unsigned OrAndOpcode,
6497 SelectionDAG &DAG,
6498 bool isFMAXNUMFMINNUM_IEEE,
6499 bool isFMAXNUMFMINNUM) {
6500 // The optimization cannot be applied for all the predicates because
6501 // of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle
6502 // NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be
6503 // applied at all if one of the operands is a signaling NaN.
6504
6505 // It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
6506 // are non NaN values.
6507 if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
6508 ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND))) {
6509 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6510 isFMAXNUMFMINNUM_IEEE
6511 ? ISD::FMINNUM_IEEE
6513 }
6514
6515 if (((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::OR)) ||
6516 ((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::AND))) {
6517 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6518 isFMAXNUMFMINNUM_IEEE
6519 ? ISD::FMAXNUM_IEEE
6521 }
6522
6523 // Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet
6524 // NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/
6525 // FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove
6526 // that there are not any sNaNs, then the optimization is not valid
6527 // for FMINNUM_IEEE/FMAXNUM_IEEE. In the presence of sNaNs, we apply
6528 // the optimization using FMINNUM/FMAXNUM for the following cases. If
6529 // we can prove that we do not have any sNaNs, then we can do the
6530 // optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following
6531 // cases.
6532 if (((CC == ISD::SETOLT || CC == ISD::SETOLE) && (OrAndOpcode == ISD::OR)) ||
6533 ((CC == ISD::SETUGT || CC == ISD::SETUGE) && (OrAndOpcode == ISD::AND))) {
6534 return isFMAXNUMFMINNUM ? ISD::FMINNUM
6535 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6536 isFMAXNUMFMINNUM_IEEE
6537 ? ISD::FMINNUM_IEEE
6539 }
6540
6541 if (((CC == ISD::SETOGT || CC == ISD::SETOGE) && (OrAndOpcode == ISD::OR)) ||
6542 ((CC == ISD::SETULT || CC == ISD::SETULE) && (OrAndOpcode == ISD::AND))) {
6543 return isFMAXNUMFMINNUM ? ISD::FMAXNUM
6544 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6545 isFMAXNUMFMINNUM_IEEE
6546 ? ISD::FMAXNUM_IEEE
6548 }
6549
6550 return ISD::DELETED_NODE;
6551}
6552
6555 assert(
6556 (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
6557 "Invalid Op to combine SETCC with");
6558
6559 // TODO: Search past casts/truncates.
6560 SDValue LHS = LogicOp->getOperand(0);
6561 SDValue RHS = LogicOp->getOperand(1);
6562 if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC ||
6563 !LHS->hasOneUse() || !RHS->hasOneUse())
6564 return SDValue();
6565
6566 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6568 LogicOp, LHS.getNode(), RHS.getNode());
6569
6570 SDValue LHS0 = LHS->getOperand(0);
6571 SDValue RHS0 = RHS->getOperand(0);
6572 SDValue LHS1 = LHS->getOperand(1);
6573 SDValue RHS1 = RHS->getOperand(1);
6574 // TODO: We don't actually need a splat here, for vectors we just need the
6575 // invariants to hold for each element.
6576 auto *LHS1C = isConstOrConstSplat(LHS1);
6577 auto *RHS1C = isConstOrConstSplat(RHS1);
6578 ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6579 ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
6580 EVT VT = LogicOp->getValueType(0);
6581 EVT OpVT = LHS0.getValueType();
6582 SDLoc DL(LogicOp);
6583
6584 // Check if the operands of an and/or operation are comparisons and if they
6585 // compare against the same value. Replace the and/or-cmp-cmp sequence with
6586 // min/max cmp sequence. If LHS1 is equal to RHS1, then the or-cmp-cmp
6587 // sequence will be replaced with min-cmp sequence:
6588 // (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1
6589 // and and-cmp-cmp will be replaced with max-cmp sequence:
6590 // (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1
6591 // The optimization does not work for `==` or `!=` .
6592 // The two comparisons should have either the same predicate or the
6593 // predicate of one of the comparisons is the opposite of the other one.
6594 bool isFMAXNUMFMINNUM_IEEE = TLI.isOperationLegal(ISD::FMAXNUM_IEEE, OpVT) &&
6595 TLI.isOperationLegal(ISD::FMINNUM_IEEE, OpVT);
6596 bool isFMAXNUMFMINNUM = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, OpVT) &&
6597 TLI.isOperationLegalOrCustom(ISD::FMINNUM, OpVT);
6598 if (((OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
6599 TLI.isOperationLegal(ISD::SMAX, OpVT) &&
6600 TLI.isOperationLegal(ISD::UMIN, OpVT) &&
6601 TLI.isOperationLegal(ISD::SMIN, OpVT)) ||
6602 (OpVT.isFloatingPoint() &&
6603 (isFMAXNUMFMINNUM_IEEE || isFMAXNUMFMINNUM))) &&
6605 CCL != ISD::SETFALSE && CCL != ISD::SETO && CCL != ISD::SETUO &&
6606 CCL != ISD::SETTRUE &&
6607 (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR))) {
6608
6609 SDValue CommonValue, Operand1, Operand2;
6611 if (CCL == CCR) {
6612 if (LHS0 == RHS0) {
6613 CommonValue = LHS0;
6614 Operand1 = LHS1;
6615 Operand2 = RHS1;
6617 } else if (LHS1 == RHS1) {
6618 CommonValue = LHS1;
6619 Operand1 = LHS0;
6620 Operand2 = RHS0;
6621 CC = CCL;
6622 }
6623 } else {
6624 assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC");
6625 if (LHS0 == RHS1) {
6626 CommonValue = LHS0;
6627 Operand1 = LHS1;
6628 Operand2 = RHS0;
6629 CC = CCR;
6630 } else if (RHS0 == LHS1) {
6631 CommonValue = LHS1;
6632 Operand1 = LHS0;
6633 Operand2 = RHS1;
6634 CC = CCL;
6635 }
6636 }
6637
6638 // Don't do this transform for sign bit tests. Let foldLogicOfSetCCs
6639 // handle it using OR/AND.
6640 if (CC == ISD::SETLT && isNullOrNullSplat(CommonValue))
6641 CC = ISD::SETCC_INVALID;
6642 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CommonValue))
6643 CC = ISD::SETCC_INVALID;
6644
6645 if (CC != ISD::SETCC_INVALID) {
6646 unsigned NewOpcode = ISD::DELETED_NODE;
6647 bool IsSigned = isSignedIntSetCC(CC);
6648 if (OpVT.isInteger()) {
6649 bool IsLess = (CC == ISD::SETLE || CC == ISD::SETULE ||
6650 CC == ISD::SETLT || CC == ISD::SETULT);
6651 bool IsOr = (LogicOp->getOpcode() == ISD::OR);
6652 if (IsLess == IsOr)
6653 NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN;
6654 else
6655 NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
6656 } else if (OpVT.isFloatingPoint())
6657 NewOpcode =
6658 getMinMaxOpcodeForFP(Operand1, Operand2, CC, LogicOp->getOpcode(),
6659 DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
6660
6661 if (NewOpcode != ISD::DELETED_NODE) {
6662 SDValue MinMaxValue =
6663 DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
6664 return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
6665 }
6666 }
6667 }
6668
6669 if (LHS0 == LHS1 && RHS0 == RHS1 && CCL == CCR &&
6670 LHS0.getValueType() == RHS0.getValueType() &&
6671 ((LogicOp->getOpcode() == ISD::AND && CCL == ISD::SETO) ||
6672 (LogicOp->getOpcode() == ISD::OR && CCL == ISD::SETUO)))
6673 return DAG.getSetCC(DL, VT, LHS0, RHS0, CCL);
6674
6675 if (TargetPreference == AndOrSETCCFoldKind::None)
6676 return SDValue();
6677
6678 if (CCL == CCR &&
6679 CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
6680 LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger()) {
6681 const APInt &APLhs = LHS1C->getAPIntValue();
6682 const APInt &APRhs = RHS1C->getAPIntValue();
6683
6684 // Preference is to use ISD::ABS or we already have an ISD::ABS (in which
6685 // case this is just a compare).
6686 if (APLhs == (-APRhs) &&
6687 ((TargetPreference & AndOrSETCCFoldKind::ABS) ||
6688 DAG.doesNodeExist(ISD::ABS, DAG.getVTList(OpVT), {LHS0}))) {
6689 const APInt &C = APLhs.isNegative() ? APRhs : APLhs;
6690 // (icmp eq A, C) | (icmp eq A, -C)
6691 // -> (icmp eq Abs(A), C)
6692 // (icmp ne A, C) & (icmp ne A, -C)
6693 // -> (icmp ne Abs(A), C)
6694 SDValue AbsOp = DAG.getNode(ISD::ABS, DL, OpVT, LHS0);
6695 return DAG.getNode(ISD::SETCC, DL, VT, AbsOp,
6696 DAG.getConstant(C, DL, OpVT), LHS.getOperand(2));
6697 } else if (TargetPreference &
6699
6700 // AndOrSETCCFoldKind::AddAnd:
6701 // A == C0 | A == C1
6702 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6703 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) == 0
6704 // A != C0 & A != C1
6705 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6706 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) != 0
6707
6708 // AndOrSETCCFoldKind::NotAnd:
6709 // A == C0 | A == C1
6710 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6711 // -> ~A & smin(C0, C1) == 0
6712 // A != C0 & A != C1
6713 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6714 // -> ~A & smin(C0, C1) != 0
6715
6716 const APInt &MaxC = APIntOps::smax(APRhs, APLhs);
6717 const APInt &MinC = APIntOps::smin(APRhs, APLhs);
6718 APInt Dif = MaxC - MinC;
6719 if (!Dif.isZero() && Dif.isPowerOf2()) {
6720 if (MaxC.isAllOnes() &&
6721 (TargetPreference & AndOrSETCCFoldKind::NotAnd)) {
6722 SDValue NotOp = DAG.getNOT(DL, LHS0, OpVT);
6723 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, NotOp,
6724 DAG.getConstant(MinC, DL, OpVT));
6725 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6726 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6727 } else if (TargetPreference & AndOrSETCCFoldKind::AddAnd) {
6728
6729 SDValue AddOp = DAG.getNode(ISD::ADD, DL, OpVT, LHS0,
6730 DAG.getConstant(-MinC, DL, OpVT));
6731 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
6732 DAG.getConstant(~Dif, DL, OpVT));
6733 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6734 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6735 }
6736 }
6737 }
6738 }
6739
6740 return SDValue();
6741}
6742
6743// Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`.
6744// We canonicalize to the `select` form in the middle end, but the `and` form
6745// gets better codegen and all tested targets (arm, x86, riscv)
6747 const SDLoc &DL, SelectionDAG &DAG) {
6748 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6749 if (!isNullConstant(F))
6750 return SDValue();
6751
6752 EVT CondVT = Cond.getValueType();
6753 if (TLI.getBooleanContents(CondVT) !=
6755 return SDValue();
6756
6757 if (T.getOpcode() != ISD::AND)
6758 return SDValue();
6759
6760 if (!isOneConstant(T.getOperand(1)))
6761 return SDValue();
6762
6763 EVT OpVT = T.getValueType();
6764
6765 SDValue CondMask =
6766 OpVT == CondVT ? Cond : DAG.getBoolExtOrTrunc(Cond, DL, OpVT, CondVT);
6767 return DAG.getNode(ISD::AND, DL, OpVT, CondMask, T.getOperand(0));
6768}
6769
6770/// This contains all DAGCombine rules which reduce two values combined by
6771/// an And operation to a single value. This makes them reusable in the context
6772/// of visitSELECT(). Rules involving constants are not included as
6773/// visitSELECT() already handles those cases.
6774SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
6775 EVT VT = N1.getValueType();
6776 SDLoc DL(N);
6777
6778 // fold (and x, undef) -> 0
6779 if (N0.isUndef() || N1.isUndef())
6780 return DAG.getConstant(0, DL, VT);
6781
6782 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
6783 return V;
6784
6785 // Canonicalize:
6786 // and(x, add) -> and(add, x)
6787 if (N1.getOpcode() == ISD::ADD)
6788 std::swap(N0, N1);
6789
6790 // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
6791 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
6792 VT.isScalarInteger() && VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
6793 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6794 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
6795 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
6796 // immediate for an add, but it is legal if its top c2 bits are set,
6797 // transform the ADD so the immediate doesn't need to be materialized
6798 // in a register.
6799 APInt ADDC = ADDI->getAPIntValue();
6800 APInt SRLC = SRLI->getAPIntValue();
6801 if (ADDC.getSignificantBits() <= 64 && SRLC.ult(VT.getSizeInBits()) &&
6802 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6804 SRLC.getZExtValue());
6805 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
6806 ADDC |= Mask;
6807 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6808 SDLoc DL0(N0);
6809 SDValue NewAdd =
6810 DAG.getNode(ISD::ADD, DL0, VT,
6811 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
6812 CombineTo(N0.getNode(), NewAdd);
6813 // Return N so it doesn't get rechecked!
6814 return SDValue(N, 0);
6815 }
6816 }
6817 }
6818 }
6819 }
6820 }
6821
6822 return SDValue();
6823}
6824
6825bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
6826 EVT LoadResultTy, EVT &ExtVT) {
6827 if (!AndC->getAPIntValue().isMask())
6828 return false;
6829
6830 unsigned ActiveBits = AndC->getAPIntValue().countr_one();
6831
6832 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6833 EVT LoadedVT = LoadN->getMemoryVT();
6834
6835 if (ExtVT == LoadedVT &&
6836 (!LegalOperations ||
6837 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
6838 // ZEXTLOAD will match without needing to change the size of the value being
6839 // loaded.
6840 return true;
6841 }
6842
6843 // Do not change the width of a volatile or atomic loads.
6844 if (!LoadN->isSimple())
6845 return false;
6846
6847 // Do not generate loads of non-round integer types since these can
6848 // be expensive (and would be wrong if the type is not byte sized).
6849 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
6850 return false;
6851
6852 if (LegalOperations &&
6853 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
6854 return false;
6855
6856 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT, /*ByteOffset=*/0))
6857 return false;
6858
6859 return true;
6860}
6861
6862bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
6863 ISD::LoadExtType ExtType, EVT &MemVT,
6864 unsigned ShAmt) {
6865 if (!LDST)
6866 return false;
6867
6868 // Only allow byte offsets.
6869 if (ShAmt % 8)
6870 return false;
6871 const unsigned ByteShAmt = ShAmt / 8;
6872
6873 // Do not generate loads of non-round integer types since these can
6874 // be expensive (and would be wrong if the type is not byte sized).
6875 if (!MemVT.isRound())
6876 return false;
6877
6878 // Don't change the width of a volatile or atomic loads.
6879 if (!LDST->isSimple())
6880 return false;
6881
6882 EVT LdStMemVT = LDST->getMemoryVT();
6883
6884 // Bail out when changing the scalable property, since we can't be sure that
6885 // we're actually narrowing here.
6886 if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
6887 return false;
6888
6889 // Verify that we are actually reducing a load width here.
6890 if (LdStMemVT.bitsLT(MemVT))
6891 return false;
6892
6893 // Ensure that this isn't going to produce an unsupported memory access.
6894 if (ShAmt) {
6895 const Align LDSTAlign = LDST->getAlign();
6896 const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
6897 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
6898 LDST->getAddressSpace(), NarrowAlign,
6899 LDST->getMemOperand()->getFlags()))
6900 return false;
6901 }
6902
6903 // It's not possible to generate a constant of extended or untyped type.
6904 EVT PtrType = LDST->getBasePtr().getValueType();
6905 if (PtrType == MVT::Untyped || PtrType.isExtended())
6906 return false;
6907
6908 if (isa<LoadSDNode>(LDST)) {
6909 LoadSDNode *Load = cast<LoadSDNode>(LDST);
6910 // Don't transform one with multiple uses, this would require adding a new
6911 // load.
6912 if (!SDValue(Load, 0).hasOneUse())
6913 return false;
6914
6915 if (LegalOperations &&
6916 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
6917 return false;
6918
6919 // For the transform to be legal, the load must produce only two values
6920 // (the value loaded and the chain). Don't transform a pre-increment
6921 // load, for example, which produces an extra value. Otherwise the
6922 // transformation is not equivalent, and the downstream logic to replace
6923 // uses gets things wrong.
6924 if (Load->getNumValues() > 2)
6925 return false;
6926
6927 // If the load that we're shrinking is an extload and we're not just
6928 // discarding the extension we can't simply shrink the load. Bail.
6929 // TODO: It would be possible to merge the extensions in some cases.
6930 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
6931 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6932 return false;
6933
6934 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT, ByteShAmt))
6935 return false;
6936 } else {
6937 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
6938 StoreSDNode *Store = cast<StoreSDNode>(LDST);
6939 // Can't write outside the original store
6940 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6941 return false;
6942
6943 if (LegalOperations &&
6944 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
6945 return false;
6946 }
6947 return true;
6948}
6949
6950bool DAGCombiner::SearchForAndLoads(SDNode *N,
6951 SmallVectorImpl<LoadSDNode*> &Loads,
6952 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
6953 ConstantSDNode *Mask,
6954 SDNode *&NodeToMask) {
6955 // Recursively search for the operands, looking for loads which can be
6956 // narrowed.
6957 for (SDValue Op : N->op_values()) {
6958 if (Op.getValueType().isVector())
6959 return false;
6960
6961 // Some constants may need fixing up later if they are too large.
6962 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6963 assert(ISD::isBitwiseLogicOp(N->getOpcode()) &&
6964 "Expected bitwise logic operation");
6965 if (!C->getAPIntValue().isSubsetOf(Mask->getAPIntValue()))
6966 NodesWithConsts.insert(N);
6967 continue;
6968 }
6969
6970 if (!Op.hasOneUse())
6971 return false;
6972
6973 switch(Op.getOpcode()) {
6974 case ISD::LOAD: {
6975 auto *Load = cast<LoadSDNode>(Op);
6976 EVT ExtVT;
6977 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
6978 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
6979
6980 // ZEXTLOAD is already small enough.
6981 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
6982 ExtVT.bitsGE(Load->getMemoryVT()))
6983 continue;
6984
6985 // Use LE to convert equal sized loads to zext.
6986 if (ExtVT.bitsLE(Load->getMemoryVT()))
6987 Loads.push_back(Load);
6988
6989 continue;
6990 }
6991 return false;
6992 }
6993 case ISD::ZERO_EXTEND:
6994 case ISD::AssertZext: {
6995 unsigned ActiveBits = Mask->getAPIntValue().countr_one();
6996 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6997 EVT VT = Op.getOpcode() == ISD::AssertZext ?
6998 cast<VTSDNode>(Op.getOperand(1))->getVT() :
6999 Op.getOperand(0).getValueType();
7000
7001 // We can accept extending nodes if the mask is wider or an equal
7002 // width to the original type.
7003 if (ExtVT.bitsGE(VT))
7004 continue;
7005 break;
7006 }
7007 case ISD::OR:
7008 case ISD::XOR:
7009 case ISD::AND:
7010 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
7011 NodeToMask))
7012 return false;
7013 continue;
7014 }
7015
7016 // Allow one node which will masked along with any loads found.
7017 if (NodeToMask)
7018 return false;
7019
7020 // Also ensure that the node to be masked only produces one data result.
7021 NodeToMask = Op.getNode();
7022 if (NodeToMask->getNumValues() > 1) {
7023 bool HasValue = false;
7024 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
7025 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
7026 if (VT != MVT::Glue && VT != MVT::Other) {
7027 if (HasValue) {
7028 NodeToMask = nullptr;
7029 return false;
7030 }
7031 HasValue = true;
7032 }
7033 }
7034 assert(HasValue && "Node to be masked has no data result?");
7035 }
7036 }
7037 return true;
7038}
7039
7040bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
7041 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
7042 if (!Mask)
7043 return false;
7044
7045 if (!Mask->getAPIntValue().isMask())
7046 return false;
7047
7048 // No need to do anything if the and directly uses a load.
7049 if (isa<LoadSDNode>(N->getOperand(0)))
7050 return false;
7051
7053 SmallPtrSet<SDNode*, 2> NodesWithConsts;
7054 SDNode *FixupNode = nullptr;
7055 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
7056 if (Loads.empty())
7057 return false;
7058
7059 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
7060 SDValue MaskOp = N->getOperand(1);
7061
7062 // If it exists, fixup the single node we allow in the tree that needs
7063 // masking.
7064 if (FixupNode) {
7065 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
7066 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
7067 FixupNode->getValueType(0),
7068 SDValue(FixupNode, 0), MaskOp);
7069 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
7070 if (And.getOpcode() == ISD ::AND)
7071 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
7072 }
7073
7074 // Narrow any constants that need it.
7075 for (auto *LogicN : NodesWithConsts) {
7076 SDValue Op0 = LogicN->getOperand(0);
7077 SDValue Op1 = LogicN->getOperand(1);
7078
7079 // We only need to fix AND if both inputs are constants. And we only need
7080 // to fix one of the constants.
7081 if (LogicN->getOpcode() == ISD::AND &&
7083 continue;
7084
7085 if (isa<ConstantSDNode>(Op0) && LogicN->getOpcode() != ISD::AND)
7086 Op0 =
7087 DAG.getNode(ISD::AND, SDLoc(Op0), Op0.getValueType(), Op0, MaskOp);
7088
7089 if (isa<ConstantSDNode>(Op1))
7090 Op1 =
7091 DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOp);
7092
7093 if (isa<ConstantSDNode>(Op0) && !isa<ConstantSDNode>(Op1))
7094 std::swap(Op0, Op1);
7095
7096 DAG.UpdateNodeOperands(LogicN, Op0, Op1);
7097 }
7098
7099 // Create narrow loads.
7100 for (auto *Load : Loads) {
7101 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
7102 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
7103 SDValue(Load, 0), MaskOp);
7104 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
7105 if (And.getOpcode() == ISD ::AND)
7106 And = SDValue(
7107 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
7108 SDValue NewLoad = reduceLoadWidth(And.getNode());
7109 assert(NewLoad &&
7110 "Shouldn't be masking the load if it can't be narrowed");
7111 CombineTo(Load, NewLoad, NewLoad.getValue(1));
7112 }
7113 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
7114 return true;
7115 }
7116 return false;
7117}
7118
7119// Unfold
7120// x & (-1 'logical shift' y)
7121// To
7122// (x 'opposite logical shift' y) 'logical shift' y
7123// if it is better for performance.
7124SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
7125 assert(N->getOpcode() == ISD::AND);
7126
7127 SDValue N0 = N->getOperand(0);
7128 SDValue N1 = N->getOperand(1);
7129
7130 // Do we actually prefer shifts over mask?
7132 return SDValue();
7133
7134 // Try to match (-1 '[outer] logical shift' y)
7135 unsigned OuterShift;
7136 unsigned InnerShift; // The opposite direction to the OuterShift.
7137 SDValue Y; // Shift amount.
7138 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
7139 if (!M.hasOneUse())
7140 return false;
7141 OuterShift = M->getOpcode();
7142 if (OuterShift == ISD::SHL)
7143 InnerShift = ISD::SRL;
7144 else if (OuterShift == ISD::SRL)
7145 InnerShift = ISD::SHL;
7146 else
7147 return false;
7148 if (!isAllOnesConstant(M->getOperand(0)))
7149 return false;
7150 Y = M->getOperand(1);
7151 return true;
7152 };
7153
7154 SDValue X;
7155 if (matchMask(N1))
7156 X = N0;
7157 else if (matchMask(N0))
7158 X = N1;
7159 else
7160 return SDValue();
7161
7162 SDLoc DL(N);
7163 EVT VT = N->getValueType(0);
7164
7165 // tmp = x 'opposite logical shift' y
7166 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
7167 // ret = tmp 'logical shift' y
7168 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
7169
7170 return T1;
7171}
7172
7173/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
7174/// For a target with a bit test, this is expected to become test + set and save
7175/// at least 1 instruction.
7177 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
7178
7179 // Look through an optional extension.
7180 SDValue And0 = And->getOperand(0), And1 = And->getOperand(1);
7181 if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse())
7182 And0 = And0.getOperand(0);
7183 if (!isOneConstant(And1) || !And0.hasOneUse())
7184 return SDValue();
7185
7186 SDValue Src = And0;
7187
7188 // Attempt to find a 'not' op.
7189 // TODO: Should we favor test+set even without the 'not' op?
7190 bool FoundNot = false;
7191 if (isBitwiseNot(Src)) {
7192 FoundNot = true;
7193 Src = Src.getOperand(0);
7194
7195 // Look though an optional truncation. The source operand may not be the
7196 // same type as the original 'and', but that is ok because we are masking
7197 // off everything but the low bit.
7198 if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse())
7199 Src = Src.getOperand(0);
7200 }
7201
7202 // Match a shift-right by constant.
7203 if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse())
7204 return SDValue();
7205
7206 // This is probably not worthwhile without a supported type.
7207 EVT SrcVT = Src.getValueType();
7208 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7209 if (!TLI.isTypeLegal(SrcVT))
7210 return SDValue();
7211
7212 // We might have looked through casts that make this transform invalid.
7213 unsigned BitWidth = SrcVT.getScalarSizeInBits();
7214 SDValue ShiftAmt = Src.getOperand(1);
7215 auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt);
7216 if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(BitWidth))
7217 return SDValue();
7218
7219 // Set source to shift source.
7220 Src = Src.getOperand(0);
7221
7222 // Try again to find a 'not' op.
7223 // TODO: Should we favor test+set even with two 'not' ops?
7224 if (!FoundNot) {
7225 if (!isBitwiseNot(Src))
7226 return SDValue();
7227 Src = Src.getOperand(0);
7228 }
7229
7230 if (!TLI.hasBitTest(Src, ShiftAmt))
7231 return SDValue();
7232
7233 // Turn this into a bit-test pattern using mask op + setcc:
7234 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
7235 // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
7236 SDLoc DL(And);
7237 SDValue X = DAG.getZExtOrTrunc(Src, DL, SrcVT);
7238 EVT CCVT =
7239 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
7240 SDValue Mask = DAG.getConstant(
7241 APInt::getOneBitSet(BitWidth, ShiftAmtC->getZExtValue()), DL, SrcVT);
7242 SDValue NewAnd = DAG.getNode(ISD::AND, DL, SrcVT, X, Mask);
7243 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
7244 SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
7245 return DAG.getZExtOrTrunc(Setcc, DL, And->getValueType(0));
7246}
7247
7248/// For targets that support usubsat, match a bit-hack form of that operation
7249/// that ends in 'and' and convert it.
7251 EVT VT = N->getValueType(0);
7252 unsigned BitWidth = VT.getScalarSizeInBits();
7253 APInt SignMask = APInt::getSignMask(BitWidth);
7254
7255 // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
7256 // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
7257 // xor/add with SMIN (signmask) are logically equivalent.
7258 SDValue X;
7259 if (!sd_match(N, m_And(m_OneUse(m_Xor(m_Value(X), m_SpecificInt(SignMask))),
7261 m_SpecificInt(BitWidth - 1))))) &&
7264 m_SpecificInt(BitWidth - 1))))))
7265 return SDValue();
7266
7267 return DAG.getNode(ISD::USUBSAT, DL, VT, X,
7268 DAG.getConstant(SignMask, DL, VT));
7269}
7270
7271/// Given a bitwise logic operation N with a matching bitwise logic operand,
7272/// fold a pattern where 2 of the source operands are identically shifted
7273/// values. For example:
7274/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
7276 SelectionDAG &DAG) {
7277 unsigned LogicOpcode = N->getOpcode();
7278 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
7279 "Expected bitwise logic operation");
7280
7281 if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
7282 return SDValue();
7283
7284 // Match another bitwise logic op and a shift.
7285 unsigned ShiftOpcode = ShiftOp.getOpcode();
7286 if (LogicOp.getOpcode() != LogicOpcode ||
7287 !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
7288 ShiftOpcode == ISD::SRA))
7289 return SDValue();
7290
7291 // Match another shift op inside the first logic operand. Handle both commuted
7292 // possibilities.
7293 // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
7294 // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
7295 SDValue X1 = ShiftOp.getOperand(0);
7296 SDValue Y = ShiftOp.getOperand(1);
7297 SDValue X0, Z;
7298 if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
7299 LogicOp.getOperand(0).getOperand(1) == Y) {
7300 X0 = LogicOp.getOperand(0).getOperand(0);
7301 Z = LogicOp.getOperand(1);
7302 } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
7303 LogicOp.getOperand(1).getOperand(1) == Y) {
7304 X0 = LogicOp.getOperand(1).getOperand(0);
7305 Z = LogicOp.getOperand(0);
7306 } else {
7307 return SDValue();
7308 }
7309
7310 EVT VT = N->getValueType(0);
7311 SDLoc DL(N);
7312 SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
7313 SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
7314 return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
7315}
7316
7317/// Given a tree of logic operations with shape like
7318/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y)))
7319/// try to match and fold shift operations with the same shift amount.
7320/// For example:
7321/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
7322/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
7324 SDValue RightHand, SelectionDAG &DAG) {
7325 unsigned LogicOpcode = N->getOpcode();
7326 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
7327 "Expected bitwise logic operation");
7328 if (LeftHand.getOpcode() != LogicOpcode ||
7329 RightHand.getOpcode() != LogicOpcode)
7330 return SDValue();
7331 if (!LeftHand.hasOneUse() || !RightHand.hasOneUse())
7332 return SDValue();
7333
7334 // Try to match one of following patterns:
7335 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W)
7336 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y))
7337 // Note that foldLogicOfShifts will handle commuted versions of the left hand
7338 // itself.
7339 SDValue CombinedShifts, W;
7340 SDValue R0 = RightHand.getOperand(0);
7341 SDValue R1 = RightHand.getOperand(1);
7342 if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG)))
7343 W = R1;
7344 else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG)))
7345 W = R0;
7346 else
7347 return SDValue();
7348
7349 EVT VT = N->getValueType(0);
7350 SDLoc DL(N);
7351 return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
7352}
7353
7354/// Fold "masked merge" expressions like `(m & x) | (~m & y)` and its DeMorgan
7355/// variant `(~m | x) & (m | y)` into the equivalent `((x ^ y) & m) ^ y)`
7356/// pattern. This is typically a better representation for targets without a
7357/// fused "and-not" operation.
7359 const TargetLowering &TLI, const SDLoc &DL) {
7360 // Note that masked-merge variants using XOR or ADD expressions are
7361 // normalized to OR by InstCombine so we only check for OR or AND.
7362 assert((Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::AND) &&
7363 "Must be called with ISD::OR or ISD::AND node");
7364
7365 // If the target supports and-not, don't fold this.
7366 if (TLI.hasAndNot(SDValue(Node, 0)))
7367 return SDValue();
7368
7369 SDValue M, X, Y;
7370
7371 if (sd_match(Node,
7373 m_OneUse(m_And(m_Deferred(M), m_Value(X))))) ||
7374 sd_match(Node,
7376 m_OneUse(m_Or(m_Deferred(M), m_Value(Y)))))) {
7377 EVT VT = M.getValueType();
7378 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, X, Y);
7379 SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor, M);
7380 return DAG.getNode(ISD::XOR, DL, VT, And, Y);
7381 }
7382 return SDValue();
7383}
7384
7385SDValue DAGCombiner::visitAND(SDNode *N) {
7386 SDValue N0 = N->getOperand(0);
7387 SDValue N1 = N->getOperand(1);
7388 EVT VT = N1.getValueType();
7389 SDLoc DL(N);
7390
7391 // x & x --> x
7392 if (N0 == N1)
7393 return N0;
7394
7395 // fold (and c1, c2) -> c1&c2
7396 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N0, N1}))
7397 return C;
7398
7399 // canonicalize constant to RHS
7402 return DAG.getNode(ISD::AND, DL, VT, N1, N0);
7403
7404 if (areBitwiseNotOfEachother(N0, N1))
7405 return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), DL, VT);
7406
7407 // fold vector ops
7408 if (VT.isVector()) {
7409 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
7410 return FoldedVOp;
7411
7412 // fold (and x, 0) -> 0, vector edition
7414 // do not return N1, because undef node may exist in N1
7416 N1.getValueType());
7417
7418 // fold (and x, -1) -> x, vector edition
7420 return N0;
7421
7422 // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
7423 auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
7424 ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
7425 if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat) {
7426 EVT LoadVT = MLoad->getMemoryVT();
7427 EVT ExtVT = VT;
7428 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
7429 // For this AND to be a zero extension of the masked load the elements
7430 // of the BuildVec must mask the bottom bits of the extended element
7431 // type
7432 uint64_t ElementSize =
7434 if (Splat->getAPIntValue().isMask(ElementSize)) {
7435 SDValue NewLoad = DAG.getMaskedLoad(
7436 ExtVT, DL, MLoad->getChain(), MLoad->getBasePtr(),
7437 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
7438 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
7439 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
7440 bool LoadHasOtherUsers = !N0.hasOneUse();
7441 CombineTo(N, NewLoad);
7442 if (LoadHasOtherUsers)
7443 CombineTo(MLoad, NewLoad.getValue(0), NewLoad.getValue(1));
7444 return SDValue(N, 0);
7445 }
7446 }
7447 }
7448 }
7449
7450 // fold (and x, -1) -> x
7451 if (isAllOnesConstant(N1))
7452 return N0;
7453
7454 // if (and x, c) is known to be zero, return 0
7455 unsigned BitWidth = VT.getScalarSizeInBits();
7456 ConstantSDNode *N1C = isConstOrConstSplat(N1);
7458 return DAG.getConstant(0, DL, VT);
7459
7460 if (SDValue R = foldAndOrOfSETCC(N, DAG))
7461 return R;
7462
7463 if (SDValue NewSel = foldBinOpIntoSelect(N))
7464 return NewSel;
7465
7466 // reassociate and
7467 if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))
7468 return RAND;
7469
7470 // Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y))
7471 if (SDValue SD =
7472 reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, DL, VT, N0, N1))
7473 return SD;
7474
7475 // fold (and (or x, C), D) -> D if (C & D) == D
7476 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7477 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
7478 };
7479 if (N0.getOpcode() == ISD::OR &&
7480 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
7481 return N1;
7482
7483 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
7484 SDValue N0Op0 = N0.getOperand(0);
7485 EVT SrcVT = N0Op0.getValueType();
7486 unsigned SrcBitWidth = SrcVT.getScalarSizeInBits();
7487 APInt Mask = ~N1C->getAPIntValue();
7488 Mask = Mask.trunc(SrcBitWidth);
7489
7490 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
7491 if (DAG.MaskedValueIsZero(N0Op0, Mask))
7492 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0Op0);
7493
7494 // fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
7495 if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) &&
7496 TLI.isTruncateFree(VT, SrcVT) && TLI.isZExtFree(SrcVT, VT) &&
7497 TLI.isTypeDesirableForOp(ISD::AND, SrcVT) &&
7498 TLI.isNarrowingProfitable(N, VT, SrcVT))
7499 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
7500 DAG.getNode(ISD::AND, DL, SrcVT, N0Op0,
7501 DAG.getZExtOrTrunc(N1, DL, SrcVT)));
7502 }
7503
7504 // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
7505 if (ISD::isExtOpcode(N0.getOpcode())) {
7506 unsigned ExtOpc = N0.getOpcode();
7507 SDValue N0Op0 = N0.getOperand(0);
7508 if (N0Op0.getOpcode() == ISD::AND &&
7509 (ExtOpc != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0Op0, VT)) &&
7510 N0->hasOneUse() && N0Op0->hasOneUse()) {
7511 if (SDValue NewExt = DAG.FoldConstantArithmetic(ExtOpc, DL, VT,
7512 {N0Op0.getOperand(1)})) {
7513 if (SDValue NewMask =
7514 DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N1, NewExt})) {
7515 return DAG.getNode(ISD::AND, DL, VT,
7516 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(0)),
7517 NewMask);
7518 }
7519 }
7520 }
7521 }
7522
7523 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
7524 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
7525 // already be zero by virtue of the width of the base type of the load.
7526 //
7527 // the 'X' node here can either be nothing or an extract_vector_elt to catch
7528 // more cases.
7529 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7531 N0.getOperand(0).getOpcode() == ISD::LOAD &&
7532 N0.getOperand(0).getResNo() == 0) ||
7533 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
7534 auto *Load =
7535 cast<LoadSDNode>((N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(0));
7536
7537 // Get the constant (if applicable) the zero'th operand is being ANDed with.
7538 // This can be a pure constant or a vector splat, in which case we treat the
7539 // vector as a scalar and use the splat value.
7540 APInt Constant = APInt::getZero(1);
7541 if (const ConstantSDNode *C = isConstOrConstSplat(
7542 N1, /*AllowUndefs=*/false, /*AllowTruncation=*/true)) {
7543 Constant = C->getAPIntValue();
7544 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
7545 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
7546 APInt SplatValue, SplatUndef;
7547 unsigned SplatBitSize;
7548 bool HasAnyUndefs;
7549 // Endianness should not matter here. Code below makes sure that we only
7550 // use the result if the SplatBitSize is a multiple of the vector element
7551 // size. And after that we AND all element sized parts of the splat
7552 // together. So the end result should be the same regardless of in which
7553 // order we do those operations.
7554 const bool IsBigEndian = false;
7555 bool IsSplat =
7556 Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
7557 HasAnyUndefs, EltBitWidth, IsBigEndian);
7558
7559 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
7560 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
7561 if (IsSplat && (SplatBitSize % EltBitWidth) == 0) {
7562 // Undef bits can contribute to a possible optimisation if set, so
7563 // set them.
7564 SplatValue |= SplatUndef;
7565
7566 // The splat value may be something like "0x00FFFFFF", which means 0 for
7567 // the first vector value and FF for the rest, repeating. We need a mask
7568 // that will apply equally to all members of the vector, so AND all the
7569 // lanes of the constant together.
7570 Constant = APInt::getAllOnes(EltBitWidth);
7571 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
7572 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
7573 }
7574 }
7575
7576 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
7577 // actually legal and isn't going to get expanded, else this is a false
7578 // optimisation.
7579 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
7580 Load->getValueType(0),
7581 Load->getMemoryVT());
7582
7583 // Resize the constant to the same size as the original memory access before
7584 // extension. If it is still the AllOnesValue then this AND is completely
7585 // unneeded.
7586 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
7587
7588 bool B;
7589 switch (Load->getExtensionType()) {
7590 default: B = false; break;
7591 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
7592 case ISD::ZEXTLOAD:
7593 case ISD::NON_EXTLOAD: B = true; break;
7594 }
7595
7596 if (B && Constant.isAllOnes()) {
7597 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
7598 // preserve semantics once we get rid of the AND.
7599 SDValue NewLoad(Load, 0);
7600
7601 // Fold the AND away. NewLoad may get replaced immediately.
7602 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
7603
7604 if (Load->getExtensionType() == ISD::EXTLOAD) {
7605 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
7606 Load->getValueType(0), SDLoc(Load),
7607 Load->getChain(), Load->getBasePtr(),
7608 Load->getOffset(), Load->getMemoryVT(),
7609 Load->getMemOperand());
7610 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
7611 if (Load->getNumValues() == 3) {
7612 // PRE/POST_INC loads have 3 values.
7613 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
7614 NewLoad.getValue(2) };
7615 CombineTo(Load, To, 3, true);
7616 } else {
7617 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
7618 }
7619 }
7620
7621 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7622 }
7623 }
7624
7625 // Try to convert a constant mask AND into a shuffle clear mask.
7626 if (VT.isVector())
7627 if (SDValue Shuffle = XformToShuffleWithZero(N))
7628 return Shuffle;
7629
7630 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7631 return Combined;
7632
7633 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C &&
7635 SDValue Ext = N0.getOperand(0);
7636 EVT ExtVT = Ext->getValueType(0);
7637 SDValue Extendee = Ext->getOperand(0);
7638
7639 unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits();
7640 if (N1C->getAPIntValue().isMask(ScalarWidth) &&
7641 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, ExtVT))) {
7642 // (and (extract_subvector (zext|anyext|sext v) _) iN_mask)
7643 // => (extract_subvector (iN_zeroext v))
7644 SDValue ZeroExtExtendee =
7645 DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Extendee);
7646
7647 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ZeroExtExtendee,
7648 N0.getOperand(1));
7649 }
7650 }
7651
7652 // fold (and (masked_gather x)) -> (zext_masked_gather x)
7653 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
7654 EVT MemVT = GN0->getMemoryVT();
7655 EVT ScalarVT = MemVT.getScalarType();
7656
7657 if (SDValue(GN0, 0).hasOneUse() &&
7658 isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
7660 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
7661 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
7662
7663 SDValue ZExtLoad = DAG.getMaskedGather(
7664 DAG.getVTList(VT, MVT::Other), MemVT, DL, Ops, GN0->getMemOperand(),
7665 GN0->getIndexType(), ISD::ZEXTLOAD);
7666
7667 CombineTo(N, ZExtLoad);
7668 AddToWorklist(ZExtLoad.getNode());
7669 // Avoid recheck of N.
7670 return SDValue(N, 0);
7671 }
7672 }
7673
7674 // fold (and (load x), 255) -> (zextload x, i8)
7675 // fold (and (extload x, i16), 255) -> (zextload x, i8)
7676 if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector())
7677 if (SDValue Res = reduceLoadWidth(N))
7678 return Res;
7679
7680 if (LegalTypes) {
7681 // Attempt to propagate the AND back up to the leaves which, if they're
7682 // loads, can be combined to narrow loads and the AND node can be removed.
7683 // Perform after legalization so that extend nodes will already be
7684 // combined into the loads.
7685 if (BackwardsPropagateMask(N))
7686 return SDValue(N, 0);
7687 }
7688
7689 if (SDValue Combined = visitANDLike(N0, N1, N))
7690 return Combined;
7691
7692 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
7693 if (N0.getOpcode() == N1.getOpcode())
7694 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7695 return V;
7696
7697 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7698 return R;
7699 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
7700 return R;
7701
7702 // Fold (and X, (bswap (not Y))) -> (and X, (not (bswap Y)))
7703 // Fold (and X, (bitreverse (not Y))) -> (and X, (not (bitreverse Y)))
7704 SDValue X, Y, Z, NotY;
7705 for (unsigned Opc : {ISD::BSWAP, ISD::BITREVERSE})
7706 if (sd_match(N,
7707 m_And(m_Value(X), m_OneUse(m_UnaryOp(Opc, m_Value(NotY))))) &&
7708 sd_match(NotY, m_Not(m_Value(Y))) &&
7709 (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
7710 return DAG.getNode(ISD::AND, DL, VT, X,
7711 DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y), VT));
7712
7713 // Fold (and X, (rot (not Y), Z)) -> (and X, (not (rot Y, Z)))
7714 for (unsigned Opc : {ISD::ROTL, ISD::ROTR})
7715 if (sd_match(N, m_And(m_Value(X),
7716 m_OneUse(m_BinOp(Opc, m_Value(NotY), m_Value(Z))))) &&
7717 sd_match(NotY, m_Not(m_Value(Y))) &&
7718 (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
7719 return DAG.getNode(ISD::AND, DL, VT, X,
7720 DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y, Z), VT));
7721
7722 // Fold (and X, (add (not Y), Z)) -> (and X, (not (sub Y, Z)))
7723 // Fold (and X, (sub (not Y), Z)) -> (and X, (not (add Y, Z)))
7724 if (TLI.hasAndNot(SDValue(N, 0)))
7725 if (SDValue Folded = foldBitwiseOpWithNeg(N, DL, VT))
7726 return Folded;
7727
7728 // Fold (and (srl X, C), 1) -> (srl X, BW-1) for signbit extraction
7729 // If we are shifting down an extended sign bit, see if we can simplify
7730 // this to shifting the MSB directly to expose further simplifications.
7731 // This pattern often appears after sext_inreg legalization.
7732 APInt Amt;
7733 if (sd_match(N, m_And(m_Srl(m_Value(X), m_ConstInt(Amt)), m_One())) &&
7734 Amt.ult(BitWidth - 1) && Amt.uge(BitWidth - DAG.ComputeNumSignBits(X)))
7735 return DAG.getNode(ISD::SRL, DL, VT, X,
7736 DAG.getShiftAmountConstant(BitWidth - 1, VT, DL));
7737
7738 // Masking the negated extension of a boolean is just the zero-extended
7739 // boolean:
7740 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
7741 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
7742 //
7743 // Note: the SimplifyDemandedBits fold below can make an information-losing
7744 // transform, and then we have no way to find this better fold.
7745 if (sd_match(N, m_And(m_Sub(m_Zero(), m_Value(X)), m_One()))) {
7746 if (X.getOpcode() == ISD::ZERO_EXTEND &&
7747 X.getOperand(0).getScalarValueSizeInBits() == 1)
7748 return X;
7749 if (X.getOpcode() == ISD::SIGN_EXTEND &&
7750 X.getOperand(0).getScalarValueSizeInBits() == 1)
7751 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, X.getOperand(0));
7752 }
7753
7754 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
7755 // fold (and (sra)) -> (and (srl)) when possible.
7757 return SDValue(N, 0);
7758
7759 // fold (zext_inreg (extload x)) -> (zextload x)
7760 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
7761 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
7762 (ISD::isEXTLoad(N0.getNode()) ||
7763 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
7764 auto *LN0 = cast<LoadSDNode>(N0);
7765 EVT MemVT = LN0->getMemoryVT();
7766 // If we zero all the possible extended bits, then we can turn this into
7767 // a zextload if we are running before legalize or the operation is legal.
7768 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
7769 unsigned MemBitSize = MemVT.getScalarSizeInBits();
7770 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
7771 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
7772 ((!LegalOperations && LN0->isSimple()) ||
7773 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
7774 SDValue ExtLoad =
7775 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
7776 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
7777 AddToWorklist(N);
7778 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7779 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7780 }
7781 }
7782
7783 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
7784 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
7785 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
7786 N0.getOperand(1), false))
7787 return BSwap;
7788 }
7789
7790 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
7791 return Shifts;
7792
7793 if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
7794 return V;
7795
7796 // Recognize the following pattern:
7797 //
7798 // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
7799 //
7800 // where bitmask is a mask that clears the upper bits of AndVT. The
7801 // number of bits in bitmask must be a power of two.
7802 auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
7803 if (LHS->getOpcode() != ISD::SIGN_EXTEND)
7804 return false;
7805
7807 if (!C)
7808 return false;
7809
7810 if (!C->getAPIntValue().isMask(
7811 LHS.getOperand(0).getValueType().getFixedSizeInBits()))
7812 return false;
7813
7814 return true;
7815 };
7816
7817 // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
7818 if (IsAndZeroExtMask(N0, N1))
7819 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
7820
7821 if (hasOperation(ISD::USUBSAT, VT))
7822 if (SDValue V = foldAndToUsubsat(N, DAG, DL))
7823 return V;
7824
7825 // Postpone until legalization completed to avoid interference with bswap
7826 // folding
7827 if (LegalOperations || VT.isVector())
7828 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
7829 return R;
7830
7831 if (VT.isScalarInteger() && VT != MVT::i1)
7832 if (SDValue R = foldMaskedMerge(N, DAG, TLI, DL))
7833 return R;
7834
7835 return SDValue();
7836}
7837
7838/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
7839SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
7840 bool DemandHighBits) {
7841 if (!LegalOperations)
7842 return SDValue();
7843
7844 EVT VT = N->getValueType(0);
7845 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
7846 return SDValue();
7848 return SDValue();
7849
7850 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
7851 bool LookPassAnd0 = false;
7852 bool LookPassAnd1 = false;
7853 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
7854 std::swap(N0, N1);
7855 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
7856 std::swap(N0, N1);
7857 if (N0.getOpcode() == ISD::AND) {
7858 if (!N0->hasOneUse())
7859 return SDValue();
7860 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7861 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
7862 // This is needed for X86.
7863 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
7864 N01C->getZExtValue() != 0xFFFF))
7865 return SDValue();
7866 N0 = N0.getOperand(0);
7867 LookPassAnd0 = true;
7868 }
7869
7870 if (N1.getOpcode() == ISD::AND) {
7871 if (!N1->hasOneUse())
7872 return SDValue();
7873 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7874 if (!N11C || N11C->getZExtValue() != 0xFF)
7875 return SDValue();
7876 N1 = N1.getOperand(0);
7877 LookPassAnd1 = true;
7878 }
7879
7880 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
7881 std::swap(N0, N1);
7882 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
7883 return SDValue();
7884 if (!N0->hasOneUse() || !N1->hasOneUse())
7885 return SDValue();
7886
7887 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7888 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7889 if (!N01C || !N11C)
7890 return SDValue();
7891 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
7892 return SDValue();
7893
7894 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
7895 SDValue N00 = N0->getOperand(0);
7896 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
7897 if (!N00->hasOneUse())
7898 return SDValue();
7899 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
7900 if (!N001C || N001C->getZExtValue() != 0xFF)
7901 return SDValue();
7902 N00 = N00.getOperand(0);
7903 LookPassAnd0 = true;
7904 }
7905
7906 SDValue N10 = N1->getOperand(0);
7907 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
7908 if (!N10->hasOneUse())
7909 return SDValue();
7910 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
7911 // Also allow 0xFFFF since the bits will be shifted out. This is needed
7912 // for X86.
7913 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
7914 N101C->getZExtValue() != 0xFFFF))
7915 return SDValue();
7916 N10 = N10.getOperand(0);
7917 LookPassAnd1 = true;
7918 }
7919
7920 if (N00 != N10)
7921 return SDValue();
7922
7923 // Make sure everything beyond the low halfword gets set to zero since the SRL
7924 // 16 will clear the top bits.
7925 unsigned OpSizeInBits = VT.getSizeInBits();
7926 if (OpSizeInBits > 16) {
7927 // If the left-shift isn't masked out then the only way this is a bswap is
7928 // if all bits beyond the low 8 are 0. In that case the entire pattern
7929 // reduces to a left shift anyway: leave it for other parts of the combiner.
7930 if (DemandHighBits && !LookPassAnd0)
7931 return SDValue();
7932
7933 // However, if the right shift isn't masked out then it might be because
7934 // it's not needed. See if we can spot that too. If the high bits aren't
7935 // demanded, we only need bits 23:16 to be zero. Otherwise, we need all
7936 // upper bits to be zero.
7937 if (!LookPassAnd1) {
7938 unsigned HighBit = DemandHighBits ? OpSizeInBits : 24;
7939 if (!DAG.MaskedValueIsZero(N10,
7940 APInt::getBitsSet(OpSizeInBits, 16, HighBit)))
7941 return SDValue();
7942 }
7943 }
7944
7945 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
7946 if (OpSizeInBits > 16) {
7947 SDLoc DL(N);
7948 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
7949 DAG.getShiftAmountConstant(OpSizeInBits - 16, VT, DL));
7950 }
7951 return Res;
7952}
7953
7954/// Return true if the specified node is an element that makes up a 32-bit
7955/// packed halfword byteswap.
7956/// ((x & 0x000000ff) << 8) |
7957/// ((x & 0x0000ff00) >> 8) |
7958/// ((x & 0x00ff0000) << 8) |
7959/// ((x & 0xff000000) >> 8)
7961 if (!N->hasOneUse())
7962 return false;
7963
7964 unsigned Opc = N.getOpcode();
7965 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
7966 return false;
7967
7968 SDValue N0 = N.getOperand(0);
7969 unsigned Opc0 = N0.getOpcode();
7970 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
7971 return false;
7972
7973 ConstantSDNode *N1C = nullptr;
7974 // SHL or SRL: look upstream for AND mask operand
7975 if (Opc == ISD::AND)
7976 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7977 else if (Opc0 == ISD::AND)
7979 if (!N1C)
7980 return false;
7981
7982 unsigned MaskByteOffset;
7983 switch (N1C->getZExtValue()) {
7984 default:
7985 return false;
7986 case 0xFF: MaskByteOffset = 0; break;
7987 case 0xFF00: MaskByteOffset = 1; break;
7988 case 0xFFFF:
7989 // In case demanded bits didn't clear the bits that will be shifted out.
7990 // This is needed for X86.
7991 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
7992 MaskByteOffset = 1;
7993 break;
7994 }
7995 return false;
7996 case 0xFF0000: MaskByteOffset = 2; break;
7997 case 0xFF000000: MaskByteOffset = 3; break;
7998 }
7999
8000 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
8001 if (Opc == ISD::AND) {
8002 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
8003 // (x >> 8) & 0xff
8004 // (x >> 8) & 0xff0000
8005 if (Opc0 != ISD::SRL)
8006 return false;
8008 if (!C || C->getZExtValue() != 8)
8009 return false;
8010 } else {
8011 // (x << 8) & 0xff00
8012 // (x << 8) & 0xff000000
8013 if (Opc0 != ISD::SHL)
8014 return false;
8016 if (!C || C->getZExtValue() != 8)
8017 return false;
8018 }
8019 } else if (Opc == ISD::SHL) {
8020 // (x & 0xff) << 8
8021 // (x & 0xff0000) << 8
8022 if (MaskByteOffset != 0 && MaskByteOffset != 2)
8023 return false;
8024 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
8025 if (!C || C->getZExtValue() != 8)
8026 return false;
8027 } else { // Opc == ISD::SRL
8028 // (x & 0xff00) >> 8
8029 // (x & 0xff000000) >> 8
8030 if (MaskByteOffset != 1 && MaskByteOffset != 3)
8031 return false;
8032 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
8033 if (!C || C->getZExtValue() != 8)
8034 return false;
8035 }
8036
8037 if (Parts[MaskByteOffset])
8038 return false;
8039
8040 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
8041 return true;
8042}
8043
8044// Match 2 elements of a packed halfword bswap.
8046 if (N.getOpcode() == ISD::OR)
8047 return isBSwapHWordElement(N.getOperand(0), Parts) &&
8048 isBSwapHWordElement(N.getOperand(1), Parts);
8049
8050 if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
8051 ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
8052 if (!C || C->getAPIntValue() != 16)
8053 return false;
8054 Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
8055 return true;
8056 }
8057
8058 return false;
8059}
8060
8061// Match this pattern:
8062// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
8063// And rewrite this to:
8064// (rotr (bswap A), 16)
8066 SelectionDAG &DAG, SDNode *N, SDValue N0,
8067 SDValue N1, EVT VT) {
8068 assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
8069 "MatchBSwapHWordOrAndAnd: expecting i32");
8070 if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
8071 return SDValue();
8072 if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
8073 return SDValue();
8074 // TODO: this is too restrictive; lifting this restriction requires more tests
8075 if (!N0->hasOneUse() || !N1->hasOneUse())
8076 return SDValue();
8079 if (!Mask0 || !Mask1)
8080 return SDValue();
8081 if (Mask0->getAPIntValue() != 0xff00ff00 ||
8082 Mask1->getAPIntValue() != 0x00ff00ff)
8083 return SDValue();
8084 SDValue Shift0 = N0.getOperand(0);
8085 SDValue Shift1 = N1.getOperand(0);
8086 if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
8087 return SDValue();
8088 ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
8089 ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
8090 if (!ShiftAmt0 || !ShiftAmt1)
8091 return SDValue();
8092 if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
8093 return SDValue();
8094 if (Shift0.getOperand(0) != Shift1.getOperand(0))
8095 return SDValue();
8096
8097 SDLoc DL(N);
8098 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
8099 SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL);
8100 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
8101}
8102
8103/// Match a 32-bit packed halfword bswap. That is
8104/// ((x & 0x000000ff) << 8) |
8105/// ((x & 0x0000ff00) >> 8) |
8106/// ((x & 0x00ff0000) << 8) |
8107/// ((x & 0xff000000) >> 8)
8108/// => (rotl (bswap x), 16)
8109SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
8110 if (!LegalOperations)
8111 return SDValue();
8112
8113 EVT VT = N->getValueType(0);
8114 if (VT != MVT::i32)
8115 return SDValue();
8117 return SDValue();
8118
8119 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT))
8120 return BSwap;
8121
8122 // Try again with commuted operands.
8123 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT))
8124 return BSwap;
8125
8126
8127 // Look for either
8128 // (or (bswaphpair), (bswaphpair))
8129 // (or (or (bswaphpair), (and)), (and))
8130 // (or (or (and), (bswaphpair)), (and))
8131 SDNode *Parts[4] = {};
8132
8133 if (isBSwapHWordPair(N0, Parts)) {
8134 // (or (or (and), (and)), (or (and), (and)))
8135 if (!isBSwapHWordPair(N1, Parts))
8136 return SDValue();
8137 } else if (N0.getOpcode() == ISD::OR) {
8138 // (or (or (or (and), (and)), (and)), (and))
8139 if (!isBSwapHWordElement(N1, Parts))
8140 return SDValue();
8141 SDValue N00 = N0.getOperand(0);
8142 SDValue N01 = N0.getOperand(1);
8143 if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
8144 !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
8145 return SDValue();
8146 } else {
8147 return SDValue();
8148 }
8149
8150 // Make sure the parts are all coming from the same node.
8151 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
8152 return SDValue();
8153
8154 SDLoc DL(N);
8155 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
8156 SDValue(Parts[0], 0));
8157
8158 // Result of the bswap should be rotated by 16. If it's not legal, then
8159 // do (x << 16) | (x >> 16).
8160 SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL);
8162 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
8164 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
8165 return DAG.getNode(ISD::OR, DL, VT,
8166 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
8167 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
8168}
8169
8170/// This contains all DAGCombine rules which reduce two values combined by
8171/// an Or operation to a single value \see visitANDLike().
8172SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, const SDLoc &DL) {
8173 EVT VT = N1.getValueType();
8174
8175 // fold (or x, undef) -> -1
8176 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
8177 return DAG.getAllOnesConstant(DL, VT);
8178
8179 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
8180 return V;
8181
8182 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
8183 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
8184 // Don't increase # computations.
8185 (N0->hasOneUse() || N1->hasOneUse())) {
8186 // We can only do this xform if we know that bits from X that are set in C2
8187 // but not in C1 are already zero. Likewise for Y.
8188 if (const ConstantSDNode *N0O1C =
8190 if (const ConstantSDNode *N1O1C =
8192 // We can only do this xform if we know that bits from X that are set in
8193 // C2 but not in C1 are already zero. Likewise for Y.
8194 const APInt &LHSMask = N0O1C->getAPIntValue();
8195 const APInt &RHSMask = N1O1C->getAPIntValue();
8196
8197 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
8198 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
8199 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
8200 N0.getOperand(0), N1.getOperand(0));
8201 return DAG.getNode(ISD::AND, DL, VT, X,
8202 DAG.getConstant(LHSMask | RHSMask, DL, VT));
8203 }
8204 }
8205 }
8206 }
8207
8208 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
8209 if (N0.getOpcode() == ISD::AND &&
8210 N1.getOpcode() == ISD::AND &&
8211 N0.getOperand(0) == N1.getOperand(0) &&
8212 // Don't increase # computations.
8213 (N0->hasOneUse() || N1->hasOneUse())) {
8214 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
8215 N0.getOperand(1), N1.getOperand(1));
8216 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
8217 }
8218
8219 return SDValue();
8220}
8221
8222/// OR combines for which the commuted variant will be tried as well.
8224 SDNode *N) {
8225 EVT VT = N0.getValueType();
8226 unsigned BW = VT.getScalarSizeInBits();
8227 SDLoc DL(N);
8228
8229 auto peekThroughResize = [](SDValue V) {
8230 if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE)
8231 return V->getOperand(0);
8232 return V;
8233 };
8234
8235 SDValue N0Resized = peekThroughResize(N0);
8236 if (N0Resized.getOpcode() == ISD::AND) {
8237 SDValue N1Resized = peekThroughResize(N1);
8238 SDValue N00 = N0Resized.getOperand(0);
8239 SDValue N01 = N0Resized.getOperand(1);
8240
8241 // fold or (and x, y), x --> x
8242 if (N00 == N1Resized || N01 == N1Resized)
8243 return N1;
8244
8245 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
8246 // TODO: Set AllowUndefs = true.
8247 if (SDValue NotOperand = getBitwiseNotOperand(N01, N00,
8248 /* AllowUndefs */ false)) {
8249 if (peekThroughResize(NotOperand) == N1Resized)
8250 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N00, DL, VT),
8251 N1);
8252 }
8253
8254 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
8255 if (SDValue NotOperand = getBitwiseNotOperand(N00, N01,
8256 /* AllowUndefs */ false)) {
8257 if (peekThroughResize(NotOperand) == N1Resized)
8258 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N01, DL, VT),
8259 N1);
8260 }
8261 }
8262
8263 SDValue X, Y;
8264
8265 // fold or (xor X, N1), N1 --> or X, N1
8266 if (sd_match(N0, m_Xor(m_Value(X), m_Specific(N1))))
8267 return DAG.getNode(ISD::OR, DL, VT, X, N1);
8268
8269 // fold or (xor x, y), (x and/or y) --> or x, y
8270 if (sd_match(N0, m_Xor(m_Value(X), m_Value(Y))) &&
8271 (sd_match(N1, m_And(m_Specific(X), m_Specific(Y))) ||
8273 return DAG.getNode(ISD::OR, DL, VT, X, Y);
8274
8275 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
8276 return R;
8277
8278 auto peekThroughZext = [](SDValue V) {
8279 if (V->getOpcode() == ISD::ZERO_EXTEND)
8280 return V->getOperand(0);
8281 return V;
8282 };
8283
8284 // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
8285 if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
8286 N0.getOperand(0) == N1.getOperand(0) &&
8287 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
8288 return N0;
8289
8290 // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
8291 if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
8292 N0.getOperand(1) == N1.getOperand(0) &&
8293 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
8294 return N0;
8295
8296 // Attempt to match a legalized build_pair-esque pattern:
8297 // or(shl(aext(Hi),BW/2),zext(Lo))
8298 SDValue Lo, Hi;
8299 if (sd_match(N0,
8301 sd_match(N1, m_ZExt(m_Value(Lo))) &&
8302 Lo.getScalarValueSizeInBits() == (BW / 2) &&
8303 Lo.getValueType() == Hi.getValueType()) {
8304 // Fold build_pair(not(Lo),not(Hi)) -> not(build_pair(Lo,Hi)).
8305 SDValue NotLo, NotHi;
8306 if (sd_match(Lo, m_OneUse(m_Not(m_Value(NotLo)))) &&
8307 sd_match(Hi, m_OneUse(m_Not(m_Value(NotHi))))) {
8308 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotLo);
8309 Hi = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NotHi);
8310 Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
8311 DAG.getShiftAmountConstant(BW / 2, VT, DL));
8312 return DAG.getNOT(DL, DAG.getNode(ISD::OR, DL, VT, Lo, Hi), VT);
8313 }
8314 }
8315
8316 return SDValue();
8317}
8318
8319SDValue DAGCombiner::visitOR(SDNode *N) {
8320 SDValue N0 = N->getOperand(0);
8321 SDValue N1 = N->getOperand(1);
8322 EVT VT = N1.getValueType();
8323 SDLoc DL(N);
8324
8325 // x | x --> x
8326 if (N0 == N1)
8327 return N0;
8328
8329 // fold (or c1, c2) -> c1|c2
8330 if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, DL, VT, {N0, N1}))
8331 return C;
8332
8333 // canonicalize constant to RHS
8336 return DAG.getNode(ISD::OR, DL, VT, N1, N0);
8337
8338 // fold vector ops
8339 if (VT.isVector()) {
8340 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
8341 return FoldedVOp;
8342
8343 // fold (or x, 0) -> x, vector edition
8345 return N0;
8346
8347 // fold (or x, -1) -> -1, vector edition
8349 // do not return N1, because undef node may exist in N1
8350 return DAG.getAllOnesConstant(DL, N1.getValueType());
8351
8352 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
8353 // Do this only if the resulting type / shuffle is legal.
8354 auto *SV0 = dyn_cast<ShuffleVectorSDNode>(N0);
8355 auto *SV1 = dyn_cast<ShuffleVectorSDNode>(N1);
8356 if (SV0 && SV1 && TLI.isTypeLegal(VT)) {
8357 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
8358 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
8359 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
8360 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
8361 // Ensure both shuffles have a zero input.
8362 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
8363 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
8364 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
8365 bool CanFold = true;
8366 int NumElts = VT.getVectorNumElements();
8367 SmallVector<int, 4> Mask(NumElts, -1);
8368
8369 for (int i = 0; i != NumElts; ++i) {
8370 int M0 = SV0->getMaskElt(i);
8371 int M1 = SV1->getMaskElt(i);
8372
8373 // Determine if either index is pointing to a zero vector.
8374 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
8375 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
8376
8377 // If one element is zero and the otherside is undef, keep undef.
8378 // This also handles the case that both are undef.
8379 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0))
8380 continue;
8381
8382 // Make sure only one of the elements is zero.
8383 if (M0Zero == M1Zero) {
8384 CanFold = false;
8385 break;
8386 }
8387
8388 assert((M0 >= 0 || M1 >= 0) && "Undef index!");
8389
8390 // We have a zero and non-zero element. If the non-zero came from
8391 // SV0 make the index a LHS index. If it came from SV1, make it
8392 // a RHS index. We need to mod by NumElts because we don't care
8393 // which operand it came from in the original shuffles.
8394 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
8395 }
8396
8397 if (CanFold) {
8398 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
8399 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
8400 SDValue LegalShuffle =
8401 TLI.buildLegalVectorShuffle(VT, DL, NewLHS, NewRHS, Mask, DAG);
8402 if (LegalShuffle)
8403 return LegalShuffle;
8404 }
8405 }
8406 }
8407 }
8408
8409 // fold (or x, 0) -> x
8410 if (isNullConstant(N1))
8411 return N0;
8412
8413 // fold (or x, -1) -> -1
8414 if (isAllOnesConstant(N1))
8415 return N1;
8416
8417 if (SDValue NewSel = foldBinOpIntoSelect(N))
8418 return NewSel;
8419
8420 // fold (or x, c) -> c iff (x & ~c) == 0
8421 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
8422 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
8423 return N1;
8424
8425 if (SDValue R = foldAndOrOfSETCC(N, DAG))
8426 return R;
8427
8428 if (SDValue Combined = visitORLike(N0, N1, DL))
8429 return Combined;
8430
8431 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
8432 return Combined;
8433
8434 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
8435 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
8436 return BSwap;
8437 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
8438 return BSwap;
8439
8440 // reassociate or
8441 if (SDValue ROR = reassociateOps(ISD::OR, DL, N0, N1, N->getFlags()))
8442 return ROR;
8443
8444 // Fold or(vecreduce(x), vecreduce(y)) -> vecreduce(or(x, y))
8445 if (SDValue SD =
8446 reassociateReduction(ISD::VECREDUCE_OR, ISD::OR, DL, VT, N0, N1))
8447 return SD;
8448
8449 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
8450 // iff (c1 & c2) != 0 or c1/c2 are undef.
8451 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
8452 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
8453 };
8454 if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
8455 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
8456 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
8457 {N1, N0.getOperand(1)})) {
8458 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
8459 AddToWorklist(IOR.getNode());
8460 return DAG.getNode(ISD::AND, DL, VT, COR, IOR);
8461 }
8462 }
8463
8464 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
8465 return Combined;
8466 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
8467 return Combined;
8468
8469 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
8470 if (N0.getOpcode() == N1.getOpcode())
8471 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
8472 return V;
8473
8474 // See if this is some rotate idiom.
8475 if (SDValue Rot = MatchRotate(N0, N1, DL, /*FromAdd=*/false))
8476 return Rot;
8477
8478 if (SDValue Load = MatchLoadCombine(N))
8479 return Load;
8480
8481 // Simplify the operands using demanded-bits information.
8483 return SDValue(N, 0);
8484
8485 // If OR can be rewritten into ADD, try combines based on ADD.
8486 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
8487 DAG.isADDLike(SDValue(N, 0)))
8488 if (SDValue Combined = visitADDLike(N))
8489 return Combined;
8490
8491 // Postpone until legalization completed to avoid interference with bswap
8492 // folding
8493 if (LegalOperations || VT.isVector())
8494 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
8495 return R;
8496
8497 if (VT.isScalarInteger() && VT != MVT::i1)
8498 if (SDValue R = foldMaskedMerge(N, DAG, TLI, DL))
8499 return R;
8500
8501 return SDValue();
8502}
8503
8505 SDValue &Mask) {
8506 if (Op.getOpcode() == ISD::AND &&
8507 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
8508 Mask = Op.getOperand(1);
8509 return Op.getOperand(0);
8510 }
8511 return Op;
8512}
8513
8514/// Match "(X shl/srl V1) & V2" where V2 may not be present.
8515static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift,
8516 SDValue &Mask) {
8517 Op = stripConstantMask(DAG, Op, Mask);
8518 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
8519 Shift = Op;
8520 return true;
8521 }
8522 return false;
8523}
8524
8525/// Helper function for visitOR to extract the needed side of a rotate idiom
8526/// from a shl/srl/mul/udiv. This is meant to handle cases where
8527/// InstCombine merged some outside op with one of the shifts from
8528/// the rotate pattern.
8529/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
8530/// Otherwise, returns an expansion of \p ExtractFrom based on the following
8531/// patterns:
8532///
8533/// (or (add v v) (shrl v bitwidth-1)):
8534/// expands (add v v) -> (shl v 1)
8535///
8536/// (or (mul v c0) (shrl (mul v c1) c2)):
8537/// expands (mul v c0) -> (shl (mul v c1) c3)
8538///
8539/// (or (udiv v c0) (shl (udiv v c1) c2)):
8540/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
8541///
8542/// (or (shl v c0) (shrl (shl v c1) c2)):
8543/// expands (shl v c0) -> (shl (shl v c1) c3)
8544///
8545/// (or (shrl v c0) (shl (shrl v c1) c2)):
8546/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
8547///
8548/// Such that in all cases, c3+c2==bitwidth(op v c1).
8550 SDValue ExtractFrom, SDValue &Mask,
8551 const SDLoc &DL) {
8552 assert(OppShift && ExtractFrom && "Empty SDValue");
8553 if (OppShift.getOpcode() != ISD::SHL && OppShift.getOpcode() != ISD::SRL)
8554 return SDValue();
8555
8556 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
8557
8558 // Value and Type of the shift.
8559 SDValue OppShiftLHS = OppShift.getOperand(0);
8560 EVT ShiftedVT = OppShiftLHS.getValueType();
8561
8562 // Amount of the existing shift.
8563 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
8564
8565 // (add v v) -> (shl v 1)
8566 // TODO: Should this be a general DAG canonicalization?
8567 if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
8568 ExtractFrom.getOpcode() == ISD::ADD &&
8569 ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
8570 ExtractFrom.getOperand(0) == OppShiftLHS &&
8571 OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
8572 return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
8573 DAG.getShiftAmountConstant(1, ShiftedVT, DL));
8574
8575 // Preconditions:
8576 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
8577 //
8578 // Find opcode of the needed shift to be extracted from (op0 v c0).
8579 unsigned Opcode = ISD::DELETED_NODE;
8580 bool IsMulOrDiv = false;
8581 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
8582 // opcode or its arithmetic (mul or udiv) variant.
8583 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
8584 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
8585 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
8586 return false;
8587 Opcode = NeededShift;
8588 return true;
8589 };
8590 // op0 must be either the needed shift opcode or the mul/udiv equivalent
8591 // that the needed shift can be extracted from.
8592 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
8593 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
8594 return SDValue();
8595
8596 // op0 must be the same opcode on both sides, have the same LHS argument,
8597 // and produce the same value type.
8598 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
8599 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
8600 ShiftedVT != ExtractFrom.getValueType())
8601 return SDValue();
8602
8603 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
8604 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
8605 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
8606 ConstantSDNode *ExtractFromCst =
8607 isConstOrConstSplat(ExtractFrom.getOperand(1));
8608 // TODO: We should be able to handle non-uniform constant vectors for these values
8609 // Check that we have constant values.
8610 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
8611 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
8612 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
8613 return SDValue();
8614
8615 // Compute the shift amount we need to extract to complete the rotate.
8616 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
8617 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
8618 return SDValue();
8619 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
8620 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
8621 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
8622 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
8623 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
8624
8625 // Now try extract the needed shift from the ExtractFrom op and see if the
8626 // result matches up with the existing shift's LHS op.
8627 if (IsMulOrDiv) {
8628 // Op to extract from is a mul or udiv by a constant.
8629 // Check:
8630 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
8631 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
8632 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
8633 NeededShiftAmt.getZExtValue());
8634 APInt ResultAmt;
8635 APInt Rem;
8636 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
8637 if (Rem != 0 || ResultAmt != OppLHSAmt)
8638 return SDValue();
8639 } else {
8640 // Op to extract from is a shift by a constant.
8641 // Check:
8642 // c2 - (bitwidth(op0 v c0) - c1) == c0
8643 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
8644 ExtractFromAmt.getBitWidth()))
8645 return SDValue();
8646 }
8647
8648 // Return the expanded shift op that should allow a rotate to be formed.
8649 EVT ShiftVT = OppShift.getOperand(1).getValueType();
8650 EVT ResVT = ExtractFrom.getValueType();
8651 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
8652 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
8653}
8654
8655// Return true if we can prove that, whenever Neg and Pos are both in the
8656// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
8657// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
8658//
8659// (or (shift1 X, Neg), (shift2 X, Pos))
8660//
8661// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
8662// in direction shift1 by Neg. The range [0, EltSize) means that we only need
8663// to consider shift amounts with defined behavior.
8664//
8665// The IsRotate flag should be set when the LHS of both shifts is the same.
8666// Otherwise if matching a general funnel shift, it should be clear.
8667static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
8668 SelectionDAG &DAG, bool IsRotate, bool FromAdd) {
8669 const auto &TLI = DAG.getTargetLoweringInfo();
8670 // If EltSize is a power of 2 then:
8671 //
8672 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
8673 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
8674 //
8675 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
8676 // for the stronger condition:
8677 //
8678 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
8679 //
8680 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
8681 // we can just replace Neg with Neg' for the rest of the function.
8682 //
8683 // In other cases we check for the even stronger condition:
8684 //
8685 // Neg == EltSize - Pos [B]
8686 //
8687 // for all Neg and Pos. Note that the (or ...) then invokes undefined
8688 // behavior if Pos == 0 (and consequently Neg == EltSize).
8689 //
8690 // We could actually use [A] whenever EltSize is a power of 2, but the
8691 // only extra cases that it would match are those uninteresting ones
8692 // where Neg and Pos are never in range at the same time. E.g. for
8693 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
8694 // as well as (sub 32, Pos), but:
8695 //
8696 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
8697 //
8698 // always invokes undefined behavior for 32-bit X.
8699 //
8700 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
8701 // This allows us to peek through any operations that only affect Mask's
8702 // un-demanded bits.
8703 //
8704 // NOTE: We can only do this when matching operations which won't modify the
8705 // least Log2(EltSize) significant bits and not a general funnel shift.
8706 unsigned MaskLoBits = 0;
8707 if (IsRotate && !FromAdd && isPowerOf2_64(EltSize)) {
8708 unsigned Bits = Log2_64(EltSize);
8709 unsigned NegBits = Neg.getScalarValueSizeInBits();
8710 if (NegBits >= Bits) {
8711 APInt DemandedBits = APInt::getLowBitsSet(NegBits, Bits);
8712 if (SDValue Inner =
8714 Neg = Inner;
8715 MaskLoBits = Bits;
8716 }
8717 }
8718 }
8719
8720 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
8721 if (Neg.getOpcode() != ISD::SUB)
8722 return false;
8724 if (!NegC)
8725 return false;
8726 SDValue NegOp1 = Neg.getOperand(1);
8727
8728 // On the RHS of [A], if Pos is the result of operation on Pos' that won't
8729 // affect Mask's demanded bits, just replace Pos with Pos'. These operations
8730 // are redundant for the purpose of the equality.
8731 if (MaskLoBits) {
8732 unsigned PosBits = Pos.getScalarValueSizeInBits();
8733 if (PosBits >= MaskLoBits) {
8734 APInt DemandedBits = APInt::getLowBitsSet(PosBits, MaskLoBits);
8735 if (SDValue Inner =
8737 Pos = Inner;
8738 }
8739 }
8740 }
8741
8742 // The condition we need is now:
8743 //
8744 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
8745 //
8746 // If NegOp1 == Pos then we need:
8747 //
8748 // EltSize & Mask == NegC & Mask
8749 //
8750 // (because "x & Mask" is a truncation and distributes through subtraction).
8751 //
8752 // We also need to account for a potential truncation of NegOp1 if the amount
8753 // has already been legalized to a shift amount type.
8754 APInt Width;
8755 if ((Pos == NegOp1) ||
8756 (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
8757 Width = NegC->getAPIntValue();
8758
8759 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
8760 // Then the condition we want to prove becomes:
8761 //
8762 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
8763 //
8764 // which, again because "x & Mask" is a truncation, becomes:
8765 //
8766 // NegC & Mask == (EltSize - PosC) & Mask
8767 // EltSize & Mask == (NegC + PosC) & Mask
8768 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
8769 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
8770 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
8771 else
8772 return false;
8773 } else
8774 return false;
8775
8776 // Now we just need to check that EltSize & Mask == Width & Mask.
8777 if (MaskLoBits)
8778 // EltSize & Mask is 0 since Mask is EltSize - 1.
8779 return Width.getLoBits(MaskLoBits) == 0;
8780 return Width == EltSize;
8781}
8782
8783// A subroutine of MatchRotate used once we have found an OR of two opposite
8784// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
8785// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
8786// former being preferred if supported. InnerPos and InnerNeg are Pos and
8787// Neg with outer conversions stripped away.
8788SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
8789 SDValue Neg, SDValue InnerPos,
8790 SDValue InnerNeg, bool FromAdd,
8791 bool HasPos, unsigned PosOpcode,
8792 unsigned NegOpcode, const SDLoc &DL) {
8793 // fold (or/add (shl x, (*ext y)),
8794 // (srl x, (*ext (sub 32, y)))) ->
8795 // (rotl x, y) or (rotr x, (sub 32, y))
8796 //
8797 // fold (or/add (shl x, (*ext (sub 32, y))),
8798 // (srl x, (*ext y))) ->
8799 // (rotr x, y) or (rotl x, (sub 32, y))
8800 EVT VT = Shifted.getValueType();
8801 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
8802 /*IsRotate*/ true, FromAdd))
8803 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
8804 HasPos ? Pos : Neg);
8805
8806 return SDValue();
8807}
8808
8809// A subroutine of MatchRotate used once we have found an OR of two opposite
8810// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
8811// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
8812// former being preferred if supported. InnerPos and InnerNeg are Pos and
8813// Neg with outer conversions stripped away.
8814// TODO: Merge with MatchRotatePosNeg.
8815SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
8816 SDValue Neg, SDValue InnerPos,
8817 SDValue InnerNeg, bool FromAdd,
8818 bool HasPos, unsigned PosOpcode,
8819 unsigned NegOpcode, const SDLoc &DL) {
8820 EVT VT = N0.getValueType();
8821 unsigned EltBits = VT.getScalarSizeInBits();
8822
8823 // fold (or/add (shl x0, (*ext y)),
8824 // (srl x1, (*ext (sub 32, y)))) ->
8825 // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
8826 //
8827 // fold (or/add (shl x0, (*ext (sub 32, y))),
8828 // (srl x1, (*ext y))) ->
8829 // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
8830 if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1,
8831 FromAdd))
8832 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
8833 HasPos ? Pos : Neg);
8834
8835 // Matching the shift+xor cases, we can't easily use the xor'd shift amount
8836 // so for now just use the PosOpcode case if its legal.
8837 // TODO: When can we use the NegOpcode case?
8838 if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
8839 SDValue X;
8840 // fold (or/add (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
8841 // -> (fshl x0, x1, y)
8842 if (sd_match(N1, m_Srl(m_Value(X), m_One())) &&
8843 sd_match(InnerNeg,
8844 m_Xor(m_Specific(InnerPos), m_SpecificInt(EltBits - 1))) &&
8846 return DAG.getNode(ISD::FSHL, DL, VT, N0, X, Pos);
8847 }
8848
8849 // fold (or/add (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
8850 // -> (fshr x0, x1, y)
8851 if (sd_match(N0, m_Shl(m_Value(X), m_One())) &&
8852 sd_match(InnerPos,
8853 m_Xor(m_Specific(InnerNeg), m_SpecificInt(EltBits - 1))) &&
8855 return DAG.getNode(ISD::FSHR, DL, VT, X, N1, Neg);
8856 }
8857
8858 // fold (or/add (shl (add x0, x0), (xor y, 31)), (srl x1, y))
8859 // -> (fshr x0, x1, y)
8860 // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
8861 if (sd_match(N0, m_Add(m_Value(X), m_Deferred(X))) &&
8862 sd_match(InnerPos,
8863 m_Xor(m_Specific(InnerNeg), m_SpecificInt(EltBits - 1))) &&
8865 return DAG.getNode(ISD::FSHR, DL, VT, X, N1, Neg);
8866 }
8867 }
8868
8869 return SDValue();
8870}
8871
8872// MatchRotate - Handle an 'or' or 'add' of two operands. If this is one of the
8873// many idioms for rotate, and if the target supports rotation instructions,
8874// generate a rot[lr]. This also matches funnel shift patterns, similar to
8875// rotation but with different shifted sources.
8876SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL,
8877 bool FromAdd) {
8878 EVT VT = LHS.getValueType();
8879
8880 // The target must have at least one rotate/funnel flavor.
8881 // We still try to match rotate by constant pre-legalization.
8882 // TODO: Support pre-legalization funnel-shift by constant.
8883 bool HasROTL = hasOperation(ISD::ROTL, VT);
8884 bool HasROTR = hasOperation(ISD::ROTR, VT);
8885 bool HasFSHL = hasOperation(ISD::FSHL, VT);
8886 bool HasFSHR = hasOperation(ISD::FSHR, VT);
8887
8888 // If the type is going to be promoted and the target has enabled custom
8889 // lowering for rotate, allow matching rotate by non-constants. Only allow
8890 // this for scalar types.
8891 if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) ==
8895 }
8896
8897 if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8898 return SDValue();
8899
8900 // Check for truncated rotate.
8901 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
8902 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
8903 assert(LHS.getValueType() == RHS.getValueType());
8904 if (SDValue Rot =
8905 MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL, FromAdd))
8906 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
8907 }
8908
8909 // Match "(X shl/srl V1) & V2" where V2 may not be present.
8910 SDValue LHSShift; // The shift.
8911 SDValue LHSMask; // AND value if any.
8912 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
8913
8914 SDValue RHSShift; // The shift.
8915 SDValue RHSMask; // AND value if any.
8916 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
8917
8918 // If neither side matched a rotate half, bail
8919 if (!LHSShift && !RHSShift)
8920 return SDValue();
8921
8922 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
8923 // side of the rotate, so try to handle that here. In all cases we need to
8924 // pass the matched shift from the opposite side to compute the opcode and
8925 // needed shift amount to extract. We still want to do this if both sides
8926 // matched a rotate half because one half may be a potential overshift that
8927 // can be broken down (ie if InstCombine merged two shl or srl ops into a
8928 // single one).
8929
8930 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
8931 if (LHSShift)
8932 if (SDValue NewRHSShift =
8933 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
8934 RHSShift = NewRHSShift;
8935 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
8936 if (RHSShift)
8937 if (SDValue NewLHSShift =
8938 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
8939 LHSShift = NewLHSShift;
8940
8941 // If a side is still missing, nothing else we can do.
8942 if (!RHSShift || !LHSShift)
8943 return SDValue();
8944
8945 // At this point we've matched or extracted a shift op on each side.
8946
8947 if (LHSShift.getOpcode() == RHSShift.getOpcode())
8948 return SDValue(); // Shifts must disagree.
8949
8950 // Canonicalize shl to left side in a shl/srl pair.
8951 if (RHSShift.getOpcode() == ISD::SHL) {
8952 std::swap(LHS, RHS);
8953 std::swap(LHSShift, RHSShift);
8954 std::swap(LHSMask, RHSMask);
8955 }
8956
8957 // Something has gone wrong - we've lost the shl/srl pair - bail.
8958 if (LHSShift.getOpcode() != ISD::SHL || RHSShift.getOpcode() != ISD::SRL)
8959 return SDValue();
8960
8961 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8962 SDValue LHSShiftArg = LHSShift.getOperand(0);
8963 SDValue LHSShiftAmt = LHSShift.getOperand(1);
8964 SDValue RHSShiftArg = RHSShift.getOperand(0);
8965 SDValue RHSShiftAmt = RHSShift.getOperand(1);
8966
8967 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
8968 ConstantSDNode *RHS) {
8969 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
8970 };
8971
8972 auto ApplyMasks = [&](SDValue Res) {
8973 // If there is an AND of either shifted operand, apply it to the result.
8974 if (LHSMask.getNode() || RHSMask.getNode()) {
8977
8978 if (LHSMask.getNode()) {
8979 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
8980 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8981 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
8982 }
8983 if (RHSMask.getNode()) {
8984 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
8985 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8986 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
8987 }
8988
8989 Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
8990 }
8991
8992 return Res;
8993 };
8994
8995 // TODO: Support pre-legalization funnel-shift by constant.
8996 bool IsRotate = LHSShiftArg == RHSShiftArg;
8997 if (!IsRotate && !(HasFSHL || HasFSHR)) {
8998 if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
8999 ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
9000 // Look for a disguised rotate by constant.
9001 // The common shifted operand X may be hidden inside another 'or'.
9002 SDValue X, Y;
9003 auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
9004 if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
9005 return false;
9006 if (CommonOp == Or.getOperand(0)) {
9007 X = CommonOp;
9008 Y = Or.getOperand(1);
9009 return true;
9010 }
9011 if (CommonOp == Or.getOperand(1)) {
9012 X = CommonOp;
9013 Y = Or.getOperand(0);
9014 return true;
9015 }
9016 return false;
9017 };
9018
9019 SDValue Res;
9020 if (matchOr(LHSShiftArg, RHSShiftArg)) {
9021 // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
9022 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
9023 SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
9024 Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
9025 } else if (matchOr(RHSShiftArg, LHSShiftArg)) {
9026 // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
9027 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
9028 SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
9029 Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
9030 } else {
9031 return SDValue();
9032 }
9033
9034 return ApplyMasks(Res);
9035 }
9036
9037 return SDValue(); // Requires funnel shift support.
9038 }
9039
9040 // fold (or/add (shl x, C1), (srl x, C2)) -> (rotl x, C1)
9041 // fold (or/add (shl x, C1), (srl x, C2)) -> (rotr x, C2)
9042 // fold (or/add (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
9043 // fold (or/add (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
9044 // iff C1+C2 == EltSizeInBits
9045 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
9046 SDValue Res;
9047 if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
9048 bool UseROTL = !LegalOperations || HasROTL;
9049 Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
9050 UseROTL ? LHSShiftAmt : RHSShiftAmt);
9051 } else {
9052 bool UseFSHL = !LegalOperations || HasFSHL;
9053 Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
9054 RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
9055 }
9056
9057 return ApplyMasks(Res);
9058 }
9059
9060 // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
9061 // shift.
9062 if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
9063 return SDValue();
9064
9065 // If there is a mask here, and we have a variable shift, we can't be sure
9066 // that we're masking out the right stuff.
9067 if (LHSMask.getNode() || RHSMask.getNode())
9068 return SDValue();
9069
9070 // If the shift amount is sign/zext/any-extended just peel it off.
9071 SDValue LExtOp0 = LHSShiftAmt;
9072 SDValue RExtOp0 = RHSShiftAmt;
9073 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
9074 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
9075 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
9076 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
9077 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
9078 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
9079 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
9080 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
9081 LExtOp0 = LHSShiftAmt.getOperand(0);
9082 RExtOp0 = RHSShiftAmt.getOperand(0);
9083 }
9084
9085 if (IsRotate && (HasROTL || HasROTR)) {
9086 if (SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
9087 LExtOp0, RExtOp0, FromAdd, HasROTL,
9089 return TryL;
9090
9091 if (SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
9092 RExtOp0, LExtOp0, FromAdd, HasROTR,
9094 return TryR;
9095 }
9096
9097 if (SDValue TryL = MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt,
9098 RHSShiftAmt, LExtOp0, RExtOp0, FromAdd,
9099 HasFSHL, ISD::FSHL, ISD::FSHR, DL))
9100 return TryL;
9101
9102 if (SDValue TryR = MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt,
9103 LHSShiftAmt, RExtOp0, LExtOp0, FromAdd,
9104 HasFSHR, ISD::FSHR, ISD::FSHL, DL))
9105 return TryR;
9106
9107 return SDValue();
9108}
9109
9110/// Recursively traverses the expression calculating the origin of the requested
9111/// byte of the given value. Returns std::nullopt if the provider can't be
9112/// calculated.
9113///
9114/// For all the values except the root of the expression, we verify that the
9115/// value has exactly one use and if not then return std::nullopt. This way if
9116/// the origin of the byte is returned it's guaranteed that the values which
9117/// contribute to the byte are not used outside of this expression.
9118
9119/// However, there is a special case when dealing with vector loads -- we allow
9120/// more than one use if the load is a vector type. Since the values that
9121/// contribute to the byte ultimately come from the ExtractVectorElements of the
9122/// Load, we don't care if the Load has uses other than ExtractVectorElements,
9123/// because those operations are independent from the pattern to be combined.
9124/// For vector loads, we simply care that the ByteProviders are adjacent
9125/// positions of the same vector, and their index matches the byte that is being
9126/// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex
9127/// is the index used in an ExtractVectorElement, and \p StartingIndex is the
9128/// byte position we are trying to provide for the LoadCombine. If these do
9129/// not match, then we can not combine the vector loads. \p Index uses the
9130/// byte position we are trying to provide for and is matched against the
9131/// shl and load size. The \p Index algorithm ensures the requested byte is
9132/// provided for by the pattern, and the pattern does not over provide bytes.
9133///
9134///
9135/// The supported LoadCombine pattern for vector loads is as follows
9136/// or
9137/// / \
9138/// or shl
9139/// / \ |
9140/// or shl zext
9141/// / \ | |
9142/// shl zext zext EVE*
9143/// | | | |
9144/// zext EVE* EVE* LOAD
9145/// | | |
9146/// EVE* LOAD LOAD
9147/// |
9148/// LOAD
9149///
9150/// *ExtractVectorElement
9152
9153static std::optional<SDByteProvider>
9154calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
9155 std::optional<uint64_t> VectorIndex,
9156 unsigned StartingIndex = 0) {
9157
9158 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
9159 if (Depth == 10)
9160 return std::nullopt;
9161
9162 // Only allow multiple uses if the instruction is a vector load (in which
9163 // case we will use the load for every ExtractVectorElement)
9164 if (Depth && !Op.hasOneUse() &&
9165 (Op.getOpcode() != ISD::LOAD || !Op.getValueType().isVector()))
9166 return std::nullopt;
9167
9168 // Fail to combine if we have encountered anything but a LOAD after handling
9169 // an ExtractVectorElement.
9170 if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value())
9171 return std::nullopt;
9172
9173 unsigned BitWidth = Op.getScalarValueSizeInBits();
9174 if (BitWidth % 8 != 0)
9175 return std::nullopt;
9176 unsigned ByteWidth = BitWidth / 8;
9177 assert(Index < ByteWidth && "invalid index requested");
9178 (void) ByteWidth;
9179
9180 switch (Op.getOpcode()) {
9181 case ISD::OR: {
9182 auto LHS =
9183 calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex);
9184 if (!LHS)
9185 return std::nullopt;
9186 auto RHS =
9187 calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex);
9188 if (!RHS)
9189 return std::nullopt;
9190
9191 if (LHS->isConstantZero())
9192 return RHS;
9193 if (RHS->isConstantZero())
9194 return LHS;
9195 return std::nullopt;
9196 }
9197 case ISD::SHL: {
9198 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
9199 if (!ShiftOp)
9200 return std::nullopt;
9201
9202 uint64_t BitShift = ShiftOp->getZExtValue();
9203
9204 if (BitShift % 8 != 0)
9205 return std::nullopt;
9206 uint64_t ByteShift = BitShift / 8;
9207
9208 // If we are shifting by an amount greater than the index we are trying to
9209 // provide, then do not provide anything. Otherwise, subtract the index by
9210 // the amount we shifted by.
9211 return Index < ByteShift
9213 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
9214 Depth + 1, VectorIndex, Index);
9215 }
9216 case ISD::ANY_EXTEND:
9217 case ISD::SIGN_EXTEND:
9218 case ISD::ZERO_EXTEND: {
9219 SDValue NarrowOp = Op->getOperand(0);
9220 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
9221 if (NarrowBitWidth % 8 != 0)
9222 return std::nullopt;
9223 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
9224
9225 if (Index >= NarrowByteWidth)
9226 return Op.getOpcode() == ISD::ZERO_EXTEND
9227 ? std::optional<SDByteProvider>(
9229 : std::nullopt;
9230 return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex,
9231 StartingIndex);
9232 }
9233 case ISD::BSWAP:
9234 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
9235 Depth + 1, VectorIndex, StartingIndex);
9237 auto OffsetOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
9238 if (!OffsetOp)
9239 return std::nullopt;
9240
9241 VectorIndex = OffsetOp->getZExtValue();
9242
9243 SDValue NarrowOp = Op->getOperand(0);
9244 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
9245 if (NarrowBitWidth % 8 != 0)
9246 return std::nullopt;
9247 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
9248 // EXTRACT_VECTOR_ELT can extend the element type to the width of the return
9249 // type, leaving the high bits undefined.
9250 if (Index >= NarrowByteWidth)
9251 return std::nullopt;
9252
9253 // Check to see if the position of the element in the vector corresponds
9254 // with the byte we are trying to provide for. In the case of a vector of
9255 // i8, this simply means the VectorIndex == StartingIndex. For non i8 cases,
9256 // the element will provide a range of bytes. For example, if we have a
9257 // vector of i16s, each element provides two bytes (V[1] provides byte 2 and
9258 // 3).
9259 if (*VectorIndex * NarrowByteWidth > StartingIndex)
9260 return std::nullopt;
9261 if ((*VectorIndex + 1) * NarrowByteWidth <= StartingIndex)
9262 return std::nullopt;
9263
9264 return calculateByteProvider(Op->getOperand(0), Index, Depth + 1,
9265 VectorIndex, StartingIndex);
9266 }
9267 case ISD::LOAD: {
9268 auto L = cast<LoadSDNode>(Op.getNode());
9269 if (!L->isSimple() || L->isIndexed())
9270 return std::nullopt;
9271
9272 unsigned NarrowBitWidth = L->getMemoryVT().getScalarSizeInBits();
9273 if (NarrowBitWidth % 8 != 0)
9274 return std::nullopt;
9275 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
9276
9277 // If the width of the load does not reach byte we are trying to provide for
9278 // and it is not a ZEXTLOAD, then the load does not provide for the byte in
9279 // question
9280 if (Index >= NarrowByteWidth)
9281 return L->getExtensionType() == ISD::ZEXTLOAD
9282 ? std::optional<SDByteProvider>(
9284 : std::nullopt;
9285
9286 unsigned BPVectorIndex = VectorIndex.value_or(0U);
9287 return SDByteProvider::getSrc(L, Index, BPVectorIndex);
9288 }
9289 }
9290
9291 return std::nullopt;
9292}
9293
9294static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
9295 return i;
9296}
9297
9298static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
9299 return BW - i - 1;
9300}
9301
9302// Check if the bytes offsets we are looking at match with either big or
9303// little endian value loaded. Return true for big endian, false for little
9304// endian, and std::nullopt if match failed.
9305static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
9306 int64_t FirstOffset) {
9307 // The endian can be decided only when it is 2 bytes at least.
9308 unsigned Width = ByteOffsets.size();
9309 if (Width < 2)
9310 return std::nullopt;
9311
9312 bool BigEndian = true, LittleEndian = true;
9313 for (unsigned i = 0; i < Width; i++) {
9314 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
9315 LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
9316 BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
9317 if (!BigEndian && !LittleEndian)
9318 return std::nullopt;
9319 }
9320
9321 assert((BigEndian != LittleEndian) && "It should be either big endian or"
9322 "little endian");
9323 return BigEndian;
9324}
9325
9326// Look through one layer of truncate or extend.
9328 switch (Value.getOpcode()) {
9329 case ISD::TRUNCATE:
9330 case ISD::ZERO_EXTEND:
9331 case ISD::SIGN_EXTEND:
9332 case ISD::ANY_EXTEND:
9333 return Value.getOperand(0);
9334 }
9335 return SDValue();
9336}
9337
9338/// Match a pattern where a wide type scalar value is stored by several narrow
9339/// stores. Fold it into a single store or a BSWAP and a store if the targets
9340/// supports it.
9341///
9342/// Assuming little endian target:
9343/// i8 *p = ...
9344/// i32 val = ...
9345/// p[0] = (val >> 0) & 0xFF;
9346/// p[1] = (val >> 8) & 0xFF;
9347/// p[2] = (val >> 16) & 0xFF;
9348/// p[3] = (val >> 24) & 0xFF;
9349/// =>
9350/// *((i32)p) = val;
9351///
9352/// i8 *p = ...
9353/// i32 val = ...
9354/// p[0] = (val >> 24) & 0xFF;
9355/// p[1] = (val >> 16) & 0xFF;
9356/// p[2] = (val >> 8) & 0xFF;
9357/// p[3] = (val >> 0) & 0xFF;
9358/// =>
9359/// *((i32)p) = BSWAP(val);
9360SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
9361 // The matching looks for "store (trunc x)" patterns that appear early but are
9362 // likely to be replaced by truncating store nodes during combining.
9363 // TODO: If there is evidence that running this later would help, this
9364 // limitation could be removed. Legality checks may need to be added
9365 // for the created store and optional bswap/rotate.
9366 if (LegalOperations || OptLevel == CodeGenOptLevel::None)
9367 return SDValue();
9368
9369 // We only handle merging simple stores of 1-4 bytes.
9370 // TODO: Allow unordered atomics when wider type is legal (see D66309)
9371 EVT MemVT = N->getMemoryVT();
9372 if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
9373 !N->isSimple() || N->isIndexed())
9374 return SDValue();
9375
9376 // Collect all of the stores in the chain, upto the maximum store width (i64).
9377 SDValue Chain = N->getChain();
9379 unsigned NarrowNumBits = MemVT.getScalarSizeInBits();
9380 unsigned MaxWideNumBits = 64;
9381 unsigned MaxStores = MaxWideNumBits / NarrowNumBits;
9382 while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
9383 // All stores must be the same size to ensure that we are writing all of the
9384 // bytes in the wide value.
9385 // This store should have exactly one use as a chain operand for another
9386 // store in the merging set. If there are other chain uses, then the
9387 // transform may not be safe because order of loads/stores outside of this
9388 // set may not be preserved.
9389 // TODO: We could allow multiple sizes by tracking each stored byte.
9390 if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
9391 Store->isIndexed() || !Store->hasOneUse())
9392 return SDValue();
9393 Stores.push_back(Store);
9394 Chain = Store->getChain();
9395 if (MaxStores < Stores.size())
9396 return SDValue();
9397 }
9398 // There is no reason to continue if we do not have at least a pair of stores.
9399 if (Stores.size() < 2)
9400 return SDValue();
9401
9402 // Handle simple types only.
9403 LLVMContext &Context = *DAG.getContext();
9404 unsigned NumStores = Stores.size();
9405 unsigned WideNumBits = NumStores * NarrowNumBits;
9406 if (WideNumBits != 16 && WideNumBits != 32 && WideNumBits != 64)
9407 return SDValue();
9408
9409 // Check if all bytes of the source value that we are looking at are stored
9410 // to the same base address. Collect offsets from Base address into OffsetMap.
9411 SDValue SourceValue;
9412 SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
9413 int64_t FirstOffset = INT64_MAX;
9414 StoreSDNode *FirstStore = nullptr;
9415 std::optional<BaseIndexOffset> Base;
9416 for (auto *Store : Stores) {
9417 // All the stores store different parts of the CombinedValue. A truncate is
9418 // required to get the partial value.
9419 SDValue Trunc = Store->getValue();
9420 if (Trunc.getOpcode() != ISD::TRUNCATE)
9421 return SDValue();
9422 // Other than the first/last part, a shift operation is required to get the
9423 // offset.
9424 int64_t Offset = 0;
9425 SDValue WideVal = Trunc.getOperand(0);
9426 if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
9427 isa<ConstantSDNode>(WideVal.getOperand(1))) {
9428 // The shift amount must be a constant multiple of the narrow type.
9429 // It is translated to the offset address in the wide source value "y".
9430 //
9431 // x = srl y, ShiftAmtC
9432 // i8 z = trunc x
9433 // store z, ...
9434 uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
9435 if (ShiftAmtC % NarrowNumBits != 0)
9436 return SDValue();
9437
9438 // Make sure we aren't reading bits that are shifted in.
9439 if (ShiftAmtC > WideVal.getScalarValueSizeInBits() - NarrowNumBits)
9440 return SDValue();
9441
9442 Offset = ShiftAmtC / NarrowNumBits;
9443 WideVal = WideVal.getOperand(0);
9444 }
9445
9446 // Stores must share the same source value with different offsets.
9447 if (!SourceValue)
9448 SourceValue = WideVal;
9449 else if (SourceValue != WideVal) {
9450 // Truncate and extends can be stripped to see if the values are related.
9451 if (stripTruncAndExt(SourceValue) != WideVal &&
9452 stripTruncAndExt(WideVal) != SourceValue)
9453 return SDValue();
9454
9455 if (WideVal.getScalarValueSizeInBits() >
9456 SourceValue.getScalarValueSizeInBits())
9457 SourceValue = WideVal;
9458
9459 // Give up if the source value type is smaller than the store size.
9460 if (SourceValue.getScalarValueSizeInBits() < WideNumBits)
9461 return SDValue();
9462 }
9463
9464 // Stores must share the same base address.
9465 BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
9466 int64_t ByteOffsetFromBase = 0;
9467 if (!Base)
9468 Base = Ptr;
9469 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9470 return SDValue();
9471
9472 // Remember the first store.
9473 if (ByteOffsetFromBase < FirstOffset) {
9474 FirstStore = Store;
9475 FirstOffset = ByteOffsetFromBase;
9476 }
9477 // Map the offset in the store and the offset in the combined value, and
9478 // early return if it has been set before.
9479 if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
9480 return SDValue();
9481 OffsetMap[Offset] = ByteOffsetFromBase;
9482 }
9483
9484 EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
9485
9486 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9487 assert(FirstStore && "First store must be set");
9488
9489 // Check that a store of the wide type is both allowed and fast on the target
9490 const DataLayout &Layout = DAG.getDataLayout();
9491 unsigned Fast = 0;
9492 bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
9493 *FirstStore->getMemOperand(), &Fast);
9494 if (!Allowed || !Fast)
9495 return SDValue();
9496
9497 // Check if the pieces of the value are going to the expected places in memory
9498 // to merge the stores.
9499 auto checkOffsets = [&](bool MatchLittleEndian) {
9500 if (MatchLittleEndian) {
9501 for (unsigned i = 0; i != NumStores; ++i)
9502 if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
9503 return false;
9504 } else { // MatchBigEndian by reversing loop counter.
9505 for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
9506 if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
9507 return false;
9508 }
9509 return true;
9510 };
9511
9512 // Check if the offsets line up for the native data layout of this target.
9513 bool NeedBswap = false;
9514 bool NeedRotate = false;
9515 if (!checkOffsets(Layout.isLittleEndian())) {
9516 // Special-case: check if byte offsets line up for the opposite endian.
9517 if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
9518 NeedBswap = true;
9519 else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
9520 NeedRotate = true;
9521 else
9522 return SDValue();
9523 }
9524
9525 SDLoc DL(N);
9526 if (WideVT != SourceValue.getValueType()) {
9527 assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
9528 "Unexpected store value to merge");
9529 SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
9530 }
9531
9532 // Before legalize we can introduce illegal bswaps/rotates which will be later
9533 // converted to an explicit bswap sequence. This way we end up with a single
9534 // store and byte shuffling instead of several stores and byte shuffling.
9535 if (NeedBswap) {
9536 SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
9537 } else if (NeedRotate) {
9538 assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
9539 SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
9540 SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
9541 }
9542
9543 SDValue NewStore =
9544 DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
9545 FirstStore->getPointerInfo(), FirstStore->getAlign());
9546
9547 // Rely on other DAG combine rules to remove the other individual stores.
9548 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
9549 return NewStore;
9550}
9551
9552/// Match a pattern where a wide type scalar value is loaded by several narrow
9553/// loads and combined by shifts and ors. Fold it into a single load or a load
9554/// and a BSWAP if the targets supports it.
9555///
9556/// Assuming little endian target:
9557/// i8 *a = ...
9558/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
9559/// =>
9560/// i32 val = *((i32)a)
9561///
9562/// i8 *a = ...
9563/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
9564/// =>
9565/// i32 val = BSWAP(*((i32)a))
9566///
9567/// TODO: This rule matches complex patterns with OR node roots and doesn't
9568/// interact well with the worklist mechanism. When a part of the pattern is
9569/// updated (e.g. one of the loads) its direct users are put into the worklist,
9570/// but the root node of the pattern which triggers the load combine is not
9571/// necessarily a direct user of the changed node. For example, once the address
9572/// of t28 load is reassociated load combine won't be triggered:
9573/// t25: i32 = add t4, Constant:i32<2>
9574/// t26: i64 = sign_extend t25
9575/// t27: i64 = add t2, t26
9576/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
9577/// t29: i32 = zero_extend t28
9578/// t32: i32 = shl t29, Constant:i8<8>
9579/// t33: i32 = or t23, t32
9580/// As a possible fix visitLoad can check if the load can be a part of a load
9581/// combine pattern and add corresponding OR roots to the worklist.
9582SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
9583 assert(N->getOpcode() == ISD::OR &&
9584 "Can only match load combining against OR nodes");
9585
9586 // Handles simple types only
9587 EVT VT = N->getValueType(0);
9588 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
9589 return SDValue();
9590 unsigned ByteWidth = VT.getSizeInBits() / 8;
9591
9592 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
9593 auto MemoryByteOffset = [&](SDByteProvider P) {
9594 assert(P.hasSrc() && "Must be a memory byte provider");
9595 auto *Load = cast<LoadSDNode>(P.Src.value());
9596
9597 unsigned LoadBitWidth = Load->getMemoryVT().getScalarSizeInBits();
9598
9599 assert(LoadBitWidth % 8 == 0 &&
9600 "can only analyze providers for individual bytes not bit");
9601 unsigned LoadByteWidth = LoadBitWidth / 8;
9602 return IsBigEndianTarget ? bigEndianByteAt(LoadByteWidth, P.DestOffset)
9603 : littleEndianByteAt(LoadByteWidth, P.DestOffset);
9604 };
9605
9606 std::optional<BaseIndexOffset> Base;
9607 SDValue Chain;
9608
9609 SmallPtrSet<LoadSDNode *, 8> Loads;
9610 std::optional<SDByteProvider> FirstByteProvider;
9611 int64_t FirstOffset = INT64_MAX;
9612
9613 // Check if all the bytes of the OR we are looking at are loaded from the same
9614 // base address. Collect bytes offsets from Base address in ByteOffsets.
9615 SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
9616 unsigned ZeroExtendedBytes = 0;
9617 for (int i = ByteWidth - 1; i >= 0; --i) {
9618 auto P =
9619 calculateByteProvider(SDValue(N, 0), i, 0, /*VectorIndex*/ std::nullopt,
9620 /*StartingIndex*/ i);
9621 if (!P)
9622 return SDValue();
9623
9624 if (P->isConstantZero()) {
9625 // It's OK for the N most significant bytes to be 0, we can just
9626 // zero-extend the load.
9627 if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
9628 return SDValue();
9629 continue;
9630 }
9631 assert(P->hasSrc() && "provenance should either be memory or zero");
9632 auto *L = cast<LoadSDNode>(P->Src.value());
9633
9634 // All loads must share the same chain
9635 SDValue LChain = L->getChain();
9636 if (!Chain)
9637 Chain = LChain;
9638 else if (Chain != LChain)
9639 return SDValue();
9640
9641 // Loads must share the same base address
9642 BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
9643 int64_t ByteOffsetFromBase = 0;
9644
9645 // For vector loads, the expected load combine pattern will have an
9646 // ExtractElement for each index in the vector. While each of these
9647 // ExtractElements will be accessing the same base address as determined
9648 // by the load instruction, the actual bytes they interact with will differ
9649 // due to different ExtractElement indices. To accurately determine the
9650 // byte position of an ExtractElement, we offset the base load ptr with
9651 // the index multiplied by the byte size of each element in the vector.
9652 if (L->getMemoryVT().isVector()) {
9653 unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
9654 if (LoadWidthInBit % 8 != 0)
9655 return SDValue();
9656 unsigned ByteOffsetFromVector = P->SrcOffset * LoadWidthInBit / 8;
9657 Ptr.addToOffset(ByteOffsetFromVector);
9658 }
9659
9660 if (!Base)
9661 Base = Ptr;
9662
9663 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9664 return SDValue();
9665
9666 // Calculate the offset of the current byte from the base address
9667 ByteOffsetFromBase += MemoryByteOffset(*P);
9668 ByteOffsets[i] = ByteOffsetFromBase;
9669
9670 // Remember the first byte load
9671 if (ByteOffsetFromBase < FirstOffset) {
9672 FirstByteProvider = P;
9673 FirstOffset = ByteOffsetFromBase;
9674 }
9675
9676 Loads.insert(L);
9677 }
9678
9679 assert(!Loads.empty() && "All the bytes of the value must be loaded from "
9680 "memory, so there must be at least one load which produces the value");
9681 assert(Base && "Base address of the accessed memory location must be set");
9682 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9683
9684 bool NeedsZext = ZeroExtendedBytes > 0;
9685
9686 EVT MemVT =
9687 EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
9688
9689 if (!MemVT.isSimple())
9690 return SDValue();
9691
9692 // Before legalize we can introduce too wide illegal loads which will be later
9693 // split into legal sized loads. This enables us to combine i64 load by i8
9694 // patterns to a couple of i32 loads on 32 bit targets.
9695 if (LegalOperations &&
9696 !TLI.isLoadExtLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, VT,
9697 MemVT))
9698 return SDValue();
9699
9700 // Check if the bytes of the OR we are looking at match with either big or
9701 // little endian value load
9702 std::optional<bool> IsBigEndian = isBigEndian(
9703 ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
9704 if (!IsBigEndian)
9705 return SDValue();
9706
9707 assert(FirstByteProvider && "must be set");
9708
9709 // Ensure that the first byte is loaded from zero offset of the first load.
9710 // So the combined value can be loaded from the first load address.
9711 if (MemoryByteOffset(*FirstByteProvider) != 0)
9712 return SDValue();
9713 auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
9714
9715 // The node we are looking at matches with the pattern, check if we can
9716 // replace it with a single (possibly zero-extended) load and bswap + shift if
9717 // needed.
9718
9719 // If the load needs byte swap check if the target supports it
9720 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
9721
9722 // Before legalize we can introduce illegal bswaps which will be later
9723 // converted to an explicit bswap sequence. This way we end up with a single
9724 // load and byte shuffling instead of several loads and byte shuffling.
9725 // We do not introduce illegal bswaps when zero-extending as this tends to
9726 // introduce too many arithmetic instructions.
9727 if (NeedsBswap && (LegalOperations || NeedsZext) &&
9728 !TLI.isOperationLegal(ISD::BSWAP, VT))
9729 return SDValue();
9730
9731 // If we need to bswap and zero extend, we have to insert a shift. Check that
9732 // it is legal.
9733 if (NeedsBswap && NeedsZext && LegalOperations &&
9734 !TLI.isOperationLegal(ISD::SHL, VT))
9735 return SDValue();
9736
9737 // Check that a load of the wide type is both allowed and fast on the target
9738 unsigned Fast = 0;
9739 bool Allowed =
9740 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
9741 *FirstLoad->getMemOperand(), &Fast);
9742 if (!Allowed || !Fast)
9743 return SDValue();
9744
9745 SDValue NewLoad =
9746 DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
9747 Chain, FirstLoad->getBasePtr(),
9748 FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
9749
9750 // Transfer chain users from old loads to the new load.
9751 for (LoadSDNode *L : Loads)
9752 DAG.makeEquivalentMemoryOrdering(L, NewLoad);
9753
9754 if (!NeedsBswap)
9755 return NewLoad;
9756
9757 SDValue ShiftedLoad =
9758 NeedsZext ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
9759 DAG.getShiftAmountConstant(ZeroExtendedBytes * 8,
9760 VT, SDLoc(N)))
9761 : NewLoad;
9762 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
9763}
9764
9765// If the target has andn, bsl, or a similar bit-select instruction,
9766// we want to unfold masked merge, with canonical pattern of:
9767// | A | |B|
9768// ((x ^ y) & m) ^ y
9769// | D |
9770// Into:
9771// (x & m) | (y & ~m)
9772// If y is a constant, m is not a 'not', and the 'andn' does not work with
9773// immediates, we unfold into a different pattern:
9774// ~(~x & m) & (m | y)
9775// If x is a constant, m is a 'not', and the 'andn' does not work with
9776// immediates, we unfold into a different pattern:
9777// (x | ~m) & ~(~m & ~y)
9778// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
9779// the very least that breaks andnpd / andnps patterns, and because those
9780// patterns are simplified in IR and shouldn't be created in the DAG
9781SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
9782 assert(N->getOpcode() == ISD::XOR);
9783
9784 // Don't touch 'not' (i.e. where y = -1).
9785 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
9786 return SDValue();
9787
9788 EVT VT = N->getValueType(0);
9789
9790 // There are 3 commutable operators in the pattern,
9791 // so we have to deal with 8 possible variants of the basic pattern.
9792 SDValue X, Y, M;
9793 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
9794 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
9795 return false;
9796 SDValue Xor = And.getOperand(XorIdx);
9797 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
9798 return false;
9799 SDValue Xor0 = Xor.getOperand(0);
9800 SDValue Xor1 = Xor.getOperand(1);
9801 // Don't touch 'not' (i.e. where y = -1).
9802 if (isAllOnesOrAllOnesSplat(Xor1))
9803 return false;
9804 if (Other == Xor0)
9805 std::swap(Xor0, Xor1);
9806 if (Other != Xor1)
9807 return false;
9808 X = Xor0;
9809 Y = Xor1;
9810 M = And.getOperand(XorIdx ? 0 : 1);
9811 return true;
9812 };
9813
9814 SDValue N0 = N->getOperand(0);
9815 SDValue N1 = N->getOperand(1);
9816 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
9817 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
9818 return SDValue();
9819
9820 // Don't do anything if the mask is constant. This should not be reachable.
9821 // InstCombine should have already unfolded this pattern, and DAGCombiner
9822 // probably shouldn't produce it, too.
9823 if (isa<ConstantSDNode>(M.getNode()))
9824 return SDValue();
9825
9826 // We can transform if the target has AndNot
9827 if (!TLI.hasAndNot(M))
9828 return SDValue();
9829
9830 SDLoc DL(N);
9831
9832 // If Y is a constant, check that 'andn' works with immediates. Unless M is
9833 // a bitwise not that would already allow ANDN to be used.
9834 if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
9835 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
9836 // If not, we need to do a bit more work to make sure andn is still used.
9837 SDValue NotX = DAG.getNOT(DL, X, VT);
9838 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
9839 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
9840 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
9841 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
9842 }
9843
9844 // If X is a constant and M is a bitwise not, check that 'andn' works with
9845 // immediates.
9846 if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
9847 assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
9848 // If not, we need to do a bit more work to make sure andn is still used.
9849 SDValue NotM = M.getOperand(0);
9850 SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
9851 SDValue NotY = DAG.getNOT(DL, Y, VT);
9852 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
9853 SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
9854 return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
9855 }
9856
9857 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
9858 SDValue NotM = DAG.getNOT(DL, M, VT);
9859 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
9860
9861 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
9862}
9863
9864SDValue DAGCombiner::visitXOR(SDNode *N) {
9865 SDValue N0 = N->getOperand(0);
9866 SDValue N1 = N->getOperand(1);
9867 EVT VT = N0.getValueType();
9868 SDLoc DL(N);
9869
9870 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
9871 if (N0.isUndef() && N1.isUndef())
9872 return DAG.getConstant(0, DL, VT);
9873
9874 // fold (xor x, undef) -> undef
9875 if (N0.isUndef())
9876 return N0;
9877 if (N1.isUndef())
9878 return N1;
9879
9880 // fold (xor c1, c2) -> c1^c2
9881 if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
9882 return C;
9883
9884 // canonicalize constant to RHS
9887 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
9888
9889 // fold vector ops
9890 if (VT.isVector()) {
9891 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
9892 return FoldedVOp;
9893
9894 // fold (xor x, 0) -> x, vector edition
9896 return N0;
9897 }
9898
9899 // fold (xor x, 0) -> x
9900 if (isNullConstant(N1))
9901 return N0;
9902
9903 if (SDValue NewSel = foldBinOpIntoSelect(N))
9904 return NewSel;
9905
9906 // reassociate xor
9907 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
9908 return RXOR;
9909
9910 // Fold xor(vecreduce(x), vecreduce(y)) -> vecreduce(xor(x, y))
9911 if (SDValue SD =
9912 reassociateReduction(ISD::VECREDUCE_XOR, ISD::XOR, DL, VT, N0, N1))
9913 return SD;
9914
9915 // fold (a^b) -> (a|b) iff a and b share no bits.
9916 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
9917 DAG.haveNoCommonBitsSet(N0, N1))
9918 return DAG.getNode(ISD::OR, DL, VT, N0, N1, SDNodeFlags::Disjoint);
9919
9920 // look for 'add-like' folds:
9921 // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
9922 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
9924 if (SDValue Combined = visitADDLike(N))
9925 return Combined;
9926
9927 // fold not (setcc x, y, cc) -> setcc x y !cc
9928 // Avoid breaking: and (not(setcc x, y, cc), z) -> andn for vec
9929 unsigned N0Opcode = N0.getOpcode();
9930 SDValue LHS, RHS, CC;
9931 if (TLI.isConstTrueVal(N1) &&
9932 isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true) &&
9933 !(VT.isVector() && TLI.hasAndNot(SDValue(N, 0)) && N->hasOneUse() &&
9934 N->use_begin()->getUser()->getOpcode() == ISD::AND)) {
9936 LHS.getValueType());
9937 if (!LegalOperations ||
9938 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
9939 switch (N0Opcode) {
9940 default:
9941 llvm_unreachable("Unhandled SetCC Equivalent!");
9942 case ISD::SETCC:
9943 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
9944 case ISD::SELECT_CC:
9945 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
9946 N0.getOperand(3), NotCC);
9947 case ISD::STRICT_FSETCC:
9948 case ISD::STRICT_FSETCCS: {
9949 if (N0.hasOneUse()) {
9950 // FIXME Can we handle multiple uses? Could we token factor the chain
9951 // results from the new/old setcc?
9952 SDValue SetCC =
9953 DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
9954 N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
9955 CombineTo(N, SetCC);
9956 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
9957 recursivelyDeleteUnusedNodes(N0.getNode());
9958 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9959 }
9960 break;
9961 }
9962 }
9963 }
9964 }
9965
9966 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
9967 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
9968 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
9969 SDValue V = N0.getOperand(0);
9970 SDLoc DL0(N0);
9971 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
9972 DAG.getConstant(1, DL0, V.getValueType()));
9973 AddToWorklist(V.getNode());
9974 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
9975 }
9976
9977 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
9978 // fold (not (and x, y)) -> (or (not x), (not y)) iff x or y are setcc
9979 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
9980 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9981 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9982 if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
9983 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9984 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9985 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9986 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9987 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9988 }
9989 }
9990 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
9991 // fold (not (and x, y)) -> (or (not x), (not y)) iff x or y are constants
9992 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
9993 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9994 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9995 if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
9996 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9997 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9998 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9999 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
10000 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
10001 }
10002 }
10003
10004 // fold (not (sub Y, X)) -> (add X, ~Y) if Y is a constant
10005 if (N0.getOpcode() == ISD::SUB && isAllOnesConstant(N1)) {
10006 SDValue Y = N0.getOperand(0);
10007 SDValue X = N0.getOperand(1);
10008
10009 if (auto *YConst = dyn_cast<ConstantSDNode>(Y)) {
10010 APInt NotYValue = ~YConst->getAPIntValue();
10011 SDValue NotY = DAG.getConstant(NotYValue, DL, VT);
10012 return DAG.getNode(ISD::ADD, DL, VT, X, NotY, N->getFlags());
10013 }
10014 }
10015
10016 // fold (not (add X, -1)) -> (neg X)
10017 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && isAllOnesConstant(N1) &&
10019 return DAG.getNegative(N0.getOperand(0), DL, VT);
10020 }
10021
10022 // fold (xor (and x, y), y) -> (and (not x), y)
10023 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
10024 SDValue X = N0.getOperand(0);
10025 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
10026 AddToWorklist(NotX.getNode());
10027 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
10028 }
10029
10030 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
10031 if (!LegalOperations || hasOperation(ISD::ABS, VT)) {
10032 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
10033 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
10034 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
10035 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
10036 SDValue S0 = S.getOperand(0);
10037 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
10038 if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
10039 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
10040 return DAG.getNode(ISD::ABS, DL, VT, S0);
10041 }
10042 }
10043
10044 // fold (xor x, x) -> 0
10045 if (N0 == N1)
10046 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
10047
10048 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
10049 // Here is a concrete example of this equivalence:
10050 // i16 x == 14
10051 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
10052 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
10053 //
10054 // =>
10055 //
10056 // i16 ~1 == 0b1111111111111110
10057 // i16 rol(~1, 14) == 0b1011111111111111
10058 //
10059 // Some additional tips to help conceptualize this transform:
10060 // - Try to see the operation as placing a single zero in a value of all ones.
10061 // - There exists no value for x which would allow the result to contain zero.
10062 // - Values of x larger than the bitwidth are undefined and do not require a
10063 // consistent result.
10064 // - Pushing the zero left requires shifting one bits in from the right.
10065 // A rotate left of ~1 is a nice way of achieving the desired result.
10066 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
10068 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getSignedConstant(~1, DL, VT),
10069 N0.getOperand(1));
10070 }
10071
10072 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
10073 if (N0Opcode == N1.getOpcode())
10074 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
10075 return V;
10076
10077 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
10078 return R;
10079 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
10080 return R;
10081 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
10082 return R;
10083
10084 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
10085 if (SDValue MM = unfoldMaskedMerge(N))
10086 return MM;
10087
10088 // Simplify the expression using non-local knowledge.
10090 return SDValue(N, 0);
10091
10092 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
10093 return Combined;
10094
10095 return SDValue();
10096}
10097
10098/// If we have a shift-by-constant of a bitwise logic op that itself has a
10099/// shift-by-constant operand with identical opcode, we may be able to convert
10100/// that into 2 independent shifts followed by the logic op. This is a
10101/// throughput improvement.
10103 // Match a one-use bitwise logic op.
10104 SDValue LogicOp = Shift->getOperand(0);
10105 if (!LogicOp.hasOneUse())
10106 return SDValue();
10107
10108 unsigned LogicOpcode = LogicOp.getOpcode();
10109 if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
10110 LogicOpcode != ISD::XOR)
10111 return SDValue();
10112
10113 // Find a matching one-use shift by constant.
10114 unsigned ShiftOpcode = Shift->getOpcode();
10115 SDValue C1 = Shift->getOperand(1);
10116 ConstantSDNode *C1Node = isConstOrConstSplat(C1);
10117 assert(C1Node && "Expected a shift with constant operand");
10118 const APInt &C1Val = C1Node->getAPIntValue();
10119 auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
10120 const APInt *&ShiftAmtVal) {
10121 if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
10122 return false;
10123
10124 ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
10125 if (!ShiftCNode)
10126 return false;
10127
10128 // Capture the shifted operand and shift amount value.
10129 ShiftOp = V.getOperand(0);
10130 ShiftAmtVal = &ShiftCNode->getAPIntValue();
10131
10132 // Shift amount types do not have to match their operand type, so check that
10133 // the constants are the same width.
10134 if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
10135 return false;
10136
10137 // The fold is not valid if the sum of the shift values doesn't fit in the
10138 // given shift amount type.
10139 bool Overflow = false;
10140 APInt NewShiftAmt = C1Val.uadd_ov(*ShiftAmtVal, Overflow);
10141 if (Overflow)
10142 return false;
10143
10144 // The fold is not valid if the sum of the shift values exceeds bitwidth.
10145 if (NewShiftAmt.uge(V.getScalarValueSizeInBits()))
10146 return false;
10147
10148 return true;
10149 };
10150
10151 // Logic ops are commutative, so check each operand for a match.
10152 SDValue X, Y;
10153 const APInt *C0Val;
10154 if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
10155 Y = LogicOp.getOperand(1);
10156 else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
10157 Y = LogicOp.getOperand(0);
10158 else
10159 return SDValue();
10160
10161 // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
10162 SDLoc DL(Shift);
10163 EVT VT = Shift->getValueType(0);
10164 EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
10165 SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
10166 SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
10167 SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
10168 return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2,
10169 LogicOp->getFlags());
10170}
10171
10172/// Handle transforms common to the three shifts, when the shift amount is a
10173/// constant.
10174/// We are looking for: (shift being one of shl/sra/srl)
10175/// shift (binop X, C0), C1
10176/// And want to transform into:
10177/// binop (shift X, C1), (shift C0, C1)
10178SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
10179 assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
10180
10181 // Do not turn a 'not' into a regular xor.
10182 if (isBitwiseNot(N->getOperand(0)))
10183 return SDValue();
10184
10185 // The inner binop must be one-use, since we want to replace it.
10186 SDValue LHS = N->getOperand(0);
10187 if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
10188 return SDValue();
10189
10190 // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)).
10191 if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
10192 return R;
10193
10194 // We want to pull some binops through shifts, so that we have (and (shift))
10195 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
10196 // thing happens with address calculations, so it's important to canonicalize
10197 // it.
10198 switch (LHS.getOpcode()) {
10199 default:
10200 return SDValue();
10201 case ISD::OR:
10202 case ISD::XOR:
10203 case ISD::AND:
10204 break;
10205 case ISD::ADD:
10206 if (N->getOpcode() != ISD::SHL)
10207 return SDValue(); // only shl(add) not sr[al](add).
10208 break;
10209 }
10210
10211 // FIXME: disable this unless the input to the binop is a shift by a constant
10212 // or is copy/select. Enable this in other cases when figure out it's exactly
10213 // profitable.
10214 SDValue BinOpLHSVal = LHS.getOperand(0);
10215 bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
10216 BinOpLHSVal.getOpcode() == ISD::SRA ||
10217 BinOpLHSVal.getOpcode() == ISD::SRL) &&
10218 isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
10219 bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
10220 BinOpLHSVal.getOpcode() == ISD::SELECT;
10221
10222 if (!IsShiftByConstant && !IsCopyOrSelect)
10223 return SDValue();
10224
10225 if (IsCopyOrSelect && N->hasOneUse())
10226 return SDValue();
10227
10228 // Attempt to fold the constants, shifting the binop RHS by the shift amount.
10229 SDLoc DL(N);
10230 EVT VT = N->getValueType(0);
10231 if (SDValue NewRHS = DAG.FoldConstantArithmetic(
10232 N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) {
10233 SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
10234 N->getOperand(1));
10235 return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
10236 }
10237
10238 return SDValue();
10239}
10240
10241SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
10242 assert(N->getOpcode() == ISD::TRUNCATE);
10243 assert(N->getOperand(0).getOpcode() == ISD::AND);
10244
10245 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
10246 EVT TruncVT = N->getValueType(0);
10247 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
10248 TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
10249 SDValue N01 = N->getOperand(0).getOperand(1);
10250 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
10251 SDLoc DL(N);
10252 SDValue N00 = N->getOperand(0).getOperand(0);
10253 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
10254 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
10255 AddToWorklist(Trunc00.getNode());
10256 AddToWorklist(Trunc01.getNode());
10257 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
10258 }
10259 }
10260
10261 return SDValue();
10262}
10263
10264SDValue DAGCombiner::visitRotate(SDNode *N) {
10265 SDLoc dl(N);
10266 SDValue N0 = N->getOperand(0);
10267 SDValue N1 = N->getOperand(1);
10268 EVT VT = N->getValueType(0);
10269 unsigned Bitsize = VT.getScalarSizeInBits();
10270
10271 // fold (rot x, 0) -> x
10272 if (isNullOrNullSplat(N1))
10273 return N0;
10274
10275 // fold (rot x, c) -> x iff (c % BitSize) == 0
10276 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
10277 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
10278 if (DAG.MaskedValueIsZero(N1, ModuloMask))
10279 return N0;
10280 }
10281
10282 // fold (rot x, c) -> (rot x, c % BitSize)
10283 bool OutOfRange = false;
10284 auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
10285 OutOfRange |= C->getAPIntValue().uge(Bitsize);
10286 return true;
10287 };
10288 if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
10289 EVT AmtVT = N1.getValueType();
10290 SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
10291 if (SDValue Amt =
10292 DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
10293 return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
10294 }
10295
10296 // rot i16 X, 8 --> bswap X
10297 auto *RotAmtC = isConstOrConstSplat(N1);
10298 if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
10299 VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
10300 return DAG.getNode(ISD::BSWAP, dl, VT, N0);
10301
10302 // Simplify the operands using demanded-bits information.
10304 return SDValue(N, 0);
10305
10306 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
10307 if (N1.getOpcode() == ISD::TRUNCATE &&
10308 N1.getOperand(0).getOpcode() == ISD::AND) {
10309 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10310 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
10311 }
10312
10313 unsigned NextOp = N0.getOpcode();
10314
10315 // fold (rot* (rot* x, c2), c1)
10316 // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize)
10317 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
10318 bool C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
10320 if (C1 && C2 && N1.getValueType() == N0.getOperand(1).getValueType()) {
10321 EVT ShiftVT = N1.getValueType();
10322 bool SameSide = (N->getOpcode() == NextOp);
10323 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
10324 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
10325 SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
10326 {N1, BitsizeC});
10327 SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
10328 {N0.getOperand(1), BitsizeC});
10329 if (Norm1 && Norm2)
10330 if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
10331 CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
10332 CombinedShift = DAG.FoldConstantArithmetic(ISD::ADD, dl, ShiftVT,
10333 {CombinedShift, BitsizeC});
10334 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
10335 ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
10336 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
10337 CombinedShiftNorm);
10338 }
10339 }
10340 }
10341 return SDValue();
10342}
10343
10344SDValue DAGCombiner::visitSHL(SDNode *N) {
10345 SDValue N0 = N->getOperand(0);
10346 SDValue N1 = N->getOperand(1);
10347 if (SDValue V = DAG.simplifyShift(N0, N1))
10348 return V;
10349
10350 SDLoc DL(N);
10351 EVT VT = N0.getValueType();
10352 EVT ShiftVT = N1.getValueType();
10353 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10354
10355 // fold (shl c1, c2) -> c1<<c2
10356 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N0, N1}))
10357 return C;
10358
10359 // fold vector ops
10360 if (VT.isVector()) {
10361 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10362 return FoldedVOp;
10363
10364 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
10365 // If setcc produces all-one true value then:
10366 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
10367 if (N1CV && N1CV->isConstant()) {
10368 if (N0.getOpcode() == ISD::AND) {
10369 SDValue N00 = N0->getOperand(0);
10370 SDValue N01 = N0->getOperand(1);
10371 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
10372
10373 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
10376 if (SDValue C =
10377 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N01, N1}))
10378 return DAG.getNode(ISD::AND, DL, VT, N00, C);
10379 }
10380 }
10381 }
10382 }
10383
10384 if (SDValue NewSel = foldBinOpIntoSelect(N))
10385 return NewSel;
10386
10387 // if (shl x, c) is known to be zero, return 0
10388 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
10389 return DAG.getConstant(0, DL, VT);
10390
10391 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
10392 if (N1.getOpcode() == ISD::TRUNCATE &&
10393 N1.getOperand(0).getOpcode() == ISD::AND) {
10394 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10395 return DAG.getNode(ISD::SHL, DL, VT, N0, NewOp1);
10396 }
10397
10398 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
10399 if (N0.getOpcode() == ISD::SHL) {
10400 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
10401 ConstantSDNode *RHS) {
10402 APInt c1 = LHS->getAPIntValue();
10403 APInt c2 = RHS->getAPIntValue();
10404 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10405 return (c1 + c2).uge(OpSizeInBits);
10406 };
10407 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
10408 return DAG.getConstant(0, DL, VT);
10409
10410 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
10411 ConstantSDNode *RHS) {
10412 APInt c1 = LHS->getAPIntValue();
10413 APInt c2 = RHS->getAPIntValue();
10414 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10415 return (c1 + c2).ult(OpSizeInBits);
10416 };
10417 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
10418 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
10419 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
10420 }
10421 }
10422
10423 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
10424 // For this to be valid, the second form must not preserve any of the bits
10425 // that are shifted out by the inner shift in the first form. This means
10426 // the outer shift size must be >= the number of bits added by the ext.
10427 // As a corollary, we don't care what kind of ext it is.
10428 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
10429 N0.getOpcode() == ISD::ANY_EXTEND ||
10430 N0.getOpcode() == ISD::SIGN_EXTEND) &&
10431 N0.getOperand(0).getOpcode() == ISD::SHL) {
10432 SDValue N0Op0 = N0.getOperand(0);
10433 SDValue InnerShiftAmt = N0Op0.getOperand(1);
10434 EVT InnerVT = N0Op0.getValueType();
10435 uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
10436
10437 auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
10438 ConstantSDNode *RHS) {
10439 APInt c1 = LHS->getAPIntValue();
10440 APInt c2 = RHS->getAPIntValue();
10441 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10442 return c2.uge(OpSizeInBits - InnerBitwidth) &&
10443 (c1 + c2).uge(OpSizeInBits);
10444 };
10445 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
10446 /*AllowUndefs*/ false,
10447 /*AllowTypeMismatch*/ true))
10448 return DAG.getConstant(0, DL, VT);
10449
10450 auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
10451 ConstantSDNode *RHS) {
10452 APInt c1 = LHS->getAPIntValue();
10453 APInt c2 = RHS->getAPIntValue();
10454 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10455 return c2.uge(OpSizeInBits - InnerBitwidth) &&
10456 (c1 + c2).ult(OpSizeInBits);
10457 };
10458 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
10459 /*AllowUndefs*/ false,
10460 /*AllowTypeMismatch*/ true)) {
10461 SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
10462 SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
10463 Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
10464 return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
10465 }
10466 }
10467
10468 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
10469 // Only fold this if the inner zext has no other uses to avoid increasing
10470 // the total number of instructions.
10471 if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
10472 N0.getOperand(0).getOpcode() == ISD::SRL) {
10473 SDValue N0Op0 = N0.getOperand(0);
10474 SDValue InnerShiftAmt = N0Op0.getOperand(1);
10475
10476 auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10477 APInt c1 = LHS->getAPIntValue();
10478 APInt c2 = RHS->getAPIntValue();
10479 zeroExtendToMatch(c1, c2);
10480 return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
10481 };
10482 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
10483 /*AllowUndefs*/ false,
10484 /*AllowTypeMismatch*/ true)) {
10485 EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
10486 SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
10487 NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
10488 AddToWorklist(NewSHL.getNode());
10489 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
10490 }
10491 }
10492
10493 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) {
10494 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
10495 ConstantSDNode *RHS) {
10496 const APInt &LHSC = LHS->getAPIntValue();
10497 const APInt &RHSC = RHS->getAPIntValue();
10498 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
10499 LHSC.getZExtValue() <= RHSC.getZExtValue();
10500 };
10501
10502 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
10503 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
10504 if (N0->getFlags().hasExact()) {
10505 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10506 /*AllowUndefs*/ false,
10507 /*AllowTypeMismatch*/ true)) {
10508 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10509 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10510 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10511 }
10512 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10513 /*AllowUndefs*/ false,
10514 /*AllowTypeMismatch*/ true)) {
10515 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10516 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10517 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
10518 }
10519 }
10520
10521 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
10522 // (and (srl x, (sub c1, c2), MASK)
10523 // Only fold this if the inner shift has no other uses -- if it does,
10524 // folding this will increase the total number of instructions.
10525 if (N0.getOpcode() == ISD::SRL &&
10526 (N0.getOperand(1) == N1 || N0.hasOneUse()) &&
10528 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10529 /*AllowUndefs*/ false,
10530 /*AllowTypeMismatch*/ true)) {
10531 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10532 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10533 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10534 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
10535 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
10536 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10537 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10538 }
10539 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10540 /*AllowUndefs*/ false,
10541 /*AllowTypeMismatch*/ true)) {
10542 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10543 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10544 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10545 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
10546 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10547 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10548 }
10549 }
10550 }
10551
10552 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
10553 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
10554 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
10555 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
10556 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
10557 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
10558 }
10559
10560 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
10561 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
10562 // Variant of version done on multiply, except mul by a power of 2 is turned
10563 // into a shift.
10564 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
10565 TLI.isDesirableToCommuteWithShift(N, Level)) {
10566 SDValue N01 = N0.getOperand(1);
10567 if (SDValue Shl1 =
10568 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
10569 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
10570 AddToWorklist(Shl0.getNode());
10571 SDNodeFlags Flags;
10572 // Preserve the disjoint flag for Or.
10573 if (N0.getOpcode() == ISD::OR && N0->getFlags().hasDisjoint())
10575 return DAG.getNode(N0.getOpcode(), DL, VT, Shl0, Shl1, Flags);
10576 }
10577 }
10578
10579 // fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)
10580 // TODO: Add zext/add_nuw variant with suitable test coverage
10581 // TODO: Should we limit this with isLegalAddImmediate?
10582 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
10583 N0.getOperand(0).getOpcode() == ISD::ADD &&
10584 N0.getOperand(0)->getFlags().hasNoSignedWrap() &&
10585 TLI.isDesirableToCommuteWithShift(N, Level)) {
10586 SDValue Add = N0.getOperand(0);
10587 SDLoc DL(N0);
10588 if (SDValue ExtC = DAG.FoldConstantArithmetic(N0.getOpcode(), DL, VT,
10589 {Add.getOperand(1)})) {
10590 if (SDValue ShlC =
10591 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {ExtC, N1})) {
10592 SDValue ExtX = DAG.getNode(N0.getOpcode(), DL, VT, Add.getOperand(0));
10593 SDValue ShlX = DAG.getNode(ISD::SHL, DL, VT, ExtX, N1);
10594 return DAG.getNode(ISD::ADD, DL, VT, ShlX, ShlC);
10595 }
10596 }
10597 }
10598
10599 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
10600 if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
10601 SDValue N01 = N0.getOperand(1);
10602 if (SDValue Shl =
10603 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1}))
10604 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), Shl);
10605 }
10606
10607 ConstantSDNode *N1C = isConstOrConstSplat(N1);
10608 if (N1C && !N1C->isOpaque())
10609 if (SDValue NewSHL = visitShiftByConstant(N))
10610 return NewSHL;
10611
10612 // fold (shl X, cttz(Y)) -> (mul (Y & -Y), X) if cttz is unsupported on the
10613 // target.
10614 if (((N1.getOpcode() == ISD::CTTZ &&
10615 VT.getScalarSizeInBits() <= ShiftVT.getScalarSizeInBits()) ||
10617 N1.hasOneUse() && !TLI.isOperationLegalOrCustom(ISD::CTTZ, ShiftVT) &&
10619 SDValue Y = N1.getOperand(0);
10620 SDLoc DL(N);
10621 SDValue NegY = DAG.getNegative(Y, DL, ShiftVT);
10622 SDValue And =
10623 DAG.getZExtOrTrunc(DAG.getNode(ISD::AND, DL, ShiftVT, Y, NegY), DL, VT);
10624 return DAG.getNode(ISD::MUL, DL, VT, And, N0);
10625 }
10626
10628 return SDValue(N, 0);
10629
10630 // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
10631 if (N0.getOpcode() == ISD::VSCALE && N1C) {
10632 const APInt &C0 = N0.getConstantOperandAPInt(0);
10633 const APInt &C1 = N1C->getAPIntValue();
10634 return DAG.getVScale(DL, VT, C0 << C1);
10635 }
10636
10637 SDValue X;
10638 APInt VS0;
10639
10640 // fold (shl (X * vscale(VS0)), C1) -> (X * vscale(VS0 << C1))
10641 if (N1C && sd_match(N0, m_Mul(m_Value(X), m_VScale(m_ConstInt(VS0))))) {
10642 SDNodeFlags Flags;
10643 Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap() &&
10644 N0->getFlags().hasNoUnsignedWrap());
10645
10646 SDValue VScale = DAG.getVScale(DL, VT, VS0 << N1C->getAPIntValue());
10647 return DAG.getNode(ISD::MUL, DL, VT, X, VScale, Flags);
10648 }
10649
10650 // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
10651 APInt ShlVal;
10652 if (N0.getOpcode() == ISD::STEP_VECTOR &&
10653 ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
10654 const APInt &C0 = N0.getConstantOperandAPInt(0);
10655 if (ShlVal.ult(C0.getBitWidth())) {
10656 APInt NewStep = C0 << ShlVal;
10657 return DAG.getStepVector(DL, VT, NewStep);
10658 }
10659 }
10660
10661 return SDValue();
10662}
10663
10664// Transform a right shift of a multiply into a multiply-high.
10665// Examples:
10666// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
10667// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
10669 const TargetLowering &TLI) {
10670 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
10671 "SRL or SRA node is required here!");
10672
10673 // Check the shift amount. Proceed with the transformation if the shift
10674 // amount is constant.
10675 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
10676 if (!ShiftAmtSrc)
10677 return SDValue();
10678
10679 // The operation feeding into the shift must be a multiply.
10680 SDValue ShiftOperand = N->getOperand(0);
10681 if (ShiftOperand.getOpcode() != ISD::MUL)
10682 return SDValue();
10683
10684 // Both operands must be equivalent extend nodes.
10685 SDValue LeftOp = ShiftOperand.getOperand(0);
10686 SDValue RightOp = ShiftOperand.getOperand(1);
10687
10688 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
10689 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
10690
10691 if (!IsSignExt && !IsZeroExt)
10692 return SDValue();
10693
10694 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
10695 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
10696
10697 // return true if U may use the lower bits of its operands
10698 auto UserOfLowerBits = [NarrowVTSize](SDNode *U) {
10699 if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) {
10700 return true;
10701 }
10702 ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(U->getOperand(1));
10703 if (!UShiftAmtSrc) {
10704 return true;
10705 }
10706 unsigned UShiftAmt = UShiftAmtSrc->getZExtValue();
10707 return UShiftAmt < NarrowVTSize;
10708 };
10709
10710 // If the lower part of the MUL is also used and MUL_LOHI is supported
10711 // do not introduce the MULH in favor of MUL_LOHI
10712 unsigned MulLoHiOp = IsSignExt ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10713 if (!ShiftOperand.hasOneUse() &&
10714 TLI.isOperationLegalOrCustom(MulLoHiOp, NarrowVT) &&
10715 llvm::any_of(ShiftOperand->users(), UserOfLowerBits)) {
10716 return SDValue();
10717 }
10718
10719 SDValue MulhRightOp;
10721 unsigned ActiveBits = IsSignExt
10722 ? Constant->getAPIntValue().getSignificantBits()
10723 : Constant->getAPIntValue().getActiveBits();
10724 if (ActiveBits > NarrowVTSize)
10725 return SDValue();
10726 MulhRightOp = DAG.getConstant(
10727 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
10728 NarrowVT);
10729 } else {
10730 if (LeftOp.getOpcode() != RightOp.getOpcode())
10731 return SDValue();
10732 // Check that the two extend nodes are the same type.
10733 if (NarrowVT != RightOp.getOperand(0).getValueType())
10734 return SDValue();
10735 MulhRightOp = RightOp.getOperand(0);
10736 }
10737
10738 EVT WideVT = LeftOp.getValueType();
10739 // Proceed with the transformation if the wide types match.
10740 assert((WideVT == RightOp.getValueType()) &&
10741 "Cannot have a multiply node with two different operand types.");
10742
10743 // Proceed with the transformation if the wide type is twice as large
10744 // as the narrow type.
10745 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
10746 return SDValue();
10747
10748 // Check the shift amount with the narrow type size.
10749 // Proceed with the transformation if the shift amount is the width
10750 // of the narrow type.
10751 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
10752 if (ShiftAmt != NarrowVTSize)
10753 return SDValue();
10754
10755 // If the operation feeding into the MUL is a sign extend (sext),
10756 // we use mulhs. Othewise, zero extends (zext) use mulhu.
10757 unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
10758
10759 // Combine to mulh if mulh is legal/custom for the narrow type on the target
10760 // or if it is a vector type then we could transform to an acceptable type and
10761 // rely on legalization to split/combine the result.
10762 if (NarrowVT.isVector()) {
10763 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), NarrowVT);
10764 if (TransformVT.getVectorElementType() != NarrowVT.getVectorElementType() ||
10765 !TLI.isOperationLegalOrCustom(MulhOpcode, TransformVT))
10766 return SDValue();
10767 } else {
10768 if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
10769 return SDValue();
10770 }
10771
10772 SDValue Result =
10773 DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
10774 bool IsSigned = N->getOpcode() == ISD::SRA;
10775 return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
10776}
10777
10778// fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y))
10779// This helper function accept SDNode with opcode ISD::BSWAP and ISD::BITREVERSE
10781 unsigned Opcode = N->getOpcode();
10782 if (Opcode != ISD::BSWAP && Opcode != ISD::BITREVERSE)
10783 return SDValue();
10784
10785 SDValue N0 = N->getOperand(0);
10786 EVT VT = N->getValueType(0);
10787 SDLoc DL(N);
10788 SDValue X, Y;
10789
10790 // If both operands are bswap/bitreverse, ignore the multiuse
10792 m_UnaryOp(Opcode, m_Value(Y))))))
10793 return DAG.getNode(N0.getOpcode(), DL, VT, X, Y);
10794
10795 // Otherwise need to ensure logic_op and bswap/bitreverse(x) have one use.
10797 m_OneUse(m_UnaryOp(Opcode, m_Value(X))), m_Value(Y))))) {
10798 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, Y);
10799 return DAG.getNode(N0.getOpcode(), DL, VT, X, NewBitReorder);
10800 }
10801
10802 return SDValue();
10803}
10804
10805SDValue DAGCombiner::visitSRA(SDNode *N) {
10806 SDValue N0 = N->getOperand(0);
10807 SDValue N1 = N->getOperand(1);
10808 if (SDValue V = DAG.simplifyShift(N0, N1))
10809 return V;
10810
10811 SDLoc DL(N);
10812 EVT VT = N0.getValueType();
10813 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10814
10815 // fold (sra c1, c2) -> (sra c1, c2)
10816 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, DL, VT, {N0, N1}))
10817 return C;
10818
10819 // Arithmetic shifting an all-sign-bit value is a no-op.
10820 // fold (sra 0, x) -> 0
10821 // fold (sra -1, x) -> -1
10822 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
10823 return N0;
10824
10825 // fold vector ops
10826 if (VT.isVector())
10827 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10828 return FoldedVOp;
10829
10830 if (SDValue NewSel = foldBinOpIntoSelect(N))
10831 return NewSel;
10832
10833 ConstantSDNode *N1C = isConstOrConstSplat(N1);
10834
10835 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
10836 // clamp (add c1, c2) to max shift.
10837 if (N0.getOpcode() == ISD::SRA) {
10838 EVT ShiftVT = N1.getValueType();
10839 EVT ShiftSVT = ShiftVT.getScalarType();
10840 SmallVector<SDValue, 16> ShiftValues;
10841
10842 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10843 APInt c1 = LHS->getAPIntValue();
10844 APInt c2 = RHS->getAPIntValue();
10845 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10846 APInt Sum = c1 + c2;
10847 unsigned ShiftSum =
10848 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
10849 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
10850 return true;
10851 };
10852 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
10853 SDValue ShiftValue;
10854 if (N1.getOpcode() == ISD::BUILD_VECTOR)
10855 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
10856 else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
10857 assert(ShiftValues.size() == 1 &&
10858 "Expected matchBinaryPredicate to return one element for "
10859 "SPLAT_VECTORs");
10860 ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
10861 } else
10862 ShiftValue = ShiftValues[0];
10863 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
10864 }
10865 }
10866
10867 // fold (sra (shl X, m), (sub result_size, n))
10868 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
10869 // result_size - n != m.
10870 // If truncate is free for the target sext(shl) is likely to result in better
10871 // code.
10872 if (N0.getOpcode() == ISD::SHL && N1C) {
10873 // Get the two constants of the shifts, CN0 = m, CN = n.
10874 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
10875 if (N01C) {
10876 LLVMContext &Ctx = *DAG.getContext();
10877 // Determine what the truncate's result bitsize and type would be.
10878 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
10879
10880 if (VT.isVector())
10881 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10882
10883 // Determine the residual right-shift amount.
10884 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
10885
10886 // If the shift is not a no-op (in which case this should be just a sign
10887 // extend already), the truncated to type is legal, sign_extend is legal
10888 // on that type, and the truncate to that type is both legal and free,
10889 // perform the transform.
10890 if ((ShiftAmt > 0) &&
10893 TLI.isTruncateFree(VT, TruncVT)) {
10894 SDValue Amt = DAG.getShiftAmountConstant(ShiftAmt, VT, DL);
10895 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
10896 N0.getOperand(0), Amt);
10897 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
10898 Shift);
10899 return DAG.getNode(ISD::SIGN_EXTEND, DL,
10900 N->getValueType(0), Trunc);
10901 }
10902 }
10903 }
10904
10905 // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
10906 // sra (add (shl X, N1C), AddC), N1C -->
10907 // sext (add (trunc X to (width - N1C)), AddC')
10908 // sra (sub AddC, (shl X, N1C)), N1C -->
10909 // sext (sub AddC1',(trunc X to (width - N1C)))
10910 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C &&
10911 N0.hasOneUse()) {
10912 bool IsAdd = N0.getOpcode() == ISD::ADD;
10913 SDValue Shl = N0.getOperand(IsAdd ? 0 : 1);
10914 if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 &&
10915 Shl.hasOneUse()) {
10916 // TODO: AddC does not need to be a splat.
10917 if (ConstantSDNode *AddC =
10918 isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) {
10919 // Determine what the truncate's type would be and ask the target if
10920 // that is a free operation.
10921 LLVMContext &Ctx = *DAG.getContext();
10922 unsigned ShiftAmt = N1C->getZExtValue();
10923 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
10924 if (VT.isVector())
10925 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10926
10927 // TODO: The simple type check probably belongs in the default hook
10928 // implementation and/or target-specific overrides (because
10929 // non-simple types likely require masking when legalized), but
10930 // that restriction may conflict with other transforms.
10931 if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
10932 TLI.isTruncateFree(VT, TruncVT)) {
10933 SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
10934 SDValue ShiftC =
10935 DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
10936 TruncVT.getScalarSizeInBits()),
10937 DL, TruncVT);
10938 SDValue Add;
10939 if (IsAdd)
10940 Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
10941 else
10942 Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc);
10943 return DAG.getSExtOrTrunc(Add, DL, VT);
10944 }
10945 }
10946 }
10947 }
10948
10949 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
10950 if (N1.getOpcode() == ISD::TRUNCATE &&
10951 N1.getOperand(0).getOpcode() == ISD::AND) {
10952 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10953 return DAG.getNode(ISD::SRA, DL, VT, N0, NewOp1);
10954 }
10955
10956 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
10957 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
10958 // if c1 is equal to the number of bits the trunc removes
10959 // TODO - support non-uniform vector shift amounts.
10960 if (N0.getOpcode() == ISD::TRUNCATE &&
10961 (N0.getOperand(0).getOpcode() == ISD::SRL ||
10962 N0.getOperand(0).getOpcode() == ISD::SRA) &&
10963 N0.getOperand(0).hasOneUse() &&
10964 N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
10965 SDValue N0Op0 = N0.getOperand(0);
10966 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
10967 EVT LargeVT = N0Op0.getValueType();
10968 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
10969 if (LargeShift->getAPIntValue() == TruncBits) {
10970 EVT LargeShiftVT = getShiftAmountTy(LargeVT);
10971 SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT);
10972 Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt,
10973 DAG.getConstant(TruncBits, DL, LargeShiftVT));
10974 SDValue SRA =
10975 DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
10976 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
10977 }
10978 }
10979 }
10980
10981 // Simplify, based on bits shifted out of the LHS.
10983 return SDValue(N, 0);
10984
10985 // If the sign bit is known to be zero, switch this to a SRL.
10986 if (DAG.SignBitIsZero(N0))
10987 return DAG.getNode(ISD::SRL, DL, VT, N0, N1);
10988
10989 if (N1C && !N1C->isOpaque())
10990 if (SDValue NewSRA = visitShiftByConstant(N))
10991 return NewSRA;
10992
10993 // Try to transform this shift into a multiply-high if
10994 // it matches the appropriate pattern detected in combineShiftToMULH.
10995 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
10996 return MULH;
10997
10998 // Attempt to convert a sra of a load into a narrower sign-extending load.
10999 if (SDValue NarrowLoad = reduceLoadWidth(N))
11000 return NarrowLoad;
11001
11002 if (SDValue AVG = foldShiftToAvg(N, DL))
11003 return AVG;
11004
11005 return SDValue();
11006}
11007
11008SDValue DAGCombiner::visitSRL(SDNode *N) {
11009 SDValue N0 = N->getOperand(0);
11010 SDValue N1 = N->getOperand(1);
11011 if (SDValue V = DAG.simplifyShift(N0, N1))
11012 return V;
11013
11014 SDLoc DL(N);
11015 EVT VT = N0.getValueType();
11016 EVT ShiftVT = N1.getValueType();
11017 unsigned OpSizeInBits = VT.getScalarSizeInBits();
11018
11019 // fold (srl c1, c2) -> c1 >>u c2
11020 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, DL, VT, {N0, N1}))
11021 return C;
11022
11023 // fold vector ops
11024 if (VT.isVector())
11025 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
11026 return FoldedVOp;
11027
11028 if (SDValue NewSel = foldBinOpIntoSelect(N))
11029 return NewSel;
11030
11031 // if (srl x, c) is known to be zero, return 0
11032 ConstantSDNode *N1C = isConstOrConstSplat(N1);
11033 if (N1C &&
11034 DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
11035 return DAG.getConstant(0, DL, VT);
11036
11037 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
11038 if (N0.getOpcode() == ISD::SRL) {
11039 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
11040 ConstantSDNode *RHS) {
11041 APInt c1 = LHS->getAPIntValue();
11042 APInt c2 = RHS->getAPIntValue();
11043 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
11044 return (c1 + c2).uge(OpSizeInBits);
11045 };
11046 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
11047 return DAG.getConstant(0, DL, VT);
11048
11049 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
11050 ConstantSDNode *RHS) {
11051 APInt c1 = LHS->getAPIntValue();
11052 APInt c2 = RHS->getAPIntValue();
11053 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
11054 return (c1 + c2).ult(OpSizeInBits);
11055 };
11056 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
11057 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
11058 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
11059 }
11060 }
11061
11062 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
11063 N0.getOperand(0).getOpcode() == ISD::SRL) {
11064 SDValue InnerShift = N0.getOperand(0);
11065 // TODO - support non-uniform vector shift amounts.
11066 if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
11067 uint64_t c1 = N001C->getZExtValue();
11068 uint64_t c2 = N1C->getZExtValue();
11069 EVT InnerShiftVT = InnerShift.getValueType();
11070 EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
11071 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
11072 // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
11073 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
11074 if (c1 + OpSizeInBits == InnerShiftSize) {
11075 if (c1 + c2 >= InnerShiftSize)
11076 return DAG.getConstant(0, DL, VT);
11077 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
11078 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
11079 InnerShift.getOperand(0), NewShiftAmt);
11080 return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
11081 }
11082 // In the more general case, we can clear the high bits after the shift:
11083 // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
11084 if (N0.hasOneUse() && InnerShift.hasOneUse() &&
11085 c1 + c2 < InnerShiftSize) {
11086 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
11087 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
11088 InnerShift.getOperand(0), NewShiftAmt);
11089 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
11090 OpSizeInBits - c2),
11091 DL, InnerShiftVT);
11092 SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
11093 return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
11094 }
11095 }
11096 }
11097
11098 if (N0.getOpcode() == ISD::SHL) {
11099 // fold (srl (shl nuw x, c), c) -> x
11100 if (N0.getOperand(1) == N1 && N0->getFlags().hasNoUnsignedWrap())
11101 return N0.getOperand(0);
11102
11103 // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
11104 // (and (srl x, (sub c2, c1), MASK)
11105 if ((N0.getOperand(1) == N1 || N0->hasOneUse()) &&
11107 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
11108 ConstantSDNode *RHS) {
11109 const APInt &LHSC = LHS->getAPIntValue();
11110 const APInt &RHSC = RHS->getAPIntValue();
11111 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
11112 LHSC.getZExtValue() <= RHSC.getZExtValue();
11113 };
11114 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
11115 /*AllowUndefs*/ false,
11116 /*AllowTypeMismatch*/ true)) {
11117 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
11118 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
11119 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
11120 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
11121 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
11122 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
11123 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
11124 }
11125 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
11126 /*AllowUndefs*/ false,
11127 /*AllowTypeMismatch*/ true)) {
11128 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
11129 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
11130 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
11131 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
11132 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
11133 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
11134 }
11135 }
11136 }
11137
11138 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
11139 // TODO - support non-uniform vector shift amounts.
11140 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
11141 // Shifting in all undef bits?
11142 EVT SmallVT = N0.getOperand(0).getValueType();
11143 unsigned BitSize = SmallVT.getScalarSizeInBits();
11144 if (N1C->getAPIntValue().uge(BitSize))
11145 return DAG.getUNDEF(VT);
11146
11147 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
11148 uint64_t ShiftAmt = N1C->getZExtValue();
11149 SDLoc DL0(N0);
11150 SDValue SmallShift =
11151 DAG.getNode(ISD::SRL, DL0, SmallVT, N0.getOperand(0),
11152 DAG.getShiftAmountConstant(ShiftAmt, SmallVT, DL0));
11153 AddToWorklist(SmallShift.getNode());
11154 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
11155 return DAG.getNode(ISD::AND, DL, VT,
11156 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
11157 DAG.getConstant(Mask, DL, VT));
11158 }
11159 }
11160
11161 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
11162 // bit, which is unmodified by sra.
11163 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
11164 if (N0.getOpcode() == ISD::SRA)
11165 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1);
11166 }
11167
11168 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit), and x has a power
11169 // of two bitwidth. The "5" represents (log2 (bitwidth x)).
11170 if (N1C && N0.getOpcode() == ISD::CTLZ &&
11171 isPowerOf2_32(OpSizeInBits) &&
11172 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
11173 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
11174
11175 // If any of the input bits are KnownOne, then the input couldn't be all
11176 // zeros, thus the result of the srl will always be zero.
11177 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
11178
11179 // If all of the bits input the to ctlz node are known to be zero, then
11180 // the result of the ctlz is "32" and the result of the shift is one.
11181 APInt UnknownBits = ~Known.Zero;
11182 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
11183
11184 // Otherwise, check to see if there is exactly one bit input to the ctlz.
11185 if (UnknownBits.isPowerOf2()) {
11186 // Okay, we know that only that the single bit specified by UnknownBits
11187 // could be set on input to the CTLZ node. If this bit is set, the SRL
11188 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
11189 // to an SRL/XOR pair, which is likely to simplify more.
11190 unsigned ShAmt = UnknownBits.countr_zero();
11191 SDValue Op = N0.getOperand(0);
11192
11193 if (ShAmt) {
11194 SDLoc DL(N0);
11195 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
11196 DAG.getShiftAmountConstant(ShAmt, VT, DL));
11197 AddToWorklist(Op.getNode());
11198 }
11199 return DAG.getNode(ISD::XOR, DL, VT, Op, DAG.getConstant(1, DL, VT));
11200 }
11201 }
11202
11203 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
11204 if (N1.getOpcode() == ISD::TRUNCATE &&
11205 N1.getOperand(0).getOpcode() == ISD::AND) {
11206 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
11207 return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1);
11208 }
11209
11210 // fold (srl (logic_op x, (shl (zext y), c1)), c1)
11211 // -> (logic_op (srl x, c1), (zext y))
11212 // c1 <= leadingzeros(zext(y))
11213 SDValue X, ZExtY;
11214 if (N1C && sd_match(N0, m_OneUse(m_BitwiseLogic(
11215 m_Value(X),
11218 m_Specific(N1))))))) {
11219 unsigned NumLeadingZeros = ZExtY.getScalarValueSizeInBits() -
11221 if (N1C->getZExtValue() <= NumLeadingZeros)
11222 return DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
11223 DAG.getNode(ISD::SRL, SDLoc(N0), VT, X, N1), ZExtY);
11224 }
11225
11226 // fold operands of srl based on knowledge that the low bits are not
11227 // demanded.
11229 return SDValue(N, 0);
11230
11231 if (N1C && !N1C->isOpaque())
11232 if (SDValue NewSRL = visitShiftByConstant(N))
11233 return NewSRL;
11234
11235 // Attempt to convert a srl of a load into a narrower zero-extending load.
11236 if (SDValue NarrowLoad = reduceLoadWidth(N))
11237 return NarrowLoad;
11238
11239 // Here is a common situation. We want to optimize:
11240 //
11241 // %a = ...
11242 // %b = and i32 %a, 2
11243 // %c = srl i32 %b, 1
11244 // brcond i32 %c ...
11245 //
11246 // into
11247 //
11248 // %a = ...
11249 // %b = and %a, 2
11250 // %c = setcc eq %b, 0
11251 // brcond %c ...
11252 //
11253 // However when after the source operand of SRL is optimized into AND, the SRL
11254 // itself may not be optimized further. Look for it and add the BRCOND into
11255 // the worklist.
11256 //
11257 // The also tends to happen for binary operations when SimplifyDemandedBits
11258 // is involved.
11259 //
11260 // FIXME: This is unecessary if we process the DAG in topological order,
11261 // which we plan to do. This workaround can be removed once the DAG is
11262 // processed in topological order.
11263 if (N->hasOneUse()) {
11264 SDNode *User = *N->user_begin();
11265
11266 // Look pass the truncate.
11267 if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse())
11268 User = *User->user_begin();
11269
11270 if (User->getOpcode() == ISD::BRCOND || User->getOpcode() == ISD::AND ||
11271 User->getOpcode() == ISD::OR || User->getOpcode() == ISD::XOR)
11272 AddToWorklist(User);
11273 }
11274
11275 // Try to transform this shift into a multiply-high if
11276 // it matches the appropriate pattern detected in combineShiftToMULH.
11277 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
11278 return MULH;
11279
11280 if (SDValue AVG = foldShiftToAvg(N, DL))
11281 return AVG;
11282
11283 return SDValue();
11284}
11285
11286SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
11287 EVT VT = N->getValueType(0);
11288 SDValue N0 = N->getOperand(0);
11289 SDValue N1 = N->getOperand(1);
11290 SDValue N2 = N->getOperand(2);
11291 bool IsFSHL = N->getOpcode() == ISD::FSHL;
11292 unsigned BitWidth = VT.getScalarSizeInBits();
11293 SDLoc DL(N);
11294
11295 // fold (fshl/fshr C0, C1, C2) -> C3
11296 if (SDValue C =
11297 DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1, N2}))
11298 return C;
11299
11300 // fold (fshl N0, N1, 0) -> N0
11301 // fold (fshr N0, N1, 0) -> N1
11303 if (DAG.MaskedValueIsZero(
11304 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
11305 return IsFSHL ? N0 : N1;
11306
11307 auto IsUndefOrZero = [](SDValue V) {
11308 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
11309 };
11310
11311 // TODO - support non-uniform vector shift amounts.
11312 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
11313 EVT ShAmtTy = N2.getValueType();
11314
11315 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
11316 if (Cst->getAPIntValue().uge(BitWidth)) {
11317 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
11318 return DAG.getNode(N->getOpcode(), DL, VT, N0, N1,
11319 DAG.getConstant(RotAmt, DL, ShAmtTy));
11320 }
11321
11322 unsigned ShAmt = Cst->getZExtValue();
11323 if (ShAmt == 0)
11324 return IsFSHL ? N0 : N1;
11325
11326 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
11327 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
11328 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
11329 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
11330 if (IsUndefOrZero(N0))
11331 return DAG.getNode(
11332 ISD::SRL, DL, VT, N1,
11333 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt, DL, ShAmtTy));
11334 if (IsUndefOrZero(N1))
11335 return DAG.getNode(
11336 ISD::SHL, DL, VT, N0,
11337 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt, DL, ShAmtTy));
11338
11339 // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
11340 // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
11341 // TODO - bigendian support once we have test coverage.
11342 // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
11343 // TODO - permit LHS EXTLOAD if extensions are shifted out.
11344 if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
11345 !DAG.getDataLayout().isBigEndian()) {
11346 auto *LHS = dyn_cast<LoadSDNode>(N0);
11347 auto *RHS = dyn_cast<LoadSDNode>(N1);
11348 if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
11349 LHS->getAddressSpace() == RHS->getAddressSpace() &&
11350 (LHS->hasNUsesOfValue(1, 0) || RHS->hasNUsesOfValue(1, 0)) &&
11352 if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
11353 SDLoc DL(RHS);
11354 uint64_t PtrOff =
11355 IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
11356 Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
11357 unsigned Fast = 0;
11358 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
11359 RHS->getAddressSpace(), NewAlign,
11360 RHS->getMemOperand()->getFlags(), &Fast) &&
11361 Fast) {
11362 SDValue NewPtr = DAG.getMemBasePlusOffset(
11363 RHS->getBasePtr(), TypeSize::getFixed(PtrOff), DL);
11364 AddToWorklist(NewPtr.getNode());
11365 SDValue Load = DAG.getLoad(
11366 VT, DL, RHS->getChain(), NewPtr,
11367 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
11368 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
11369 DAG.makeEquivalentMemoryOrdering(LHS, Load.getValue(1));
11370 DAG.makeEquivalentMemoryOrdering(RHS, Load.getValue(1));
11371 return Load;
11372 }
11373 }
11374 }
11375 }
11376 }
11377
11378 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
11379 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
11380 // iff We know the shift amount is in range.
11381 // TODO: when is it worth doing SUB(BW, N2) as well?
11382 if (isPowerOf2_32(BitWidth)) {
11383 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
11384 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
11385 return DAG.getNode(ISD::SRL, DL, VT, N1, N2);
11386 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
11387 return DAG.getNode(ISD::SHL, DL, VT, N0, N2);
11388 }
11389
11390 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
11391 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
11392 // TODO: Investigate flipping this rotate if only one is legal.
11393 // If funnel shift is legal as well we might be better off avoiding
11394 // non-constant (BW - N2).
11395 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
11396 if (N0 == N1 && hasOperation(RotOpc, VT))
11397 return DAG.getNode(RotOpc, DL, VT, N0, N2);
11398
11399 // Simplify, based on bits shifted out of N0/N1.
11401 return SDValue(N, 0);
11402
11403 return SDValue();
11404}
11405
11406SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
11407 SDValue N0 = N->getOperand(0);
11408 SDValue N1 = N->getOperand(1);
11409 if (SDValue V = DAG.simplifyShift(N0, N1))
11410 return V;
11411
11412 SDLoc DL(N);
11413 EVT VT = N0.getValueType();
11414
11415 // fold (*shlsat c1, c2) -> c1<<c2
11416 if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
11417 return C;
11418
11419 ConstantSDNode *N1C = isConstOrConstSplat(N1);
11420
11421 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
11422 // fold (sshlsat x, c) -> (shl x, c)
11423 if (N->getOpcode() == ISD::SSHLSAT && N1C &&
11424 N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
11425 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
11426
11427 // fold (ushlsat x, c) -> (shl x, c)
11428 if (N->getOpcode() == ISD::USHLSAT && N1C &&
11429 N1C->getAPIntValue().ule(
11431 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
11432 }
11433
11434 return SDValue();
11435}
11436
11437// Given a ABS node, detect the following patterns:
11438// (ABS (SUB (EXTEND a), (EXTEND b))).
11439// (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))).
11440// Generates UABD/SABD instruction.
11441SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
11442 EVT SrcVT = N->getValueType(0);
11443
11444 if (N->getOpcode() == ISD::TRUNCATE)
11445 N = N->getOperand(0).getNode();
11446
11447 EVT VT = N->getValueType(0);
11448 SDValue Op0, Op1;
11449
11450 if (!sd_match(N, m_Abs(m_Sub(m_Value(Op0), m_Value(Op1)))))
11451 return SDValue();
11452
11453 SDValue AbsOp0 = N->getOperand(0);
11454 unsigned Opc0 = Op0.getOpcode();
11455
11456 // Check if the operands of the sub are (zero|sign)-extended, otherwise
11457 // fallback to ValueTracking.
11458 if (Opc0 != Op1.getOpcode() ||
11459 (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
11460 Opc0 != ISD::SIGN_EXTEND_INREG)) {
11461 // fold (abs (sub nsw x, y)) -> abds(x, y)
11462 // Don't fold this for unsupported types as we lose the NSW handling.
11463 if (hasOperation(ISD::ABDS, VT) && TLI.preferABDSToABSWithNSW(VT) &&
11464 (AbsOp0->getFlags().hasNoSignedWrap() ||
11465 DAG.willNotOverflowSub(/*IsSigned=*/true, Op0, Op1))) {
11466 SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
11467 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11468 }
11469 // fold (abs (sub x, y)) -> abdu(x, y)
11470 if (hasOperation(ISD::ABDU, VT) && DAG.SignBitIsZero(Op0) &&
11471 DAG.SignBitIsZero(Op1)) {
11472 SDValue ABD = DAG.getNode(ISD::ABDU, DL, VT, Op0, Op1);
11473 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11474 }
11475 return SDValue();
11476 }
11477
11478 EVT VT0, VT1;
11479 if (Opc0 == ISD::SIGN_EXTEND_INREG) {
11480 VT0 = cast<VTSDNode>(Op0.getOperand(1))->getVT();
11481 VT1 = cast<VTSDNode>(Op1.getOperand(1))->getVT();
11482 } else {
11483 VT0 = Op0.getOperand(0).getValueType();
11484 VT1 = Op1.getOperand(0).getValueType();
11485 }
11486 unsigned ABDOpcode = (Opc0 == ISD::ZERO_EXTEND) ? ISD::ABDU : ISD::ABDS;
11487
11488 // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
11489 // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
11490 EVT MaxVT = VT0.bitsGT(VT1) ? VT0 : VT1;
11491 if ((VT0 == MaxVT || Op0->hasOneUse()) &&
11492 (VT1 == MaxVT || Op1->hasOneUse()) &&
11493 (!LegalTypes || hasOperation(ABDOpcode, MaxVT))) {
11494 SDValue ABD = DAG.getNode(ABDOpcode, DL, MaxVT,
11495 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op0),
11496 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op1));
11497 ABD = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ABD);
11498 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11499 }
11500
11501 // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
11502 // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
11503 if (!LegalOperations || hasOperation(ABDOpcode, VT)) {
11504 SDValue ABD = DAG.getNode(ABDOpcode, DL, VT, Op0, Op1);
11505 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11506 }
11507
11508 return SDValue();
11509}
11510
11511SDValue DAGCombiner::visitABS(SDNode *N) {
11512 SDValue N0 = N->getOperand(0);
11513 EVT VT = N->getValueType(0);
11514 SDLoc DL(N);
11515
11516 // fold (abs c1) -> c2
11517 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, DL, VT, {N0}))
11518 return C;
11519 // fold (abs (abs x)) -> (abs x)
11520 if (N0.getOpcode() == ISD::ABS)
11521 return N0;
11522 // fold (abs x) -> x iff not-negative
11523 if (DAG.SignBitIsZero(N0))
11524 return N0;
11525
11526 if (SDValue ABD = foldABSToABD(N, DL))
11527 return ABD;
11528
11529 // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
11530 // iff zero_extend/truncate are free.
11531 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
11532 EVT ExtVT = cast<VTSDNode>(N0.getOperand(1))->getVT();
11533 if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) &&
11534 TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) &&
11535 hasOperation(ISD::ABS, ExtVT)) {
11536 return DAG.getNode(
11537 ISD::ZERO_EXTEND, DL, VT,
11538 DAG.getNode(ISD::ABS, DL, ExtVT,
11539 DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N0.getOperand(0))));
11540 }
11541 }
11542
11543 return SDValue();
11544}
11545
11546SDValue DAGCombiner::visitBSWAP(SDNode *N) {
11547 SDValue N0 = N->getOperand(0);
11548 EVT VT = N->getValueType(0);
11549 SDLoc DL(N);
11550
11551 // fold (bswap c1) -> c2
11552 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BSWAP, DL, VT, {N0}))
11553 return C;
11554 // fold (bswap (bswap x)) -> x
11555 if (N0.getOpcode() == ISD::BSWAP)
11556 return N0.getOperand(0);
11557
11558 // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
11559 // isn't supported, it will be expanded to bswap followed by a manual reversal
11560 // of bits in each byte. By placing bswaps before bitreverse, we can remove
11561 // the two bswaps if the bitreverse gets expanded.
11562 if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
11563 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11564 return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
11565 }
11566
11567 // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
11568 // iff x >= bw/2 (i.e. lower half is known zero)
11569 unsigned BW = VT.getScalarSizeInBits();
11570 if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
11571 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11572 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
11573 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11574 ShAmt->getZExtValue() >= (BW / 2) &&
11575 (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
11576 TLI.isTruncateFree(VT, HalfVT) &&
11577 (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
11578 SDValue Res = N0.getOperand(0);
11579 if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
11580 Res = DAG.getNode(ISD::SHL, DL, VT, Res,
11581 DAG.getShiftAmountConstant(NewShAmt, VT, DL));
11582 Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
11583 Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
11584 return DAG.getZExtOrTrunc(Res, DL, VT);
11585 }
11586 }
11587
11588 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
11589 // inverse-shift-of-bswap:
11590 // bswap (X u<< C) --> (bswap X) u>> C
11591 // bswap (X u>> C) --> (bswap X) u<< C
11592 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
11593 N0.hasOneUse()) {
11594 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11595 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11596 ShAmt->getZExtValue() % 8 == 0) {
11597 SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11598 unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
11599 return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
11600 }
11601 }
11602
11603 if (SDValue V = foldBitOrderCrossLogicOp(N, DAG))
11604 return V;
11605
11606 return SDValue();
11607}
11608
11609SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
11610 SDValue N0 = N->getOperand(0);
11611 EVT VT = N->getValueType(0);
11612 SDLoc DL(N);
11613
11614 // fold (bitreverse c1) -> c2
11615 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0}))
11616 return C;
11617
11618 // fold (bitreverse (bitreverse x)) -> x
11619 if (N0.getOpcode() == ISD::BITREVERSE)
11620 return N0.getOperand(0);
11621
11622 SDValue X, Y;
11623
11624 // fold (bitreverse (lshr (bitreverse x), y)) -> (shl x, y)
11625 if ((!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
11627 return DAG.getNode(ISD::SHL, DL, VT, X, Y);
11628
11629 // fold (bitreverse (shl (bitreverse x), y)) -> (lshr x, y)
11630 if ((!LegalOperations || TLI.isOperationLegal(ISD::SRL, VT)) &&
11632 return DAG.getNode(ISD::SRL, DL, VT, X, Y);
11633
11634 return SDValue();
11635}
11636
11637SDValue DAGCombiner::visitCTLZ(SDNode *N) {
11638 SDValue N0 = N->getOperand(0);
11639 EVT VT = N->getValueType(0);
11640 SDLoc DL(N);
11641
11642 // fold (ctlz c1) -> c2
11643 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTLZ, DL, VT, {N0}))
11644 return C;
11645
11646 // If the value is known never to be zero, switch to the undef version.
11647 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT))
11648 if (DAG.isKnownNeverZero(N0))
11649 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, N0);
11650
11651 return SDValue();
11652}
11653
11654SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
11655 SDValue N0 = N->getOperand(0);
11656 EVT VT = N->getValueType(0);
11657 SDLoc DL(N);
11658
11659 // fold (ctlz_zero_undef c1) -> c2
11660 if (SDValue C =
11662 return C;
11663 return SDValue();
11664}
11665
11666SDValue DAGCombiner::visitCTTZ(SDNode *N) {
11667 SDValue N0 = N->getOperand(0);
11668 EVT VT = N->getValueType(0);
11669 SDLoc DL(N);
11670
11671 // fold (cttz c1) -> c2
11672 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTTZ, DL, VT, {N0}))
11673 return C;
11674
11675 // If the value is known never to be zero, switch to the undef version.
11676 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT))
11677 if (DAG.isKnownNeverZero(N0))
11678 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, DL, VT, N0);
11679
11680 return SDValue();
11681}
11682
11683SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
11684 SDValue N0 = N->getOperand(0);
11685 EVT VT = N->getValueType(0);
11686 SDLoc DL(N);
11687
11688 // fold (cttz_zero_undef c1) -> c2
11689 if (SDValue C =
11691 return C;
11692 return SDValue();
11693}
11694
11695SDValue DAGCombiner::visitCTPOP(SDNode *N) {
11696 SDValue N0 = N->getOperand(0);
11697 EVT VT = N->getValueType(0);
11698 unsigned NumBits = VT.getScalarSizeInBits();
11699 SDLoc DL(N);
11700
11701 // fold (ctpop c1) -> c2
11702 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTPOP, DL, VT, {N0}))
11703 return C;
11704
11705 // If the source is being shifted, but doesn't affect any active bits,
11706 // then we can call CTPOP on the shift source directly.
11707 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SHL) {
11708 if (ConstantSDNode *AmtC = isConstOrConstSplat(N0.getOperand(1))) {
11709 const APInt &Amt = AmtC->getAPIntValue();
11710 if (Amt.ult(NumBits)) {
11711 KnownBits KnownSrc = DAG.computeKnownBits(N0.getOperand(0));
11712 if ((N0.getOpcode() == ISD::SRL &&
11713 Amt.ule(KnownSrc.countMinTrailingZeros())) ||
11714 (N0.getOpcode() == ISD::SHL &&
11715 Amt.ule(KnownSrc.countMinLeadingZeros()))) {
11716 return DAG.getNode(ISD::CTPOP, DL, VT, N0.getOperand(0));
11717 }
11718 }
11719 }
11720 }
11721
11722 // If the upper bits are known to be zero, then see if its profitable to
11723 // only count the lower bits.
11724 if (VT.isScalarInteger() && NumBits > 8 && (NumBits & 1) == 0) {
11725 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), NumBits / 2);
11726 if (hasOperation(ISD::CTPOP, HalfVT) &&
11727 TLI.isTypeDesirableForOp(ISD::CTPOP, HalfVT) &&
11728 TLI.isTruncateFree(N0, HalfVT) && TLI.isZExtFree(HalfVT, VT)) {
11729 APInt UpperBits = APInt::getHighBitsSet(NumBits, NumBits / 2);
11730 if (DAG.MaskedValueIsZero(N0, UpperBits)) {
11731 SDValue PopCnt = DAG.getNode(ISD::CTPOP, DL, HalfVT,
11732 DAG.getZExtOrTrunc(N0, DL, HalfVT));
11733 return DAG.getZExtOrTrunc(PopCnt, DL, VT);
11734 }
11735 }
11736 }
11737
11738 return SDValue();
11739}
11740
11742 SDValue RHS, const SDNodeFlags Flags,
11743 const TargetLowering &TLI) {
11744 EVT VT = LHS.getValueType();
11745 if (!VT.isFloatingPoint())
11746 return false;
11747
11748 const TargetOptions &Options = DAG.getTarget().Options;
11749
11750 return (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) &&
11752 (Flags.hasNoNaNs() ||
11753 (DAG.isKnownNeverNaN(RHS) && DAG.isKnownNeverNaN(LHS)));
11754}
11755
11757 SDValue RHS, SDValue True, SDValue False,
11758 ISD::CondCode CC,
11759 const TargetLowering &TLI,
11760 SelectionDAG &DAG) {
11761 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
11762 switch (CC) {
11763 case ISD::SETOLT:
11764 case ISD::SETOLE:
11765 case ISD::SETLT:
11766 case ISD::SETLE:
11767 case ISD::SETULT:
11768 case ISD::SETULE: {
11769 // Since it's known never nan to get here already, either fminnum or
11770 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
11771 // expanded in terms of it.
11772 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
11773 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11774 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11775
11776 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
11777 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11778 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11779 return SDValue();
11780 }
11781 case ISD::SETOGT:
11782 case ISD::SETOGE:
11783 case ISD::SETGT:
11784 case ISD::SETGE:
11785 case ISD::SETUGT:
11786 case ISD::SETUGE: {
11787 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
11788 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11789 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11790
11791 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
11792 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11793 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11794 return SDValue();
11795 }
11796 default:
11797 return SDValue();
11798 }
11799}
11800
11801// Convert (sr[al] (add n[su]w x, y)) -> (avgfloor[su] x, y)
11802SDValue DAGCombiner::foldShiftToAvg(SDNode *N, const SDLoc &DL) {
11803 const unsigned Opcode = N->getOpcode();
11804 if (Opcode != ISD::SRA && Opcode != ISD::SRL)
11805 return SDValue();
11806
11807 EVT VT = N->getValueType(0);
11808 bool IsUnsigned = Opcode == ISD::SRL;
11809
11810 // Captured values.
11811 SDValue A, B, Add;
11812
11813 // Match floor average as it is common to both floor/ceil avgs.
11814 if (sd_match(N, m_BinOp(Opcode,
11816 m_One()))) {
11817 // Decide whether signed or unsigned.
11818 unsigned FloorISD = IsUnsigned ? ISD::AVGFLOORU : ISD::AVGFLOORS;
11819 if (!hasOperation(FloorISD, VT))
11820 return SDValue();
11821
11822 // Can't optimize adds that may wrap.
11823 if ((IsUnsigned && !Add->getFlags().hasNoUnsignedWrap()) ||
11824 (!IsUnsigned && !Add->getFlags().hasNoSignedWrap()))
11825 return SDValue();
11826
11827 return DAG.getNode(FloorISD, DL, N->getValueType(0), {A, B});
11828 }
11829
11830 return SDValue();
11831}
11832
11833SDValue DAGCombiner::foldBitwiseOpWithNeg(SDNode *N, const SDLoc &DL, EVT VT) {
11834 unsigned Opc = N->getOpcode();
11835 SDValue X, Y, Z;
11836 if (sd_match(
11838 return DAG.getNode(Opc, DL, VT, X,
11839 DAG.getNOT(DL, DAG.getNode(ISD::SUB, DL, VT, Y, Z), VT));
11840
11842 m_Value(Z)))))
11843 return DAG.getNode(Opc, DL, VT, X,
11844 DAG.getNOT(DL, DAG.getNode(ISD::ADD, DL, VT, Y, Z), VT));
11845
11846 return SDValue();
11847}
11848
11849/// Generate Min/Max node
11850SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
11851 SDValue RHS, SDValue True,
11852 SDValue False, ISD::CondCode CC) {
11853 if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
11854 return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG);
11855
11856 // If we can't directly match this, try to see if we can pull an fneg out of
11857 // the select.
11859 True, DAG, LegalOperations, ForCodeSize);
11860 if (!NegTrue)
11861 return SDValue();
11862
11863 HandleSDNode NegTrueHandle(NegTrue);
11864
11865 // Try to unfold an fneg from the select if we are comparing the negated
11866 // constant.
11867 //
11868 // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
11869 //
11870 // TODO: Handle fabs
11871 if (LHS == NegTrue) {
11872 // If we can't directly match this, try to see if we can pull an fneg out of
11873 // the select.
11875 RHS, DAG, LegalOperations, ForCodeSize);
11876 if (NegRHS) {
11877 HandleSDNode NegRHSHandle(NegRHS);
11878 if (NegRHS == False) {
11879 SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
11880 False, CC, TLI, DAG);
11881 if (Combined)
11882 return DAG.getNode(ISD::FNEG, DL, VT, Combined);
11883 }
11884 }
11885 }
11886
11887 return SDValue();
11888}
11889
11890/// If a (v)select has a condition value that is a sign-bit test, try to smear
11891/// the condition operand sign-bit across the value width and use it as a mask.
11893 SelectionDAG &DAG) {
11894 SDValue Cond = N->getOperand(0);
11895 SDValue C1 = N->getOperand(1);
11896 SDValue C2 = N->getOperand(2);
11898 return SDValue();
11899
11900 EVT VT = N->getValueType(0);
11901 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
11902 VT != Cond.getOperand(0).getValueType())
11903 return SDValue();
11904
11905 // The inverted-condition + commuted-select variants of these patterns are
11906 // canonicalized to these forms in IR.
11907 SDValue X = Cond.getOperand(0);
11908 SDValue CondC = Cond.getOperand(1);
11909 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11910 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
11912 // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
11913 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11914 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11915 return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
11916 }
11917 if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
11918 // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
11919 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11920 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11921 return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
11922 }
11923 return SDValue();
11924}
11925
11927 const TargetLowering &TLI) {
11928 if (!TLI.convertSelectOfConstantsToMath(VT))
11929 return false;
11930
11931 if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse())
11932 return true;
11934 return true;
11935
11936 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11937 if (CC == ISD::SETLT && isNullOrNullSplat(Cond.getOperand(1)))
11938 return true;
11939 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond.getOperand(1)))
11940 return true;
11941
11942 return false;
11943}
11944
11945SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
11946 SDValue Cond = N->getOperand(0);
11947 SDValue N1 = N->getOperand(1);
11948 SDValue N2 = N->getOperand(2);
11949 EVT VT = N->getValueType(0);
11950 EVT CondVT = Cond.getValueType();
11951 SDLoc DL(N);
11952
11953 if (!VT.isInteger())
11954 return SDValue();
11955
11956 auto *C1 = dyn_cast<ConstantSDNode>(N1);
11957 auto *C2 = dyn_cast<ConstantSDNode>(N2);
11958 if (!C1 || !C2)
11959 return SDValue();
11960
11961 if (CondVT != MVT::i1 || LegalOperations) {
11962 // fold (select Cond, 0, 1) -> (xor Cond, 1)
11963 // We can't do this reliably if integer based booleans have different contents
11964 // to floating point based booleans. This is because we can't tell whether we
11965 // have an integer-based boolean or a floating-point-based boolean unless we
11966 // can find the SETCC that produced it and inspect its operands. This is
11967 // fairly easy if C is the SETCC node, but it can potentially be
11968 // undiscoverable (or not reasonably discoverable). For example, it could be
11969 // in another basic block or it could require searching a complicated
11970 // expression.
11971 if (CondVT.isInteger() &&
11972 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
11974 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
11976 C1->isZero() && C2->isOne()) {
11977 SDValue NotCond =
11978 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
11979 if (VT.bitsEq(CondVT))
11980 return NotCond;
11981 return DAG.getZExtOrTrunc(NotCond, DL, VT);
11982 }
11983
11984 return SDValue();
11985 }
11986
11987 // Only do this before legalization to avoid conflicting with target-specific
11988 // transforms in the other direction (create a select from a zext/sext). There
11989 // is also a target-independent combine here in DAGCombiner in the other
11990 // direction for (select Cond, -1, 0) when the condition is not i1.
11991 assert(CondVT == MVT::i1 && !LegalOperations);
11992
11993 // select Cond, 1, 0 --> zext (Cond)
11994 if (C1->isOne() && C2->isZero())
11995 return DAG.getZExtOrTrunc(Cond, DL, VT);
11996
11997 // select Cond, -1, 0 --> sext (Cond)
11998 if (C1->isAllOnes() && C2->isZero())
11999 return DAG.getSExtOrTrunc(Cond, DL, VT);
12000
12001 // select Cond, 0, 1 --> zext (!Cond)
12002 if (C1->isZero() && C2->isOne()) {
12003 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
12004 NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT);
12005 return NotCond;
12006 }
12007
12008 // select Cond, 0, -1 --> sext (!Cond)
12009 if (C1->isZero() && C2->isAllOnes()) {
12010 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
12011 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
12012 return NotCond;
12013 }
12014
12015 // Use a target hook because some targets may prefer to transform in the
12016 // other direction.
12018 return SDValue();
12019
12020 // For any constants that differ by 1, we can transform the select into
12021 // an extend and add.
12022 const APInt &C1Val = C1->getAPIntValue();
12023 const APInt &C2Val = C2->getAPIntValue();
12024
12025 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
12026 if (C1Val - 1 == C2Val) {
12027 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
12028 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
12029 }
12030
12031 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
12032 if (C1Val + 1 == C2Val) {
12033 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
12034 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
12035 }
12036
12037 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
12038 if (C1Val.isPowerOf2() && C2Val.isZero()) {
12039 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
12040 SDValue ShAmtC =
12041 DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
12042 return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
12043 }
12044
12045 // select Cond, -1, C --> or (sext Cond), C
12046 if (C1->isAllOnes()) {
12047 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
12048 return DAG.getNode(ISD::OR, DL, VT, Cond, N2);
12049 }
12050
12051 // select Cond, C, -1 --> or (sext (not Cond)), C
12052 if (C2->isAllOnes()) {
12053 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
12054 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
12055 return DAG.getNode(ISD::OR, DL, VT, NotCond, N1);
12056 }
12057
12059 return V;
12060
12061 return SDValue();
12062}
12063
12064template <class MatchContextClass>
12066 SelectionDAG &DAG) {
12067 assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT ||
12068 N->getOpcode() == ISD::VP_SELECT) &&
12069 "Expected a (v)(vp.)select");
12070 SDValue Cond = N->getOperand(0);
12071 SDValue T = N->getOperand(1), F = N->getOperand(2);
12072 EVT VT = N->getValueType(0);
12073 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12074 MatchContextClass matcher(DAG, TLI, N);
12075
12076 if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
12077 return SDValue();
12078
12079 // select Cond, Cond, F --> or Cond, freeze(F)
12080 // select Cond, 1, F --> or Cond, freeze(F)
12081 if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
12082 return matcher.getNode(ISD::OR, DL, VT, Cond, DAG.getFreeze(F));
12083
12084 // select Cond, T, Cond --> and Cond, freeze(T)
12085 // select Cond, T, 0 --> and Cond, freeze(T)
12086 if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
12087 return matcher.getNode(ISD::AND, DL, VT, Cond, DAG.getFreeze(T));
12088
12089 // select Cond, T, 1 --> or (not Cond), freeze(T)
12090 if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
12091 SDValue NotCond =
12092 matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
12093 return matcher.getNode(ISD::OR, DL, VT, NotCond, DAG.getFreeze(T));
12094 }
12095
12096 // select Cond, 0, F --> and (not Cond), freeze(F)
12097 if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
12098 SDValue NotCond =
12099 matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
12100 return matcher.getNode(ISD::AND, DL, VT, NotCond, DAG.getFreeze(F));
12101 }
12102
12103 return SDValue();
12104}
12105
12107 SDValue N0 = N->getOperand(0);
12108 SDValue N1 = N->getOperand(1);
12109 SDValue N2 = N->getOperand(2);
12110 EVT VT = N->getValueType(0);
12111 unsigned EltSizeInBits = VT.getScalarSizeInBits();
12112
12113 SDValue Cond0, Cond1;
12114 ISD::CondCode CC;
12115 if (!sd_match(N0, m_OneUse(m_SetCC(m_Value(Cond0), m_Value(Cond1),
12116 m_CondCode(CC)))) ||
12117 VT != Cond0.getValueType())
12118 return SDValue();
12119
12120 // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
12121 // compare is inverted from that pattern ("Cond0 s> -1").
12122 if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
12123 ; // This is the pattern we are looking for.
12124 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
12125 std::swap(N1, N2);
12126 else
12127 return SDValue();
12128
12129 // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & freeze(N1)
12130 if (isNullOrNullSplat(N2)) {
12131 SDLoc DL(N);
12132 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
12133 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
12134 return DAG.getNode(ISD::AND, DL, VT, Sra, DAG.getFreeze(N1));
12135 }
12136
12137 // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | freeze(N2)
12138 if (isAllOnesOrAllOnesSplat(N1)) {
12139 SDLoc DL(N);
12140 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
12141 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
12142 return DAG.getNode(ISD::OR, DL, VT, Sra, DAG.getFreeze(N2));
12143 }
12144
12145 // If we have to invert the sign bit mask, only do that transform if the
12146 // target has a bitwise 'and not' instruction (the invert is free).
12147 // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & freeze(N2)
12148 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12149 if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
12150 SDLoc DL(N);
12151 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
12152 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
12153 SDValue Not = DAG.getNOT(DL, Sra, VT);
12154 return DAG.getNode(ISD::AND, DL, VT, Not, DAG.getFreeze(N2));
12155 }
12156
12157 // TODO: There's another pattern in this family, but it may require
12158 // implementing hasOrNot() to check for profitability:
12159 // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | freeze(N2)
12160
12161 return SDValue();
12162}
12163
12164// Match SELECTs with absolute difference patterns.
12165// (select (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
12166// (select (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
12167// (select (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
12168// (select (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
12169SDValue DAGCombiner::foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
12170 SDValue False, ISD::CondCode CC,
12171 const SDLoc &DL) {
12172 bool IsSigned = isSignedIntSetCC(CC);
12173 unsigned ABDOpc = IsSigned ? ISD::ABDS : ISD::ABDU;
12174 EVT VT = LHS.getValueType();
12175
12176 if (LegalOperations && !hasOperation(ABDOpc, VT))
12177 return SDValue();
12178
12179 switch (CC) {
12180 case ISD::SETGT:
12181 case ISD::SETGE:
12182 case ISD::SETUGT:
12183 case ISD::SETUGE:
12184 if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
12186 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12187 if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
12188 sd_match(False, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
12189 hasOperation(ABDOpc, VT))
12190 return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
12191 break;
12192 case ISD::SETLT:
12193 case ISD::SETLE:
12194 case ISD::SETULT:
12195 case ISD::SETULE:
12196 if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
12198 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12199 if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
12200 sd_match(False, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
12201 hasOperation(ABDOpc, VT))
12202 return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
12203 break;
12204 default:
12205 break;
12206 }
12207
12208 return SDValue();
12209}
12210
12211// ([v]select (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
12212// ([v]select (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
12213SDValue DAGCombiner::foldSelectToUMin(SDValue LHS, SDValue RHS, SDValue True,
12214 SDValue False, ISD::CondCode CC,
12215 const SDLoc &DL) {
12216 APInt C;
12217 EVT VT = True.getValueType();
12218 if (sd_match(RHS, m_ConstInt(C)) && hasUMin(VT)) {
12219 if (CC == ISD::SETUGT && LHS == False &&
12220 sd_match(True, m_Add(m_Specific(False), m_SpecificInt(~C)))) {
12221 SDValue AddC = DAG.getConstant(~C, DL, VT);
12222 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, False, AddC);
12223 return DAG.getNode(ISD::UMIN, DL, VT, Add, False);
12224 }
12225 if (CC == ISD::SETULT && LHS == True &&
12226 sd_match(False, m_Add(m_Specific(True), m_SpecificInt(-C)))) {
12227 SDValue AddC = DAG.getConstant(-C, DL, VT);
12228 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, True, AddC);
12229 return DAG.getNode(ISD::UMIN, DL, VT, True, Add);
12230 }
12231 }
12232 return SDValue();
12233}
12234
12235SDValue DAGCombiner::visitSELECT(SDNode *N) {
12236 SDValue N0 = N->getOperand(0);
12237 SDValue N1 = N->getOperand(1);
12238 SDValue N2 = N->getOperand(2);
12239 EVT VT = N->getValueType(0);
12240 EVT VT0 = N0.getValueType();
12241 SDLoc DL(N);
12242 SDNodeFlags Flags = N->getFlags();
12243
12244 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12245 return V;
12246
12248 return V;
12249
12250 // select (not Cond), N1, N2 -> select Cond, N2, N1
12251 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
12252 return DAG.getSelect(DL, VT, F, N2, N1, Flags);
12253
12254 if (SDValue V = foldSelectOfConstants(N))
12255 return V;
12256
12257 // If we can fold this based on the true/false value, do so.
12258 if (SimplifySelectOps(N, N1, N2))
12259 return SDValue(N, 0); // Don't revisit N.
12260
12261 if (VT0 == MVT::i1) {
12262 // The code in this block deals with the following 2 equivalences:
12263 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
12264 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
12265 // The target can specify its preferred form with the
12266 // shouldNormalizeToSelectSequence() callback. However we always transform
12267 // to the right anyway if we find the inner select exists in the DAG anyway
12268 // and we always transform to the left side if we know that we can further
12269 // optimize the combination of the conditions.
12270 bool normalizeToSequence =
12272 // select (and Cond0, Cond1), X, Y
12273 // -> select Cond0, (select Cond1, X, Y), Y
12274 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
12275 SDValue Cond0 = N0->getOperand(0);
12276 SDValue Cond1 = N0->getOperand(1);
12277 SDValue InnerSelect =
12278 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
12279 if (normalizeToSequence || !InnerSelect.use_empty())
12280 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
12281 InnerSelect, N2, Flags);
12282 // Cleanup on failure.
12283 if (InnerSelect.use_empty())
12284 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
12285 }
12286 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
12287 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
12288 SDValue Cond0 = N0->getOperand(0);
12289 SDValue Cond1 = N0->getOperand(1);
12290 SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
12291 Cond1, N1, N2, Flags);
12292 if (normalizeToSequence || !InnerSelect.use_empty())
12293 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
12294 InnerSelect, Flags);
12295 // Cleanup on failure.
12296 if (InnerSelect.use_empty())
12297 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
12298 }
12299
12300 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
12301 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
12302 SDValue N1_0 = N1->getOperand(0);
12303 SDValue N1_1 = N1->getOperand(1);
12304 SDValue N1_2 = N1->getOperand(2);
12305 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
12306 // Create the actual and node if we can generate good code for it.
12307 if (!normalizeToSequence) {
12308 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
12309 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
12310 N2, Flags);
12311 }
12312 // Otherwise see if we can optimize the "and" to a better pattern.
12313 if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
12314 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
12315 N2, Flags);
12316 }
12317 }
12318 }
12319 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
12320 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
12321 SDValue N2_0 = N2->getOperand(0);
12322 SDValue N2_1 = N2->getOperand(1);
12323 SDValue N2_2 = N2->getOperand(2);
12324 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
12325 // Create the actual or node if we can generate good code for it.
12326 if (!normalizeToSequence) {
12327 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
12328 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
12329 N2_2, Flags);
12330 }
12331 // Otherwise see if we can optimize to a better pattern.
12332 if (SDValue Combined = visitORLike(N0, N2_0, DL))
12333 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
12334 N2_2, Flags);
12335 }
12336 }
12337
12338 // select usubo(x, y).overflow, (sub y, x), (usubo x, y) -> abdu(x, y)
12339 if (N0.getOpcode() == ISD::USUBO && N0.getResNo() == 1 &&
12340 N2.getNode() == N0.getNode() && N2.getResNo() == 0 &&
12341 N1.getOpcode() == ISD::SUB && N2.getOperand(0) == N1.getOperand(1) &&
12342 N2.getOperand(1) == N1.getOperand(0) &&
12343 (!LegalOperations || TLI.isOperationLegal(ISD::ABDU, VT)))
12344 return DAG.getNode(ISD::ABDU, DL, VT, N0.getOperand(0), N0.getOperand(1));
12345
12346 // select usubo(x, y).overflow, (usubo x, y), (sub y, x) -> neg (abdu x, y)
12347 if (N0.getOpcode() == ISD::USUBO && N0.getResNo() == 1 &&
12348 N1.getNode() == N0.getNode() && N1.getResNo() == 0 &&
12349 N2.getOpcode() == ISD::SUB && N2.getOperand(0) == N1.getOperand(1) &&
12350 N2.getOperand(1) == N1.getOperand(0) &&
12351 (!LegalOperations || TLI.isOperationLegal(ISD::ABDU, VT)))
12352 return DAG.getNegative(
12353 DAG.getNode(ISD::ABDU, DL, VT, N0.getOperand(0), N0.getOperand(1)),
12354 DL, VT);
12355 }
12356
12357 // Fold selects based on a setcc into other things, such as min/max/abs.
12358 if (N0.getOpcode() == ISD::SETCC) {
12359 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
12361
12362 // select (fcmp lt x, y), x, y -> fminnum x, y
12363 // select (fcmp gt x, y), x, y -> fmaxnum x, y
12364 //
12365 // This is OK if we don't care what happens if either operand is a NaN.
12366 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, Flags, TLI))
12367 if (SDValue FMinMax =
12368 combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC))
12369 return FMinMax;
12370
12371 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
12372 // This is conservatively limited to pre-legal-operations to give targets
12373 // a chance to reverse the transform if they want to do that. Also, it is
12374 // unlikely that the pattern would be formed late, so it's probably not
12375 // worth going through the other checks.
12376 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
12377 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
12378 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
12379 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
12380 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
12381 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
12382 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
12383 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
12384 //
12385 // The IR equivalent of this transform would have this form:
12386 // %a = add %x, C
12387 // %c = icmp ugt %x, ~C
12388 // %r = select %c, -1, %a
12389 // =>
12390 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
12391 // %u0 = extractvalue %u, 0
12392 // %u1 = extractvalue %u, 1
12393 // %r = select %u1, -1, %u0
12394 SDVTList VTs = DAG.getVTList(VT, VT0);
12395 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
12396 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
12397 }
12398 }
12399
12400 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
12401 (!LegalOperations &&
12403 // Any flags available in a select/setcc fold will be on the setcc as they
12404 // migrated from fcmp
12405 return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2,
12406 N0.getOperand(2), N0->getFlags());
12407 }
12408
12409 if (SDValue ABD = foldSelectToABD(Cond0, Cond1, N1, N2, CC, DL))
12410 return ABD;
12411
12412 if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
12413 return NewSel;
12414
12415 // (select (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
12416 // (select (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
12417 if (SDValue UMin = foldSelectToUMin(Cond0, Cond1, N1, N2, CC, DL))
12418 return UMin;
12419 }
12420
12421 if (!VT.isVector())
12422 if (SDValue BinOp = foldSelectOfBinops(N))
12423 return BinOp;
12424
12425 if (SDValue R = combineSelectAsExtAnd(N0, N1, N2, DL, DAG))
12426 return R;
12427
12428 return SDValue();
12429}
12430
12431// This function assumes all the vselect's arguments are CONCAT_VECTOR
12432// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
12434 SDLoc DL(N);
12435 SDValue Cond = N->getOperand(0);
12436 SDValue LHS = N->getOperand(1);
12437 SDValue RHS = N->getOperand(2);
12438 EVT VT = N->getValueType(0);
12439 int NumElems = VT.getVectorNumElements();
12440 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
12441 RHS.getOpcode() == ISD::CONCAT_VECTORS &&
12442 Cond.getOpcode() == ISD::BUILD_VECTOR);
12443
12444 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
12445 // binary ones here.
12446 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
12447 return SDValue();
12448
12449 // We're sure we have an even number of elements due to the
12450 // concat_vectors we have as arguments to vselect.
12451 // Skip BV elements until we find one that's not an UNDEF
12452 // After we find an UNDEF element, keep looping until we get to half the
12453 // length of the BV and see if all the non-undef nodes are the same.
12454 ConstantSDNode *BottomHalf = nullptr;
12455 for (int i = 0; i < NumElems / 2; ++i) {
12456 if (Cond->getOperand(i)->isUndef())
12457 continue;
12458
12459 if (BottomHalf == nullptr)
12460 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
12461 else if (Cond->getOperand(i).getNode() != BottomHalf)
12462 return SDValue();
12463 }
12464
12465 // Do the same for the second half of the BuildVector
12466 ConstantSDNode *TopHalf = nullptr;
12467 for (int i = NumElems / 2; i < NumElems; ++i) {
12468 if (Cond->getOperand(i)->isUndef())
12469 continue;
12470
12471 if (TopHalf == nullptr)
12472 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
12473 else if (Cond->getOperand(i).getNode() != TopHalf)
12474 return SDValue();
12475 }
12476
12477 assert(TopHalf && BottomHalf &&
12478 "One half of the selector was all UNDEFs and the other was all the "
12479 "same value. This should have been addressed before this function.");
12480 return DAG.getNode(
12482 BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
12483 TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
12484}
12485
12486bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
12487 SelectionDAG &DAG, const SDLoc &DL) {
12488
12489 // Only perform the transformation when existing operands can be reused.
12490 if (IndexIsScaled)
12491 return false;
12492
12493 if (!isNullConstant(BasePtr) && !Index.hasOneUse())
12494 return false;
12495
12496 EVT VT = BasePtr.getValueType();
12497
12498 if (SDValue SplatVal = DAG.getSplatValue(Index);
12499 SplatVal && !isNullConstant(SplatVal) &&
12500 SplatVal.getValueType() == VT) {
12501 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12502 Index = DAG.getSplat(Index.getValueType(), DL, DAG.getConstant(0, DL, VT));
12503 return true;
12504 }
12505
12506 if (Index.getOpcode() != ISD::ADD)
12507 return false;
12508
12509 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0));
12510 SplatVal && SplatVal.getValueType() == VT) {
12511 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12512 Index = Index.getOperand(1);
12513 return true;
12514 }
12515 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1));
12516 SplatVal && SplatVal.getValueType() == VT) {
12517 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12518 Index = Index.getOperand(0);
12519 return true;
12520 }
12521 return false;
12522}
12523
12524// Fold sext/zext of index into index type.
12525bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT,
12526 SelectionDAG &DAG) {
12527 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12528
12529 // It's always safe to look through zero extends.
12530 if (Index.getOpcode() == ISD::ZERO_EXTEND) {
12531 if (TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
12532 IndexType = ISD::UNSIGNED_SCALED;
12533 Index = Index.getOperand(0);
12534 return true;
12535 }
12536 if (ISD::isIndexTypeSigned(IndexType)) {
12537 IndexType = ISD::UNSIGNED_SCALED;
12538 return true;
12539 }
12540 }
12541
12542 // It's only safe to look through sign extends when Index is signed.
12543 if (Index.getOpcode() == ISD::SIGN_EXTEND &&
12544 ISD::isIndexTypeSigned(IndexType) &&
12545 TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
12546 Index = Index.getOperand(0);
12547 return true;
12548 }
12549
12550 return false;
12551}
12552
12553SDValue DAGCombiner::visitVPSCATTER(SDNode *N) {
12554 VPScatterSDNode *MSC = cast<VPScatterSDNode>(N);
12555 SDValue Mask = MSC->getMask();
12556 SDValue Chain = MSC->getChain();
12557 SDValue Index = MSC->getIndex();
12558 SDValue Scale = MSC->getScale();
12559 SDValue StoreVal = MSC->getValue();
12560 SDValue BasePtr = MSC->getBasePtr();
12561 SDValue VL = MSC->getVectorLength();
12562 ISD::MemIndexType IndexType = MSC->getIndexType();
12563 SDLoc DL(N);
12564
12565 // Zap scatters with a zero mask.
12567 return Chain;
12568
12569 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
12570 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
12571 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12572 DL, Ops, MSC->getMemOperand(), IndexType);
12573 }
12574
12575 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
12576 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
12577 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12578 DL, Ops, MSC->getMemOperand(), IndexType);
12579 }
12580
12581 return SDValue();
12582}
12583
12584SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
12585 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
12586 SDValue Mask = MSC->getMask();
12587 SDValue Chain = MSC->getChain();
12588 SDValue Index = MSC->getIndex();
12589 SDValue Scale = MSC->getScale();
12590 SDValue StoreVal = MSC->getValue();
12591 SDValue BasePtr = MSC->getBasePtr();
12592 ISD::MemIndexType IndexType = MSC->getIndexType();
12593 SDLoc DL(N);
12594
12595 // Zap scatters with a zero mask.
12597 return Chain;
12598
12599 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
12600 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
12601 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12602 DL, Ops, MSC->getMemOperand(), IndexType,
12603 MSC->isTruncatingStore());
12604 }
12605
12606 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
12607 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
12608 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12609 DL, Ops, MSC->getMemOperand(), IndexType,
12610 MSC->isTruncatingStore());
12611 }
12612
12613 return SDValue();
12614}
12615
12616SDValue DAGCombiner::visitMSTORE(SDNode *N) {
12617 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
12618 SDValue Mask = MST->getMask();
12619 SDValue Chain = MST->getChain();
12620 SDValue Value = MST->getValue();
12621 SDValue Ptr = MST->getBasePtr();
12622
12623 // Zap masked stores with a zero mask.
12625 return Chain;
12626
12627 // Remove a masked store if base pointers and masks are equal.
12628 if (MaskedStoreSDNode *MST1 = dyn_cast<MaskedStoreSDNode>(Chain)) {
12629 if (MST->isUnindexed() && MST->isSimple() && MST1->isUnindexed() &&
12630 MST1->isSimple() && MST1->getBasePtr() == Ptr &&
12631 !MST->getBasePtr().isUndef() &&
12632 ((Mask == MST1->getMask() && MST->getMemoryVT().getStoreSize() ==
12633 MST1->getMemoryVT().getStoreSize()) ||
12635 TypeSize::isKnownLE(MST1->getMemoryVT().getStoreSize(),
12636 MST->getMemoryVT().getStoreSize())) {
12637 CombineTo(MST1, MST1->getChain());
12638 if (N->getOpcode() != ISD::DELETED_NODE)
12639 AddToWorklist(N);
12640 return SDValue(N, 0);
12641 }
12642 }
12643
12644 // If this is a masked load with an all ones mask, we can use a unmasked load.
12645 // FIXME: Can we do this for indexed, compressing, or truncating stores?
12646 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
12647 !MST->isCompressingStore() && !MST->isTruncatingStore())
12648 return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
12649 MST->getBasePtr(), MST->getPointerInfo(),
12650 MST->getBaseAlign(), MST->getMemOperand()->getFlags(),
12651 MST->getAAInfo());
12652
12653 // Try transforming N to an indexed store.
12654 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12655 return SDValue(N, 0);
12656
12657 if (MST->isTruncatingStore() && MST->isUnindexed() &&
12658 Value.getValueType().isInteger() &&
12660 !cast<ConstantSDNode>(Value)->isOpaque())) {
12661 APInt TruncDemandedBits =
12662 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
12664
12665 // See if we can simplify the operation with
12666 // SimplifyDemandedBits, which only works if the value has a single use.
12667 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
12668 // Re-visit the store if anything changed and the store hasn't been merged
12669 // with another node (N is deleted) SimplifyDemandedBits will add Value's
12670 // node back to the worklist if necessary, but we also need to re-visit
12671 // the Store node itself.
12672 if (N->getOpcode() != ISD::DELETED_NODE)
12673 AddToWorklist(N);
12674 return SDValue(N, 0);
12675 }
12676 }
12677
12678 // If this is a TRUNC followed by a masked store, fold this into a masked
12679 // truncating store. We can do this even if this is already a masked
12680 // truncstore.
12681 // TODO: Try combine to masked compress store if possiable.
12682 if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
12683 MST->isUnindexed() && !MST->isCompressingStore() &&
12684 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
12685 MST->getMemoryVT(), LegalOperations)) {
12686 auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
12687 Value.getOperand(0).getValueType());
12688 return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
12689 MST->getOffset(), Mask, MST->getMemoryVT(),
12690 MST->getMemOperand(), MST->getAddressingMode(),
12691 /*IsTruncating=*/true);
12692 }
12693
12694 return SDValue();
12695}
12696
12697SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) {
12698 auto *SST = cast<VPStridedStoreSDNode>(N);
12699 EVT EltVT = SST->getValue().getValueType().getVectorElementType();
12700 // Combine strided stores with unit-stride to a regular VP store.
12701 if (auto *CStride = dyn_cast<ConstantSDNode>(SST->getStride());
12702 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
12703 return DAG.getStoreVP(SST->getChain(), SDLoc(N), SST->getValue(),
12704 SST->getBasePtr(), SST->getOffset(), SST->getMask(),
12705 SST->getVectorLength(), SST->getMemoryVT(),
12706 SST->getMemOperand(), SST->getAddressingMode(),
12707 SST->isTruncatingStore(), SST->isCompressingStore());
12708 }
12709 return SDValue();
12710}
12711
12712SDValue DAGCombiner::visitVECTOR_COMPRESS(SDNode *N) {
12713 SDLoc DL(N);
12714 SDValue Vec = N->getOperand(0);
12715 SDValue Mask = N->getOperand(1);
12716 SDValue Passthru = N->getOperand(2);
12717 EVT VecVT = Vec.getValueType();
12718
12719 bool HasPassthru = !Passthru.isUndef();
12720
12721 APInt SplatVal;
12722 if (ISD::isConstantSplatVector(Mask.getNode(), SplatVal))
12723 return TLI.isConstTrueVal(Mask) ? Vec : Passthru;
12724
12725 if (Vec.isUndef() || Mask.isUndef())
12726 return Passthru;
12727
12728 // No need for potentially expensive compress if the mask is constant.
12731 EVT ScalarVT = VecVT.getVectorElementType();
12732 unsigned NumSelected = 0;
12733 unsigned NumElmts = VecVT.getVectorNumElements();
12734 for (unsigned I = 0; I < NumElmts; ++I) {
12735 SDValue MaskI = Mask.getOperand(I);
12736 // We treat undef mask entries as "false".
12737 if (MaskI.isUndef())
12738 continue;
12739
12740 if (TLI.isConstTrueVal(MaskI)) {
12741 SDValue VecI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Vec,
12742 DAG.getVectorIdxConstant(I, DL));
12743 Ops.push_back(VecI);
12744 NumSelected++;
12745 }
12746 }
12747 for (unsigned Rest = NumSelected; Rest < NumElmts; ++Rest) {
12748 SDValue Val =
12749 HasPassthru
12750 ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Passthru,
12751 DAG.getVectorIdxConstant(Rest, DL))
12752 : DAG.getUNDEF(ScalarVT);
12753 Ops.push_back(Val);
12754 }
12755 return DAG.getBuildVector(VecVT, DL, Ops);
12756 }
12757
12758 return SDValue();
12759}
12760
12761SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
12762 VPGatherSDNode *MGT = cast<VPGatherSDNode>(N);
12763 SDValue Mask = MGT->getMask();
12764 SDValue Chain = MGT->getChain();
12765 SDValue Index = MGT->getIndex();
12766 SDValue Scale = MGT->getScale();
12767 SDValue BasePtr = MGT->getBasePtr();
12768 SDValue VL = MGT->getVectorLength();
12769 ISD::MemIndexType IndexType = MGT->getIndexType();
12770 SDLoc DL(N);
12771
12772 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12773 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
12774 return DAG.getGatherVP(
12775 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12776 Ops, MGT->getMemOperand(), IndexType);
12777 }
12778
12779 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12780 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
12781 return DAG.getGatherVP(
12782 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12783 Ops, MGT->getMemOperand(), IndexType);
12784 }
12785
12786 return SDValue();
12787}
12788
12789SDValue DAGCombiner::visitMGATHER(SDNode *N) {
12790 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
12791 SDValue Mask = MGT->getMask();
12792 SDValue Chain = MGT->getChain();
12793 SDValue Index = MGT->getIndex();
12794 SDValue Scale = MGT->getScale();
12795 SDValue PassThru = MGT->getPassThru();
12796 SDValue BasePtr = MGT->getBasePtr();
12797 ISD::MemIndexType IndexType = MGT->getIndexType();
12798 SDLoc DL(N);
12799
12800 // Zap gathers with a zero mask.
12802 return CombineTo(N, PassThru, MGT->getChain());
12803
12804 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12805 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12806 return DAG.getMaskedGather(
12807 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12808 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12809 }
12810
12811 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12812 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12813 return DAG.getMaskedGather(
12814 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12815 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12816 }
12817
12818 return SDValue();
12819}
12820
12821SDValue DAGCombiner::visitMLOAD(SDNode *N) {
12822 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
12823 SDValue Mask = MLD->getMask();
12824
12825 // Zap masked loads with a zero mask.
12827 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
12828
12829 // If this is a masked load with an all ones mask, we can use a unmasked load.
12830 // FIXME: Can we do this for indexed, expanding, or extending loads?
12831 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
12832 !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
12833 SDValue NewLd = DAG.getLoad(
12834 N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
12835 MLD->getPointerInfo(), MLD->getBaseAlign(),
12836 MLD->getMemOperand()->getFlags(), MLD->getAAInfo(), MLD->getRanges());
12837 return CombineTo(N, NewLd, NewLd.getValue(1));
12838 }
12839
12840 // Try transforming N to an indexed load.
12841 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12842 return SDValue(N, 0);
12843
12844 return SDValue();
12845}
12846
12847SDValue DAGCombiner::visitMHISTOGRAM(SDNode *N) {
12848 MaskedHistogramSDNode *HG = cast<MaskedHistogramSDNode>(N);
12849 SDValue Chain = HG->getChain();
12850 SDValue Inc = HG->getInc();
12851 SDValue Mask = HG->getMask();
12852 SDValue BasePtr = HG->getBasePtr();
12853 SDValue Index = HG->getIndex();
12854 SDLoc DL(HG);
12855
12856 EVT MemVT = HG->getMemoryVT();
12857 EVT DataVT = Index.getValueType();
12858 MachineMemOperand *MMO = HG->getMemOperand();
12859 ISD::MemIndexType IndexType = HG->getIndexType();
12860
12862 return Chain;
12863
12864 if (refineUniformBase(BasePtr, Index, HG->isIndexScaled(), DAG, DL) ||
12865 refineIndexType(Index, IndexType, DataVT, DAG)) {
12866 SDValue Ops[] = {Chain, Inc, Mask, BasePtr, Index,
12867 HG->getScale(), HG->getIntID()};
12868 return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, Ops,
12869 MMO, IndexType);
12870 }
12871
12872 return SDValue();
12873}
12874
12875SDValue DAGCombiner::visitPARTIAL_REDUCE_MLA(SDNode *N) {
12876 if (SDValue Res = foldPartialReduceMLAMulOp(N))
12877 return Res;
12878 if (SDValue Res = foldPartialReduceAdd(N))
12879 return Res;
12880 return SDValue();
12881}
12882
12883// partial_reduce_*mla(acc, mul(ext(a), ext(b)), splat(1))
12884// -> partial_reduce_*mla(acc, a, b)
12885//
12886// partial_reduce_*mla(acc, mul(ext(x), splat(C)), splat(1))
12887// -> partial_reduce_*mla(acc, x, C)
12888SDValue DAGCombiner::foldPartialReduceMLAMulOp(SDNode *N) {
12889 SDLoc DL(N);
12890 auto *Context = DAG.getContext();
12891 SDValue Acc = N->getOperand(0);
12892 SDValue Op1 = N->getOperand(1);
12893 SDValue Op2 = N->getOperand(2);
12894
12895 APInt C;
12896 if (Op1->getOpcode() != ISD::MUL ||
12897 !ISD::isConstantSplatVector(Op2.getNode(), C) || !C.isOne())
12898 return SDValue();
12899
12900 SDValue LHS = Op1->getOperand(0);
12901 SDValue RHS = Op1->getOperand(1);
12902 unsigned LHSOpcode = LHS->getOpcode();
12903 if (!ISD::isExtOpcode(LHSOpcode))
12904 return SDValue();
12905
12906 SDValue LHSExtOp = LHS->getOperand(0);
12907 EVT LHSExtOpVT = LHSExtOp.getValueType();
12908
12909 // partial_reduce_*mla(acc, mul(ext(x), splat(C)), splat(1))
12910 // -> partial_reduce_*mla(acc, x, C)
12911 if (ISD::isConstantSplatVector(RHS.getNode(), C)) {
12912 // TODO: Make use of partial_reduce_sumla here
12913 APInt CTrunc = C.trunc(LHSExtOpVT.getScalarSizeInBits());
12914 unsigned LHSBits = LHS.getValueType().getScalarSizeInBits();
12915 if ((LHSOpcode != ISD::ZERO_EXTEND || CTrunc.zext(LHSBits) != C) &&
12916 (LHSOpcode != ISD::SIGN_EXTEND || CTrunc.sext(LHSBits) != C))
12917 return SDValue();
12918
12919 unsigned NewOpcode = LHSOpcode == ISD::SIGN_EXTEND
12920 ? ISD::PARTIAL_REDUCE_SMLA
12921 : ISD::PARTIAL_REDUCE_UMLA;
12922
12923 // Only perform these combines if the target supports folding
12924 // the extends into the operation.
12926 NewOpcode, TLI.getTypeToTransformTo(*Context, N->getValueType(0)),
12927 TLI.getTypeToTransformTo(*Context, LHSExtOpVT)))
12928 return SDValue();
12929
12930 return DAG.getNode(NewOpcode, DL, N->getValueType(0), Acc, LHSExtOp,
12931 DAG.getConstant(CTrunc, DL, LHSExtOpVT));
12932 }
12933
12934 unsigned RHSOpcode = RHS->getOpcode();
12935 if (!ISD::isExtOpcode(RHSOpcode))
12936 return SDValue();
12937
12938 SDValue RHSExtOp = RHS->getOperand(0);
12939 if (LHSExtOpVT != RHSExtOp.getValueType())
12940 return SDValue();
12941
12942 unsigned NewOpc;
12943 if (LHSOpcode == ISD::SIGN_EXTEND && RHSOpcode == ISD::SIGN_EXTEND)
12944 NewOpc = ISD::PARTIAL_REDUCE_SMLA;
12945 else if (LHSOpcode == ISD::ZERO_EXTEND && RHSOpcode == ISD::ZERO_EXTEND)
12946 NewOpc = ISD::PARTIAL_REDUCE_UMLA;
12947 else if (LHSOpcode == ISD::SIGN_EXTEND && RHSOpcode == ISD::ZERO_EXTEND)
12948 NewOpc = ISD::PARTIAL_REDUCE_SUMLA;
12949 else if (LHSOpcode == ISD::ZERO_EXTEND && RHSOpcode == ISD::SIGN_EXTEND) {
12950 NewOpc = ISD::PARTIAL_REDUCE_SUMLA;
12951 std::swap(LHSExtOp, RHSExtOp);
12952 } else
12953 return SDValue();
12954 // For a 2-stage extend the signedness of both of the extends must match
12955 // If the mul has the same type, there is no outer extend, and thus we
12956 // can simply use the inner extends to pick the result node.
12957 // TODO: extend to handle nonneg zext as sext
12958 EVT AccElemVT = Acc.getValueType().getVectorElementType();
12959 if (Op1.getValueType().getVectorElementType() != AccElemVT &&
12960 NewOpc != N->getOpcode())
12961 return SDValue();
12962
12963 // Only perform these combines if the target supports folding
12964 // the extends into the operation.
12966 NewOpc, TLI.getTypeToTransformTo(*Context, N->getValueType(0)),
12967 TLI.getTypeToTransformTo(*Context, LHSExtOpVT)))
12968 return SDValue();
12969
12970 return DAG.getNode(NewOpc, DL, N->getValueType(0), Acc, LHSExtOp, RHSExtOp);
12971}
12972
12973// partial.reduce.umla(acc, zext(op), splat(1))
12974// -> partial.reduce.umla(acc, op, splat(trunc(1)))
12975// partial.reduce.smla(acc, sext(op), splat(1))
12976// -> partial.reduce.smla(acc, op, splat(trunc(1)))
12977// partial.reduce.sumla(acc, sext(op), splat(1))
12978// -> partial.reduce.smla(acc, op, splat(trunc(1)))
12979SDValue DAGCombiner::foldPartialReduceAdd(SDNode *N) {
12980 SDLoc DL(N);
12981 SDValue Acc = N->getOperand(0);
12982 SDValue Op1 = N->getOperand(1);
12983 SDValue Op2 = N->getOperand(2);
12984
12985 APInt ConstantOne;
12986 if (!ISD::isConstantSplatVector(Op2.getNode(), ConstantOne) ||
12987 !ConstantOne.isOne())
12988 return SDValue();
12989
12990 unsigned Op1Opcode = Op1.getOpcode();
12991 if (!ISD::isExtOpcode(Op1Opcode))
12992 return SDValue();
12993
12994 bool Op1IsSigned = Op1Opcode == ISD::SIGN_EXTEND;
12995 bool NodeIsSigned = N->getOpcode() != ISD::PARTIAL_REDUCE_UMLA;
12996 EVT AccElemVT = Acc.getValueType().getVectorElementType();
12997 if (Op1IsSigned != NodeIsSigned &&
12998 Op1.getValueType().getVectorElementType() != AccElemVT)
12999 return SDValue();
13000
13001 unsigned NewOpcode =
13002 Op1IsSigned ? ISD::PARTIAL_REDUCE_SMLA : ISD::PARTIAL_REDUCE_UMLA;
13003
13004 SDValue UnextOp1 = Op1.getOperand(0);
13005 EVT UnextOp1VT = UnextOp1.getValueType();
13006 auto *Context = DAG.getContext();
13008 NewOpcode, TLI.getTypeToTransformTo(*Context, N->getValueType(0)),
13009 TLI.getTypeToTransformTo(*Context, UnextOp1VT)))
13010 return SDValue();
13011
13012 return DAG.getNode(NewOpcode, DL, N->getValueType(0), Acc, UnextOp1,
13013 DAG.getConstant(1, DL, UnextOp1VT));
13014}
13015
13016SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) {
13017 auto *SLD = cast<VPStridedLoadSDNode>(N);
13018 EVT EltVT = SLD->getValueType(0).getVectorElementType();
13019 // Combine strided loads with unit-stride to a regular VP load.
13020 if (auto *CStride = dyn_cast<ConstantSDNode>(SLD->getStride());
13021 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
13022 SDValue NewLd = DAG.getLoadVP(
13023 SLD->getAddressingMode(), SLD->getExtensionType(), SLD->getValueType(0),
13024 SDLoc(N), SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(),
13025 SLD->getMask(), SLD->getVectorLength(), SLD->getMemoryVT(),
13026 SLD->getMemOperand(), SLD->isExpandingLoad());
13027 return CombineTo(N, NewLd, NewLd.getValue(1));
13028 }
13029 return SDValue();
13030}
13031
13032/// A vector select of 2 constant vectors can be simplified to math/logic to
13033/// avoid a variable select instruction and possibly avoid constant loads.
13034SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
13035 SDValue Cond = N->getOperand(0);
13036 SDValue N1 = N->getOperand(1);
13037 SDValue N2 = N->getOperand(2);
13038 EVT VT = N->getValueType(0);
13039 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
13043 return SDValue();
13044
13045 // Check if we can use the condition value to increment/decrement a single
13046 // constant value. This simplifies a select to an add and removes a constant
13047 // load/materialization from the general case.
13048 bool AllAddOne = true;
13049 bool AllSubOne = true;
13050 unsigned Elts = VT.getVectorNumElements();
13051 for (unsigned i = 0; i != Elts; ++i) {
13052 SDValue N1Elt = N1.getOperand(i);
13053 SDValue N2Elt = N2.getOperand(i);
13054 if (N1Elt.isUndef())
13055 continue;
13056 // N2 should not contain undef values since it will be reused in the fold.
13057 if (N2Elt.isUndef() || N1Elt.getValueType() != N2Elt.getValueType()) {
13058 AllAddOne = false;
13059 AllSubOne = false;
13060 break;
13061 }
13062
13063 const APInt &C1 = N1Elt->getAsAPIntVal();
13064 const APInt &C2 = N2Elt->getAsAPIntVal();
13065 if (C1 != C2 + 1)
13066 AllAddOne = false;
13067 if (C1 != C2 - 1)
13068 AllSubOne = false;
13069 }
13070
13071 // Further simplifications for the extra-special cases where the constants are
13072 // all 0 or all -1 should be implemented as folds of these patterns.
13073 SDLoc DL(N);
13074 if (AllAddOne || AllSubOne) {
13075 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
13076 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
13077 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
13078 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
13079 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
13080 }
13081
13082 // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
13083 APInt Pow2C;
13084 if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
13085 isNullOrNullSplat(N2)) {
13086 SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
13087 SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
13088 return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
13089 }
13090
13092 return V;
13093
13094 // The general case for select-of-constants:
13095 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
13096 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
13097 // leave that to a machine-specific pass.
13098 return SDValue();
13099}
13100
13101SDValue DAGCombiner::visitVP_SELECT(SDNode *N) {
13102 SDValue N0 = N->getOperand(0);
13103 SDValue N1 = N->getOperand(1);
13104 SDValue N2 = N->getOperand(2);
13105 SDLoc DL(N);
13106
13107 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
13108 return V;
13109
13111 return V;
13112
13113 return SDValue();
13114}
13115
13117 SDValue FVal,
13118 const TargetLowering &TLI,
13119 SelectionDAG &DAG,
13120 const SDLoc &DL) {
13121 EVT VT = TVal.getValueType();
13122 if (!TLI.isTypeLegal(VT))
13123 return SDValue();
13124
13125 EVT CondVT = Cond.getValueType();
13126 assert(CondVT.isVector() && "Vector select expects a vector selector!");
13127
13128 bool IsTAllZero = ISD::isConstantSplatVectorAllZeros(TVal.getNode());
13129 bool IsTAllOne = ISD::isConstantSplatVectorAllOnes(TVal.getNode());
13130 bool IsFAllZero = ISD::isConstantSplatVectorAllZeros(FVal.getNode());
13131 bool IsFAllOne = ISD::isConstantSplatVectorAllOnes(FVal.getNode());
13132
13133 // no vselect(cond, 0/-1, X) or vselect(cond, X, 0/-1), return
13134 if (!IsTAllZero && !IsTAllOne && !IsFAllZero && !IsFAllOne)
13135 return SDValue();
13136
13137 // select Cond, 0, 0 → 0
13138 if (IsTAllZero && IsFAllZero) {
13139 return VT.isFloatingPoint() ? DAG.getConstantFP(0.0, DL, VT)
13140 : DAG.getConstant(0, DL, VT);
13141 }
13142
13143 // check select(setgt lhs, -1), 1, -1 --> or (sra lhs, bitwidth - 1), 1
13144 APInt TValAPInt;
13145 if (Cond.getOpcode() == ISD::SETCC &&
13146 Cond.getOperand(2) == DAG.getCondCode(ISD::SETGT) &&
13147 Cond.getOperand(0).getValueType() == VT && VT.isSimple() &&
13148 ISD::isConstantSplatVector(TVal.getNode(), TValAPInt) &&
13149 TValAPInt.isOne() &&
13150 ISD::isConstantSplatVectorAllOnes(Cond.getOperand(1).getNode()) &&
13152 return SDValue();
13153 }
13154
13155 // To use the condition operand as a bitwise mask, it must have elements that
13156 // are the same size as the select elements. i.e, the condition operand must
13157 // have already been promoted from the IR select condition type <N x i1>.
13158 // Don't check if the types themselves are equal because that excludes
13159 // vector floating-point selects.
13160 if (CondVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
13161 return SDValue();
13162
13163 // Cond value must be 'sign splat' to be converted to a logical op.
13164 if (DAG.ComputeNumSignBits(Cond) != CondVT.getScalarSizeInBits())
13165 return SDValue();
13166
13167 // Try inverting Cond and swapping T/F if it gives all-ones/all-zeros form
13168 if (!IsTAllOne && !IsFAllZero && Cond.hasOneUse() &&
13169 Cond.getOpcode() == ISD::SETCC &&
13170 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT) ==
13171 CondVT) {
13172 if (IsTAllZero || IsFAllOne) {
13173 SDValue CC = Cond.getOperand(2);
13175 cast<CondCodeSDNode>(CC)->get(), Cond.getOperand(0).getValueType());
13176 Cond = DAG.getSetCC(DL, CondVT, Cond.getOperand(0), Cond.getOperand(1),
13177 InverseCC);
13178 std::swap(TVal, FVal);
13179 std::swap(IsTAllOne, IsFAllOne);
13180 std::swap(IsTAllZero, IsFAllZero);
13181 }
13182 }
13183
13185 "Select condition no longer all-sign bits");
13186
13187 // select Cond, -1, 0 → bitcast Cond
13188 if (IsTAllOne && IsFAllZero)
13189 return DAG.getBitcast(VT, Cond);
13190
13191 // select Cond, -1, x → or Cond, x
13192 if (IsTAllOne) {
13193 SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(FVal));
13194 SDValue Or = DAG.getNode(ISD::OR, DL, CondVT, Cond, X);
13195 return DAG.getBitcast(VT, Or);
13196 }
13197
13198 // select Cond, x, 0 → and Cond, x
13199 if (IsFAllZero) {
13200 SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(TVal));
13201 SDValue And = DAG.getNode(ISD::AND, DL, CondVT, Cond, X);
13202 return DAG.getBitcast(VT, And);
13203 }
13204
13205 // select Cond, 0, x -> and not(Cond), x
13206 if (IsTAllZero &&
13208 SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(FVal));
13209 SDValue And =
13210 DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT), X);
13211 return DAG.getBitcast(VT, And);
13212 }
13213
13214 return SDValue();
13215}
13216
13217SDValue DAGCombiner::visitVSELECT(SDNode *N) {
13218 SDValue N0 = N->getOperand(0);
13219 SDValue N1 = N->getOperand(1);
13220 SDValue N2 = N->getOperand(2);
13221 EVT VT = N->getValueType(0);
13222 SDLoc DL(N);
13223
13224 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
13225 return V;
13226
13228 return V;
13229
13230 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
13231 if (!TLI.isTargetCanonicalSelect(N))
13232 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
13233 return DAG.getSelect(DL, VT, F, N2, N1);
13234
13235 // select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
13236 if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N2 && N1->hasOneUse() &&
13239 TLI.getBooleanContents(N0.getValueType()) ==
13241 return DAG.getNode(
13242 ISD::ADD, DL, N1.getValueType(), N2,
13243 DAG.getNode(ISD::AND, DL, N0.getValueType(), N1.getOperand(1), N0));
13244 }
13245
13246 // Canonicalize integer abs.
13247 // vselect (setg[te] X, 0), X, -X ->
13248 // vselect (setgt X, -1), X, -X ->
13249 // vselect (setl[te] X, 0), -X, X ->
13250 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
13251 if (N0.getOpcode() == ISD::SETCC) {
13252 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
13254 bool isAbs = false;
13255 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
13256
13257 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
13258 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
13259 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
13261 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
13262 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
13264
13265 if (isAbs) {
13267 return DAG.getNode(ISD::ABS, DL, VT, LHS);
13268
13269 SDValue Shift = DAG.getNode(
13270 ISD::SRA, DL, VT, LHS,
13271 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, DL));
13272 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
13273 AddToWorklist(Shift.getNode());
13274 AddToWorklist(Add.getNode());
13275 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
13276 }
13277
13278 // vselect x, y (fcmp lt x, y) -> fminnum x, y
13279 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
13280 //
13281 // This is OK if we don't care about what happens if either operand is a
13282 // NaN.
13283 //
13284 if (N0.hasOneUse() &&
13285 isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, N->getFlags(), TLI)) {
13286 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC))
13287 return FMinMax;
13288 }
13289
13290 if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
13291 return S;
13292 if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
13293 return S;
13294
13295 // If this select has a condition (setcc) with narrower operands than the
13296 // select, try to widen the compare to match the select width.
13297 // TODO: This should be extended to handle any constant.
13298 // TODO: This could be extended to handle non-loading patterns, but that
13299 // requires thorough testing to avoid regressions.
13300 if (isNullOrNullSplat(RHS)) {
13301 EVT NarrowVT = LHS.getValueType();
13303 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
13304 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
13305 unsigned WideWidth = WideVT.getScalarSizeInBits();
13306 bool IsSigned = isSignedIntSetCC(CC);
13307 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13308 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
13309 SetCCWidth != 1 && SetCCWidth < WideWidth &&
13310 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
13311 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
13312 // Both compare operands can be widened for free. The LHS can use an
13313 // extended load, and the RHS is a constant:
13314 // vselect (ext (setcc load(X), C)), N1, N2 -->
13315 // vselect (setcc extload(X), C'), N1, N2
13316 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13317 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
13318 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
13319 EVT WideSetCCVT = getSetCCResultType(WideVT);
13320 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
13321 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
13322 }
13323 }
13324
13325 if (SDValue ABD = foldSelectToABD(LHS, RHS, N1, N2, CC, DL))
13326 return ABD;
13327
13328 // Match VSELECTs into add with unsigned saturation.
13329 if (hasOperation(ISD::UADDSAT, VT)) {
13330 // Check if one of the arms of the VSELECT is vector with all bits set.
13331 // If it's on the left side invert the predicate to simplify logic below.
13332 SDValue Other;
13333 ISD::CondCode SatCC = CC;
13335 Other = N2;
13336 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
13337 } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
13338 Other = N1;
13339 }
13340
13341 if (Other && Other.getOpcode() == ISD::ADD) {
13342 SDValue CondLHS = LHS, CondRHS = RHS;
13343 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
13344
13345 // Canonicalize condition operands.
13346 if (SatCC == ISD::SETUGE) {
13347 std::swap(CondLHS, CondRHS);
13348 SatCC = ISD::SETULE;
13349 }
13350
13351 // We can test against either of the addition operands.
13352 // x <= x+y ? x+y : ~0 --> uaddsat x, y
13353 // x+y >= x ? x+y : ~0 --> uaddsat x, y
13354 if (SatCC == ISD::SETULE && Other == CondRHS &&
13355 (OpLHS == CondLHS || OpRHS == CondLHS))
13356 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
13357
13358 if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
13359 (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
13360 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
13361 CondLHS == OpLHS) {
13362 // If the RHS is a constant we have to reverse the const
13363 // canonicalization.
13364 // x >= ~C ? x+C : ~0 --> uaddsat x, C
13365 auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
13366 return Cond->getAPIntValue() == ~Op->getAPIntValue();
13367 };
13368 if (SatCC == ISD::SETULE &&
13369 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
13370 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
13371 }
13372 }
13373 }
13374
13375 // Match VSELECTs into sub with unsigned saturation.
13376 if (hasOperation(ISD::USUBSAT, VT)) {
13377 // Check if one of the arms of the VSELECT is a zero vector. If it's on
13378 // the left side invert the predicate to simplify logic below.
13379 SDValue Other;
13380 ISD::CondCode SatCC = CC;
13382 Other = N2;
13383 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
13385 Other = N1;
13386 }
13387
13388 // zext(x) >= y ? trunc(zext(x) - y) : 0
13389 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
13390 // zext(x) > y ? trunc(zext(x) - y) : 0
13391 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
13392 if (Other && Other.getOpcode() == ISD::TRUNCATE &&
13393 Other.getOperand(0).getOpcode() == ISD::SUB &&
13394 (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) {
13395 SDValue OpLHS = Other.getOperand(0).getOperand(0);
13396 SDValue OpRHS = Other.getOperand(0).getOperand(1);
13397 if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND)
13398 if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS,
13399 DAG, DL))
13400 return R;
13401 }
13402
13403 if (Other && Other.getNumOperands() == 2) {
13404 SDValue CondRHS = RHS;
13405 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
13406
13407 if (OpLHS == LHS) {
13408 // Look for a general sub with unsigned saturation first.
13409 // x >= y ? x-y : 0 --> usubsat x, y
13410 // x > y ? x-y : 0 --> usubsat x, y
13411 if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
13412 Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
13413 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
13414
13415 if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
13416 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
13417 if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
13418 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
13419 // If the RHS is a constant we have to reverse the const
13420 // canonicalization.
13421 // x > C-1 ? x+-C : 0 --> usubsat x, C
13422 auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
13423 return (!Op && !Cond) ||
13424 (Op && Cond &&
13425 Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
13426 };
13427 if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
13428 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
13429 /*AllowUndefs*/ true)) {
13430 OpRHS = DAG.getNegative(OpRHS, DL, VT);
13431 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
13432 }
13433
13434 // Another special case: If C was a sign bit, the sub has been
13435 // canonicalized into a xor.
13436 // FIXME: Would it be better to use computeKnownBits to
13437 // determine whether it's safe to decanonicalize the xor?
13438 // x s< 0 ? x^C : 0 --> usubsat x, C
13439 APInt SplatValue;
13440 if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
13441 ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
13443 SplatValue.isSignMask()) {
13444 // Note that we have to rebuild the RHS constant here to
13445 // ensure we don't rely on particular values of undef lanes.
13446 OpRHS = DAG.getConstant(SplatValue, DL, VT);
13447 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
13448 }
13449 }
13450 }
13451 }
13452 }
13453 }
13454
13455 // (vselect (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
13456 // (vselect (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
13457 if (SDValue UMin = foldSelectToUMin(LHS, RHS, N1, N2, CC, DL))
13458 return UMin;
13459 }
13460
13461 if (SimplifySelectOps(N, N1, N2))
13462 return SDValue(N, 0); // Don't revisit N.
13463
13464 // Fold (vselect all_ones, N1, N2) -> N1
13466 return N1;
13467 // Fold (vselect all_zeros, N1, N2) -> N2
13469 return N2;
13470
13471 // The ConvertSelectToConcatVector function is assuming both the above
13472 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
13473 // and addressed.
13474 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
13477 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
13478 return CV;
13479 }
13480
13481 if (SDValue V = foldVSelectOfConstants(N))
13482 return V;
13483
13484 if (hasOperation(ISD::SRA, VT))
13486 return V;
13487
13489 return SDValue(N, 0);
13490
13491 if (SDValue V = combineVSelectWithAllOnesOrZeros(N0, N1, N2, TLI, DAG, DL))
13492 return V;
13493
13494 return SDValue();
13495}
13496
13497SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
13498 SDValue N0 = N->getOperand(0);
13499 SDValue N1 = N->getOperand(1);
13500 SDValue N2 = N->getOperand(2);
13501 SDValue N3 = N->getOperand(3);
13502 SDValue N4 = N->getOperand(4);
13503 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
13504 SDLoc DL(N);
13505
13506 // fold select_cc lhs, rhs, x, x, cc -> x
13507 if (N2 == N3)
13508 return N2;
13509
13510 // select_cc bool, 0, x, y, seteq -> select bool, y, x
13511 if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 &&
13512 isNullConstant(N1))
13513 return DAG.getSelect(DL, N2.getValueType(), N0, N3, N2);
13514
13515 // Determine if the condition we're dealing with is constant
13516 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
13517 CC, DL, false)) {
13518 AddToWorklist(SCC.getNode());
13519
13520 // cond always true -> true val
13521 // cond always false -> false val
13522 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode()))
13523 return SCCC->isZero() ? N3 : N2;
13524
13525 // When the condition is UNDEF, just return the first operand. This is
13526 // coherent the DAG creation, no setcc node is created in this case
13527 if (SCC->isUndef())
13528 return N2;
13529
13530 // Fold to a simpler select_cc
13531 if (SCC.getOpcode() == ISD::SETCC) {
13532 return DAG.getNode(ISD::SELECT_CC, DL, N2.getValueType(),
13533 SCC.getOperand(0), SCC.getOperand(1), N2, N3,
13534 SCC.getOperand(2), SCC->getFlags());
13535 }
13536 }
13537
13538 // If we can fold this based on the true/false value, do so.
13539 if (SimplifySelectOps(N, N2, N3))
13540 return SDValue(N, 0); // Don't revisit N.
13541
13542 // fold select_cc into other things, such as min/max/abs
13543 return SimplifySelectCC(DL, N0, N1, N2, N3, CC);
13544}
13545
13546SDValue DAGCombiner::visitSETCC(SDNode *N) {
13547 // setcc is very commonly used as an argument to brcond. This pattern
13548 // also lend itself to numerous combines and, as a result, it is desired
13549 // we keep the argument to a brcond as a setcc as much as possible.
13550 bool PreferSetCC =
13551 N->hasOneUse() && N->user_begin()->getOpcode() == ISD::BRCOND;
13552
13553 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
13554 EVT VT = N->getValueType(0);
13555 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
13556 SDLoc DL(N);
13557
13558 if (SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, DL, !PreferSetCC)) {
13559 // If we prefer to have a setcc, and we don't, we'll try our best to
13560 // recreate one using rebuildSetCC.
13561 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
13562 SDValue NewSetCC = rebuildSetCC(Combined);
13563
13564 // We don't have anything interesting to combine to.
13565 if (NewSetCC.getNode() == N)
13566 return SDValue();
13567
13568 if (NewSetCC)
13569 return NewSetCC;
13570 }
13571 return Combined;
13572 }
13573
13574 // Optimize
13575 // 1) (icmp eq/ne (and X, C0), (shift X, C1))
13576 // or
13577 // 2) (icmp eq/ne X, (rotate X, C1))
13578 // If C0 is a mask or shifted mask and the shift amt (C1) isolates the
13579 // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
13580 // Then:
13581 // If C1 is a power of 2, then the rotate and shift+and versions are
13582 // equivilent, so we can interchange them depending on target preference.
13583 // Otherwise, if we have the shift+and version we can interchange srl/shl
13584 // which inturn affects the constant C0. We can use this to get better
13585 // constants again determined by target preference.
13586 if (Cond == ISD::SETNE || Cond == ISD::SETEQ) {
13587 auto IsAndWithShift = [](SDValue A, SDValue B) {
13588 return A.getOpcode() == ISD::AND &&
13589 (B.getOpcode() == ISD::SRL || B.getOpcode() == ISD::SHL) &&
13590 A.getOperand(0) == B.getOperand(0);
13591 };
13592 auto IsRotateWithOp = [](SDValue A, SDValue B) {
13593 return (B.getOpcode() == ISD::ROTL || B.getOpcode() == ISD::ROTR) &&
13594 B.getOperand(0) == A;
13595 };
13596 SDValue AndOrOp = SDValue(), ShiftOrRotate = SDValue();
13597 bool IsRotate = false;
13598
13599 // Find either shift+and or rotate pattern.
13600 if (IsAndWithShift(N0, N1)) {
13601 AndOrOp = N0;
13602 ShiftOrRotate = N1;
13603 } else if (IsAndWithShift(N1, N0)) {
13604 AndOrOp = N1;
13605 ShiftOrRotate = N0;
13606 } else if (IsRotateWithOp(N0, N1)) {
13607 IsRotate = true;
13608 AndOrOp = N0;
13609 ShiftOrRotate = N1;
13610 } else if (IsRotateWithOp(N1, N0)) {
13611 IsRotate = true;
13612 AndOrOp = N1;
13613 ShiftOrRotate = N0;
13614 }
13615
13616 if (AndOrOp && ShiftOrRotate && ShiftOrRotate.hasOneUse() &&
13617 (IsRotate || AndOrOp.hasOneUse())) {
13618 EVT OpVT = N0.getValueType();
13619 // Get constant shift/rotate amount and possibly mask (if its shift+and
13620 // variant).
13621 auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> {
13622 ConstantSDNode *CNode = isConstOrConstSplat(Op, /*AllowUndefs*/ false,
13623 /*AllowTrunc*/ false);
13624 if (CNode == nullptr)
13625 return std::nullopt;
13626 return CNode->getAPIntValue();
13627 };
13628 std::optional<APInt> AndCMask =
13629 IsRotate ? std::nullopt : GetAPIntValue(AndOrOp.getOperand(1));
13630 std::optional<APInt> ShiftCAmt =
13631 GetAPIntValue(ShiftOrRotate.getOperand(1));
13632 unsigned NumBits = OpVT.getScalarSizeInBits();
13633
13634 // We found constants.
13635 if (ShiftCAmt && (IsRotate || AndCMask) && ShiftCAmt->ult(NumBits)) {
13636 unsigned ShiftOpc = ShiftOrRotate.getOpcode();
13637 // Check that the constants meet the constraints.
13638 bool CanTransform = IsRotate;
13639 if (!CanTransform) {
13640 // Check that mask and shift compliment eachother
13641 CanTransform = *ShiftCAmt == (~*AndCMask).popcount();
13642 // Check that we are comparing all bits
13643 CanTransform &= (*ShiftCAmt + AndCMask->popcount()) == NumBits;
13644 // Check that the and mask is correct for the shift
13645 CanTransform &=
13646 ShiftOpc == ISD::SHL ? (~*AndCMask).isMask() : AndCMask->isMask();
13647 }
13648
13649 // See if target prefers another shift/rotate opcode.
13650 unsigned NewShiftOpc = TLI.preferedOpcodeForCmpEqPiecesOfOperand(
13651 OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
13652 // Transform is valid and we have a new preference.
13653 if (CanTransform && NewShiftOpc != ShiftOpc) {
13654 SDValue NewShiftOrRotate =
13655 DAG.getNode(NewShiftOpc, DL, OpVT, ShiftOrRotate.getOperand(0),
13656 ShiftOrRotate.getOperand(1));
13657 SDValue NewAndOrOp = SDValue();
13658
13659 if (NewShiftOpc == ISD::SHL || NewShiftOpc == ISD::SRL) {
13660 APInt NewMask =
13661 NewShiftOpc == ISD::SHL
13662 ? APInt::getHighBitsSet(NumBits,
13663 NumBits - ShiftCAmt->getZExtValue())
13664 : APInt::getLowBitsSet(NumBits,
13665 NumBits - ShiftCAmt->getZExtValue());
13666 NewAndOrOp =
13667 DAG.getNode(ISD::AND, DL, OpVT, ShiftOrRotate.getOperand(0),
13668 DAG.getConstant(NewMask, DL, OpVT));
13669 } else {
13670 NewAndOrOp = ShiftOrRotate.getOperand(0);
13671 }
13672
13673 return DAG.getSetCC(DL, VT, NewAndOrOp, NewShiftOrRotate, Cond);
13674 }
13675 }
13676 }
13677 }
13678 return SDValue();
13679}
13680
13681SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
13682 SDValue LHS = N->getOperand(0);
13683 SDValue RHS = N->getOperand(1);
13684 SDValue Carry = N->getOperand(2);
13685 SDValue Cond = N->getOperand(3);
13686
13687 // If Carry is false, fold to a regular SETCC.
13688 if (isNullConstant(Carry))
13689 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
13690
13691 return SDValue();
13692}
13693
13694/// Check if N satisfies:
13695/// N is used once.
13696/// N is a Load.
13697/// The load is compatible with ExtOpcode. It means
13698/// If load has explicit zero/sign extension, ExpOpcode must have the same
13699/// extension.
13700/// Otherwise returns true.
13701static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
13702 if (!N.hasOneUse())
13703 return false;
13704
13705 if (!isa<LoadSDNode>(N))
13706 return false;
13707
13708 LoadSDNode *Load = cast<LoadSDNode>(N);
13709 ISD::LoadExtType LoadExt = Load->getExtensionType();
13710 if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
13711 return true;
13712
13713 // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
13714 // extension.
13715 if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
13716 (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
13717 return false;
13718
13719 return true;
13720}
13721
13722/// Fold
13723/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
13724/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
13725/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
13726/// This function is called by the DAGCombiner when visiting sext/zext/aext
13727/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
13729 SelectionDAG &DAG, const SDLoc &DL,
13730 CombineLevel Level) {
13731 unsigned Opcode = N->getOpcode();
13732 SDValue N0 = N->getOperand(0);
13733 EVT VT = N->getValueType(0);
13734 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
13735 Opcode == ISD::ANY_EXTEND) &&
13736 "Expected EXTEND dag node in input!");
13737
13738 if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
13739 !N0.hasOneUse())
13740 return SDValue();
13741
13742 SDValue Op1 = N0->getOperand(1);
13743 SDValue Op2 = N0->getOperand(2);
13744 if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
13745 return SDValue();
13746
13747 auto ExtLoadOpcode = ISD::EXTLOAD;
13748 if (Opcode == ISD::SIGN_EXTEND)
13749 ExtLoadOpcode = ISD::SEXTLOAD;
13750 else if (Opcode == ISD::ZERO_EXTEND)
13751 ExtLoadOpcode = ISD::ZEXTLOAD;
13752
13753 // Illegal VSELECT may ISel fail if happen after legalization (DAG
13754 // Combine2), so we should conservatively check the OperationAction.
13755 LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
13756 LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
13757 if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
13758 !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) ||
13759 (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes &&
13761 return SDValue();
13762
13763 SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
13764 SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
13765 return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
13766}
13767
13768/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
13769/// a build_vector of constants.
13770/// This function is called by the DAGCombiner when visiting sext/zext/aext
13771/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
13772/// Vector extends are not folded if operations are legal; this is to
13773/// avoid introducing illegal build_vector dag nodes.
13775 const TargetLowering &TLI,
13776 SelectionDAG &DAG, bool LegalTypes) {
13777 unsigned Opcode = N->getOpcode();
13778 SDValue N0 = N->getOperand(0);
13779 EVT VT = N->getValueType(0);
13780
13781 assert((ISD::isExtOpcode(Opcode) || ISD::isExtVecInRegOpcode(Opcode)) &&
13782 "Expected EXTEND dag node in input!");
13783
13784 // fold (sext c1) -> c1
13785 // fold (zext c1) -> c1
13786 // fold (aext c1) -> c1
13787 if (isa<ConstantSDNode>(N0))
13788 return DAG.getNode(Opcode, DL, VT, N0);
13789
13790 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
13791 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
13792 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
13793 if (N0->getOpcode() == ISD::SELECT) {
13794 SDValue Op1 = N0->getOperand(1);
13795 SDValue Op2 = N0->getOperand(2);
13796 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
13797 (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
13798 // For any_extend, choose sign extension of the constants to allow a
13799 // possible further transform to sign_extend_inreg.i.e.
13800 //
13801 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
13802 // t2: i64 = any_extend t1
13803 // -->
13804 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
13805 // -->
13806 // t4: i64 = sign_extend_inreg t3
13807 unsigned FoldOpc = Opcode;
13808 if (FoldOpc == ISD::ANY_EXTEND)
13809 FoldOpc = ISD::SIGN_EXTEND;
13810 return DAG.getSelect(DL, VT, N0->getOperand(0),
13811 DAG.getNode(FoldOpc, DL, VT, Op1),
13812 DAG.getNode(FoldOpc, DL, VT, Op2));
13813 }
13814 }
13815
13816 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
13817 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
13818 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
13819 EVT SVT = VT.getScalarType();
13820 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
13822 return SDValue();
13823
13824 // We can fold this node into a build_vector.
13825 unsigned VTBits = SVT.getSizeInBits();
13826 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
13828 unsigned NumElts = VT.getVectorNumElements();
13829
13830 for (unsigned i = 0; i != NumElts; ++i) {
13831 SDValue Op = N0.getOperand(i);
13832 if (Op.isUndef()) {
13833 if (Opcode == ISD::ANY_EXTEND || Opcode == ISD::ANY_EXTEND_VECTOR_INREG)
13834 Elts.push_back(DAG.getUNDEF(SVT));
13835 else
13836 Elts.push_back(DAG.getConstant(0, DL, SVT));
13837 continue;
13838 }
13839
13840 SDLoc DL(Op);
13841 // Get the constant value and if needed trunc it to the size of the type.
13842 // Nodes like build_vector might have constants wider than the scalar type.
13843 APInt C = Op->getAsAPIntVal().zextOrTrunc(EVTBits);
13844 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
13845 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
13846 else
13847 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
13848 }
13849
13850 return DAG.getBuildVector(VT, DL, Elts);
13851}
13852
13853// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
13854// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
13855// transformation. Returns true if extension are possible and the above
13856// mentioned transformation is profitable.
13858 unsigned ExtOpc,
13859 SmallVectorImpl<SDNode *> &ExtendNodes,
13860 const TargetLowering &TLI) {
13861 bool HasCopyToRegUses = false;
13862 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
13863 for (SDUse &Use : N0->uses()) {
13864 SDNode *User = Use.getUser();
13865 if (User == N)
13866 continue;
13867 if (Use.getResNo() != N0.getResNo())
13868 continue;
13869 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
13870 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
13872 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
13873 // Sign bits will be lost after a zext.
13874 return false;
13875 bool Add = false;
13876 for (unsigned i = 0; i != 2; ++i) {
13877 SDValue UseOp = User->getOperand(i);
13878 if (UseOp == N0)
13879 continue;
13880 if (!isa<ConstantSDNode>(UseOp))
13881 return false;
13882 Add = true;
13883 }
13884 if (Add)
13885 ExtendNodes.push_back(User);
13886 continue;
13887 }
13888 // If truncates aren't free and there are users we can't
13889 // extend, it isn't worthwhile.
13890 if (!isTruncFree)
13891 return false;
13892 // Remember if this value is live-out.
13893 if (User->getOpcode() == ISD::CopyToReg)
13894 HasCopyToRegUses = true;
13895 }
13896
13897 if (HasCopyToRegUses) {
13898 bool BothLiveOut = false;
13899 for (SDUse &Use : N->uses()) {
13900 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
13901 BothLiveOut = true;
13902 break;
13903 }
13904 }
13905 if (BothLiveOut)
13906 // Both unextended and extended values are live out. There had better be
13907 // a good reason for the transformation.
13908 return !ExtendNodes.empty();
13909 }
13910 return true;
13911}
13912
13913void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
13914 SDValue OrigLoad, SDValue ExtLoad,
13915 ISD::NodeType ExtType) {
13916 // Extend SetCC uses if necessary.
13917 SDLoc DL(ExtLoad);
13918 for (SDNode *SetCC : SetCCs) {
13920
13921 for (unsigned j = 0; j != 2; ++j) {
13922 SDValue SOp = SetCC->getOperand(j);
13923 if (SOp == OrigLoad)
13924 Ops.push_back(ExtLoad);
13925 else
13926 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
13927 }
13928
13929 Ops.push_back(SetCC->getOperand(2));
13930 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
13931 }
13932}
13933
13934// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
13935SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
13936 SDValue N0 = N->getOperand(0);
13937 EVT DstVT = N->getValueType(0);
13938 EVT SrcVT = N0.getValueType();
13939
13940 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
13941 N->getOpcode() == ISD::ZERO_EXTEND) &&
13942 "Unexpected node type (not an extend)!");
13943
13944 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
13945 // For example, on a target with legal v4i32, but illegal v8i32, turn:
13946 // (v8i32 (sext (v8i16 (load x))))
13947 // into:
13948 // (v8i32 (concat_vectors (v4i32 (sextload x)),
13949 // (v4i32 (sextload (x + 16)))))
13950 // Where uses of the original load, i.e.:
13951 // (v8i16 (load x))
13952 // are replaced with:
13953 // (v8i16 (truncate
13954 // (v8i32 (concat_vectors (v4i32 (sextload x)),
13955 // (v4i32 (sextload (x + 16)))))))
13956 //
13957 // This combine is only applicable to illegal, but splittable, vectors.
13958 // All legal types, and illegal non-vector types, are handled elsewhere.
13959 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
13960 //
13961 if (N0->getOpcode() != ISD::LOAD)
13962 return SDValue();
13963
13964 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13965
13966 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
13967 !N0.hasOneUse() || !LN0->isSimple() ||
13968 !DstVT.isVector() || !DstVT.isPow2VectorType() ||
13970 return SDValue();
13971
13973 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
13974 return SDValue();
13975
13976 ISD::LoadExtType ExtType =
13977 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13978
13979 // Try to split the vector types to get down to legal types.
13980 EVT SplitSrcVT = SrcVT;
13981 EVT SplitDstVT = DstVT;
13982 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
13983 SplitSrcVT.getVectorNumElements() > 1) {
13984 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
13985 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
13986 }
13987
13988 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
13989 return SDValue();
13990
13991 assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
13992
13993 SDLoc DL(N);
13994 const unsigned NumSplits =
13995 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
13996 const unsigned Stride = SplitSrcVT.getStoreSize();
13999
14000 SDValue BasePtr = LN0->getBasePtr();
14001 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
14002 const unsigned Offset = Idx * Stride;
14003
14005 DAG.getExtLoad(ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(),
14006 BasePtr, LN0->getPointerInfo().getWithOffset(Offset),
14007 SplitSrcVT, LN0->getBaseAlign(),
14008 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14009
14010 BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::getFixed(Stride), DL);
14011
14012 Loads.push_back(SplitLoad.getValue(0));
14013 Chains.push_back(SplitLoad.getValue(1));
14014 }
14015
14016 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
14017 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
14018
14019 // Simplify TF.
14020 AddToWorklist(NewChain.getNode());
14021
14022 CombineTo(N, NewValue);
14023
14024 // Replace uses of the original load (before extension)
14025 // with a truncate of the concatenated sextloaded vectors.
14026 SDValue Trunc =
14027 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
14028 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
14029 CombineTo(N0.getNode(), Trunc, NewChain);
14030 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14031}
14032
14033// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
14034// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
14035SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
14036 assert(N->getOpcode() == ISD::ZERO_EXTEND);
14037 EVT VT = N->getValueType(0);
14038 EVT OrigVT = N->getOperand(0).getValueType();
14039 if (TLI.isZExtFree(OrigVT, VT))
14040 return SDValue();
14041
14042 // and/or/xor
14043 SDValue N0 = N->getOperand(0);
14044 if (!ISD::isBitwiseLogicOp(N0.getOpcode()) ||
14045 N0.getOperand(1).getOpcode() != ISD::Constant ||
14046 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
14047 return SDValue();
14048
14049 // shl/shr
14050 SDValue N1 = N0->getOperand(0);
14051 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
14052 N1.getOperand(1).getOpcode() != ISD::Constant ||
14053 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
14054 return SDValue();
14055
14056 // load
14057 if (!isa<LoadSDNode>(N1.getOperand(0)))
14058 return SDValue();
14059 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
14060 EVT MemVT = Load->getMemoryVT();
14061 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
14062 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
14063 return SDValue();
14064
14065
14066 // If the shift op is SHL, the logic op must be AND, otherwise the result
14067 // will be wrong.
14068 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
14069 return SDValue();
14070
14071 if (!N0.hasOneUse() || !N1.hasOneUse())
14072 return SDValue();
14073
14075 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
14076 ISD::ZERO_EXTEND, SetCCs, TLI))
14077 return SDValue();
14078
14079 // Actually do the transformation.
14080 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
14081 Load->getChain(), Load->getBasePtr(),
14082 Load->getMemoryVT(), Load->getMemOperand());
14083
14084 SDLoc DL1(N1);
14085 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
14086 N1.getOperand(1));
14087
14088 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
14089 SDLoc DL0(N0);
14090 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
14091 DAG.getConstant(Mask, DL0, VT));
14092
14093 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
14094 CombineTo(N, And);
14095 if (SDValue(Load, 0).hasOneUse()) {
14096 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
14097 } else {
14098 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
14099 Load->getValueType(0), ExtLoad);
14100 CombineTo(Load, Trunc, ExtLoad.getValue(1));
14101 }
14102
14103 // N0 is dead at this point.
14104 recursivelyDeleteUnusedNodes(N0.getNode());
14105
14106 return SDValue(N,0); // Return N so it doesn't get rechecked!
14107}
14108
14109/// If we're narrowing or widening the result of a vector select and the final
14110/// size is the same size as a setcc (compare) feeding the select, then try to
14111/// apply the cast operation to the select's operands because matching vector
14112/// sizes for a select condition and other operands should be more efficient.
14113SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
14114 unsigned CastOpcode = Cast->getOpcode();
14115 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
14116 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
14117 CastOpcode == ISD::FP_ROUND) &&
14118 "Unexpected opcode for vector select narrowing/widening");
14119
14120 // We only do this transform before legal ops because the pattern may be
14121 // obfuscated by target-specific operations after legalization. Do not create
14122 // an illegal select op, however, because that may be difficult to lower.
14123 EVT VT = Cast->getValueType(0);
14124 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
14125 return SDValue();
14126
14127 SDValue VSel = Cast->getOperand(0);
14128 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
14129 VSel.getOperand(0).getOpcode() != ISD::SETCC)
14130 return SDValue();
14131
14132 // Does the setcc have the same vector size as the casted select?
14133 SDValue SetCC = VSel.getOperand(0);
14134 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
14135 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
14136 return SDValue();
14137
14138 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
14139 SDValue A = VSel.getOperand(1);
14140 SDValue B = VSel.getOperand(2);
14141 SDValue CastA, CastB;
14142 SDLoc DL(Cast);
14143 if (CastOpcode == ISD::FP_ROUND) {
14144 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
14145 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
14146 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
14147 } else {
14148 CastA = DAG.getNode(CastOpcode, DL, VT, A);
14149 CastB = DAG.getNode(CastOpcode, DL, VT, B);
14150 }
14151 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
14152}
14153
14154// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
14155// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
14157 const TargetLowering &TLI, EVT VT,
14158 bool LegalOperations, SDNode *N,
14159 SDValue N0, ISD::LoadExtType ExtLoadType) {
14160 SDNode *N0Node = N0.getNode();
14161 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
14162 : ISD::isZEXTLoad(N0Node);
14163 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
14164 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
14165 return SDValue();
14166
14167 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14168 EVT MemVT = LN0->getMemoryVT();
14169 if ((LegalOperations || !LN0->isSimple() ||
14170 VT.isVector()) &&
14171 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
14172 return SDValue();
14173
14174 SDValue ExtLoad =
14175 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
14176 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
14177 Combiner.CombineTo(N, ExtLoad);
14178 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14179 if (LN0->use_empty())
14180 Combiner.recursivelyDeleteUnusedNodes(LN0);
14181 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14182}
14183
14184// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
14185// Only generate vector extloads when 1) they're legal, and 2) they are
14186// deemed desirable by the target. NonNegZExt can be set to true if a zero
14187// extend has the nonneg flag to allow use of sextload if profitable.
14189 const TargetLowering &TLI, EVT VT,
14190 bool LegalOperations, SDNode *N, SDValue N0,
14191 ISD::LoadExtType ExtLoadType,
14192 ISD::NodeType ExtOpc,
14193 bool NonNegZExt = false) {
14195 return {};
14196
14197 // If this is zext nneg, see if it would make sense to treat it as a sext.
14198 if (NonNegZExt) {
14199 assert(ExtLoadType == ISD::ZEXTLOAD && ExtOpc == ISD::ZERO_EXTEND &&
14200 "Unexpected load type or opcode");
14201 for (SDNode *User : N0->users()) {
14202 if (User->getOpcode() == ISD::SETCC) {
14204 if (ISD::isSignedIntSetCC(CC)) {
14205 ExtLoadType = ISD::SEXTLOAD;
14206 ExtOpc = ISD::SIGN_EXTEND;
14207 break;
14208 }
14209 }
14210 }
14211 }
14212
14213 // TODO: isFixedLengthVector() should be removed and any negative effects on
14214 // code generation being the result of that target's implementation of
14215 // isVectorLoadExtDesirable().
14216 if ((LegalOperations || VT.isFixedLengthVector() ||
14217 !cast<LoadSDNode>(N0)->isSimple()) &&
14218 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))
14219 return {};
14220
14221 bool DoXform = true;
14223 if (!N0.hasOneUse())
14224 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
14225 if (VT.isVector())
14226 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
14227 if (!DoXform)
14228 return {};
14229
14230 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14231 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
14232 LN0->getBasePtr(), N0.getValueType(),
14233 LN0->getMemOperand());
14234 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
14235 // If the load value is used only by N, replace it via CombineTo N.
14236 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
14237 Combiner.CombineTo(N, ExtLoad);
14238 if (NoReplaceTrunc) {
14239 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14240 Combiner.recursivelyDeleteUnusedNodes(LN0);
14241 } else {
14242 SDValue Trunc =
14243 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
14244 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
14245 }
14246 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14247}
14248
14249static SDValue
14251 bool LegalOperations, SDNode *N, SDValue N0,
14252 ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) {
14253 if (!N0.hasOneUse())
14254 return SDValue();
14255
14257 if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
14258 return SDValue();
14259
14260 if ((LegalOperations || !cast<MaskedLoadSDNode>(N0)->isSimple()) &&
14261 !TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
14262 return SDValue();
14263
14264 if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
14265 return SDValue();
14266
14267 SDLoc dl(Ld);
14268 SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
14269 SDValue NewLoad = DAG.getMaskedLoad(
14270 VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
14271 PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
14272 ExtLoadType, Ld->isExpandingLoad());
14273 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
14274 return NewLoad;
14275}
14276
14277// fold ([s|z]ext (atomic_load)) -> ([s|z]ext (truncate ([s|z]ext atomic_load)))
14279 const TargetLowering &TLI, EVT VT,
14280 SDValue N0,
14281 ISD::LoadExtType ExtLoadType) {
14282 auto *ALoad = dyn_cast<AtomicSDNode>(N0);
14283 if (!ALoad || ALoad->getOpcode() != ISD::ATOMIC_LOAD)
14284 return {};
14285 EVT MemoryVT = ALoad->getMemoryVT();
14286 if (!TLI.isAtomicLoadExtLegal(ExtLoadType, VT, MemoryVT))
14287 return {};
14288 // Can't fold into ALoad if it is already extending differently.
14289 ISD::LoadExtType ALoadExtTy = ALoad->getExtensionType();
14290 if ((ALoadExtTy == ISD::ZEXTLOAD && ExtLoadType == ISD::SEXTLOAD) ||
14291 (ALoadExtTy == ISD::SEXTLOAD && ExtLoadType == ISD::ZEXTLOAD))
14292 return {};
14293
14294 EVT OrigVT = ALoad->getValueType(0);
14295 assert(OrigVT.getSizeInBits() < VT.getSizeInBits() && "VT should be wider.");
14296 auto *NewALoad = cast<AtomicSDNode>(DAG.getAtomicLoad(
14297 ExtLoadType, SDLoc(ALoad), MemoryVT, VT, ALoad->getChain(),
14298 ALoad->getBasePtr(), ALoad->getMemOperand()));
14300 SDValue(ALoad, 0),
14301 DAG.getNode(ISD::TRUNCATE, SDLoc(ALoad), OrigVT, SDValue(NewALoad, 0)));
14302 // Update the chain uses.
14303 DAG.ReplaceAllUsesOfValueWith(SDValue(ALoad, 1), SDValue(NewALoad, 1));
14304 return SDValue(NewALoad, 0);
14305}
14306
14308 bool LegalOperations) {
14309 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
14310 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
14311
14312 SDValue SetCC = N->getOperand(0);
14313 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
14314 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
14315 return SDValue();
14316
14317 SDValue X = SetCC.getOperand(0);
14318 SDValue Ones = SetCC.getOperand(1);
14319 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
14320 EVT VT = N->getValueType(0);
14321 EVT XVT = X.getValueType();
14322 // setge X, C is canonicalized to setgt, so we do not need to match that
14323 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
14324 // not require the 'not' op.
14325 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
14326 // Invert and smear/shift the sign bit:
14327 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
14328 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
14329 SDLoc DL(N);
14330 unsigned ShCt = VT.getSizeInBits() - 1;
14331 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14332 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
14333 SDValue NotX = DAG.getNOT(DL, X, VT);
14334 SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
14335 auto ShiftOpcode =
14336 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
14337 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
14338 }
14339 }
14340 return SDValue();
14341}
14342
14343SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
14344 SDValue N0 = N->getOperand(0);
14345 if (N0.getOpcode() != ISD::SETCC)
14346 return SDValue();
14347
14348 SDValue N00 = N0.getOperand(0);
14349 SDValue N01 = N0.getOperand(1);
14351 EVT VT = N->getValueType(0);
14352 EVT N00VT = N00.getValueType();
14353 SDLoc DL(N);
14354
14355 // Propagate fast-math-flags.
14356 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14357
14358 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
14359 // the same size as the compared operands. Try to optimize sext(setcc())
14360 // if this is the case.
14361 if (VT.isVector() && !LegalOperations &&
14362 TLI.getBooleanContents(N00VT) ==
14364 EVT SVT = getSetCCResultType(N00VT);
14365
14366 // If we already have the desired type, don't change it.
14367 if (SVT != N0.getValueType()) {
14368 // We know that the # elements of the results is the same as the
14369 // # elements of the compare (and the # elements of the compare result
14370 // for that matter). Check to see that they are the same size. If so,
14371 // we know that the element size of the sext'd result matches the
14372 // element size of the compare operands.
14373 if (VT.getSizeInBits() == SVT.getSizeInBits())
14374 return DAG.getSetCC(DL, VT, N00, N01, CC);
14375
14376 // If the desired elements are smaller or larger than the source
14377 // elements, we can use a matching integer vector type and then
14378 // truncate/sign extend.
14379 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
14380 if (SVT == MatchingVecType) {
14381 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
14382 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
14383 }
14384 }
14385
14386 // Try to eliminate the sext of a setcc by zexting the compare operands.
14387 if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
14389 bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
14390 unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
14391 unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
14392
14393 // We have an unsupported narrow vector compare op that would be legal
14394 // if extended to the destination type. See if the compare operands
14395 // can be freely extended to the destination type.
14396 auto IsFreeToExtend = [&](SDValue V) {
14397 if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
14398 return true;
14399 // Match a simple, non-extended load that can be converted to a
14400 // legal {z/s}ext-load.
14401 // TODO: Allow widening of an existing {z/s}ext-load?
14402 if (!(ISD::isNON_EXTLoad(V.getNode()) &&
14403 ISD::isUNINDEXEDLoad(V.getNode()) &&
14404 cast<LoadSDNode>(V)->isSimple() &&
14405 TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
14406 return false;
14407
14408 // Non-chain users of this value must either be the setcc in this
14409 // sequence or extends that can be folded into the new {z/s}ext-load.
14410 for (SDUse &Use : V->uses()) {
14411 // Skip uses of the chain and the setcc.
14412 SDNode *User = Use.getUser();
14413 if (Use.getResNo() != 0 || User == N0.getNode())
14414 continue;
14415 // Extra users must have exactly the same cast we are about to create.
14416 // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
14417 // is enhanced similarly.
14418 if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
14419 return false;
14420 }
14421 return true;
14422 };
14423
14424 if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
14425 SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
14426 SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
14427 return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
14428 }
14429 }
14430 }
14431
14432 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
14433 // Here, T can be 1 or -1, depending on the type of the setcc and
14434 // getBooleanContents().
14435 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
14436
14437 // To determine the "true" side of the select, we need to know the high bit
14438 // of the value returned by the setcc if it evaluates to true.
14439 // If the type of the setcc is i1, then the true case of the select is just
14440 // sext(i1 1), that is, -1.
14441 // If the type of the setcc is larger (say, i8) then the value of the high
14442 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
14443 // of the appropriate width.
14444 SDValue ExtTrueVal = (SetCCWidth == 1)
14445 ? DAG.getAllOnesConstant(DL, VT)
14446 : DAG.getBoolConstant(true, DL, VT, N00VT);
14447 SDValue Zero = DAG.getConstant(0, DL, VT);
14448 if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
14449 return SCC;
14450
14451 if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(N0, VT, TLI)) {
14452 EVT SetCCVT = getSetCCResultType(N00VT);
14453 // Don't do this transform for i1 because there's a select transform
14454 // that would reverse it.
14455 // TODO: We should not do this transform at all without a target hook
14456 // because a sext is likely cheaper than a select?
14457 if (SetCCVT.getScalarSizeInBits() != 1 &&
14458 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
14459 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
14460 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
14461 }
14462 }
14463
14464 return SDValue();
14465}
14466
14467SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
14468 SDValue N0 = N->getOperand(0);
14469 EVT VT = N->getValueType(0);
14470 SDLoc DL(N);
14471
14472 if (VT.isVector())
14473 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
14474 return FoldedVOp;
14475
14476 // sext(undef) = 0 because the top bit will all be the same.
14477 if (N0.isUndef())
14478 return DAG.getConstant(0, DL, VT);
14479
14480 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14481 return Res;
14482
14483 // fold (sext (sext x)) -> (sext x)
14484 // fold (sext (aext x)) -> (sext x)
14485 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
14486 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
14487
14488 // fold (sext (aext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
14489 // fold (sext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
14492 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
14493 N0.getOperand(0));
14494
14495 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
14496 SDValue N00 = N0.getOperand(0);
14497 EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
14498 if (N00.getOpcode() == ISD::TRUNCATE || TLI.isTruncateFree(N00, ExtVT)) {
14499 // fold (sext (sext_inreg x)) -> (sext (trunc x))
14500 if ((!LegalTypes || TLI.isTypeLegal(ExtVT))) {
14501 SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00);
14502 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
14503 }
14504
14505 // If the trunc wasn't legal, try to fold to (sext_inreg (anyext x))
14506 if (!LegalTypes || TLI.isTypeLegal(VT)) {
14507 SDValue ExtSrc = DAG.getAnyExtOrTrunc(N00, DL, VT);
14508 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, ExtSrc,
14509 N0->getOperand(1));
14510 }
14511 }
14512 }
14513
14514 if (N0.getOpcode() == ISD::TRUNCATE) {
14515 // fold (sext (truncate (load x))) -> (sext (smaller load x))
14516 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
14517 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
14518 SDNode *oye = N0.getOperand(0).getNode();
14519 if (NarrowLoad.getNode() != N0.getNode()) {
14520 CombineTo(N0.getNode(), NarrowLoad);
14521 // CombineTo deleted the truncate, if needed, but not what's under it.
14522 AddToWorklist(oye);
14523 }
14524 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14525 }
14526
14527 // See if the value being truncated is already sign extended. If so, just
14528 // eliminate the trunc/sext pair.
14529 SDValue Op = N0.getOperand(0);
14530 unsigned OpBits = Op.getScalarValueSizeInBits();
14531 unsigned MidBits = N0.getScalarValueSizeInBits();
14532 unsigned DestBits = VT.getScalarSizeInBits();
14533
14534 if (N0->getFlags().hasNoSignedWrap() ||
14535 DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
14536 if (OpBits == DestBits) {
14537 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
14538 // bits, it is already ready.
14539 return Op;
14540 }
14541
14542 if (OpBits < DestBits) {
14543 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
14544 // bits, just sext from i32.
14545 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
14546 }
14547
14548 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
14549 // bits, just truncate to i32.
14550 SDNodeFlags Flags;
14551 Flags.setNoSignedWrap(true);
14552 Flags.setNoUnsignedWrap(N0->getFlags().hasNoUnsignedWrap());
14553 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op, Flags);
14554 }
14555
14556 // fold (sext (truncate x)) -> (sextinreg x).
14557 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
14558 N0.getValueType())) {
14559 if (OpBits < DestBits)
14560 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
14561 else if (OpBits > DestBits)
14562 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
14563 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
14564 DAG.getValueType(N0.getValueType()));
14565 }
14566 }
14567
14568 // Try to simplify (sext (load x)).
14569 if (SDValue foldedExt =
14570 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
14572 return foldedExt;
14573
14574 if (SDValue foldedExt =
14575 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
14577 return foldedExt;
14578
14579 // fold (sext (load x)) to multiple smaller sextloads.
14580 // Only on illegal but splittable vectors.
14581 if (SDValue ExtLoad = CombineExtLoad(N))
14582 return ExtLoad;
14583
14584 // Try to simplify (sext (sextload x)).
14585 if (SDValue foldedExt = tryToFoldExtOfExtload(
14586 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
14587 return foldedExt;
14588
14589 // Try to simplify (sext (atomic_load x)).
14590 if (SDValue foldedExt =
14591 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::SEXTLOAD))
14592 return foldedExt;
14593
14594 // fold (sext (and/or/xor (load x), cst)) ->
14595 // (and/or/xor (sextload x), (sext cst))
14596 if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
14597 isa<LoadSDNode>(N0.getOperand(0)) &&
14598 N0.getOperand(1).getOpcode() == ISD::Constant &&
14599 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
14600 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
14601 EVT MemVT = LN00->getMemoryVT();
14602 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
14603 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
14605 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
14606 ISD::SIGN_EXTEND, SetCCs, TLI);
14607 if (DoXform) {
14608 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
14609 LN00->getChain(), LN00->getBasePtr(),
14610 LN00->getMemoryVT(),
14611 LN00->getMemOperand());
14612 APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
14613 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
14614 ExtLoad, DAG.getConstant(Mask, DL, VT));
14615 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
14616 bool NoReplaceTruncAnd = !N0.hasOneUse();
14617 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
14618 CombineTo(N, And);
14619 // If N0 has multiple uses, change other uses as well.
14620 if (NoReplaceTruncAnd) {
14621 SDValue TruncAnd =
14623 CombineTo(N0.getNode(), TruncAnd);
14624 }
14625 if (NoReplaceTrunc) {
14626 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
14627 } else {
14628 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
14629 LN00->getValueType(0), ExtLoad);
14630 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
14631 }
14632 return SDValue(N,0); // Return N so it doesn't get rechecked!
14633 }
14634 }
14635 }
14636
14637 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
14638 return V;
14639
14640 if (SDValue V = foldSextSetcc(N))
14641 return V;
14642
14643 // fold (sext x) -> (zext x) if the sign bit is known zero.
14644 if (!TLI.isSExtCheaperThanZExt(N0.getValueType(), VT) &&
14645 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
14646 DAG.SignBitIsZero(N0))
14647 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0, SDNodeFlags::NonNeg);
14648
14649 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14650 return NewVSel;
14651
14652 // Eliminate this sign extend by doing a negation in the destination type:
14653 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
14654 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
14658 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
14659 return DAG.getNegative(Zext, DL, VT);
14660 }
14661 // Eliminate this sign extend by doing a decrement in the destination type:
14662 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
14663 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
14667 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
14668 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
14669 }
14670
14671 // fold sext (not i1 X) -> add (zext i1 X), -1
14672 // TODO: This could be extended to handle bool vectors.
14673 if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
14674 (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
14675 TLI.isOperationLegal(ISD::ADD, VT)))) {
14676 // If we can eliminate the 'not', the sext form should be better
14677 if (SDValue NewXor = visitXOR(N0.getNode())) {
14678 // Returning N0 is a form of in-visit replacement that may have
14679 // invalidated N0.
14680 if (NewXor.getNode() == N0.getNode()) {
14681 // Return SDValue here as the xor should have already been replaced in
14682 // this sext.
14683 return SDValue();
14684 }
14685
14686 // Return a new sext with the new xor.
14687 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
14688 }
14689
14690 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
14691 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
14692 }
14693
14694 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
14695 return Res;
14696
14697 return SDValue();
14698}
14699
14700/// Given an extending node with a pop-count operand, if the target does not
14701/// support a pop-count in the narrow source type but does support it in the
14702/// destination type, widen the pop-count to the destination type.
14703static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG, const SDLoc &DL) {
14704 assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
14705 Extend->getOpcode() == ISD::ANY_EXTEND) &&
14706 "Expected extend op");
14707
14708 SDValue CtPop = Extend->getOperand(0);
14709 if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
14710 return SDValue();
14711
14712 EVT VT = Extend->getValueType(0);
14713 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14716 return SDValue();
14717
14718 // zext (ctpop X) --> ctpop (zext X)
14719 SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
14720 return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
14721}
14722
14723// If we have (zext (abs X)) where X is a type that will be promoted by type
14724// legalization, convert to (abs (sext X)). But don't extend past a legal type.
14725static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) {
14726 assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend.");
14727
14728 EVT VT = Extend->getValueType(0);
14729 if (VT.isVector())
14730 return SDValue();
14731
14732 SDValue Abs = Extend->getOperand(0);
14733 if (Abs.getOpcode() != ISD::ABS || !Abs.hasOneUse())
14734 return SDValue();
14735
14736 EVT AbsVT = Abs.getValueType();
14737 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14738 if (TLI.getTypeAction(*DAG.getContext(), AbsVT) !=
14740 return SDValue();
14741
14742 EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), AbsVT);
14743
14744 SDValue SExt =
14745 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Abs), LegalVT, Abs.getOperand(0));
14746 SDValue NewAbs = DAG.getNode(ISD::ABS, SDLoc(Abs), LegalVT, SExt);
14747 return DAG.getZExtOrTrunc(NewAbs, SDLoc(Extend), VT);
14748}
14749
14750SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
14751 SDValue N0 = N->getOperand(0);
14752 EVT VT = N->getValueType(0);
14753 SDLoc DL(N);
14754
14755 if (VT.isVector())
14756 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
14757 return FoldedVOp;
14758
14759 // zext(undef) = 0
14760 if (N0.isUndef())
14761 return DAG.getConstant(0, DL, VT);
14762
14763 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14764 return Res;
14765
14766 // fold (zext (zext x)) -> (zext x)
14767 // fold (zext (aext x)) -> (zext x)
14768 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
14769 SDNodeFlags Flags;
14770 if (N0.getOpcode() == ISD::ZERO_EXTEND)
14771 Flags.setNonNeg(N0->getFlags().hasNonNeg());
14772 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0), Flags);
14773 }
14774
14775 // fold (zext (aext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14776 // fold (zext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14779 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, N0.getOperand(0));
14780
14781 // fold (zext (truncate x)) -> (zext x) or
14782 // (zext (truncate x)) -> (truncate x)
14783 // This is valid when the truncated bits of x are already zero.
14784 SDValue Op;
14785 KnownBits Known;
14786 if (isTruncateOf(DAG, N0, Op, Known)) {
14787 APInt TruncatedBits =
14788 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
14789 APInt(Op.getScalarValueSizeInBits(), 0) :
14790 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
14791 N0.getScalarValueSizeInBits(),
14792 std::min(Op.getScalarValueSizeInBits(),
14793 VT.getScalarSizeInBits()));
14794 if (TruncatedBits.isSubsetOf(Known.Zero)) {
14795 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
14796 DAG.salvageDebugInfo(*N0.getNode());
14797
14798 return ZExtOrTrunc;
14799 }
14800 }
14801
14802 // fold (zext (truncate x)) -> (and x, mask)
14803 if (N0.getOpcode() == ISD::TRUNCATE) {
14804 // fold (zext (truncate (load x))) -> (zext (smaller load x))
14805 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
14806 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
14807 SDNode *oye = N0.getOperand(0).getNode();
14808 if (NarrowLoad.getNode() != N0.getNode()) {
14809 CombineTo(N0.getNode(), NarrowLoad);
14810 // CombineTo deleted the truncate, if needed, but not what's under it.
14811 AddToWorklist(oye);
14812 }
14813 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14814 }
14815
14816 EVT SrcVT = N0.getOperand(0).getValueType();
14817 EVT MinVT = N0.getValueType();
14818
14819 if (N->getFlags().hasNonNeg()) {
14820 SDValue Op = N0.getOperand(0);
14821 unsigned OpBits = SrcVT.getScalarSizeInBits();
14822 unsigned MidBits = MinVT.getScalarSizeInBits();
14823 unsigned DestBits = VT.getScalarSizeInBits();
14824
14825 if (N0->getFlags().hasNoSignedWrap() ||
14826 DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
14827 if (OpBits == DestBits) {
14828 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
14829 // bits, it is already ready.
14830 return Op;
14831 }
14832
14833 if (OpBits < DestBits) {
14834 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
14835 // bits, just sext from i32.
14836 // FIXME: This can probably be ZERO_EXTEND nneg?
14837 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
14838 }
14839
14840 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
14841 // bits, just truncate to i32.
14842 SDNodeFlags Flags;
14843 Flags.setNoSignedWrap(true);
14844 Flags.setNoUnsignedWrap(true);
14845 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op, Flags);
14846 }
14847 }
14848
14849 // Try to mask before the extension to avoid having to generate a larger mask,
14850 // possibly over several sub-vectors.
14851 if (SrcVT.bitsLT(VT) && VT.isVector()) {
14852 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
14854 SDValue Op = N0.getOperand(0);
14855 Op = DAG.getZeroExtendInReg(Op, DL, MinVT);
14856 AddToWorklist(Op.getNode());
14857 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
14858 // Transfer the debug info; the new node is equivalent to N0.
14859 DAG.transferDbgValues(N0, ZExtOrTrunc);
14860 return ZExtOrTrunc;
14861 }
14862 }
14863
14864 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
14865 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
14866 AddToWorklist(Op.getNode());
14867 SDValue And = DAG.getZeroExtendInReg(Op, DL, MinVT);
14868 // We may safely transfer the debug info describing the truncate node over
14869 // to the equivalent and operation.
14870 DAG.transferDbgValues(N0, And);
14871 return And;
14872 }
14873 }
14874
14875 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
14876 // if either of the casts is not free.
14877 if (N0.getOpcode() == ISD::AND &&
14878 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
14879 N0.getOperand(1).getOpcode() == ISD::Constant &&
14880 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType()) ||
14881 !TLI.isZExtFree(N0.getValueType(), VT))) {
14882 SDValue X = N0.getOperand(0).getOperand(0);
14883 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
14884 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
14885 return DAG.getNode(ISD::AND, DL, VT,
14886 X, DAG.getConstant(Mask, DL, VT));
14887 }
14888
14889 // Try to simplify (zext (load x)).
14890 if (SDValue foldedExt = tryToFoldExtOfLoad(
14891 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD,
14892 ISD::ZERO_EXTEND, N->getFlags().hasNonNeg()))
14893 return foldedExt;
14894
14895 if (SDValue foldedExt =
14896 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
14898 return foldedExt;
14899
14900 // fold (zext (load x)) to multiple smaller zextloads.
14901 // Only on illegal but splittable vectors.
14902 if (SDValue ExtLoad = CombineExtLoad(N))
14903 return ExtLoad;
14904
14905 // Try to simplify (zext (atomic_load x)).
14906 if (SDValue foldedExt =
14907 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::ZEXTLOAD))
14908 return foldedExt;
14909
14910 // fold (zext (and/or/xor (load x), cst)) ->
14911 // (and/or/xor (zextload x), (zext cst))
14912 // Unless (and (load x) cst) will match as a zextload already and has
14913 // additional users, or the zext is already free.
14914 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && !TLI.isZExtFree(N0, VT) &&
14915 isa<LoadSDNode>(N0.getOperand(0)) &&
14916 N0.getOperand(1).getOpcode() == ISD::Constant &&
14917 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
14918 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
14919 EVT MemVT = LN00->getMemoryVT();
14920 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
14921 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
14922 bool DoXform = true;
14924 if (!N0.hasOneUse()) {
14925 if (N0.getOpcode() == ISD::AND) {
14926 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
14927 EVT LoadResultTy = AndC->getValueType(0);
14928 EVT ExtVT;
14929 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
14930 DoXform = false;
14931 }
14932 }
14933 if (DoXform)
14934 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
14935 ISD::ZERO_EXTEND, SetCCs, TLI);
14936 if (DoXform) {
14937 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
14938 LN00->getChain(), LN00->getBasePtr(),
14939 LN00->getMemoryVT(),
14940 LN00->getMemOperand());
14941 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
14942 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
14943 ExtLoad, DAG.getConstant(Mask, DL, VT));
14944 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
14945 bool NoReplaceTruncAnd = !N0.hasOneUse();
14946 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
14947 CombineTo(N, And);
14948 // If N0 has multiple uses, change other uses as well.
14949 if (NoReplaceTruncAnd) {
14950 SDValue TruncAnd =
14952 CombineTo(N0.getNode(), TruncAnd);
14953 }
14954 if (NoReplaceTrunc) {
14955 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
14956 } else {
14957 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
14958 LN00->getValueType(0), ExtLoad);
14959 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
14960 }
14961 return SDValue(N,0); // Return N so it doesn't get rechecked!
14962 }
14963 }
14964 }
14965
14966 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
14967 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
14968 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
14969 return ZExtLoad;
14970
14971 // Try to simplify (zext (zextload x)).
14972 if (SDValue foldedExt = tryToFoldExtOfExtload(
14973 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
14974 return foldedExt;
14975
14976 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
14977 return V;
14978
14979 if (N0.getOpcode() == ISD::SETCC) {
14980 // Propagate fast-math-flags.
14981 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14982
14983 // Only do this before legalize for now.
14984 if (!LegalOperations && VT.isVector() &&
14985 N0.getValueType().getVectorElementType() == MVT::i1) {
14986 EVT N00VT = N0.getOperand(0).getValueType();
14987 if (getSetCCResultType(N00VT) == N0.getValueType())
14988 return SDValue();
14989
14990 // We know that the # elements of the results is the same as the #
14991 // elements of the compare (and the # elements of the compare result for
14992 // that matter). Check to see that they are the same size. If so, we know
14993 // that the element size of the sext'd result matches the element size of
14994 // the compare operands.
14995 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
14996 // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
14997 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
14998 N0.getOperand(1), N0.getOperand(2));
14999 return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
15000 }
15001
15002 // If the desired elements are smaller or larger than the source
15003 // elements we can use a matching integer vector type and then
15004 // truncate/any extend followed by zext_in_reg.
15005 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
15006 SDValue VsetCC =
15007 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
15008 N0.getOperand(1), N0.getOperand(2));
15009 return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
15010 N0.getValueType());
15011 }
15012
15013 // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
15014 EVT N0VT = N0.getValueType();
15015 EVT N00VT = N0.getOperand(0).getValueType();
15016 if (SDValue SCC = SimplifySelectCC(
15017 DL, N0.getOperand(0), N0.getOperand(1),
15018 DAG.getBoolConstant(true, DL, N0VT, N00VT),
15019 DAG.getBoolConstant(false, DL, N0VT, N00VT),
15020 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
15021 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
15022 }
15023
15024 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
15025 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
15026 !TLI.isZExtFree(N0, VT)) {
15027 SDValue ShVal = N0.getOperand(0);
15028 SDValue ShAmt = N0.getOperand(1);
15029 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt)) {
15030 if (ShVal.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) {
15031 if (N0.getOpcode() == ISD::SHL) {
15032 // If the original shl may be shifting out bits, do not perform this
15033 // transformation.
15034 unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
15035 ShVal.getOperand(0).getValueSizeInBits();
15036 if (ShAmtC->getAPIntValue().ugt(KnownZeroBits)) {
15037 // If the shift is too large, then see if we can deduce that the
15038 // shift is safe anyway.
15039
15040 // Check if the bits being shifted out are known to be zero.
15041 KnownBits KnownShVal = DAG.computeKnownBits(ShVal);
15042 if (ShAmtC->getAPIntValue().ugt(KnownShVal.countMinLeadingZeros()))
15043 return SDValue();
15044 }
15045 }
15046
15047 // Ensure that the shift amount is wide enough for the shifted value.
15048 if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
15049 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
15050
15051 return DAG.getNode(N0.getOpcode(), DL, VT,
15052 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ShVal), ShAmt);
15053 }
15054 }
15055 }
15056
15057 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15058 return NewVSel;
15059
15060 if (SDValue NewCtPop = widenCtPop(N, DAG, DL))
15061 return NewCtPop;
15062
15063 if (SDValue V = widenAbs(N, DAG))
15064 return V;
15065
15066 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
15067 return Res;
15068
15069 // CSE zext nneg with sext if the zext is not free.
15070 if (N->getFlags().hasNonNeg() && !TLI.isZExtFree(N0.getValueType(), VT)) {
15071 SDNode *CSENode = DAG.getNodeIfExists(ISD::SIGN_EXTEND, N->getVTList(), N0);
15072 if (CSENode)
15073 return SDValue(CSENode, 0);
15074 }
15075
15076 return SDValue();
15077}
15078
15079SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
15080 SDValue N0 = N->getOperand(0);
15081 EVT VT = N->getValueType(0);
15082 SDLoc DL(N);
15083
15084 // aext(undef) = undef
15085 if (N0.isUndef())
15086 return DAG.getUNDEF(VT);
15087
15088 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
15089 return Res;
15090
15091 // fold (aext (aext x)) -> (aext x)
15092 // fold (aext (zext x)) -> (zext x)
15093 // fold (aext (sext x)) -> (sext x)
15094 if (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::ZERO_EXTEND ||
15095 N0.getOpcode() == ISD::SIGN_EXTEND) {
15096 SDNodeFlags Flags;
15097 if (N0.getOpcode() == ISD::ZERO_EXTEND)
15098 Flags.setNonNeg(N0->getFlags().hasNonNeg());
15099 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
15100 }
15101
15102 // fold (aext (aext_extend_vector_inreg x)) -> (aext_extend_vector_inreg x)
15103 // fold (aext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
15104 // fold (aext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
15108 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
15109
15110 // fold (aext (truncate (load x))) -> (aext (smaller load x))
15111 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
15112 if (N0.getOpcode() == ISD::TRUNCATE) {
15113 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
15114 SDNode *oye = N0.getOperand(0).getNode();
15115 if (NarrowLoad.getNode() != N0.getNode()) {
15116 CombineTo(N0.getNode(), NarrowLoad);
15117 // CombineTo deleted the truncate, if needed, but not what's under it.
15118 AddToWorklist(oye);
15119 }
15120 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15121 }
15122 }
15123
15124 // fold (aext (truncate x))
15125 if (N0.getOpcode() == ISD::TRUNCATE)
15126 return DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
15127
15128 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
15129 // if the trunc is not free.
15130 if (N0.getOpcode() == ISD::AND &&
15131 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
15132 N0.getOperand(1).getOpcode() == ISD::Constant &&
15133 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType())) {
15134 SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
15135 SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
15136 assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!");
15137 return DAG.getNode(ISD::AND, DL, VT, X, Y);
15138 }
15139
15140 // fold (aext (load x)) -> (aext (truncate (extload x)))
15141 // None of the supported targets knows how to perform load and any_ext
15142 // on vectors in one instruction, so attempt to fold to zext instead.
15143 if (VT.isVector()) {
15144 // Try to simplify (zext (load x)).
15145 if (SDValue foldedExt =
15146 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
15148 return foldedExt;
15149 } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
15152 bool DoXform = true;
15154 if (!N0.hasOneUse())
15155 DoXform =
15156 ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
15157 if (DoXform) {
15158 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15159 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, LN0->getChain(),
15160 LN0->getBasePtr(), N0.getValueType(),
15161 LN0->getMemOperand());
15162 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
15163 // If the load value is used only by N, replace it via CombineTo N.
15164 bool NoReplaceTrunc = N0.hasOneUse();
15165 CombineTo(N, ExtLoad);
15166 if (NoReplaceTrunc) {
15167 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
15168 recursivelyDeleteUnusedNodes(LN0);
15169 } else {
15170 SDValue Trunc =
15171 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
15172 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
15173 }
15174 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15175 }
15176 }
15177
15178 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
15179 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
15180 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
15181 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
15182 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
15183 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15184 ISD::LoadExtType ExtType = LN0->getExtensionType();
15185 EVT MemVT = LN0->getMemoryVT();
15186 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
15187 SDValue ExtLoad =
15188 DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), LN0->getBasePtr(),
15189 MemVT, LN0->getMemOperand());
15190 CombineTo(N, ExtLoad);
15191 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
15192 recursivelyDeleteUnusedNodes(LN0);
15193 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15194 }
15195 }
15196
15197 if (N0.getOpcode() == ISD::SETCC) {
15198 // Propagate fast-math-flags.
15199 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
15200
15201 // For vectors:
15202 // aext(setcc) -> vsetcc
15203 // aext(setcc) -> truncate(vsetcc)
15204 // aext(setcc) -> aext(vsetcc)
15205 // Only do this before legalize for now.
15206 if (VT.isVector() && !LegalOperations) {
15207 EVT N00VT = N0.getOperand(0).getValueType();
15208 if (getSetCCResultType(N00VT) == N0.getValueType())
15209 return SDValue();
15210
15211 // We know that the # elements of the results is the same as the
15212 // # elements of the compare (and the # elements of the compare result
15213 // for that matter). Check to see that they are the same size. If so,
15214 // we know that the element size of the sext'd result matches the
15215 // element size of the compare operands.
15216 if (VT.getSizeInBits() == N00VT.getSizeInBits())
15217 return DAG.getSetCC(DL, VT, N0.getOperand(0), N0.getOperand(1),
15218 cast<CondCodeSDNode>(N0.getOperand(2))->get());
15219
15220 // If the desired elements are smaller or larger than the source
15221 // elements we can use a matching integer vector type and then
15222 // truncate/any extend
15223 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
15224 SDValue VsetCC = DAG.getSetCC(
15225 DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1),
15226 cast<CondCodeSDNode>(N0.getOperand(2))->get());
15227 return DAG.getAnyExtOrTrunc(VsetCC, DL, VT);
15228 }
15229
15230 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
15231 if (SDValue SCC = SimplifySelectCC(
15232 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
15233 DAG.getConstant(0, DL, VT),
15234 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
15235 return SCC;
15236 }
15237
15238 if (SDValue NewCtPop = widenCtPop(N, DAG, DL))
15239 return NewCtPop;
15240
15241 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
15242 return Res;
15243
15244 return SDValue();
15245}
15246
15247SDValue DAGCombiner::visitAssertExt(SDNode *N) {
15248 unsigned Opcode = N->getOpcode();
15249 SDValue N0 = N->getOperand(0);
15250 SDValue N1 = N->getOperand(1);
15251 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
15252
15253 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
15254 if (N0.getOpcode() == Opcode &&
15255 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
15256 return N0;
15257
15258 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
15259 N0.getOperand(0).getOpcode() == Opcode) {
15260 // We have an assert, truncate, assert sandwich. Make one stronger assert
15261 // by asserting on the smallest asserted type to the larger source type.
15262 // This eliminates the later assert:
15263 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
15264 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
15265 SDLoc DL(N);
15266 SDValue BigA = N0.getOperand(0);
15267 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
15268 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
15269 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
15270 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
15271 BigA.getOperand(0), MinAssertVTVal);
15272 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
15273 }
15274
15275 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
15276 // than X. Just move the AssertZext in front of the truncate and drop the
15277 // AssertSExt.
15278 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
15280 Opcode == ISD::AssertZext) {
15281 SDValue BigA = N0.getOperand(0);
15282 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
15283 if (AssertVT.bitsLT(BigA_AssertVT)) {
15284 SDLoc DL(N);
15285 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
15286 BigA.getOperand(0), N1);
15287 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
15288 }
15289 }
15290
15291 if (Opcode == ISD::AssertZext && N0.getOpcode() == ISD::AND &&
15293 const APInt &Mask = N0.getConstantOperandAPInt(1);
15294
15295 // If we have (AssertZext (and (AssertSext X, iX), M), iY) and Y is smaller
15296 // than X, and the And doesn't change the lower iX bits, we can move the
15297 // AssertZext in front of the And and drop the AssertSext.
15298 if (N0.getOperand(0).getOpcode() == ISD::AssertSext && N0.hasOneUse()) {
15299 SDValue BigA = N0.getOperand(0);
15300 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
15301 if (AssertVT.bitsLT(BigA_AssertVT) &&
15302 Mask.countr_one() >= BigA_AssertVT.getScalarSizeInBits()) {
15303 SDLoc DL(N);
15304 SDValue NewAssert =
15305 DAG.getNode(Opcode, DL, N->getValueType(0), BigA.getOperand(0), N1);
15306 return DAG.getNode(ISD::AND, DL, N->getValueType(0), NewAssert,
15307 N0.getOperand(1));
15308 }
15309 }
15310
15311 // Remove AssertZext entirely if the mask guarantees the assertion cannot
15312 // fail.
15313 // TODO: Use KB countMinLeadingZeros to handle non-constant masks?
15314 if (Mask.isIntN(AssertVT.getScalarSizeInBits()))
15315 return N0;
15316 }
15317
15318 return SDValue();
15319}
15320
15321SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
15322 SDLoc DL(N);
15323
15324 Align AL = cast<AssertAlignSDNode>(N)->getAlign();
15325 SDValue N0 = N->getOperand(0);
15326
15327 // Fold (assertalign (assertalign x, AL0), AL1) ->
15328 // (assertalign x, max(AL0, AL1))
15329 if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
15330 return DAG.getAssertAlign(DL, N0.getOperand(0),
15331 std::max(AL, AAN->getAlign()));
15332
15333 // In rare cases, there are trivial arithmetic ops in source operands. Sink
15334 // this assert down to source operands so that those arithmetic ops could be
15335 // exposed to the DAG combining.
15336 switch (N0.getOpcode()) {
15337 default:
15338 break;
15339 case ISD::ADD:
15340 case ISD::PTRADD:
15341 case ISD::SUB: {
15342 unsigned AlignShift = Log2(AL);
15343 SDValue LHS = N0.getOperand(0);
15344 SDValue RHS = N0.getOperand(1);
15345 unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
15346 unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
15347 if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
15348 if (LHSAlignShift < AlignShift)
15349 LHS = DAG.getAssertAlign(DL, LHS, AL);
15350 if (RHSAlignShift < AlignShift)
15351 RHS = DAG.getAssertAlign(DL, RHS, AL);
15352 return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
15353 }
15354 break;
15355 }
15356 }
15357
15358 return SDValue();
15359}
15360
15361/// If the result of a load is shifted/masked/truncated to an effectively
15362/// narrower type, try to transform the load to a narrower type and/or
15363/// use an extending load.
15364SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
15365 unsigned Opc = N->getOpcode();
15366
15368 SDValue N0 = N->getOperand(0);
15369 EVT VT = N->getValueType(0);
15370 EVT ExtVT = VT;
15371
15372 // This transformation isn't valid for vector loads.
15373 if (VT.isVector())
15374 return SDValue();
15375
15376 // The ShAmt variable is used to indicate that we've consumed a right
15377 // shift. I.e. we want to narrow the width of the load by skipping to load the
15378 // ShAmt least significant bits.
15379 unsigned ShAmt = 0;
15380 // A special case is when the least significant bits from the load are masked
15381 // away, but using an AND rather than a right shift. HasShiftedOffset is used
15382 // to indicate that the narrowed load should be left-shifted ShAmt bits to get
15383 // the result.
15384 unsigned ShiftedOffset = 0;
15385 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
15386 // extended to VT.
15387 if (Opc == ISD::SIGN_EXTEND_INREG) {
15388 ExtType = ISD::SEXTLOAD;
15389 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
15390 } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
15391 // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
15392 // value, or it may be shifting a higher subword, half or byte into the
15393 // lowest bits.
15394
15395 // Only handle shift with constant shift amount, and the shiftee must be a
15396 // load.
15397 auto *LN = dyn_cast<LoadSDNode>(N0);
15398 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
15399 if (!N1C || !LN)
15400 return SDValue();
15401 // If the shift amount is larger than the memory type then we're not
15402 // accessing any of the loaded bytes.
15403 ShAmt = N1C->getZExtValue();
15404 uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
15405 if (MemoryWidth <= ShAmt)
15406 return SDValue();
15407 // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
15408 ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
15409 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
15410 // If original load is a SEXTLOAD then we can't simply replace it by a
15411 // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
15412 // followed by a ZEXT, but that is not handled at the moment). Similarly if
15413 // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
15414 if ((LN->getExtensionType() == ISD::SEXTLOAD ||
15415 LN->getExtensionType() == ISD::ZEXTLOAD) &&
15416 LN->getExtensionType() != ExtType)
15417 return SDValue();
15418 } else if (Opc == ISD::AND) {
15419 // An AND with a constant mask is the same as a truncate + zero-extend.
15420 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
15421 if (!AndC)
15422 return SDValue();
15423
15424 const APInt &Mask = AndC->getAPIntValue();
15425 unsigned ActiveBits = 0;
15426 if (Mask.isMask()) {
15427 ActiveBits = Mask.countr_one();
15428 } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
15429 ShiftedOffset = ShAmt;
15430 } else {
15431 return SDValue();
15432 }
15433
15434 ExtType = ISD::ZEXTLOAD;
15435 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
15436 }
15437
15438 // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
15439 // a right shift. Here we redo some of those checks, to possibly adjust the
15440 // ExtVT even further based on "a masking AND". We could also end up here for
15441 // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
15442 // need to be done here as well.
15443 if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
15444 SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
15445 // Bail out when the SRL has more than one use. This is done for historical
15446 // (undocumented) reasons. Maybe intent was to guard the AND-masking below
15447 // check below? And maybe it could be non-profitable to do the transform in
15448 // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
15449 // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
15450 if (!SRL.hasOneUse())
15451 return SDValue();
15452
15453 // Only handle shift with constant shift amount, and the shiftee must be a
15454 // load.
15455 auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
15456 auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
15457 if (!SRL1C || !LN)
15458 return SDValue();
15459
15460 // If the shift amount is larger than the input type then we're not
15461 // accessing any of the loaded bytes. If the load was a zextload/extload
15462 // then the result of the shift+trunc is zero/undef (handled elsewhere).
15463 ShAmt = SRL1C->getZExtValue();
15464 uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
15465 if (ShAmt >= MemoryWidth)
15466 return SDValue();
15467
15468 // Because a SRL must be assumed to *need* to zero-extend the high bits
15469 // (as opposed to anyext the high bits), we can't combine the zextload
15470 // lowering of SRL and an sextload.
15471 if (LN->getExtensionType() == ISD::SEXTLOAD)
15472 return SDValue();
15473
15474 // Avoid reading outside the memory accessed by the original load (could
15475 // happened if we only adjust the load base pointer by ShAmt). Instead we
15476 // try to narrow the load even further. The typical scenario here is:
15477 // (i64 (truncate (i96 (srl (load x), 64)))) ->
15478 // (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
15479 if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
15480 // Don't replace sextload by zextload.
15481 if (ExtType == ISD::SEXTLOAD)
15482 return SDValue();
15483 // Narrow the load.
15484 ExtType = ISD::ZEXTLOAD;
15485 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
15486 }
15487
15488 // If the SRL is only used by a masking AND, we may be able to adjust
15489 // the ExtVT to make the AND redundant.
15490 SDNode *Mask = *(SRL->user_begin());
15491 if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
15492 isa<ConstantSDNode>(Mask->getOperand(1))) {
15493 unsigned Offset, ActiveBits;
15494 const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
15495 if (ShiftMask.isMask()) {
15496 EVT MaskedVT =
15497 EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countr_one());
15498 // If the mask is smaller, recompute the type.
15499 if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
15500 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
15501 ExtVT = MaskedVT;
15502 } else if (ExtType == ISD::ZEXTLOAD &&
15503 ShiftMask.isShiftedMask(Offset, ActiveBits) &&
15504 (Offset + ShAmt) < VT.getScalarSizeInBits()) {
15505 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
15506 // If the mask is shifted we can use a narrower load and a shl to insert
15507 // the trailing zeros.
15508 if (((Offset + ActiveBits) <= ExtVT.getScalarSizeInBits()) &&
15509 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT)) {
15510 ExtVT = MaskedVT;
15511 ShAmt = Offset + ShAmt;
15512 ShiftedOffset = Offset;
15513 }
15514 }
15515 }
15516
15517 N0 = SRL.getOperand(0);
15518 }
15519
15520 // If the load is shifted left (and the result isn't shifted back right), we
15521 // can fold a truncate through the shift. The typical scenario is that N
15522 // points at a TRUNCATE here so the attempted fold is:
15523 // (truncate (shl (load x), c))) -> (shl (narrow load x), c)
15524 // ShLeftAmt will indicate how much a narrowed load should be shifted left.
15525 unsigned ShLeftAmt = 0;
15526 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
15527 ExtVT == VT && TLI.isNarrowingProfitable(N, N0.getValueType(), VT)) {
15528 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
15529 ShLeftAmt = N01->getZExtValue();
15530 N0 = N0.getOperand(0);
15531 }
15532 }
15533
15534 // If we haven't found a load, we can't narrow it.
15535 if (!isa<LoadSDNode>(N0))
15536 return SDValue();
15537
15538 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15539 // Reducing the width of a volatile load is illegal. For atomics, we may be
15540 // able to reduce the width provided we never widen again. (see D66309)
15541 if (!LN0->isSimple() ||
15542 !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
15543 return SDValue();
15544
15545 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
15546 unsigned LVTStoreBits =
15548 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedValue();
15549 return LVTStoreBits - EVTStoreBits - ShAmt;
15550 };
15551
15552 // We need to adjust the pointer to the load by ShAmt bits in order to load
15553 // the correct bytes.
15554 unsigned PtrAdjustmentInBits =
15555 DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
15556
15557 uint64_t PtrOff = PtrAdjustmentInBits / 8;
15558 SDLoc DL(LN0);
15559 // The original load itself didn't wrap, so an offset within it doesn't.
15560 SDValue NewPtr =
15563 AddToWorklist(NewPtr.getNode());
15564
15565 SDValue Load;
15566 if (ExtType == ISD::NON_EXTLOAD) {
15567 const MDNode *OldRanges = LN0->getRanges();
15568 const MDNode *NewRanges = nullptr;
15569 // If LSBs are loaded and the truncated ConstantRange for the OldRanges
15570 // metadata is not the full-set for the new width then create a NewRanges
15571 // metadata for the truncated load
15572 if (ShAmt == 0 && OldRanges) {
15573 ConstantRange CR = getConstantRangeFromMetadata(*OldRanges);
15574 unsigned BitSize = VT.getScalarSizeInBits();
15575
15576 // It is possible for an 8-bit extending load with 8-bit range
15577 // metadata to be narrowed to an 8-bit load. This guard is necessary to
15578 // ensure that truncation is strictly smaller.
15579 if (CR.getBitWidth() > BitSize) {
15580 ConstantRange TruncatedCR = CR.truncate(BitSize);
15581 if (!TruncatedCR.isFullSet()) {
15582 Metadata *Bounds[2] = {
15584 ConstantInt::get(*DAG.getContext(), TruncatedCR.getLower())),
15586 ConstantInt::get(*DAG.getContext(), TruncatedCR.getUpper()))};
15587 NewRanges = MDNode::get(*DAG.getContext(), Bounds);
15588 }
15589 } else if (CR.getBitWidth() == BitSize)
15590 NewRanges = OldRanges;
15591 }
15592 Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
15593 LN0->getPointerInfo().getWithOffset(PtrOff),
15594 LN0->getBaseAlign(), LN0->getMemOperand()->getFlags(),
15595 LN0->getAAInfo(), NewRanges);
15596 } else
15597 Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
15598 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
15599 LN0->getBaseAlign(), LN0->getMemOperand()->getFlags(),
15600 LN0->getAAInfo());
15601
15602 // Replace the old load's chain with the new load's chain.
15603 WorklistRemover DeadNodes(*this);
15604 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15605
15606 // Shift the result left, if we've swallowed a left shift.
15608 if (ShLeftAmt != 0) {
15609 // If the shift amount is as large as the result size (but, presumably,
15610 // no larger than the source) then the useful bits of the result are
15611 // zero; we can't simply return the shortened shift, because the result
15612 // of that operation is undefined.
15613 if (ShLeftAmt >= VT.getScalarSizeInBits())
15614 Result = DAG.getConstant(0, DL, VT);
15615 else
15616 Result = DAG.getNode(ISD::SHL, DL, VT, Result,
15617 DAG.getShiftAmountConstant(ShLeftAmt, VT, DL));
15618 }
15619
15620 if (ShiftedOffset != 0) {
15621 // We're using a shifted mask, so the load now has an offset. This means
15622 // that data has been loaded into the lower bytes than it would have been
15623 // before, so we need to shl the loaded data into the correct position in the
15624 // register.
15625 SDValue ShiftC = DAG.getConstant(ShiftedOffset, DL, VT);
15626 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
15627 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
15628 }
15629
15630 // Return the new loaded value.
15631 return Result;
15632}
15633
15634SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
15635 SDValue N0 = N->getOperand(0);
15636 SDValue N1 = N->getOperand(1);
15637 EVT VT = N->getValueType(0);
15638 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
15639 unsigned VTBits = VT.getScalarSizeInBits();
15640 unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
15641 SDLoc DL(N);
15642
15643 // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
15644 if (N0.isUndef())
15645 return DAG.getConstant(0, DL, VT);
15646
15647 // fold (sext_in_reg c1) -> c1
15648 if (SDValue C =
15650 return C;
15651
15652 // If the input is already sign extended, just drop the extension.
15653 if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
15654 return N0;
15655
15656 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
15657 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
15658 ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
15659 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N0.getOperand(0), N1);
15660
15661 // fold (sext_in_reg (sext x)) -> (sext x)
15662 // fold (sext_in_reg (aext x)) -> (sext x)
15663 // if x is small enough or if we know that x has more than 1 sign bit and the
15664 // sign_extend_inreg is extending from one of them.
15665 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
15666 SDValue N00 = N0.getOperand(0);
15667 unsigned N00Bits = N00.getScalarValueSizeInBits();
15668 if ((N00Bits <= ExtVTBits ||
15669 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
15670 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
15671 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N00);
15672 }
15673
15674 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
15675 // if x is small enough or if we know that x has more than 1 sign bit and the
15676 // sign_extend_inreg is extending from one of them.
15678 SDValue N00 = N0.getOperand(0);
15679 unsigned N00Bits = N00.getScalarValueSizeInBits();
15680 bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
15681 if ((N00Bits == ExtVTBits ||
15682 (!IsZext && (N00Bits < ExtVTBits ||
15683 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
15684 (!LegalOperations ||
15686 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, N00);
15687 }
15688
15689 // fold (sext_in_reg (zext x)) -> (sext x)
15690 // iff we are extending the source sign bit.
15691 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
15692 SDValue N00 = N0.getOperand(0);
15693 if (N00.getScalarValueSizeInBits() == ExtVTBits &&
15694 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
15695 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N00);
15696 }
15697
15698 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
15699 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
15700 return DAG.getZeroExtendInReg(N0, DL, ExtVT);
15701
15702 // fold operands of sext_in_reg based on knowledge that the top bits are not
15703 // demanded.
15705 return SDValue(N, 0);
15706
15707 // fold (sext_in_reg (load x)) -> (smaller sextload x)
15708 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
15709 if (SDValue NarrowLoad = reduceLoadWidth(N))
15710 return NarrowLoad;
15711
15712 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
15713 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
15714 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
15715 if (N0.getOpcode() == ISD::SRL) {
15716 if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
15717 if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
15718 // We can turn this into an SRA iff the input to the SRL is already sign
15719 // extended enough.
15720 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
15721 if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
15722 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
15723 N0.getOperand(1));
15724 }
15725 }
15726
15727 // fold (sext_inreg (extload x)) -> (sextload x)
15728 // If sextload is not supported by target, we can only do the combine when
15729 // load has one use. Doing otherwise can block folding the extload with other
15730 // extends that the target does support.
15732 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
15733 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
15734 N0.hasOneUse()) ||
15735 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
15736 auto *LN0 = cast<LoadSDNode>(N0);
15737 SDValue ExtLoad =
15738 DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
15739 LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
15740 CombineTo(N, ExtLoad);
15741 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
15742 AddToWorklist(ExtLoad.getNode());
15743 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15744 }
15745
15746 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
15748 N0.hasOneUse() && ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
15749 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
15750 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
15751 auto *LN0 = cast<LoadSDNode>(N0);
15752 SDValue ExtLoad =
15753 DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
15754 LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
15755 CombineTo(N, ExtLoad);
15756 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
15757 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15758 }
15759
15760 // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
15761 // ignore it if the masked load is already sign extended
15762 if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
15763 if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
15764 Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
15765 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
15766 SDValue ExtMaskedLoad = DAG.getMaskedLoad(
15767 VT, DL, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
15768 Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
15769 Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
15770 CombineTo(N, ExtMaskedLoad);
15771 CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
15772 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15773 }
15774 }
15775
15776 // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
15777 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
15778 if (SDValue(GN0, 0).hasOneUse() && ExtVT == GN0->getMemoryVT() &&
15780 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
15781 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
15782
15783 SDValue ExtLoad = DAG.getMaskedGather(
15784 DAG.getVTList(VT, MVT::Other), ExtVT, DL, Ops, GN0->getMemOperand(),
15785 GN0->getIndexType(), ISD::SEXTLOAD);
15786
15787 CombineTo(N, ExtLoad);
15788 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
15789 AddToWorklist(ExtLoad.getNode());
15790 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15791 }
15792 }
15793
15794 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
15795 if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
15796 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
15797 N0.getOperand(1), false))
15798 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, BSwap, N1);
15799 }
15800
15801 // Fold (iM_signext_inreg
15802 // (extract_subvector (zext|anyext|sext iN_v to _) _)
15803 // from iN)
15804 // -> (extract_subvector (signext iN_v to iM))
15805 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() &&
15807 SDValue InnerExt = N0.getOperand(0);
15808 EVT InnerExtVT = InnerExt->getValueType(0);
15809 SDValue Extendee = InnerExt->getOperand(0);
15810
15811 if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() &&
15812 (!LegalOperations ||
15813 TLI.isOperationLegal(ISD::SIGN_EXTEND, InnerExtVT))) {
15814 SDValue SignExtExtendee =
15815 DAG.getNode(ISD::SIGN_EXTEND, DL, InnerExtVT, Extendee);
15816 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SignExtExtendee,
15817 N0.getOperand(1));
15818 }
15819 }
15820
15821 return SDValue();
15822}
15823
15825 SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG,
15826 bool LegalOperations) {
15827 unsigned InregOpcode = N->getOpcode();
15828 unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode);
15829
15830 SDValue Src = N->getOperand(0);
15831 EVT VT = N->getValueType(0);
15832 EVT SrcVT = EVT::getVectorVT(*DAG.getContext(),
15833 Src.getValueType().getVectorElementType(),
15835
15836 assert(ISD::isExtVecInRegOpcode(InregOpcode) &&
15837 "Expected EXTEND_VECTOR_INREG dag node in input!");
15838
15839 // Profitability check: our operand must be an one-use CONCAT_VECTORS.
15840 // FIXME: one-use check may be overly restrictive
15841 if (!Src.hasOneUse() || Src.getOpcode() != ISD::CONCAT_VECTORS)
15842 return SDValue();
15843
15844 // Profitability check: we must be extending exactly one of it's operands.
15845 // FIXME: this is probably overly restrictive.
15846 Src = Src.getOperand(0);
15847 if (Src.getValueType() != SrcVT)
15848 return SDValue();
15849
15850 if (LegalOperations && !TLI.isOperationLegal(Opcode, VT))
15851 return SDValue();
15852
15853 return DAG.getNode(Opcode, DL, VT, Src);
15854}
15855
15856SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
15857 SDValue N0 = N->getOperand(0);
15858 EVT VT = N->getValueType(0);
15859 SDLoc DL(N);
15860
15861 if (N0.isUndef()) {
15862 // aext_vector_inreg(undef) = undef because the top bits are undefined.
15863 // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
15864 return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
15865 ? DAG.getUNDEF(VT)
15866 : DAG.getConstant(0, DL, VT);
15867 }
15868
15869 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
15870 return Res;
15871
15873 return SDValue(N, 0);
15874
15876 LegalOperations))
15877 return R;
15878
15879 return SDValue();
15880}
15881
15882SDValue DAGCombiner::visitTRUNCATE_USAT_U(SDNode *N) {
15883 EVT VT = N->getValueType(0);
15884 SDValue N0 = N->getOperand(0);
15885
15886 SDValue FPVal;
15887 if (sd_match(N0, m_FPToUI(m_Value(FPVal))) &&
15889 ISD::FP_TO_UINT_SAT, FPVal.getValueType(), VT))
15890 return DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), VT, FPVal,
15891 DAG.getValueType(VT.getScalarType()));
15892
15893 return SDValue();
15894}
15895
15896/// Detect patterns of truncation with unsigned saturation:
15897///
15898/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
15899/// Return the source value x to be truncated or SDValue() if the pattern was
15900/// not matched.
15901///
15903 unsigned NumDstBits = VT.getScalarSizeInBits();
15904 unsigned NumSrcBits = In.getScalarValueSizeInBits();
15905 // Saturation with truncation. We truncate from InVT to VT.
15906 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
15907
15908 SDValue Min;
15909 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
15910 if (sd_match(In, m_UMin(m_Value(Min), m_SpecificInt(UnsignedMax))))
15911 return Min;
15912
15913 return SDValue();
15914}
15915
15916/// Detect patterns of truncation with signed saturation:
15917/// (truncate (smin (smax (x, signed_min_of_dest_type),
15918/// signed_max_of_dest_type)) to dest_type)
15919/// or:
15920/// (truncate (smax (smin (x, signed_max_of_dest_type),
15921/// signed_min_of_dest_type)) to dest_type).
15922///
15923/// Return the source value to be truncated or SDValue() if the pattern was not
15924/// matched.
15926 unsigned NumDstBits = VT.getScalarSizeInBits();
15927 unsigned NumSrcBits = In.getScalarValueSizeInBits();
15928 // Saturation with truncation. We truncate from InVT to VT.
15929 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
15930
15931 SDValue Val;
15932 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
15933 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
15934
15935 if (sd_match(In, m_SMin(m_SMax(m_Value(Val), m_SpecificInt(SignedMin)),
15936 m_SpecificInt(SignedMax))))
15937 return Val;
15938
15939 if (sd_match(In, m_SMax(m_SMin(m_Value(Val), m_SpecificInt(SignedMax)),
15940 m_SpecificInt(SignedMin))))
15941 return Val;
15942
15943 return SDValue();
15944}
15945
15946/// Detect patterns of truncation with unsigned saturation:
15948 const SDLoc &DL) {
15949 unsigned NumDstBits = VT.getScalarSizeInBits();
15950 unsigned NumSrcBits = In.getScalarValueSizeInBits();
15951 // Saturation with truncation. We truncate from InVT to VT.
15952 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
15953
15954 SDValue Val;
15955 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
15956 // Min == 0, Max is unsigned max of destination type.
15957 if (sd_match(In, m_SMax(m_SMin(m_Value(Val), m_SpecificInt(UnsignedMax)),
15958 m_Zero())))
15959 return Val;
15960
15961 if (sd_match(In, m_SMin(m_SMax(m_Value(Val), m_Zero()),
15962 m_SpecificInt(UnsignedMax))))
15963 return Val;
15964
15965 if (sd_match(In, m_UMin(m_SMax(m_Value(Val), m_Zero()),
15966 m_SpecificInt(UnsignedMax))))
15967 return Val;
15968
15969 return SDValue();
15970}
15971
15972static SDValue foldToSaturated(SDNode *N, EVT &VT, SDValue &Src, EVT &SrcVT,
15973 SDLoc &DL, const TargetLowering &TLI,
15974 SelectionDAG &DAG) {
15975 auto AllowedTruncateSat = [&](unsigned Opc, EVT SrcVT, EVT VT) -> bool {
15976 return (TLI.isOperationLegalOrCustom(Opc, SrcVT) &&
15977 TLI.isTypeDesirableForOp(Opc, VT));
15978 };
15979
15980 if (Src.getOpcode() == ISD::SMIN || Src.getOpcode() == ISD::SMAX) {
15981 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_S, SrcVT, VT))
15982 if (SDValue SSatVal = detectSSatSPattern(Src, VT))
15983 return DAG.getNode(ISD::TRUNCATE_SSAT_S, DL, VT, SSatVal);
15984 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_U, SrcVT, VT))
15985 if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
15986 return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
15987 } else if (Src.getOpcode() == ISD::UMIN) {
15988 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_U, SrcVT, VT))
15989 if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
15990 return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
15991 if (AllowedTruncateSat(ISD::TRUNCATE_USAT_U, SrcVT, VT))
15992 if (SDValue USatVal = detectUSatUPattern(Src, VT))
15993 return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, VT, USatVal);
15994 }
15995
15996 return SDValue();
15997}
15998
15999SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
16000 SDValue N0 = N->getOperand(0);
16001 EVT VT = N->getValueType(0);
16002 EVT SrcVT = N0.getValueType();
16003 bool isLE = DAG.getDataLayout().isLittleEndian();
16004 SDLoc DL(N);
16005
16006 // trunc(undef) = undef
16007 if (N0.isUndef())
16008 return DAG.getUNDEF(VT);
16009
16010 // fold (truncate (truncate x)) -> (truncate x)
16011 if (N0.getOpcode() == ISD::TRUNCATE)
16012 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16013
16014 // fold saturated truncate
16015 if (SDValue SaturatedTR = foldToSaturated(N, VT, N0, SrcVT, DL, TLI, DAG))
16016 return SaturatedTR;
16017
16018 // fold (truncate c1) -> c1
16019 if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, DL, VT, {N0}))
16020 return C;
16021
16022 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
16023 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
16024 N0.getOpcode() == ISD::SIGN_EXTEND ||
16025 N0.getOpcode() == ISD::ANY_EXTEND) {
16026 // if the source is smaller than the dest, we still need an extend.
16027 if (N0.getOperand(0).getValueType().bitsLT(VT)) {
16028 SDNodeFlags Flags;
16029 if (N0.getOpcode() == ISD::ZERO_EXTEND)
16030 Flags.setNonNeg(N0->getFlags().hasNonNeg());
16031 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
16032 }
16033 // if the source is larger than the dest, than we just need the truncate.
16034 if (N0.getOperand(0).getValueType().bitsGT(VT))
16035 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16036 // if the source and dest are the same type, we can drop both the extend
16037 // and the truncate.
16038 return N0.getOperand(0);
16039 }
16040
16041 // Try to narrow a truncate-of-sext_in_reg to the destination type:
16042 // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
16043 if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
16044 N0.hasOneUse()) {
16045 SDValue X = N0.getOperand(0);
16046 SDValue ExtVal = N0.getOperand(1);
16047 EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
16048 if (ExtVT.bitsLT(VT) && TLI.preferSextInRegOfTruncate(VT, SrcVT, ExtVT)) {
16049 SDValue TrX = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
16050 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, TrX, ExtVal);
16051 }
16052 }
16053
16054 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
16055 if (N->hasOneUse() && (N->user_begin()->getOpcode() == ISD::ANY_EXTEND))
16056 return SDValue();
16057
16058 // Fold extract-and-trunc into a narrow extract. For example:
16059 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
16060 // i32 y = TRUNCATE(i64 x)
16061 // -- becomes --
16062 // v16i8 b = BITCAST (v2i64 val)
16063 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
16064 //
16065 // Note: We only run this optimization after type legalization (which often
16066 // creates this pattern) and before operation legalization after which
16067 // we need to be more careful about the vector instructions that we generate.
16068 if (LegalTypes && !LegalOperations && VT.isScalarInteger() && VT != MVT::i1 &&
16069 N0->hasOneUse()) {
16070 EVT TrTy = N->getValueType(0);
16071 SDValue Src = N0;
16072
16073 // Check for cases where we shift down an upper element before truncation.
16074 int EltOffset = 0;
16075 if (Src.getOpcode() == ISD::SRL && Src.getOperand(0)->hasOneUse()) {
16076 if (auto ShAmt = DAG.getValidShiftAmount(Src)) {
16077 if ((*ShAmt % TrTy.getSizeInBits()) == 0) {
16078 Src = Src.getOperand(0);
16079 EltOffset = *ShAmt / TrTy.getSizeInBits();
16080 }
16081 }
16082 }
16083
16084 if (Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16085 EVT VecTy = Src.getOperand(0).getValueType();
16086 EVT ExTy = Src.getValueType();
16087
16088 auto EltCnt = VecTy.getVectorElementCount();
16089 unsigned SizeRatio = ExTy.getSizeInBits() / TrTy.getSizeInBits();
16090 auto NewEltCnt = EltCnt * SizeRatio;
16091
16092 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
16093 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
16094
16095 SDValue EltNo = Src->getOperand(1);
16096 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
16097 int Elt = EltNo->getAsZExtVal();
16098 int Index = isLE ? (Elt * SizeRatio + EltOffset)
16099 : (Elt * SizeRatio + (SizeRatio - 1) - EltOffset);
16100 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
16101 DAG.getBitcast(NVT, Src.getOperand(0)),
16102 DAG.getVectorIdxConstant(Index, DL));
16103 }
16104 }
16105 }
16106
16107 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
16108 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse() &&
16109 TLI.isTruncateFree(SrcVT, VT)) {
16110 if (!LegalOperations ||
16111 (TLI.isOperationLegal(ISD::SELECT, SrcVT) &&
16112 TLI.isNarrowingProfitable(N0.getNode(), SrcVT, VT))) {
16113 SDLoc SL(N0);
16114 SDValue Cond = N0.getOperand(0);
16115 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
16116 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
16117 return DAG.getNode(ISD::SELECT, DL, VT, Cond, TruncOp0, TruncOp1);
16118 }
16119 }
16120
16121 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
16122 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
16123 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
16124 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
16125 SDValue Amt = N0.getOperand(1);
16126 KnownBits Known = DAG.computeKnownBits(Amt);
16127 unsigned Size = VT.getScalarSizeInBits();
16128 if (Known.countMaxActiveBits() <= Log2_32(Size)) {
16129 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
16130 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16131 if (AmtVT != Amt.getValueType()) {
16132 Amt = DAG.getZExtOrTrunc(Amt, DL, AmtVT);
16133 AddToWorklist(Amt.getNode());
16134 }
16135 return DAG.getNode(ISD::SHL, DL, VT, Trunc, Amt);
16136 }
16137 }
16138
16139 if (SDValue V = foldSubToUSubSat(VT, N0.getNode(), DL))
16140 return V;
16141
16142 if (SDValue ABD = foldABSToABD(N, DL))
16143 return ABD;
16144
16145 // Attempt to pre-truncate BUILD_VECTOR sources.
16146 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
16147 N0.hasOneUse() &&
16148 TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
16149 // Avoid creating illegal types if running after type legalizer.
16150 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
16151 EVT SVT = VT.getScalarType();
16152 SmallVector<SDValue, 8> TruncOps;
16153 for (const SDValue &Op : N0->op_values()) {
16154 SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
16155 TruncOps.push_back(TruncOp);
16156 }
16157 return DAG.getBuildVector(VT, DL, TruncOps);
16158 }
16159
16160 // trunc (splat_vector x) -> splat_vector (trunc x)
16161 if (N0.getOpcode() == ISD::SPLAT_VECTOR &&
16162 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType())) &&
16163 (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) {
16164 EVT SVT = VT.getScalarType();
16165 return DAG.getSplatVector(
16166 VT, DL, DAG.getNode(ISD::TRUNCATE, DL, SVT, N0->getOperand(0)));
16167 }
16168
16169 // Fold a series of buildvector, bitcast, and truncate if possible.
16170 // For example fold
16171 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
16172 // (2xi32 (buildvector x, y)).
16173 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
16174 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
16176 N0.getOperand(0).hasOneUse()) {
16177 SDValue BuildVect = N0.getOperand(0);
16178 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
16179 EVT TruncVecEltTy = VT.getVectorElementType();
16180
16181 // Check that the element types match.
16182 if (BuildVectEltTy == TruncVecEltTy) {
16183 // Now we only need to compute the offset of the truncated elements.
16184 unsigned BuildVecNumElts = BuildVect.getNumOperands();
16185 unsigned TruncVecNumElts = VT.getVectorNumElements();
16186 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
16187 unsigned FirstElt = isLE ? 0 : (TruncEltOffset - 1);
16188
16189 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
16190 "Invalid number of elements");
16191
16193 for (unsigned i = FirstElt, e = BuildVecNumElts; i < e;
16194 i += TruncEltOffset)
16195 Opnds.push_back(BuildVect.getOperand(i));
16196
16197 return DAG.getBuildVector(VT, DL, Opnds);
16198 }
16199 }
16200
16201 // fold (truncate (load x)) -> (smaller load x)
16202 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
16203 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
16204 if (SDValue Reduced = reduceLoadWidth(N))
16205 return Reduced;
16206
16207 // Handle the case where the truncated result is at least as wide as the
16208 // loaded type.
16209 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
16210 auto *LN0 = cast<LoadSDNode>(N0);
16211 if (LN0->isSimple() && LN0->getMemoryVT().bitsLE(VT)) {
16212 SDValue NewLoad = DAG.getExtLoad(
16213 LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(),
16214 LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand());
16215 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
16216 return NewLoad;
16217 }
16218 }
16219 }
16220
16221 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
16222 // where ... are all 'undef'.
16223 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
16225 SDValue V;
16226 unsigned Idx = 0;
16227 unsigned NumDefs = 0;
16228
16229 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
16230 SDValue X = N0.getOperand(i);
16231 if (!X.isUndef()) {
16232 V = X;
16233 Idx = i;
16234 NumDefs++;
16235 }
16236 // Stop if more than one members are non-undef.
16237 if (NumDefs > 1)
16238 break;
16239
16242 X.getValueType().getVectorElementCount()));
16243 }
16244
16245 if (NumDefs == 0)
16246 return DAG.getUNDEF(VT);
16247
16248 if (NumDefs == 1) {
16249 assert(V.getNode() && "The single defined operand is empty!");
16251 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
16252 if (i != Idx) {
16253 Opnds.push_back(DAG.getUNDEF(VTs[i]));
16254 continue;
16255 }
16256 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
16257 AddToWorklist(NV.getNode());
16258 Opnds.push_back(NV);
16259 }
16260 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
16261 }
16262 }
16263
16264 // Fold truncate of a bitcast of a vector to an extract of the low vector
16265 // element.
16266 //
16267 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
16268 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
16269 SDValue VecSrc = N0.getOperand(0);
16270 EVT VecSrcVT = VecSrc.getValueType();
16271 if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
16272 (!LegalOperations ||
16273 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
16274 unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
16275 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VecSrc,
16276 DAG.getVectorIdxConstant(Idx, DL));
16277 }
16278 }
16279
16280 // Simplify the operands using demanded-bits information.
16282 return SDValue(N, 0);
16283
16284 // fold (truncate (extract_subvector(ext x))) ->
16285 // (extract_subvector x)
16286 // TODO: This can be generalized to cover cases where the truncate and extract
16287 // do not fully cancel each other out.
16288 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
16289 SDValue N00 = N0.getOperand(0);
16290 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
16291 N00.getOpcode() == ISD::ZERO_EXTEND ||
16292 N00.getOpcode() == ISD::ANY_EXTEND) {
16293 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
16295 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
16296 N00.getOperand(0), N0.getOperand(1));
16297 }
16298 }
16299
16300 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
16301 return NewVSel;
16302
16303 // Narrow a suitable binary operation with a non-opaque constant operand by
16304 // moving it ahead of the truncate. This is limited to pre-legalization
16305 // because targets may prefer a wider type during later combines and invert
16306 // this transform.
16307 switch (N0.getOpcode()) {
16308 case ISD::ADD:
16309 case ISD::SUB:
16310 case ISD::MUL:
16311 case ISD::AND:
16312 case ISD::OR:
16313 case ISD::XOR:
16314 if (!LegalOperations && N0.hasOneUse() &&
16315 (isConstantOrConstantVector(N0.getOperand(0), true) ||
16316 isConstantOrConstantVector(N0.getOperand(1), true))) {
16317 // TODO: We already restricted this to pre-legalization, but for vectors
16318 // we are extra cautious to not create an unsupported operation.
16319 // Target-specific changes are likely needed to avoid regressions here.
16320 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
16321 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16322 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
16323 SDNodeFlags Flags;
16324 // Propagate nuw for sub.
16325 if (N0->getOpcode() == ISD::SUB && N0->getFlags().hasNoUnsignedWrap() &&
16327 N0->getOperand(0),
16329 VT.getScalarSizeInBits())))
16330 Flags.setNoUnsignedWrap(true);
16331 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR, Flags);
16332 }
16333 }
16334 break;
16335 case ISD::ADDE:
16336 case ISD::UADDO_CARRY:
16337 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
16338 // (trunc uaddo_carry(X, Y, Carry)) ->
16339 // (uaddo_carry trunc(X), trunc(Y), Carry)
16340 // When the adde's carry is not used.
16341 // We only do for uaddo_carry before legalize operation
16342 if (((!LegalOperations && N0.getOpcode() == ISD::UADDO_CARRY) ||
16343 TLI.isOperationLegal(N0.getOpcode(), VT)) &&
16344 N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
16345 SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16346 SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
16347 SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
16348 return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2));
16349 }
16350 break;
16351 case ISD::USUBSAT:
16352 // Truncate the USUBSAT only if LHS is a known zero-extension, its not
16353 // enough to know that the upper bits are zero we must ensure that we don't
16354 // introduce an extra truncate.
16355 if (!LegalOperations && N0.hasOneUse() &&
16358 VT.getScalarSizeInBits() &&
16359 hasOperation(N0.getOpcode(), VT)) {
16360 return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
16361 DAG, DL);
16362 }
16363 break;
16364 case ISD::AVGFLOORS:
16365 case ISD::AVGFLOORU:
16366 case ISD::AVGCEILS:
16367 case ISD::AVGCEILU:
16368 case ISD::ABDS:
16369 case ISD::ABDU:
16370 // (trunc (avg a, b)) -> (avg (trunc a), (trunc b))
16371 // (trunc (abdu/abds a, b)) -> (abdu/abds (trunc a), (trunc b))
16372 if (!LegalOperations && N0.hasOneUse() &&
16373 TLI.isOperationLegal(N0.getOpcode(), VT)) {
16374 EVT TruncVT = VT;
16375 unsigned SrcBits = SrcVT.getScalarSizeInBits();
16376 unsigned TruncBits = TruncVT.getScalarSizeInBits();
16377
16378 SDValue A = N0.getOperand(0);
16379 SDValue B = N0.getOperand(1);
16380 bool CanFold = false;
16381
16382 if (N0.getOpcode() == ISD::AVGFLOORU || N0.getOpcode() == ISD::AVGCEILU ||
16383 N0.getOpcode() == ISD::ABDU) {
16384 APInt UpperBits = APInt::getBitsSetFrom(SrcBits, TruncBits);
16385 CanFold = DAG.MaskedValueIsZero(B, UpperBits) &&
16386 DAG.MaskedValueIsZero(A, UpperBits);
16387 } else {
16388 unsigned NeededBits = SrcBits - TruncBits;
16389 CanFold = DAG.ComputeNumSignBits(B) > NeededBits &&
16390 DAG.ComputeNumSignBits(A) > NeededBits;
16391 }
16392
16393 if (CanFold) {
16394 SDValue NewA = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, A);
16395 SDValue NewB = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, B);
16396 return DAG.getNode(N0.getOpcode(), DL, TruncVT, NewA, NewB);
16397 }
16398 }
16399 break;
16400 }
16401
16402 return SDValue();
16403}
16404
16405static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
16406 SDValue Elt = N->getOperand(i);
16407 if (Elt.getOpcode() != ISD::MERGE_VALUES)
16408 return Elt.getNode();
16409 return Elt.getOperand(Elt.getResNo()).getNode();
16410}
16411
16412/// build_pair (load, load) -> load
16413/// if load locations are consecutive.
16414SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
16415 assert(N->getOpcode() == ISD::BUILD_PAIR);
16416
16417 auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
16418 auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
16419
16420 // A BUILD_PAIR is always having the least significant part in elt 0 and the
16421 // most significant part in elt 1. So when combining into one large load, we
16422 // need to consider the endianness.
16423 if (DAG.getDataLayout().isBigEndian())
16424 std::swap(LD1, LD2);
16425
16426 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
16427 !LD1->hasOneUse() || !LD2->hasOneUse() ||
16428 LD1->getAddressSpace() != LD2->getAddressSpace())
16429 return SDValue();
16430
16431 unsigned LD1Fast = 0;
16432 EVT LD1VT = LD1->getValueType(0);
16433 unsigned LD1Bytes = LD1VT.getStoreSize();
16434 if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
16435 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
16436 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
16437 *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
16438 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
16439 LD1->getPointerInfo(), LD1->getAlign());
16440
16441 return SDValue();
16442}
16443
16444static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
16445 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
16446 // and Lo parts; on big-endian machines it doesn't.
16447 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
16448}
16449
16450SDValue DAGCombiner::foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
16451 const TargetLowering &TLI) {
16452 // If this is not a bitcast to an FP type or if the target doesn't have
16453 // IEEE754-compliant FP logic, we're done.
16454 EVT VT = N->getValueType(0);
16455 SDValue N0 = N->getOperand(0);
16456 EVT SourceVT = N0.getValueType();
16457
16458 if (!VT.isFloatingPoint())
16459 return SDValue();
16460
16461 // TODO: Handle cases where the integer constant is a different scalar
16462 // bitwidth to the FP.
16463 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
16464 return SDValue();
16465
16466 unsigned FPOpcode;
16467 APInt SignMask;
16468 switch (N0.getOpcode()) {
16469 case ISD::AND:
16470 FPOpcode = ISD::FABS;
16471 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
16472 break;
16473 case ISD::XOR:
16474 FPOpcode = ISD::FNEG;
16475 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
16476 break;
16477 case ISD::OR:
16478 FPOpcode = ISD::FABS;
16479 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
16480 break;
16481 default:
16482 return SDValue();
16483 }
16484
16485 if (LegalOperations && !TLI.isOperationLegal(FPOpcode, VT))
16486 return SDValue();
16487
16488 // This needs to be the inverse of logic in foldSignChangeInBitcast.
16489 // FIXME: I don't think looking for bitcast intrinsically makes sense, but
16490 // removing this would require more changes.
16491 auto IsBitCastOrFree = [&TLI, FPOpcode](SDValue Op, EVT VT) {
16492 if (sd_match(Op, m_BitCast(m_SpecificVT(VT))))
16493 return true;
16494
16495 return FPOpcode == ISD::FABS ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
16496 };
16497
16498 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
16499 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
16500 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
16501 // fneg (fabs X)
16502 SDValue LogicOp0 = N0.getOperand(0);
16503 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
16504 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
16505 IsBitCastOrFree(LogicOp0, VT)) {
16506 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, LogicOp0);
16507 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, CastOp0);
16508 NumFPLogicOpsConv++;
16509 if (N0.getOpcode() == ISD::OR)
16510 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
16511 return FPOp;
16512 }
16513
16514 return SDValue();
16515}
16516
16517SDValue DAGCombiner::visitBITCAST(SDNode *N) {
16518 SDValue N0 = N->getOperand(0);
16519 EVT VT = N->getValueType(0);
16520
16521 if (N0.isUndef())
16522 return DAG.getUNDEF(VT);
16523
16524 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
16525 // Only do this before legalize types, unless both types are integer and the
16526 // scalar type is legal. Only do this before legalize ops, since the target
16527 // maybe depending on the bitcast.
16528 // First check to see if this is all constant.
16529 // TODO: Support FP bitcasts after legalize types.
16530 if (VT.isVector() &&
16531 (!LegalTypes ||
16532 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
16533 TLI.isTypeLegal(VT.getVectorElementType()))) &&
16534 N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
16535 cast<BuildVectorSDNode>(N0)->isConstant())
16536 return DAG.FoldConstantBuildVector(cast<BuildVectorSDNode>(N0), SDLoc(N),
16538
16539 // If the input is a constant, let getNode fold it.
16540 if (isIntOrFPConstant(N0)) {
16541 // If we can't allow illegal operations, we need to check that this is just
16542 // a fp -> int or int -> conversion and that the resulting operation will
16543 // be legal.
16544 if (!LegalOperations ||
16545 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
16547 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
16548 TLI.isOperationLegal(ISD::Constant, VT))) {
16549 SDValue C = DAG.getBitcast(VT, N0);
16550 if (C.getNode() != N)
16551 return C;
16552 }
16553 }
16554
16555 // (conv (conv x, t1), t2) -> (conv x, t2)
16556 if (N0.getOpcode() == ISD::BITCAST)
16557 return DAG.getBitcast(VT, N0.getOperand(0));
16558
16559 // fold (conv (logicop (conv x), (c))) -> (logicop x, (conv c))
16560 // iff the current bitwise logicop type isn't legal
16561 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && VT.isInteger() &&
16562 !TLI.isTypeLegal(N0.getOperand(0).getValueType())) {
16563 auto IsFreeBitcast = [VT](SDValue V) {
16564 return (V.getOpcode() == ISD::BITCAST &&
16565 V.getOperand(0).getValueType() == VT) ||
16567 V->hasOneUse());
16568 };
16569 if (IsFreeBitcast(N0.getOperand(0)) && IsFreeBitcast(N0.getOperand(1)))
16570 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
16571 DAG.getBitcast(VT, N0.getOperand(0)),
16572 DAG.getBitcast(VT, N0.getOperand(1)));
16573 }
16574
16575 // fold (conv (load x)) -> (load (conv*)x)
16576 // If the resultant load doesn't need a higher alignment than the original!
16577 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
16578 // Do not remove the cast if the types differ in endian layout.
16580 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
16581 // If the load is volatile, we only want to change the load type if the
16582 // resulting load is legal. Otherwise we might increase the number of
16583 // memory accesses. We don't care if the original type was legal or not
16584 // as we assume software couldn't rely on the number of accesses of an
16585 // illegal type.
16586 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
16587 TLI.isOperationLegal(ISD::LOAD, VT))) {
16588 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
16589
16590 if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
16591 *LN0->getMemOperand())) {
16592 // If the range metadata type does not match the new memory
16593 // operation type, remove the range metadata.
16594 if (const MDNode *MD = LN0->getRanges()) {
16595 ConstantInt *Lower = mdconst::extract<ConstantInt>(MD->getOperand(0));
16596 if (Lower->getBitWidth() != VT.getScalarSizeInBits() ||
16597 !VT.isInteger()) {
16598 LN0->getMemOperand()->clearRanges();
16599 }
16600 }
16601 SDValue Load =
16602 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
16603 LN0->getMemOperand());
16604 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
16605 return Load;
16606 }
16607 }
16608
16609 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
16610 return V;
16611
16612 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
16613 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
16614 //
16615 // For ppc_fp128:
16616 // fold (bitcast (fneg x)) ->
16617 // flipbit = signbit
16618 // (xor (bitcast x) (build_pair flipbit, flipbit))
16619 //
16620 // fold (bitcast (fabs x)) ->
16621 // flipbit = (and (extract_element (bitcast x), 0), signbit)
16622 // (xor (bitcast x) (build_pair flipbit, flipbit))
16623 // This often reduces constant pool loads.
16624 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
16625 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
16626 N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
16627 !N0.getValueType().isVector()) {
16628 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
16629 AddToWorklist(NewConv.getNode());
16630
16631 SDLoc DL(N);
16632 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
16633 assert(VT.getSizeInBits() == 128);
16634 SDValue SignBit = DAG.getConstant(
16635 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
16636 SDValue FlipBit;
16637 if (N0.getOpcode() == ISD::FNEG) {
16638 FlipBit = SignBit;
16639 AddToWorklist(FlipBit.getNode());
16640 } else {
16641 assert(N0.getOpcode() == ISD::FABS);
16642 SDValue Hi =
16643 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
16645 SDLoc(NewConv)));
16646 AddToWorklist(Hi.getNode());
16647 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
16648 AddToWorklist(FlipBit.getNode());
16649 }
16650 SDValue FlipBits =
16651 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
16652 AddToWorklist(FlipBits.getNode());
16653 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
16654 }
16655 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
16656 if (N0.getOpcode() == ISD::FNEG)
16657 return DAG.getNode(ISD::XOR, DL, VT,
16658 NewConv, DAG.getConstant(SignBit, DL, VT));
16659 assert(N0.getOpcode() == ISD::FABS);
16660 return DAG.getNode(ISD::AND, DL, VT,
16661 NewConv, DAG.getConstant(~SignBit, DL, VT));
16662 }
16663
16664 // fold (bitconvert (fcopysign cst, x)) ->
16665 // (or (and (bitconvert x), sign), (and cst, (not sign)))
16666 // Note that we don't handle (copysign x, cst) because this can always be
16667 // folded to an fneg or fabs.
16668 //
16669 // For ppc_fp128:
16670 // fold (bitcast (fcopysign cst, x)) ->
16671 // flipbit = (and (extract_element
16672 // (xor (bitcast cst), (bitcast x)), 0),
16673 // signbit)
16674 // (xor (bitcast cst) (build_pair flipbit, flipbit))
16675 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
16677 !VT.isVector()) {
16678 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
16679 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
16680 if (isTypeLegal(IntXVT)) {
16681 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
16682 AddToWorklist(X.getNode());
16683
16684 // If X has a different width than the result/lhs, sext it or truncate it.
16685 unsigned VTWidth = VT.getSizeInBits();
16686 if (OrigXWidth < VTWidth) {
16687 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
16688 AddToWorklist(X.getNode());
16689 } else if (OrigXWidth > VTWidth) {
16690 // To get the sign bit in the right place, we have to shift it right
16691 // before truncating.
16692 SDLoc DL(X);
16693 X = DAG.getNode(ISD::SRL, DL,
16694 X.getValueType(), X,
16695 DAG.getConstant(OrigXWidth-VTWidth, DL,
16696 X.getValueType()));
16697 AddToWorklist(X.getNode());
16698 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
16699 AddToWorklist(X.getNode());
16700 }
16701
16702 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
16703 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
16704 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
16705 AddToWorklist(Cst.getNode());
16706 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
16707 AddToWorklist(X.getNode());
16708 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
16709 AddToWorklist(XorResult.getNode());
16710 SDValue XorResult64 = DAG.getNode(
16711 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
16713 SDLoc(XorResult)));
16714 AddToWorklist(XorResult64.getNode());
16715 SDValue FlipBit =
16716 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
16717 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
16718 AddToWorklist(FlipBit.getNode());
16719 SDValue FlipBits =
16720 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
16721 AddToWorklist(FlipBits.getNode());
16722 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
16723 }
16724 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
16725 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
16726 X, DAG.getConstant(SignBit, SDLoc(X), VT));
16727 AddToWorklist(X.getNode());
16728
16729 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
16730 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
16731 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
16732 AddToWorklist(Cst.getNode());
16733
16734 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
16735 }
16736 }
16737
16738 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
16739 if (N0.getOpcode() == ISD::BUILD_PAIR)
16740 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
16741 return CombineLD;
16742
16743 // int_vt (bitcast (vec_vt (scalar_to_vector elt_vt:x)))
16744 // => int_vt (any_extend elt_vt:x)
16745 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && VT.isScalarInteger()) {
16746 SDValue SrcScalar = N0.getOperand(0);
16747 if (SrcScalar.getValueType().isScalarInteger())
16748 return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SrcScalar);
16749 }
16750
16751 // Remove double bitcasts from shuffles - this is often a legacy of
16752 // XformToShuffleWithZero being used to combine bitmaskings (of
16753 // float vectors bitcast to integer vectors) into shuffles.
16754 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
16755 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
16756 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
16759 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
16760
16761 // If operands are a bitcast, peek through if it casts the original VT.
16762 // If operands are a constant, just bitcast back to original VT.
16763 auto PeekThroughBitcast = [&](SDValue Op) {
16764 if (Op.getOpcode() == ISD::BITCAST &&
16765 Op.getOperand(0).getValueType() == VT)
16766 return SDValue(Op.getOperand(0));
16767 if (Op.isUndef() || isAnyConstantBuildVector(Op))
16768 return DAG.getBitcast(VT, Op);
16769 return SDValue();
16770 };
16771
16772 // FIXME: If either input vector is bitcast, try to convert the shuffle to
16773 // the result type of this bitcast. This would eliminate at least one
16774 // bitcast. See the transform in InstCombine.
16775 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
16776 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
16777 if (!(SV0 && SV1))
16778 return SDValue();
16779
16780 int MaskScale =
16782 SmallVector<int, 8> NewMask;
16783 for (int M : SVN->getMask())
16784 for (int i = 0; i != MaskScale; ++i)
16785 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
16786
16787 SDValue LegalShuffle =
16788 TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
16789 if (LegalShuffle)
16790 return LegalShuffle;
16791 }
16792
16793 return SDValue();
16794}
16795
16796SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
16797 EVT VT = N->getValueType(0);
16798 return CombineConsecutiveLoads(N, VT);
16799}
16800
16801SDValue DAGCombiner::visitFREEZE(SDNode *N) {
16802 SDValue N0 = N->getOperand(0);
16803
16804 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
16805 return N0;
16806
16807 // If we have frozen and unfrozen users of N0, update so everything uses N.
16808 if (!N0.isUndef() && !N0.hasOneUse()) {
16809 SDValue FrozenN0(N, 0);
16810 // Unfreeze all uses of N to avoid double deleting N from the CSE map.
16811 DAG.ReplaceAllUsesOfValueWith(FrozenN0, N0);
16812 DAG.ReplaceAllUsesOfValueWith(N0, FrozenN0);
16813 // ReplaceAllUsesOfValueWith will have also updated the use in N, thus
16814 // creating a cycle in a DAG. Let's undo that by mutating the freeze.
16815 assert(N->getOperand(0) == FrozenN0 && "Expected cycle in DAG");
16816 DAG.UpdateNodeOperands(N, N0);
16817 return FrozenN0;
16818 }
16819
16820 // We currently avoid folding freeze over SRA/SRL, due to the problems seen
16821 // with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for
16822 // example https://reviews.llvm.org/D136529#4120959.
16823 if (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)
16824 return SDValue();
16825
16826 // Fold freeze(op(x, ...)) -> op(freeze(x), ...).
16827 // Try to push freeze through instructions that propagate but don't produce
16828 // poison as far as possible. If an operand of freeze follows three
16829 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
16830 // guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
16831 // the freeze through to the operands that are not guaranteed non-poison.
16832 // NOTE: we will strip poison-generating flags, so ignore them here.
16833 if (DAG.canCreateUndefOrPoison(N0, /*PoisonOnly*/ false,
16834 /*ConsiderFlags*/ false) ||
16835 N0->getNumValues() != 1 || !N0->hasOneUse())
16836 return SDValue();
16837
16838 // TOOD: we should always allow multiple operands, however this increases the
16839 // likelihood of infinite loops due to the ReplaceAllUsesOfValueWith call
16840 // below causing later nodes that share frozen operands to fold again and no
16841 // longer being able to confirm other operands are not poison due to recursion
16842 // depth limits on isGuaranteedNotToBeUndefOrPoison.
16843 bool AllowMultipleMaybePoisonOperands =
16844 N0.getOpcode() == ISD::SELECT_CC || N0.getOpcode() == ISD::SETCC ||
16845 N0.getOpcode() == ISD::BUILD_VECTOR ||
16846 N0.getOpcode() == ISD::BUILD_PAIR ||
16849
16850 // Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
16851 // ones" or "constant" into something that depends on FrozenUndef. We can
16852 // instead pick undef values to keep those properties, while at the same time
16853 // folding away the freeze.
16854 // If we implement a more general solution for folding away freeze(undef) in
16855 // the future, then this special handling can be removed.
16856 if (N0.getOpcode() == ISD::BUILD_VECTOR) {
16857 SDLoc DL(N0);
16858 EVT VT = N0.getValueType();
16860 return DAG.getAllOnesConstant(DL, VT);
16863 for (const SDValue &Op : N0->op_values())
16864 NewVecC.push_back(
16865 Op.isUndef() ? DAG.getConstant(0, DL, Op.getValueType()) : Op);
16866 return DAG.getBuildVector(VT, DL, NewVecC);
16867 }
16868 }
16869
16870 SmallSet<SDValue, 8> MaybePoisonOperands;
16871 SmallVector<unsigned, 8> MaybePoisonOperandNumbers;
16872 for (auto [OpNo, Op] : enumerate(N0->ops())) {
16873 if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly=*/false))
16874 continue;
16875 bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
16876 bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op).second;
16877 if (IsNewMaybePoisonOperand)
16878 MaybePoisonOperandNumbers.push_back(OpNo);
16879 if (!HadMaybePoisonOperands)
16880 continue;
16881 if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
16882 // Multiple maybe-poison ops when not allowed - bail out.
16883 return SDValue();
16884 }
16885 }
16886 // NOTE: the whole op may be not guaranteed to not be undef or poison because
16887 // it could create undef or poison due to it's poison-generating flags.
16888 // So not finding any maybe-poison operands is fine.
16889
16890 for (unsigned OpNo : MaybePoisonOperandNumbers) {
16891 // N0 can mutate during iteration, so make sure to refetch the maybe poison
16892 // operands via the operand numbers. The typical scenario is that we have
16893 // something like this
16894 // t262: i32 = freeze t181
16895 // t150: i32 = ctlz_zero_undef t262
16896 // t184: i32 = ctlz_zero_undef t181
16897 // t268: i32 = select_cc t181, Constant:i32<0>, t184, t186, setne:ch
16898 // When freezing the t181 operand we get t262 back, and then the
16899 // ReplaceAllUsesOfValueWith call will not only replace t181 by t262, but
16900 // also recursively replace t184 by t150.
16901 SDValue MaybePoisonOperand = N->getOperand(0).getOperand(OpNo);
16902 // Don't replace every single UNDEF everywhere with frozen UNDEF, though.
16903 if (MaybePoisonOperand.isUndef())
16904 continue;
16905 // First, freeze each offending operand.
16906 SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand);
16907 // Then, change all other uses of unfrozen operand to use frozen operand.
16908 DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
16909 if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE &&
16910 FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) {
16911 // But, that also updated the use in the freeze we just created, thus
16912 // creating a cycle in a DAG. Let's undo that by mutating the freeze.
16913 DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(),
16914 MaybePoisonOperand);
16915 }
16916
16917 // This node has been merged with another.
16918 if (N->getOpcode() == ISD::DELETED_NODE)
16919 return SDValue(N, 0);
16920 }
16921
16922 assert(N->getOpcode() != ISD::DELETED_NODE && "Node was deleted!");
16923
16924 // The whole node may have been updated, so the value we were holding
16925 // may no longer be valid. Re-fetch the operand we're `freeze`ing.
16926 N0 = N->getOperand(0);
16927
16928 // Finally, recreate the node, it's operands were updated to use
16929 // frozen operands, so we just need to use it's "original" operands.
16931 // TODO: ISD::UNDEF and ISD::POISON should get separate handling, but best
16932 // leave for a future patch.
16933 for (SDValue &Op : Ops) {
16934 if (Op.isUndef())
16935 Op = DAG.getFreeze(Op);
16936 }
16937
16938 SDLoc DL(N0);
16939
16940 // Special case handling for ShuffleVectorSDNode nodes.
16941 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N0))
16942 return DAG.getVectorShuffle(N0.getValueType(), DL, Ops[0], Ops[1],
16943 SVN->getMask());
16944
16945 // NOTE: this strips poison generating flags.
16946 // Folding freeze(op(x, ...)) -> op(freeze(x), ...) does not require nnan,
16947 // ninf, nsz, or fast.
16948 // However, contract, reassoc, afn, and arcp should be preserved,
16949 // as these fast-math flags do not introduce poison values.
16950 SDNodeFlags SrcFlags = N0->getFlags();
16951 SDNodeFlags SafeFlags;
16952 SafeFlags.setAllowContract(SrcFlags.hasAllowContract());
16953 SafeFlags.setAllowReassociation(SrcFlags.hasAllowReassociation());
16954 SafeFlags.setApproximateFuncs(SrcFlags.hasApproximateFuncs());
16955 SafeFlags.setAllowReciprocal(SrcFlags.hasAllowReciprocal());
16956 return DAG.getNode(N0.getOpcode(), DL, N0->getVTList(), Ops, SafeFlags);
16957}
16958
16959// Returns true if floating point contraction is allowed on the FMUL-SDValue
16960// `N`
16962 assert(N.getOpcode() == ISD::FMUL);
16963
16964 return Options.AllowFPOpFusion == FPOpFusion::Fast ||
16965 N->getFlags().hasAllowContract();
16966}
16967
16968// Returns true if `N` can assume no infinities involved in its computation.
16970 return Options.NoInfsFPMath || N->getFlags().hasNoInfs();
16971}
16972
16973/// Try to perform FMA combining on a given FADD node.
16974template <class MatchContextClass>
16975SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
16976 SDValue N0 = N->getOperand(0);
16977 SDValue N1 = N->getOperand(1);
16978 EVT VT = N->getValueType(0);
16979 SDLoc SL(N);
16980 MatchContextClass matcher(DAG, TLI, N);
16981 const TargetOptions &Options = DAG.getTarget().Options;
16982
16983 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
16984
16985 // Floating-point multiply-add with intermediate rounding.
16986 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
16987 // FIXME: Add VP_FMAD opcode.
16988 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
16989
16990 // Floating-point multiply-add without intermediate rounding.
16991 bool HasFMA =
16992 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT)) &&
16994
16995 // No valid opcode, do not combine.
16996 if (!HasFMAD && !HasFMA)
16997 return SDValue();
16998
16999 bool AllowFusionGlobally =
17000 Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD;
17001 // If the addition is not contractable, do not combine.
17002 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
17003 return SDValue();
17004
17005 // Folding fadd (fmul x, y), (fmul x, y) -> fma x, y, (fmul x, y) is never
17006 // beneficial. It does not reduce latency. It increases register pressure. It
17007 // replaces an fadd with an fma which is a more complex instruction, so is
17008 // likely to have a larger encoding, use more functional units, etc.
17009 if (N0 == N1)
17010 return SDValue();
17011
17012 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
17013 return SDValue();
17014
17015 // Always prefer FMAD to FMA for precision.
17016 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
17018
17019 auto isFusedOp = [&](SDValue N) {
17020 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
17021 };
17022
17023 // Is the node an FMUL and contractable either due to global flags or
17024 // SDNodeFlags.
17025 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
17026 if (!matcher.match(N, ISD::FMUL))
17027 return false;
17028 return AllowFusionGlobally || N->getFlags().hasAllowContract();
17029 };
17030 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
17031 // prefer to fold the multiply with fewer uses.
17033 if (N0->use_size() > N1->use_size())
17034 std::swap(N0, N1);
17035 }
17036
17037 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
17038 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
17039 return matcher.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
17040 N0.getOperand(1), N1);
17041 }
17042
17043 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
17044 // Note: Commutes FADD operands.
17045 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
17046 return matcher.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
17047 N1.getOperand(1), N0);
17048 }
17049
17050 // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
17051 // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
17052 // This also works with nested fma instructions:
17053 // fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G -->
17054 // fma A, B, (fma C, D, fma (E, F, G))
17055 // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
17056 // fma A, B, (fma C, D, fma (E, F, G)).
17057 // This requires reassociation because it changes the order of operations.
17058 bool CanReassociate = N->getFlags().hasAllowReassociation();
17059 if (CanReassociate) {
17060 SDValue FMA, E;
17061 if (isFusedOp(N0) && N0.hasOneUse()) {
17062 FMA = N0;
17063 E = N1;
17064 } else if (isFusedOp(N1) && N1.hasOneUse()) {
17065 FMA = N1;
17066 E = N0;
17067 }
17068
17069 SDValue TmpFMA = FMA;
17070 while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) {
17071 SDValue FMul = TmpFMA->getOperand(2);
17072 if (matcher.match(FMul, ISD::FMUL) && FMul.hasOneUse()) {
17073 SDValue C = FMul.getOperand(0);
17074 SDValue D = FMul.getOperand(1);
17075 SDValue CDE = matcher.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
17077 // Replacing the inner FMul could cause the outer FMA to be simplified
17078 // away.
17079 return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue(N, 0) : FMA;
17080 }
17081
17082 TmpFMA = TmpFMA->getOperand(2);
17083 }
17084 }
17085
17086 // Look through FP_EXTEND nodes to do more combining.
17087
17088 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
17089 if (matcher.match(N0, ISD::FP_EXTEND)) {
17090 SDValue N00 = N0.getOperand(0);
17091 if (isContractableFMUL(N00) &&
17092 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17093 N00.getValueType())) {
17094 return matcher.getNode(
17095 PreferredFusedOpcode, SL, VT,
17096 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
17097 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), N1);
17098 }
17099 }
17100
17101 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
17102 // Note: Commutes FADD operands.
17103 if (matcher.match(N1, ISD::FP_EXTEND)) {
17104 SDValue N10 = N1.getOperand(0);
17105 if (isContractableFMUL(N10) &&
17106 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17107 N10.getValueType())) {
17108 return matcher.getNode(
17109 PreferredFusedOpcode, SL, VT,
17110 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
17111 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
17112 }
17113 }
17114
17115 // More folding opportunities when target permits.
17116 if (Aggressive) {
17117 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
17118 // -> (fma x, y, (fma (fpext u), (fpext v), z))
17119 auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
17120 SDValue Z) {
17121 return matcher.getNode(
17122 PreferredFusedOpcode, SL, VT, X, Y,
17123 matcher.getNode(PreferredFusedOpcode, SL, VT,
17124 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
17125 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
17126 };
17127 if (isFusedOp(N0)) {
17128 SDValue N02 = N0.getOperand(2);
17129 if (matcher.match(N02, ISD::FP_EXTEND)) {
17130 SDValue N020 = N02.getOperand(0);
17131 if (isContractableFMUL(N020) &&
17132 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17133 N020.getValueType())) {
17134 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
17135 N020.getOperand(0), N020.getOperand(1),
17136 N1);
17137 }
17138 }
17139 }
17140
17141 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
17142 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
17143 // FIXME: This turns two single-precision and one double-precision
17144 // operation into two double-precision operations, which might not be
17145 // interesting for all targets, especially GPUs.
17146 auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
17147 SDValue Z) {
17148 return matcher.getNode(
17149 PreferredFusedOpcode, SL, VT,
17150 matcher.getNode(ISD::FP_EXTEND, SL, VT, X),
17151 matcher.getNode(ISD::FP_EXTEND, SL, VT, Y),
17152 matcher.getNode(PreferredFusedOpcode, SL, VT,
17153 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
17154 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
17155 };
17156 if (N0.getOpcode() == ISD::FP_EXTEND) {
17157 SDValue N00 = N0.getOperand(0);
17158 if (isFusedOp(N00)) {
17159 SDValue N002 = N00.getOperand(2);
17160 if (isContractableFMUL(N002) &&
17161 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17162 N00.getValueType())) {
17163 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
17164 N002.getOperand(0), N002.getOperand(1),
17165 N1);
17166 }
17167 }
17168 }
17169
17170 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
17171 // -> (fma y, z, (fma (fpext u), (fpext v), x))
17172 if (isFusedOp(N1)) {
17173 SDValue N12 = N1.getOperand(2);
17174 if (N12.getOpcode() == ISD::FP_EXTEND) {
17175 SDValue N120 = N12.getOperand(0);
17176 if (isContractableFMUL(N120) &&
17177 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17178 N120.getValueType())) {
17179 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
17180 N120.getOperand(0), N120.getOperand(1),
17181 N0);
17182 }
17183 }
17184 }
17185
17186 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
17187 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
17188 // FIXME: This turns two single-precision and one double-precision
17189 // operation into two double-precision operations, which might not be
17190 // interesting for all targets, especially GPUs.
17191 if (N1.getOpcode() == ISD::FP_EXTEND) {
17192 SDValue N10 = N1.getOperand(0);
17193 if (isFusedOp(N10)) {
17194 SDValue N102 = N10.getOperand(2);
17195 if (isContractableFMUL(N102) &&
17196 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17197 N10.getValueType())) {
17198 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
17199 N102.getOperand(0), N102.getOperand(1),
17200 N0);
17201 }
17202 }
17203 }
17204 }
17205
17206 return SDValue();
17207}
17208
17209/// Try to perform FMA combining on a given FSUB node.
17210template <class MatchContextClass>
17211SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
17212 SDValue N0 = N->getOperand(0);
17213 SDValue N1 = N->getOperand(1);
17214 EVT VT = N->getValueType(0);
17215 SDLoc SL(N);
17216 MatchContextClass matcher(DAG, TLI, N);
17217 const TargetOptions &Options = DAG.getTarget().Options;
17218
17219 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
17220
17221 // Floating-point multiply-add with intermediate rounding.
17222 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
17223 // FIXME: Add VP_FMAD opcode.
17224 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
17225
17226 // Floating-point multiply-add without intermediate rounding.
17227 bool HasFMA =
17228 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT)) &&
17230
17231 // No valid opcode, do not combine.
17232 if (!HasFMAD && !HasFMA)
17233 return SDValue();
17234
17235 const SDNodeFlags Flags = N->getFlags();
17236 bool AllowFusionGlobally =
17237 (Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD);
17238
17239 // If the subtraction is not contractable, do not combine.
17240 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
17241 return SDValue();
17242
17243 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
17244 return SDValue();
17245
17246 // Always prefer FMAD to FMA for precision.
17247 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
17249 bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
17250
17251 // Is the node an FMUL and contractable either due to global flags or
17252 // SDNodeFlags.
17253 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
17254 if (!matcher.match(N, ISD::FMUL))
17255 return false;
17256 return AllowFusionGlobally || N->getFlags().hasAllowContract();
17257 };
17258
17259 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
17260 auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
17261 if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
17262 return matcher.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
17263 XY.getOperand(1),
17264 matcher.getNode(ISD::FNEG, SL, VT, Z));
17265 }
17266 return SDValue();
17267 };
17268
17269 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
17270 // Note: Commutes FSUB operands.
17271 auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
17272 if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
17273 return matcher.getNode(
17274 PreferredFusedOpcode, SL, VT,
17275 matcher.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
17276 YZ.getOperand(1), X);
17277 }
17278 return SDValue();
17279 };
17280
17281 // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
17282 // prefer to fold the multiply with fewer uses.
17283 if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
17284 (N0->use_size() > N1->use_size())) {
17285 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
17286 if (SDValue V = tryToFoldXSubYZ(N0, N1))
17287 return V;
17288 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
17289 if (SDValue V = tryToFoldXYSubZ(N0, N1))
17290 return V;
17291 } else {
17292 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
17293 if (SDValue V = tryToFoldXYSubZ(N0, N1))
17294 return V;
17295 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
17296 if (SDValue V = tryToFoldXSubYZ(N0, N1))
17297 return V;
17298 }
17299
17300 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
17301 if (matcher.match(N0, ISD::FNEG) && isContractableFMUL(N0.getOperand(0)) &&
17302 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
17303 SDValue N00 = N0.getOperand(0).getOperand(0);
17304 SDValue N01 = N0.getOperand(0).getOperand(1);
17305 return matcher.getNode(PreferredFusedOpcode, SL, VT,
17306 matcher.getNode(ISD::FNEG, SL, VT, N00), N01,
17307 matcher.getNode(ISD::FNEG, SL, VT, N1));
17308 }
17309
17310 // Look through FP_EXTEND nodes to do more combining.
17311
17312 // fold (fsub (fpext (fmul x, y)), z)
17313 // -> (fma (fpext x), (fpext y), (fneg z))
17314 if (matcher.match(N0, ISD::FP_EXTEND)) {
17315 SDValue N00 = N0.getOperand(0);
17316 if (isContractableFMUL(N00) &&
17317 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17318 N00.getValueType())) {
17319 return matcher.getNode(
17320 PreferredFusedOpcode, SL, VT,
17321 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
17322 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
17323 matcher.getNode(ISD::FNEG, SL, VT, N1));
17324 }
17325 }
17326
17327 // fold (fsub x, (fpext (fmul y, z)))
17328 // -> (fma (fneg (fpext y)), (fpext z), x)
17329 // Note: Commutes FSUB operands.
17330 if (matcher.match(N1, ISD::FP_EXTEND)) {
17331 SDValue N10 = N1.getOperand(0);
17332 if (isContractableFMUL(N10) &&
17333 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17334 N10.getValueType())) {
17335 return matcher.getNode(
17336 PreferredFusedOpcode, SL, VT,
17337 matcher.getNode(
17338 ISD::FNEG, SL, VT,
17339 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
17340 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
17341 }
17342 }
17343
17344 // fold (fsub (fpext (fneg (fmul, x, y))), z)
17345 // -> (fneg (fma (fpext x), (fpext y), z))
17346 // Note: This could be removed with appropriate canonicalization of the
17347 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
17348 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
17349 // from implementing the canonicalization in visitFSUB.
17350 if (matcher.match(N0, ISD::FP_EXTEND)) {
17351 SDValue N00 = N0.getOperand(0);
17352 if (matcher.match(N00, ISD::FNEG)) {
17353 SDValue N000 = N00.getOperand(0);
17354 if (isContractableFMUL(N000) &&
17355 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17356 N00.getValueType())) {
17357 return matcher.getNode(
17358 ISD::FNEG, SL, VT,
17359 matcher.getNode(
17360 PreferredFusedOpcode, SL, VT,
17361 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
17362 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
17363 N1));
17364 }
17365 }
17366 }
17367
17368 // fold (fsub (fneg (fpext (fmul, x, y))), z)
17369 // -> (fneg (fma (fpext x)), (fpext y), z)
17370 // Note: This could be removed with appropriate canonicalization of the
17371 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
17372 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
17373 // from implementing the canonicalization in visitFSUB.
17374 if (matcher.match(N0, ISD::FNEG)) {
17375 SDValue N00 = N0.getOperand(0);
17376 if (matcher.match(N00, ISD::FP_EXTEND)) {
17377 SDValue N000 = N00.getOperand(0);
17378 if (isContractableFMUL(N000) &&
17379 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17380 N000.getValueType())) {
17381 return matcher.getNode(
17382 ISD::FNEG, SL, VT,
17383 matcher.getNode(
17384 PreferredFusedOpcode, SL, VT,
17385 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
17386 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
17387 N1));
17388 }
17389 }
17390 }
17391
17392 auto isContractableAndReassociableFMUL = [&isContractableFMUL](SDValue N) {
17393 return isContractableFMUL(N) && N->getFlags().hasAllowReassociation();
17394 };
17395
17396 auto isFusedOp = [&](SDValue N) {
17397 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
17398 };
17399
17400 // More folding opportunities when target permits.
17401 if (Aggressive && N->getFlags().hasAllowReassociation()) {
17402 bool CanFuse = N->getFlags().hasAllowContract();
17403 // fold (fsub (fma x, y, (fmul u, v)), z)
17404 // -> (fma x, y (fma u, v, (fneg z)))
17405 if (CanFuse && isFusedOp(N0) &&
17406 isContractableAndReassociableFMUL(N0.getOperand(2)) &&
17407 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
17408 return matcher.getNode(
17409 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
17410 matcher.getNode(PreferredFusedOpcode, SL, VT,
17411 N0.getOperand(2).getOperand(0),
17412 N0.getOperand(2).getOperand(1),
17413 matcher.getNode(ISD::FNEG, SL, VT, N1)));
17414 }
17415
17416 // fold (fsub x, (fma y, z, (fmul u, v)))
17417 // -> (fma (fneg y), z, (fma (fneg u), v, x))
17418 if (CanFuse && isFusedOp(N1) &&
17419 isContractableAndReassociableFMUL(N1.getOperand(2)) &&
17420 N1->hasOneUse() && NoSignedZero) {
17421 SDValue N20 = N1.getOperand(2).getOperand(0);
17422 SDValue N21 = N1.getOperand(2).getOperand(1);
17423 return matcher.getNode(
17424 PreferredFusedOpcode, SL, VT,
17425 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
17426 N1.getOperand(1),
17427 matcher.getNode(PreferredFusedOpcode, SL, VT,
17428 matcher.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
17429 }
17430
17431 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
17432 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
17433 if (isFusedOp(N0) && N0->hasOneUse()) {
17434 SDValue N02 = N0.getOperand(2);
17435 if (matcher.match(N02, ISD::FP_EXTEND)) {
17436 SDValue N020 = N02.getOperand(0);
17437 if (isContractableAndReassociableFMUL(N020) &&
17438 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17439 N020.getValueType())) {
17440 return matcher.getNode(
17441 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
17442 matcher.getNode(
17443 PreferredFusedOpcode, SL, VT,
17444 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
17445 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
17446 matcher.getNode(ISD::FNEG, SL, VT, N1)));
17447 }
17448 }
17449 }
17450
17451 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
17452 // -> (fma (fpext x), (fpext y),
17453 // (fma (fpext u), (fpext v), (fneg z)))
17454 // FIXME: This turns two single-precision and one double-precision
17455 // operation into two double-precision operations, which might not be
17456 // interesting for all targets, especially GPUs.
17457 if (matcher.match(N0, ISD::FP_EXTEND)) {
17458 SDValue N00 = N0.getOperand(0);
17459 if (isFusedOp(N00)) {
17460 SDValue N002 = N00.getOperand(2);
17461 if (isContractableAndReassociableFMUL(N002) &&
17462 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17463 N00.getValueType())) {
17464 return matcher.getNode(
17465 PreferredFusedOpcode, SL, VT,
17466 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
17467 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
17468 matcher.getNode(
17469 PreferredFusedOpcode, SL, VT,
17470 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
17471 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
17472 matcher.getNode(ISD::FNEG, SL, VT, N1)));
17473 }
17474 }
17475 }
17476
17477 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
17478 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
17479 if (isFusedOp(N1) && matcher.match(N1.getOperand(2), ISD::FP_EXTEND) &&
17480 N1->hasOneUse()) {
17481 SDValue N120 = N1.getOperand(2).getOperand(0);
17482 if (isContractableAndReassociableFMUL(N120) &&
17483 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17484 N120.getValueType())) {
17485 SDValue N1200 = N120.getOperand(0);
17486 SDValue N1201 = N120.getOperand(1);
17487 return matcher.getNode(
17488 PreferredFusedOpcode, SL, VT,
17489 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
17490 N1.getOperand(1),
17491 matcher.getNode(
17492 PreferredFusedOpcode, SL, VT,
17493 matcher.getNode(ISD::FNEG, SL, VT,
17494 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
17495 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
17496 }
17497 }
17498
17499 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
17500 // -> (fma (fneg (fpext y)), (fpext z),
17501 // (fma (fneg (fpext u)), (fpext v), x))
17502 // FIXME: This turns two single-precision and one double-precision
17503 // operation into two double-precision operations, which might not be
17504 // interesting for all targets, especially GPUs.
17505 if (matcher.match(N1, ISD::FP_EXTEND) && isFusedOp(N1.getOperand(0))) {
17506 SDValue CvtSrc = N1.getOperand(0);
17507 SDValue N100 = CvtSrc.getOperand(0);
17508 SDValue N101 = CvtSrc.getOperand(1);
17509 SDValue N102 = CvtSrc.getOperand(2);
17510 if (isContractableAndReassociableFMUL(N102) &&
17511 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17512 CvtSrc.getValueType())) {
17513 SDValue N1020 = N102.getOperand(0);
17514 SDValue N1021 = N102.getOperand(1);
17515 return matcher.getNode(
17516 PreferredFusedOpcode, SL, VT,
17517 matcher.getNode(ISD::FNEG, SL, VT,
17518 matcher.getNode(ISD::FP_EXTEND, SL, VT, N100)),
17519 matcher.getNode(ISD::FP_EXTEND, SL, VT, N101),
17520 matcher.getNode(
17521 PreferredFusedOpcode, SL, VT,
17522 matcher.getNode(ISD::FNEG, SL, VT,
17523 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
17524 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
17525 }
17526 }
17527 }
17528
17529 return SDValue();
17530}
17531
17532/// Try to perform FMA combining on a given FMUL node based on the distributive
17533/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
17534/// subtraction instead of addition).
17535SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
17536 SDValue N0 = N->getOperand(0);
17537 SDValue N1 = N->getOperand(1);
17538 EVT VT = N->getValueType(0);
17539 SDLoc SL(N);
17540
17541 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
17542
17543 const TargetOptions &Options = DAG.getTarget().Options;
17544
17545 // The transforms below are incorrect when x == 0 and y == inf, because the
17546 // intermediate multiplication produces a nan.
17547 SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
17548 if (!hasNoInfs(Options, FAdd))
17549 return SDValue();
17550
17551 // Floating-point multiply-add without intermediate rounding.
17552 bool HasFMA =
17554 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
17556
17557 // Floating-point multiply-add with intermediate rounding. This can result
17558 // in a less precise result due to the changed rounding order.
17559 bool HasFMAD = LegalOperations && TLI.isFMADLegal(DAG, N);
17560
17561 // No valid opcode, do not combine.
17562 if (!HasFMAD && !HasFMA)
17563 return SDValue();
17564
17565 // Always prefer FMAD to FMA for precision.
17566 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
17568
17569 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
17570 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
17571 auto FuseFADD = [&](SDValue X, SDValue Y) {
17572 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
17573 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
17574 if (C->isExactlyValue(+1.0))
17575 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17576 Y);
17577 if (C->isExactlyValue(-1.0))
17578 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17579 DAG.getNode(ISD::FNEG, SL, VT, Y));
17580 }
17581 }
17582 return SDValue();
17583 };
17584
17585 if (SDValue FMA = FuseFADD(N0, N1))
17586 return FMA;
17587 if (SDValue FMA = FuseFADD(N1, N0))
17588 return FMA;
17589
17590 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
17591 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
17592 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
17593 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
17594 auto FuseFSUB = [&](SDValue X, SDValue Y) {
17595 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
17596 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
17597 if (C0->isExactlyValue(+1.0))
17598 return DAG.getNode(PreferredFusedOpcode, SL, VT,
17599 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
17600 Y);
17601 if (C0->isExactlyValue(-1.0))
17602 return DAG.getNode(PreferredFusedOpcode, SL, VT,
17603 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
17604 DAG.getNode(ISD::FNEG, SL, VT, Y));
17605 }
17606 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
17607 if (C1->isExactlyValue(+1.0))
17608 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17609 DAG.getNode(ISD::FNEG, SL, VT, Y));
17610 if (C1->isExactlyValue(-1.0))
17611 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17612 Y);
17613 }
17614 }
17615 return SDValue();
17616 };
17617
17618 if (SDValue FMA = FuseFSUB(N0, N1))
17619 return FMA;
17620 if (SDValue FMA = FuseFSUB(N1, N0))
17621 return FMA;
17622
17623 return SDValue();
17624}
17625
17626SDValue DAGCombiner::visitVP_FADD(SDNode *N) {
17627 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17628
17629 // FADD -> FMA combines:
17630 if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) {
17631 if (Fused.getOpcode() != ISD::DELETED_NODE)
17632 AddToWorklist(Fused.getNode());
17633 return Fused;
17634 }
17635 return SDValue();
17636}
17637
17638SDValue DAGCombiner::visitFADD(SDNode *N) {
17639 SDValue N0 = N->getOperand(0);
17640 SDValue N1 = N->getOperand(1);
17641 bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
17642 bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
17643 EVT VT = N->getValueType(0);
17644 SDLoc DL(N);
17645 const TargetOptions &Options = DAG.getTarget().Options;
17646 SDNodeFlags Flags = N->getFlags();
17647 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17648
17649 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17650 return R;
17651
17652 // fold (fadd c1, c2) -> c1 + c2
17653 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
17654 return C;
17655
17656 // canonicalize constant to RHS
17657 if (N0CFP && !N1CFP)
17658 return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
17659
17660 // fold vector ops
17661 if (VT.isVector())
17662 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17663 return FoldedVOp;
17664
17665 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
17666 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
17667 if (N1C && N1C->isZero())
17668 if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
17669 return N0;
17670
17671 if (SDValue NewSel = foldBinOpIntoSelect(N))
17672 return NewSel;
17673
17674 // fold (fadd A, (fneg B)) -> (fsub A, B)
17675 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
17676 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
17677 N1, DAG, LegalOperations, ForCodeSize))
17678 return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
17679
17680 // fold (fadd (fneg A), B) -> (fsub B, A)
17681 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
17682 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
17683 N0, DAG, LegalOperations, ForCodeSize))
17684 return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
17685
17686 auto isFMulNegTwo = [](SDValue FMul) {
17687 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
17688 return false;
17689 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
17690 return C && C->isExactlyValue(-2.0);
17691 };
17692
17693 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
17694 if (isFMulNegTwo(N0)) {
17695 SDValue B = N0.getOperand(0);
17696 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
17697 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
17698 }
17699 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
17700 if (isFMulNegTwo(N1)) {
17701 SDValue B = N1.getOperand(0);
17702 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
17703 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
17704 }
17705
17706 // No FP constant should be created after legalization as Instruction
17707 // Selection pass has a hard time dealing with FP constants.
17708 bool AllowNewConst = (Level < AfterLegalizeDAG);
17709
17710 // If nnan is enabled, fold lots of things.
17711 if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
17712 // If allowed, fold (fadd (fneg x), x) -> 0.0
17713 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
17714 return DAG.getConstantFP(0.0, DL, VT);
17715
17716 // If allowed, fold (fadd x, (fneg x)) -> 0.0
17717 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
17718 return DAG.getConstantFP(0.0, DL, VT);
17719 }
17720
17721 // If 'unsafe math' or reassoc and nsz, fold lots of things.
17722 // TODO: break out portions of the transformations below for which Unsafe is
17723 // considered and which do not require both nsz and reassoc
17724 if ((Options.NoSignedZerosFPMath ||
17725 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
17726 AllowNewConst) {
17727 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
17728 if (N1CFP && N0.getOpcode() == ISD::FADD &&
17730 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
17731 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
17732 }
17733
17734 // We can fold chains of FADD's of the same value into multiplications.
17735 // This transform is not safe in general because we are reducing the number
17736 // of rounding steps.
17737 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
17738 if (N0.getOpcode() == ISD::FMUL) {
17739 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
17740 bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
17741
17742 // (fadd (fmul x, c), x) -> (fmul x, c+1)
17743 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
17744 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
17745 DAG.getConstantFP(1.0, DL, VT));
17746 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
17747 }
17748
17749 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
17750 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
17751 N1.getOperand(0) == N1.getOperand(1) &&
17752 N0.getOperand(0) == N1.getOperand(0)) {
17753 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
17754 DAG.getConstantFP(2.0, DL, VT));
17755 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
17756 }
17757 }
17758
17759 if (N1.getOpcode() == ISD::FMUL) {
17760 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
17761 bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
17762
17763 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
17764 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
17765 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
17766 DAG.getConstantFP(1.0, DL, VT));
17767 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
17768 }
17769
17770 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
17771 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
17772 N0.getOperand(0) == N0.getOperand(1) &&
17773 N1.getOperand(0) == N0.getOperand(0)) {
17774 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
17775 DAG.getConstantFP(2.0, DL, VT));
17776 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
17777 }
17778 }
17779
17780 if (N0.getOpcode() == ISD::FADD) {
17781 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
17782 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
17783 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
17784 (N0.getOperand(0) == N1)) {
17785 return DAG.getNode(ISD::FMUL, DL, VT, N1,
17786 DAG.getConstantFP(3.0, DL, VT));
17787 }
17788 }
17789
17790 if (N1.getOpcode() == ISD::FADD) {
17791 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
17792 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
17793 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
17794 N1.getOperand(0) == N0) {
17795 return DAG.getNode(ISD::FMUL, DL, VT, N0,
17796 DAG.getConstantFP(3.0, DL, VT));
17797 }
17798 }
17799
17800 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
17801 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
17802 N0.getOperand(0) == N0.getOperand(1) &&
17803 N1.getOperand(0) == N1.getOperand(1) &&
17804 N0.getOperand(0) == N1.getOperand(0)) {
17805 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
17806 DAG.getConstantFP(4.0, DL, VT));
17807 }
17808 }
17809 } // enable-unsafe-fp-math && AllowNewConst
17810
17811 if ((Options.NoSignedZerosFPMath ||
17812 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))) {
17813 // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
17814 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
17815 VT, N0, N1, Flags))
17816 return SD;
17817 }
17818
17819 // FADD -> FMA combines:
17820 if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {
17821 if (Fused.getOpcode() != ISD::DELETED_NODE)
17822 AddToWorklist(Fused.getNode());
17823 return Fused;
17824 }
17825 return SDValue();
17826}
17827
17828SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
17829 SDValue Chain = N->getOperand(0);
17830 SDValue N0 = N->getOperand(1);
17831 SDValue N1 = N->getOperand(2);
17832 EVT VT = N->getValueType(0);
17833 EVT ChainVT = N->getValueType(1);
17834 SDLoc DL(N);
17835 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17836
17837 // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
17838 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
17839 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
17840 N1, DAG, LegalOperations, ForCodeSize)) {
17841 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
17842 {Chain, N0, NegN1});
17843 }
17844
17845 // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
17846 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
17847 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
17848 N0, DAG, LegalOperations, ForCodeSize)) {
17849 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
17850 {Chain, N1, NegN0});
17851 }
17852 return SDValue();
17853}
17854
17855SDValue DAGCombiner::visitFSUB(SDNode *N) {
17856 SDValue N0 = N->getOperand(0);
17857 SDValue N1 = N->getOperand(1);
17858 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
17859 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
17860 EVT VT = N->getValueType(0);
17861 SDLoc DL(N);
17862 const TargetOptions &Options = DAG.getTarget().Options;
17863 const SDNodeFlags Flags = N->getFlags();
17864 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17865
17866 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17867 return R;
17868
17869 // fold (fsub c1, c2) -> c1-c2
17870 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
17871 return C;
17872
17873 // fold vector ops
17874 if (VT.isVector())
17875 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17876 return FoldedVOp;
17877
17878 if (SDValue NewSel = foldBinOpIntoSelect(N))
17879 return NewSel;
17880
17881 // (fsub A, 0) -> A
17882 if (N1CFP && N1CFP->isZero()) {
17883 if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
17884 Flags.hasNoSignedZeros()) {
17885 return N0;
17886 }
17887 }
17888
17889 if (N0 == N1) {
17890 // (fsub x, x) -> 0.0
17891 if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
17892 return DAG.getConstantFP(0.0f, DL, VT);
17893 }
17894
17895 // (fsub -0.0, N1) -> -N1
17896 if (N0CFP && N0CFP->isZero()) {
17897 if (N0CFP->isNegative() ||
17898 (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
17899 // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
17900 // flushed to zero, unless all users treat denorms as zero (DAZ).
17901 // FIXME: This transform will change the sign of a NaN and the behavior
17902 // of a signaling NaN. It is only valid when a NoNaN flag is present.
17903 DenormalMode DenormMode = DAG.getDenormalMode(VT);
17904 if (DenormMode == DenormalMode::getIEEE()) {
17905 if (SDValue NegN1 =
17906 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
17907 return NegN1;
17908 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
17909 return DAG.getNode(ISD::FNEG, DL, VT, N1);
17910 }
17911 }
17912 }
17913
17914 if ((Options.NoSignedZerosFPMath ||
17915 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
17916 N1.getOpcode() == ISD::FADD) {
17917 // X - (X + Y) -> -Y
17918 if (N0 == N1->getOperand(0))
17919 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
17920 // X - (Y + X) -> -Y
17921 if (N0 == N1->getOperand(1))
17922 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
17923 }
17924
17925 // fold (fsub A, (fneg B)) -> (fadd A, B)
17926 if (SDValue NegN1 =
17927 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
17928 return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
17929
17930 // FSUB -> FMA combines:
17931 if (SDValue Fused = visitFSUBForFMACombine<EmptyMatchContext>(N)) {
17932 AddToWorklist(Fused.getNode());
17933 return Fused;
17934 }
17935
17936 return SDValue();
17937}
17938
17939// Transform IEEE Floats:
17940// (fmul C, (uitofp Pow2))
17941// -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
17942// (fdiv C, (uitofp Pow2))
17943// -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
17944//
17945// The rationale is fmul/fdiv by a power of 2 is just change the exponent, so
17946// there is no need for more than an add/sub.
17947//
17948// This is valid under the following circumstances:
17949// 1) We are dealing with IEEE floats
17950// 2) C is normal
17951// 3) The fmul/fdiv add/sub will not go outside of min/max exponent bounds.
17952// TODO: Much of this could also be used for generating `ldexp` on targets the
17953// prefer it.
17954SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) {
17955 EVT VT = N->getValueType(0);
17957 return SDValue();
17958
17959 SDValue ConstOp, Pow2Op;
17960
17961 std::optional<int> Mantissa;
17962 auto GetConstAndPow2Ops = [&](unsigned ConstOpIdx) {
17963 if (ConstOpIdx == 1 && N->getOpcode() == ISD::FDIV)
17964 return false;
17965
17966 ConstOp = peekThroughBitcasts(N->getOperand(ConstOpIdx));
17967 Pow2Op = N->getOperand(1 - ConstOpIdx);
17968 if (Pow2Op.getOpcode() != ISD::UINT_TO_FP &&
17969 (Pow2Op.getOpcode() != ISD::SINT_TO_FP ||
17970 !DAG.computeKnownBits(Pow2Op).isNonNegative()))
17971 return false;
17972
17973 Pow2Op = Pow2Op.getOperand(0);
17974
17975 // `Log2(Pow2Op) < Pow2Op.getScalarSizeInBits()`.
17976 // TODO: We could use knownbits to make this bound more precise.
17977 int MaxExpChange = Pow2Op.getValueType().getScalarSizeInBits();
17978
17979 auto IsFPConstValid = [N, MaxExpChange, &Mantissa](ConstantFPSDNode *CFP) {
17980 if (CFP == nullptr)
17981 return false;
17982
17983 const APFloat &APF = CFP->getValueAPF();
17984
17985 // Make sure we have normal constant.
17986 if (!APF.isNormal())
17987 return false;
17988
17989 // Make sure the floats exponent is within the bounds that this transform
17990 // produces bitwise equals value.
17991 int CurExp = ilogb(APF);
17992 // FMul by pow2 will only increase exponent.
17993 int MinExp =
17994 N->getOpcode() == ISD::FMUL ? CurExp : (CurExp - MaxExpChange);
17995 // FDiv by pow2 will only decrease exponent.
17996 int MaxExp =
17997 N->getOpcode() == ISD::FDIV ? CurExp : (CurExp + MaxExpChange);
17998 if (MinExp <= APFloat::semanticsMinExponent(APF.getSemantics()) ||
18000 return false;
18001
18002 // Finally make sure we actually know the mantissa for the float type.
18003 int ThisMantissa = APFloat::semanticsPrecision(APF.getSemantics()) - 1;
18004 if (!Mantissa)
18005 Mantissa = ThisMantissa;
18006
18007 return *Mantissa == ThisMantissa && ThisMantissa > 0;
18008 };
18009
18010 // TODO: We may be able to include undefs.
18011 return ISD::matchUnaryFpPredicate(ConstOp, IsFPConstValid);
18012 };
18013
18014 if (!GetConstAndPow2Ops(0) && !GetConstAndPow2Ops(1))
18015 return SDValue();
18016
18017 if (!TLI.optimizeFMulOrFDivAsShiftAddBitcast(N, ConstOp, Pow2Op))
18018 return SDValue();
18019
18020 // Get log2 after all other checks have taken place. This is because
18021 // BuildLogBase2 may create a new node.
18022 SDLoc DL(N);
18023 // Get Log2 type with same bitwidth as the float type (VT).
18024 EVT NewIntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits());
18025 if (VT.isVector())
18026 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewIntVT,
18028
18029 SDValue Log2 = BuildLogBase2(Pow2Op, DL, DAG.isKnownNeverZero(Pow2Op),
18030 /*InexpensiveOnly*/ true, NewIntVT);
18031 if (!Log2)
18032 return SDValue();
18033
18034 // Perform actual transform.
18035 SDValue MantissaShiftCnt =
18036 DAG.getShiftAmountConstant(*Mantissa, NewIntVT, DL);
18037 // TODO: Sometimes Log2 is of form `(X + C)`. `(X + C) << C1` should fold to
18038 // `(X << C1) + (C << C1)`, but that isn't always the case because of the
18039 // cast. We could implement that by handle here to handle the casts.
18040 SDValue Shift = DAG.getNode(ISD::SHL, DL, NewIntVT, Log2, MantissaShiftCnt);
18041 SDValue ResAsInt =
18042 DAG.getNode(N->getOpcode() == ISD::FMUL ? ISD::ADD : ISD::SUB, DL,
18043 NewIntVT, DAG.getBitcast(NewIntVT, ConstOp), Shift);
18044 SDValue ResAsFP = DAG.getBitcast(VT, ResAsInt);
18045 return ResAsFP;
18046}
18047
18048SDValue DAGCombiner::visitFMUL(SDNode *N) {
18049 SDValue N0 = N->getOperand(0);
18050 SDValue N1 = N->getOperand(1);
18051 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
18052 EVT VT = N->getValueType(0);
18053 SDLoc DL(N);
18054 const SDNodeFlags Flags = N->getFlags();
18055 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18056
18057 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
18058 return R;
18059
18060 // fold (fmul c1, c2) -> c1*c2
18061 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
18062 return C;
18063
18064 // canonicalize constant to RHS
18067 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
18068
18069 // fold vector ops
18070 if (VT.isVector())
18071 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
18072 return FoldedVOp;
18073
18074 if (SDValue NewSel = foldBinOpIntoSelect(N))
18075 return NewSel;
18076
18077 if (Flags.hasAllowReassociation()) {
18078 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
18080 N0.getOpcode() == ISD::FMUL) {
18081 SDValue N00 = N0.getOperand(0);
18082 SDValue N01 = N0.getOperand(1);
18083 // Avoid an infinite loop by making sure that N00 is not a constant
18084 // (the inner multiply has not been constant folded yet).
18087 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
18088 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
18089 }
18090 }
18091
18092 // Match a special-case: we convert X * 2.0 into fadd.
18093 // fmul (fadd X, X), C -> fmul X, 2.0 * C
18094 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
18095 N0.getOperand(0) == N0.getOperand(1)) {
18096 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
18097 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
18098 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
18099 }
18100
18101 // Fold fmul(vecreduce(x), vecreduce(y)) -> vecreduce(fmul(x, y))
18102 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FMUL, ISD::FMUL, DL,
18103 VT, N0, N1, Flags))
18104 return SD;
18105 }
18106
18107 // fold (fmul X, 2.0) -> (fadd X, X)
18108 if (N1CFP && N1CFP->isExactlyValue(+2.0))
18109 return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
18110
18111 // fold (fmul X, -1.0) -> (fsub -0.0, X)
18112 if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
18113 if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
18114 return DAG.getNode(ISD::FSUB, DL, VT,
18115 DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
18116 }
18117 }
18118
18119 // -N0 * -N1 --> N0 * N1
18124 SDValue NegN0 =
18125 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
18126 if (NegN0) {
18127 HandleSDNode NegN0Handle(NegN0);
18128 SDValue NegN1 =
18129 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
18130 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
18132 return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
18133 }
18134
18135 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
18136 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
18137 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
18138 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
18139 TLI.isOperationLegal(ISD::FABS, VT)) {
18140 SDValue Select = N0, X = N1;
18141 if (Select.getOpcode() != ISD::SELECT)
18142 std::swap(Select, X);
18143
18144 SDValue Cond = Select.getOperand(0);
18145 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
18146 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
18147
18148 if (TrueOpnd && FalseOpnd &&
18149 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
18150 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
18151 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
18152 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
18153 switch (CC) {
18154 default: break;
18155 case ISD::SETOLT:
18156 case ISD::SETULT:
18157 case ISD::SETOLE:
18158 case ISD::SETULE:
18159 case ISD::SETLT:
18160 case ISD::SETLE:
18161 std::swap(TrueOpnd, FalseOpnd);
18162 [[fallthrough]];
18163 case ISD::SETOGT:
18164 case ISD::SETUGT:
18165 case ISD::SETOGE:
18166 case ISD::SETUGE:
18167 case ISD::SETGT:
18168 case ISD::SETGE:
18169 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
18170 TLI.isOperationLegal(ISD::FNEG, VT))
18171 return DAG.getNode(ISD::FNEG, DL, VT,
18172 DAG.getNode(ISD::FABS, DL, VT, X));
18173 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
18174 return DAG.getNode(ISD::FABS, DL, VT, X);
18175
18176 break;
18177 }
18178 }
18179 }
18180
18181 // FMUL -> FMA combines:
18182 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
18183 AddToWorklist(Fused.getNode());
18184 return Fused;
18185 }
18186
18187 // Don't do `combineFMulOrFDivWithIntPow2` until after FMUL -> FMA has been
18188 // able to run.
18189 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
18190 return R;
18191
18192 return SDValue();
18193}
18194
18195template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
18196 SDValue N0 = N->getOperand(0);
18197 SDValue N1 = N->getOperand(1);
18198 SDValue N2 = N->getOperand(2);
18199 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
18200 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
18201 ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
18202 EVT VT = N->getValueType(0);
18203 SDLoc DL(N);
18204 const TargetOptions &Options = DAG.getTarget().Options;
18205 // FMA nodes have flags that propagate to the created nodes.
18206 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18207 MatchContextClass matcher(DAG, TLI, N);
18208
18209 // Constant fold FMA.
18210 if (SDValue C =
18211 DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1, N2}))
18212 return C;
18213
18214 // (-N0 * -N1) + N2 --> (N0 * N1) + N2
18219 SDValue NegN0 =
18220 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
18221 if (NegN0) {
18222 HandleSDNode NegN0Handle(NegN0);
18223 SDValue NegN1 =
18224 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
18225 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
18227 return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
18228 }
18229
18230 // FIXME: use fast math flags instead of Options.UnsafeFPMath
18231 // TODO: Finally migrate away from global TargetOptions.
18232 if ((Options.NoNaNsFPMath && Options.NoInfsFPMath) ||
18233 (N->getFlags().hasNoNaNs() && N->getFlags().hasNoInfs())) {
18234 if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros() ||
18235 (N2CFP && !N2CFP->isExactlyValue(-0.0))) {
18236 if (N0CFP && N0CFP->isZero())
18237 return N2;
18238 if (N1CFP && N1CFP->isZero())
18239 return N2;
18240 }
18241 }
18242
18243 // FIXME: Support splat of constant.
18244 if (N0CFP && N0CFP->isExactlyValue(1.0))
18245 return matcher.getNode(ISD::FADD, DL, VT, N1, N2);
18246 if (N1CFP && N1CFP->isExactlyValue(1.0))
18247 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
18248
18249 // Canonicalize (fma c, x, y) -> (fma x, c, y)
18252 return matcher.getNode(ISD::FMA, DL, VT, N1, N0, N2);
18253
18254 bool CanReassociate = N->getFlags().hasAllowReassociation();
18255 if (CanReassociate) {
18256 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
18257 if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) &&
18260 return matcher.getNode(
18261 ISD::FMUL, DL, VT, N0,
18262 matcher.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
18263 }
18264
18265 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
18266 if (matcher.match(N0, ISD::FMUL) &&
18269 return matcher.getNode(
18270 ISD::FMA, DL, VT, N0.getOperand(0),
18271 matcher.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)), N2);
18272 }
18273 }
18274
18275 // (fma x, -1, y) -> (fadd (fneg x), y)
18276 // FIXME: Support splat of constant.
18277 if (N1CFP) {
18278 if (N1CFP->isExactlyValue(1.0))
18279 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
18280
18281 if (N1CFP->isExactlyValue(-1.0) &&
18282 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
18283 SDValue RHSNeg = matcher.getNode(ISD::FNEG, DL, VT, N0);
18284 AddToWorklist(RHSNeg.getNode());
18285 return matcher.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
18286 }
18287
18288 // fma (fneg x), K, y -> fma x -K, y
18289 if (matcher.match(N0, ISD::FNEG) &&
18291 (N1.hasOneUse() &&
18292 !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) {
18293 return matcher.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
18294 matcher.getNode(ISD::FNEG, DL, VT, N1), N2);
18295 }
18296 }
18297
18298 // FIXME: Support splat of constant.
18299 if (CanReassociate) {
18300 // (fma x, c, x) -> (fmul x, (c+1))
18301 if (N1CFP && N0 == N2) {
18302 return matcher.getNode(ISD::FMUL, DL, VT, N0,
18303 matcher.getNode(ISD::FADD, DL, VT, N1,
18304 DAG.getConstantFP(1.0, DL, VT)));
18305 }
18306
18307 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
18308 if (N1CFP && matcher.match(N2, ISD::FNEG) && N2.getOperand(0) == N0) {
18309 return matcher.getNode(ISD::FMUL, DL, VT, N0,
18310 matcher.getNode(ISD::FADD, DL, VT, N1,
18311 DAG.getConstantFP(-1.0, DL, VT)));
18312 }
18313 }
18314
18315 // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
18316 // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
18317 if (!TLI.isFNegFree(VT))
18319 SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
18320 return matcher.getNode(ISD::FNEG, DL, VT, Neg);
18321 return SDValue();
18322}
18323
18324SDValue DAGCombiner::visitFMAD(SDNode *N) {
18325 SDValue N0 = N->getOperand(0);
18326 SDValue N1 = N->getOperand(1);
18327 SDValue N2 = N->getOperand(2);
18328 EVT VT = N->getValueType(0);
18329 SDLoc DL(N);
18330
18331 // Constant fold FMAD.
18332 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMAD, DL, VT, {N0, N1, N2}))
18333 return C;
18334
18335 return SDValue();
18336}
18337
18338// Combine multiple FDIVs with the same divisor into multiple FMULs by the
18339// reciprocal.
18340// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
18341// Notice that this is not always beneficial. One reason is different targets
18342// may have different costs for FDIV and FMUL, so sometimes the cost of two
18343// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
18344// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
18345SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
18346 // TODO: Limit this transform based on optsize/minsize - it always creates at
18347 // least 1 extra instruction. But the perf win may be substantial enough
18348 // that only minsize should restrict this.
18349 const SDNodeFlags Flags = N->getFlags();
18350 if (LegalDAG || !Flags.hasAllowReciprocal())
18351 return SDValue();
18352
18353 // Skip if current node is a reciprocal/fneg-reciprocal.
18354 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
18355 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
18356 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
18357 return SDValue();
18358
18359 // Exit early if the target does not want this transform or if there can't
18360 // possibly be enough uses of the divisor to make the transform worthwhile.
18361 unsigned MinUses = TLI.combineRepeatedFPDivisors();
18362
18363 // For splat vectors, scale the number of uses by the splat factor. If we can
18364 // convert the division into a scalar op, that will likely be much faster.
18365 unsigned NumElts = 1;
18366 EVT VT = N->getValueType(0);
18367 if (VT.isVector() && DAG.isSplatValue(N1))
18368 NumElts = VT.getVectorMinNumElements();
18369
18370 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
18371 return SDValue();
18372
18373 // Find all FDIV users of the same divisor.
18374 // Use a set because duplicates may be present in the user list.
18375 SetVector<SDNode *> Users;
18376 for (auto *U : N1->users()) {
18377 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
18378 // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
18379 if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
18380 U->getOperand(0) == U->getOperand(1).getOperand(0) &&
18381 U->getFlags().hasAllowReassociation() &&
18382 U->getFlags().hasNoSignedZeros())
18383 continue;
18384
18385 // This division is eligible for optimization only if global unsafe math
18386 // is enabled or if this division allows reciprocal formation.
18387 if (U->getFlags().hasAllowReciprocal())
18388 Users.insert(U);
18389 }
18390 }
18391
18392 // Now that we have the actual number of divisor uses, make sure it meets
18393 // the minimum threshold specified by the target.
18394 if ((Users.size() * NumElts) < MinUses)
18395 return SDValue();
18396
18397 SDLoc DL(N);
18398 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
18399 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
18400
18401 // Dividend / Divisor -> Dividend * Reciprocal
18402 for (auto *U : Users) {
18403 SDValue Dividend = U->getOperand(0);
18404 if (Dividend != FPOne) {
18405 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
18406 Reciprocal, Flags);
18407 CombineTo(U, NewNode);
18408 } else if (U != Reciprocal.getNode()) {
18409 // In the absence of fast-math-flags, this user node is always the
18410 // same node as Reciprocal, but with FMF they may be different nodes.
18411 CombineTo(U, Reciprocal);
18412 }
18413 }
18414 return SDValue(N, 0); // N was replaced.
18415}
18416
18417SDValue DAGCombiner::visitFDIV(SDNode *N) {
18418 SDValue N0 = N->getOperand(0);
18419 SDValue N1 = N->getOperand(1);
18420 EVT VT = N->getValueType(0);
18421 SDLoc DL(N);
18422 const TargetOptions &Options = DAG.getTarget().Options;
18423 SDNodeFlags Flags = N->getFlags();
18424 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18425
18426 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
18427 return R;
18428
18429 // fold (fdiv c1, c2) -> c1/c2
18430 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
18431 return C;
18432
18433 // fold vector ops
18434 if (VT.isVector())
18435 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
18436 return FoldedVOp;
18437
18438 if (SDValue NewSel = foldBinOpIntoSelect(N))
18439 return NewSel;
18440
18442 return V;
18443
18444 // fold (fdiv X, c2) -> (fmul X, 1/c2) if there is no loss in precision, or
18445 // the loss is acceptable with AllowReciprocal.
18446 if (auto *N1CFP = isConstOrConstSplatFP(N1, true)) {
18447 // Compute the reciprocal 1.0 / c2.
18448 const APFloat &N1APF = N1CFP->getValueAPF();
18449 APFloat Recip = APFloat::getOne(N1APF.getSemantics());
18451 // Only do the transform if the reciprocal is a legal fp immediate that
18452 // isn't too nasty (eg NaN, denormal, ...).
18453 if (((st == APFloat::opOK && !Recip.isDenormal()) ||
18454 (st == APFloat::opInexact && Flags.hasAllowReciprocal())) &&
18455 (!LegalOperations ||
18456 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
18457 // backend)... we should handle this gracefully after Legalize.
18458 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
18460 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
18461 return DAG.getNode(ISD::FMUL, DL, VT, N0,
18462 DAG.getConstantFP(Recip, DL, VT));
18463 }
18464
18465 if (Flags.hasAllowReciprocal()) {
18466 // If this FDIV is part of a reciprocal square root, it may be folded
18467 // into a target-specific square root estimate instruction.
18468 if (N1.getOpcode() == ISD::FSQRT) {
18469 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
18470 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
18471 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
18472 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
18473 if (SDValue RV =
18474 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
18475 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
18476 AddToWorklist(RV.getNode());
18477 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
18478 }
18479 } else if (N1.getOpcode() == ISD::FP_ROUND &&
18480 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
18481 if (SDValue RV =
18482 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
18483 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
18484 AddToWorklist(RV.getNode());
18485 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
18486 }
18487 } else if (N1.getOpcode() == ISD::FMUL) {
18488 // Look through an FMUL. Even though this won't remove the FDIV directly,
18489 // it's still worthwhile to get rid of the FSQRT if possible.
18490 SDValue Sqrt, Y;
18491 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
18492 Sqrt = N1.getOperand(0);
18493 Y = N1.getOperand(1);
18494 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
18495 Sqrt = N1.getOperand(1);
18496 Y = N1.getOperand(0);
18497 }
18498 if (Sqrt.getNode()) {
18499 // If the other multiply operand is known positive, pull it into the
18500 // sqrt. That will eliminate the division if we convert to an estimate.
18501 if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
18502 N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
18503 SDValue A;
18504 if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
18505 A = Y.getOperand(0);
18506 else if (Y == Sqrt.getOperand(0))
18507 A = Y;
18508 if (A) {
18509 // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
18510 // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
18511 SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
18512 SDValue AAZ =
18513 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
18514 if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
18515 return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
18516
18517 // Estimate creation failed. Clean up speculatively created nodes.
18518 recursivelyDeleteUnusedNodes(AAZ.getNode());
18519 }
18520 }
18521
18522 // We found a FSQRT, so try to make this fold:
18523 // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
18524 if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
18525 SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
18526 AddToWorklist(Div.getNode());
18527 return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
18528 }
18529 }
18530 }
18531
18532 // Fold into a reciprocal estimate and multiply instead of a real divide.
18533 if (Options.NoInfsFPMath || Flags.hasNoInfs())
18534 if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
18535 return RV;
18536 }
18537
18538 // Fold X/Sqrt(X) -> Sqrt(X)
18539 if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
18540 Flags.hasAllowReassociation())
18541 if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
18542 return N1;
18543
18544 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
18549 SDValue NegN0 =
18550 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
18551 if (NegN0) {
18552 HandleSDNode NegN0Handle(NegN0);
18553 SDValue NegN1 =
18554 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
18555 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
18557 return DAG.getNode(ISD::FDIV, DL, VT, NegN0, NegN1);
18558 }
18559
18560 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
18561 return R;
18562
18563 return SDValue();
18564}
18565
18566SDValue DAGCombiner::visitFREM(SDNode *N) {
18567 SDValue N0 = N->getOperand(0);
18568 SDValue N1 = N->getOperand(1);
18569 EVT VT = N->getValueType(0);
18570 SDNodeFlags Flags = N->getFlags();
18571 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18572 SDLoc DL(N);
18573
18574 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
18575 return R;
18576
18577 // fold (frem c1, c2) -> fmod(c1,c2)
18578 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, DL, VT, {N0, N1}))
18579 return C;
18580
18581 if (SDValue NewSel = foldBinOpIntoSelect(N))
18582 return NewSel;
18583
18584 // Lower frem N0, N1 => x - trunc(N0 / N1) * N1, providing N1 is an integer
18585 // power of 2.
18586 if (!TLI.isOperationLegal(ISD::FREM, VT) &&
18589 TLI.isOperationLegalOrCustom(ISD::FTRUNC, VT) &&
18590 DAG.isKnownToBeAPowerOfTwoFP(N1)) {
18591 bool NeedsCopySign =
18592 !Flags.hasNoSignedZeros() && !DAG.cannotBeOrderedNegativeFP(N0);
18593 SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
18594 SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
18595 SDValue MLA;
18597 MLA = DAG.getNode(ISD::FMA, DL, VT, DAG.getNode(ISD::FNEG, DL, VT, Rnd),
18598 N1, N0);
18599 } else {
18600 SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
18601 MLA = DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
18602 }
18603 return NeedsCopySign ? DAG.getNode(ISD::FCOPYSIGN, DL, VT, MLA, N0) : MLA;
18604 }
18605
18606 return SDValue();
18607}
18608
18609SDValue DAGCombiner::visitFSQRT(SDNode *N) {
18610 SDNodeFlags Flags = N->getFlags();
18611 const TargetOptions &Options = DAG.getTarget().Options;
18612
18613 // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
18614 // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
18615 if (!Flags.hasApproximateFuncs() ||
18616 (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
18617 return SDValue();
18618
18619 SDValue N0 = N->getOperand(0);
18620 if (TLI.isFsqrtCheap(N0, DAG))
18621 return SDValue();
18622
18623 // FSQRT nodes have flags that propagate to the created nodes.
18624 // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
18625 // transform the fdiv, we may produce a sub-optimal estimate sequence
18626 // because the reciprocal calculation may not have to filter out a
18627 // 0.0 input.
18628 return buildSqrtEstimate(N0, Flags);
18629}
18630
18631/// copysign(x, fp_extend(y)) -> copysign(x, y)
18632/// copysign(x, fp_round(y)) -> copysign(x, y)
18633/// Operands to the functions are the type of X and Y respectively.
18634static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) {
18635 // Always fold no-op FP casts.
18636 if (XTy == YTy)
18637 return true;
18638
18639 // Do not optimize out type conversion of f128 type yet.
18640 // For some targets like x86_64, configuration is changed to keep one f128
18641 // value in one SSE register, but instruction selection cannot handle
18642 // FCOPYSIGN on SSE registers yet.
18643 if (YTy == MVT::f128)
18644 return false;
18645
18646 // Avoid mismatched vector operand types, for better instruction selection.
18647 return !YTy.isVector();
18648}
18649
18651 SDValue N1 = N->getOperand(1);
18652 if (N1.getOpcode() != ISD::FP_EXTEND &&
18653 N1.getOpcode() != ISD::FP_ROUND)
18654 return false;
18655 EVT N1VT = N1->getValueType(0);
18656 EVT N1Op0VT = N1->getOperand(0).getValueType();
18657 return CanCombineFCOPYSIGN_EXTEND_ROUND(N1VT, N1Op0VT);
18658}
18659
18660SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
18661 SDValue N0 = N->getOperand(0);
18662 SDValue N1 = N->getOperand(1);
18663 EVT VT = N->getValueType(0);
18664 SDLoc DL(N);
18665
18666 // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
18667 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, DL, VT, {N0, N1}))
18668 return C;
18669
18670 // copysign(x, fp_extend(y)) -> copysign(x, y)
18671 // copysign(x, fp_round(y)) -> copysign(x, y)
18673 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N0, N1.getOperand(0));
18674
18676 return SDValue(N, 0);
18677
18678 return SDValue();
18679}
18680
18681SDValue DAGCombiner::visitFPOW(SDNode *N) {
18682 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
18683 if (!ExponentC)
18684 return SDValue();
18685 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18686
18687 // Try to convert x ** (1/3) into cube root.
18688 // TODO: Handle the various flavors of long double.
18689 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
18690 // Some range near 1/3 should be fine.
18691 EVT VT = N->getValueType(0);
18692 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
18693 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
18694 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
18695 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
18696 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
18697 // For regular numbers, rounding may cause the results to differ.
18698 // Therefore, we require { nsz ninf nnan afn } for this transform.
18699 // TODO: We could select out the special cases if we don't have nsz/ninf.
18700 SDNodeFlags Flags = N->getFlags();
18701 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
18702 !Flags.hasApproximateFuncs())
18703 return SDValue();
18704
18705 // Do not create a cbrt() libcall if the target does not have it, and do not
18706 // turn a pow that has lowering support into a cbrt() libcall.
18707 if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
18708 (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
18709 DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
18710 return SDValue();
18711
18712 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
18713 }
18714
18715 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
18716 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
18717 // TODO: This could be extended (using a target hook) to handle smaller
18718 // power-of-2 fractional exponents.
18719 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
18720 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
18721 if (ExponentIs025 || ExponentIs075) {
18722 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
18723 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
18724 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
18725 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
18726 // For regular numbers, rounding may cause the results to differ.
18727 // Therefore, we require { nsz ninf afn } for this transform.
18728 // TODO: We could select out the special cases if we don't have nsz/ninf.
18729 SDNodeFlags Flags = N->getFlags();
18730
18731 // We only need no signed zeros for the 0.25 case.
18732 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
18733 !Flags.hasApproximateFuncs())
18734 return SDValue();
18735
18736 // Don't double the number of libcalls. We are trying to inline fast code.
18737 if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
18738 return SDValue();
18739
18740 // Assume that libcalls are the smallest code.
18741 // TODO: This restriction should probably be lifted for vectors.
18742 if (ForCodeSize)
18743 return SDValue();
18744
18745 // pow(X, 0.25) --> sqrt(sqrt(X))
18746 SDLoc DL(N);
18747 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
18748 SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
18749 if (ExponentIs025)
18750 return SqrtSqrt;
18751 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
18752 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
18753 }
18754
18755 return SDValue();
18756}
18757
18759 const TargetLowering &TLI) {
18760 // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
18761 // replacing casts with a libcall. We also must be allowed to ignore -0.0
18762 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
18763 // conversions would return +0.0.
18764 // FIXME: We should be able to use node-level FMF here.
18765 // TODO: If strict math, should we use FABS (+ range check for signed cast)?
18766 EVT VT = N->getValueType(0);
18767 if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
18769 return SDValue();
18770
18771 // fptosi/fptoui round towards zero, so converting from FP to integer and
18772 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
18773 SDValue N0 = N->getOperand(0);
18774 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
18775 N0.getOperand(0).getValueType() == VT)
18776 return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0));
18777
18778 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
18779 N0.getOperand(0).getValueType() == VT)
18780 return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0));
18781
18782 return SDValue();
18783}
18784
18785SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
18786 SDValue N0 = N->getOperand(0);
18787 EVT VT = N->getValueType(0);
18788 EVT OpVT = N0.getValueType();
18789 SDLoc DL(N);
18790
18791 // [us]itofp(undef) = 0, because the result value is bounded.
18792 if (N0.isUndef())
18793 return DAG.getConstantFP(0.0, DL, VT);
18794
18795 // fold (sint_to_fp c1) -> c1fp
18796 // ...but only if the target supports immediate floating-point values
18797 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18798 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SINT_TO_FP, DL, VT, {N0}))
18799 return C;
18800
18801 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
18802 // but UINT_TO_FP is legal on this target, try to convert.
18803 if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
18804 hasOperation(ISD::UINT_TO_FP, OpVT)) {
18805 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
18806 if (DAG.SignBitIsZero(N0))
18807 return DAG.getNode(ISD::UINT_TO_FP, DL, VT, N0);
18808 }
18809
18810 // The next optimizations are desirable only if SELECT_CC can be lowered.
18811 // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
18812 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
18813 !VT.isVector() &&
18814 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18815 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
18816 DAG.getConstantFP(0.0, DL, VT));
18817
18818 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
18819 // (select (setcc x, y, cc), 1.0, 0.0)
18820 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
18821 N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
18822 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18823 return DAG.getSelect(DL, VT, N0.getOperand(0),
18824 DAG.getConstantFP(1.0, DL, VT),
18825 DAG.getConstantFP(0.0, DL, VT));
18826
18827 if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
18828 return FTrunc;
18829
18830 // fold (sint_to_fp (trunc nsw x)) -> (sint_to_fp x)
18831 if (N0.getOpcode() == ISD::TRUNCATE && N0->getFlags().hasNoSignedWrap() &&
18833 N0.getOperand(0).getValueType()))
18834 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, N0.getOperand(0));
18835
18836 return SDValue();
18837}
18838
18839SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
18840 SDValue N0 = N->getOperand(0);
18841 EVT VT = N->getValueType(0);
18842 EVT OpVT = N0.getValueType();
18843 SDLoc DL(N);
18844
18845 // [us]itofp(undef) = 0, because the result value is bounded.
18846 if (N0.isUndef())
18847 return DAG.getConstantFP(0.0, DL, VT);
18848
18849 // fold (uint_to_fp c1) -> c1fp
18850 // ...but only if the target supports immediate floating-point values
18851 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18852 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UINT_TO_FP, DL, VT, {N0}))
18853 return C;
18854
18855 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
18856 // but SINT_TO_FP is legal on this target, try to convert.
18857 if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
18858 hasOperation(ISD::SINT_TO_FP, OpVT)) {
18859 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
18860 if (DAG.SignBitIsZero(N0))
18861 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, N0);
18862 }
18863
18864 // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
18865 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
18866 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18867 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
18868 DAG.getConstantFP(0.0, DL, VT));
18869
18870 if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
18871 return FTrunc;
18872
18873 // fold (uint_to_fp (trunc nuw x)) -> (uint_to_fp x)
18874 if (N0.getOpcode() == ISD::TRUNCATE && N0->getFlags().hasNoUnsignedWrap() &&
18876 N0.getOperand(0).getValueType()))
18877 return DAG.getNode(ISD::UINT_TO_FP, DL, VT, N0.getOperand(0));
18878
18879 return SDValue();
18880}
18881
18882// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
18884 SDValue N0 = N->getOperand(0);
18885 EVT VT = N->getValueType(0);
18886
18887 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
18888 return SDValue();
18889
18890 SDValue Src = N0.getOperand(0);
18891 EVT SrcVT = Src.getValueType();
18892 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
18893 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
18894
18895 // We can safely assume the conversion won't overflow the output range,
18896 // because (for example) (uint8_t)18293.f is undefined behavior.
18897
18898 // Since we can assume the conversion won't overflow, our decision as to
18899 // whether the input will fit in the float should depend on the minimum
18900 // of the input range and output range.
18901
18902 // This means this is also safe for a signed input and unsigned output, since
18903 // a negative input would lead to undefined behavior.
18904 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
18905 unsigned OutputSize = (int)VT.getScalarSizeInBits();
18906 unsigned ActualSize = std::min(InputSize, OutputSize);
18907 const fltSemantics &Sem = N0.getValueType().getFltSemantics();
18908
18909 // We can only fold away the float conversion if the input range can be
18910 // represented exactly in the float range.
18911 if (APFloat::semanticsPrecision(Sem) >= ActualSize) {
18912 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
18913 unsigned ExtOp =
18914 IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
18915 return DAG.getNode(ExtOp, DL, VT, Src);
18916 }
18917 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
18918 return DAG.getNode(ISD::TRUNCATE, DL, VT, Src);
18919 return DAG.getBitcast(VT, Src);
18920 }
18921 return SDValue();
18922}
18923
18924SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
18925 SDValue N0 = N->getOperand(0);
18926 EVT VT = N->getValueType(0);
18927 SDLoc DL(N);
18928
18929 // fold (fp_to_sint undef) -> undef
18930 if (N0.isUndef())
18931 return DAG.getUNDEF(VT);
18932
18933 // fold (fp_to_sint c1fp) -> c1
18934 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_TO_SINT, DL, VT, {N0}))
18935 return C;
18936
18937 return FoldIntToFPToInt(N, DL, DAG);
18938}
18939
18940SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
18941 SDValue N0 = N->getOperand(0);
18942 EVT VT = N->getValueType(0);
18943 SDLoc DL(N);
18944
18945 // fold (fp_to_uint undef) -> undef
18946 if (N0.isUndef())
18947 return DAG.getUNDEF(VT);
18948
18949 // fold (fp_to_uint c1fp) -> c1
18950 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_TO_UINT, DL, VT, {N0}))
18951 return C;
18952
18953 return FoldIntToFPToInt(N, DL, DAG);
18954}
18955
18956SDValue DAGCombiner::visitXROUND(SDNode *N) {
18957 SDValue N0 = N->getOperand(0);
18958 EVT VT = N->getValueType(0);
18959
18960 // fold (lrint|llrint undef) -> undef
18961 // fold (lround|llround undef) -> undef
18962 if (N0.isUndef())
18963 return DAG.getUNDEF(VT);
18964
18965 // fold (lrint|llrint c1fp) -> c1
18966 // fold (lround|llround c1fp) -> c1
18967 if (SDValue C =
18968 DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0}))
18969 return C;
18970
18971 return SDValue();
18972}
18973
18974SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
18975 SDValue N0 = N->getOperand(0);
18976 SDValue N1 = N->getOperand(1);
18977 EVT VT = N->getValueType(0);
18978 SDLoc DL(N);
18979
18980 // fold (fp_round c1fp) -> c1fp
18981 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_ROUND, DL, VT, {N0, N1}))
18982 return C;
18983
18984 // fold (fp_round (fp_extend x)) -> x
18985 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
18986 return N0.getOperand(0);
18987
18988 // fold (fp_round (fp_round x)) -> (fp_round x)
18989 if (N0.getOpcode() == ISD::FP_ROUND) {
18990 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
18991 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
18992
18993 // Avoid folding legal fp_rounds into non-legal ones.
18994 if (!hasOperation(ISD::FP_ROUND, VT))
18995 return SDValue();
18996
18997 // Skip this folding if it results in an fp_round from f80 to f16.
18998 //
18999 // f80 to f16 always generates an expensive (and as yet, unimplemented)
19000 // libcall to __truncxfhf2 instead of selecting native f16 conversion
19001 // instructions from f32 or f64. Moreover, the first (value-preserving)
19002 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
19003 // x86.
19004 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
19005 return SDValue();
19006
19007 // If the first fp_round isn't a value preserving truncation, it might
19008 // introduce a tie in the second fp_round, that wouldn't occur in the
19009 // single-step fp_round we want to fold to.
19010 // In other words, double rounding isn't the same as rounding.
19011 // Also, this is a value preserving truncation iff both fp_round's are.
19012 if ((N->getFlags().hasAllowContract() &&
19013 N0->getFlags().hasAllowContract()) ||
19014 N0IsTrunc)
19015 return DAG.getNode(
19016 ISD::FP_ROUND, DL, VT, N0.getOperand(0),
19017 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
19018 }
19019
19020 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
19021 // Note: From a legality perspective, this is a two step transform. First,
19022 // we duplicate the fp_round to the arguments of the copysign, then we
19023 // eliminate the fp_round on Y. The second step requires an additional
19024 // predicate to match the implementation above.
19025 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
19027 N0.getValueType())) {
19028 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
19029 N0.getOperand(0), N1);
19030 AddToWorklist(Tmp.getNode());
19031 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, Tmp, N0.getOperand(1));
19032 }
19033
19034 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
19035 return NewVSel;
19036
19037 return SDValue();
19038}
19039
19040// Eliminate a floating-point widening of a narrowed value if the fast math
19041// flags allow it.
19043 SDValue N0 = N->getOperand(0);
19044 EVT VT = N->getValueType(0);
19045
19046 unsigned NarrowingOp;
19047 switch (N->getOpcode()) {
19048 case ISD::FP16_TO_FP:
19049 NarrowingOp = ISD::FP_TO_FP16;
19050 break;
19051 case ISD::BF16_TO_FP:
19052 NarrowingOp = ISD::FP_TO_BF16;
19053 break;
19054 case ISD::FP_EXTEND:
19055 NarrowingOp = ISD::FP_ROUND;
19056 break;
19057 default:
19058 llvm_unreachable("Expected widening FP cast");
19059 }
19060
19061 if (N0.getOpcode() == NarrowingOp && N0.getOperand(0).getValueType() == VT) {
19062 const SDNodeFlags NarrowFlags = N0->getFlags();
19063 const SDNodeFlags WidenFlags = N->getFlags();
19064 // Narrowing can introduce inf and change the encoding of a nan, so the
19065 // widen must have the nnan and ninf flags to indicate that we don't need to
19066 // care about that. We are also removing a rounding step, and that requires
19067 // both the narrow and widen to allow contraction.
19068 if (WidenFlags.hasNoNaNs() && WidenFlags.hasNoInfs() &&
19069 NarrowFlags.hasAllowContract() && WidenFlags.hasAllowContract()) {
19070 return N0.getOperand(0);
19071 }
19072 }
19073
19074 return SDValue();
19075}
19076
19077SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
19078 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19079 SDValue N0 = N->getOperand(0);
19080 EVT VT = N->getValueType(0);
19081 SDLoc DL(N);
19082
19083 if (VT.isVector())
19084 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
19085 return FoldedVOp;
19086
19087 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
19088 if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::FP_ROUND)
19089 return SDValue();
19090
19091 // fold (fp_extend c1fp) -> c1fp
19092 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_EXTEND, DL, VT, {N0}))
19093 return C;
19094
19095 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
19096 if (N0.getOpcode() == ISD::FP16_TO_FP &&
19097 TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
19098 return DAG.getNode(ISD::FP16_TO_FP, DL, VT, N0.getOperand(0));
19099
19100 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
19101 // value of X.
19102 if (N0.getOpcode() == ISD::FP_ROUND && N0.getConstantOperandVal(1) == 1) {
19103 SDValue In = N0.getOperand(0);
19104 if (In.getValueType() == VT) return In;
19105 if (VT.bitsLT(In.getValueType()))
19106 return DAG.getNode(ISD::FP_ROUND, DL, VT, In, N0.getOperand(1));
19107 return DAG.getNode(ISD::FP_EXTEND, DL, VT, In);
19108 }
19109
19110 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
19111 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
19113 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
19114 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT,
19115 LN0->getChain(),
19116 LN0->getBasePtr(), N0.getValueType(),
19117 LN0->getMemOperand());
19118 CombineTo(N, ExtLoad);
19119 CombineTo(
19120 N0.getNode(),
19121 DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad,
19122 DAG.getIntPtrConstant(1, SDLoc(N0), /*isTarget=*/true)),
19123 ExtLoad.getValue(1));
19124 return SDValue(N, 0); // Return N so it doesn't get rechecked!
19125 }
19126
19127 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
19128 return NewVSel;
19129
19130 if (SDValue CastEliminated = eliminateFPCastPair(N))
19131 return CastEliminated;
19132
19133 return SDValue();
19134}
19135
19136SDValue DAGCombiner::visitFCEIL(SDNode *N) {
19137 SDValue N0 = N->getOperand(0);
19138 EVT VT = N->getValueType(0);
19139
19140 // fold (fceil c1) -> fceil(c1)
19141 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FCEIL, SDLoc(N), VT, {N0}))
19142 return C;
19143
19144 return SDValue();
19145}
19146
19147SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
19148 SDValue N0 = N->getOperand(0);
19149 EVT VT = N->getValueType(0);
19150
19151 // fold (ftrunc c1) -> ftrunc(c1)
19152 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FTRUNC, SDLoc(N), VT, {N0}))
19153 return C;
19154
19155 // fold ftrunc (known rounded int x) -> x
19156 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
19157 // likely to be generated to extract integer from a rounded floating value.
19158 switch (N0.getOpcode()) {
19159 default: break;
19160 case ISD::FRINT:
19161 case ISD::FTRUNC:
19162 case ISD::FNEARBYINT:
19163 case ISD::FROUNDEVEN:
19164 case ISD::FFLOOR:
19165 case ISD::FCEIL:
19166 return N0;
19167 }
19168
19169 return SDValue();
19170}
19171
19172SDValue DAGCombiner::visitFFREXP(SDNode *N) {
19173 SDValue N0 = N->getOperand(0);
19174
19175 // fold (ffrexp c1) -> ffrexp(c1)
19177 return DAG.getNode(ISD::FFREXP, SDLoc(N), N->getVTList(), N0);
19178 return SDValue();
19179}
19180
19181SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
19182 SDValue N0 = N->getOperand(0);
19183 EVT VT = N->getValueType(0);
19184
19185 // fold (ffloor c1) -> ffloor(c1)
19186 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FFLOOR, SDLoc(N), VT, {N0}))
19187 return C;
19188
19189 return SDValue();
19190}
19191
19192SDValue DAGCombiner::visitFNEG(SDNode *N) {
19193 SDValue N0 = N->getOperand(0);
19194 EVT VT = N->getValueType(0);
19195 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19196
19197 // Constant fold FNEG.
19198 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FNEG, SDLoc(N), VT, {N0}))
19199 return C;
19200
19201 if (SDValue NegN0 =
19202 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
19203 return NegN0;
19204
19205 // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
19206 // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
19207 // know it was called from a context with a nsz flag if the input fsub does
19208 // not.
19209 if (N0.getOpcode() == ISD::FSUB &&
19211 N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
19212 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
19213 N0.getOperand(0));
19214 }
19215
19217 return SDValue(N, 0);
19218
19219 if (SDValue Cast = foldSignChangeInBitcast(N))
19220 return Cast;
19221
19222 return SDValue();
19223}
19224
19225SDValue DAGCombiner::visitFMinMax(SDNode *N) {
19226 SDValue N0 = N->getOperand(0);
19227 SDValue N1 = N->getOperand(1);
19228 EVT VT = N->getValueType(0);
19229 const SDNodeFlags Flags = N->getFlags();
19230 unsigned Opc = N->getOpcode();
19231 bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
19232 bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
19233 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19234
19235 // Constant fold.
19236 if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
19237 return C;
19238
19239 // Canonicalize to constant on RHS.
19242 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
19243
19244 if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
19245 const APFloat &AF = N1CFP->getValueAPF();
19246
19247 // minnum(X, nan) -> X
19248 // maxnum(X, nan) -> X
19249 // minimum(X, nan) -> nan
19250 // maximum(X, nan) -> nan
19251 if (AF.isNaN())
19252 return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
19253
19254 // In the following folds, inf can be replaced with the largest finite
19255 // float, if the ninf flag is set.
19256 if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
19257 // minnum(X, -inf) -> -inf
19258 // maxnum(X, +inf) -> +inf
19259 // minimum(X, -inf) -> -inf if nnan
19260 // maximum(X, +inf) -> +inf if nnan
19261 if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
19262 return N->getOperand(1);
19263
19264 // minnum(X, +inf) -> X if nnan
19265 // maxnum(X, -inf) -> X if nnan
19266 // minimum(X, +inf) -> X
19267 // maximum(X, -inf) -> X
19268 if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
19269 return N->getOperand(0);
19270 }
19271 }
19272
19273 if (SDValue SD = reassociateReduction(
19274 PropagatesNaN
19275 ? (IsMin ? ISD::VECREDUCE_FMINIMUM : ISD::VECREDUCE_FMAXIMUM)
19276 : (IsMin ? ISD::VECREDUCE_FMIN : ISD::VECREDUCE_FMAX),
19277 Opc, SDLoc(N), VT, N0, N1, Flags))
19278 return SD;
19279
19280 return SDValue();
19281}
19282
19283SDValue DAGCombiner::visitFABS(SDNode *N) {
19284 SDValue N0 = N->getOperand(0);
19285 EVT VT = N->getValueType(0);
19286 SDLoc DL(N);
19287
19288 // fold (fabs c1) -> fabs(c1)
19289 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FABS, DL, VT, {N0}))
19290 return C;
19291
19293 return SDValue(N, 0);
19294
19295 if (SDValue Cast = foldSignChangeInBitcast(N))
19296 return Cast;
19297
19298 return SDValue();
19299}
19300
19301SDValue DAGCombiner::visitBRCOND(SDNode *N) {
19302 SDValue Chain = N->getOperand(0);
19303 SDValue N1 = N->getOperand(1);
19304 SDValue N2 = N->getOperand(2);
19305
19306 // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
19307 // nondeterministic jumps).
19308 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
19309 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
19310 N1->getOperand(0), N2, N->getFlags());
19311 }
19312
19313 // Variant of the previous fold where there is a SETCC in between:
19314 // BRCOND(SETCC(FREEZE(X), CONST, Cond))
19315 // =>
19316 // BRCOND(FREEZE(SETCC(X, CONST, Cond)))
19317 // =>
19318 // BRCOND(SETCC(X, CONST, Cond))
19319 // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
19320 // isn't equivalent to true or false.
19321 // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
19322 // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
19323 if (N1->getOpcode() == ISD::SETCC && N1.hasOneUse()) {
19324 SDValue S0 = N1->getOperand(0), S1 = N1->getOperand(1);
19326 ConstantSDNode *S0C = dyn_cast<ConstantSDNode>(S0);
19327 ConstantSDNode *S1C = dyn_cast<ConstantSDNode>(S1);
19328 bool Updated = false;
19329
19330 // Is 'X Cond C' always true or false?
19331 auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
19332 bool False = (Cond == ISD::SETULT && C->isZero()) ||
19333 (Cond == ISD::SETLT && C->isMinSignedValue()) ||
19334 (Cond == ISD::SETUGT && C->isAllOnes()) ||
19335 (Cond == ISD::SETGT && C->isMaxSignedValue());
19336 bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
19337 (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
19338 (Cond == ISD::SETUGE && C->isZero()) ||
19339 (Cond == ISD::SETGE && C->isMinSignedValue());
19340 return True || False;
19341 };
19342
19343 if (S0->getOpcode() == ISD::FREEZE && S0.hasOneUse() && S1C) {
19344 if (!IsAlwaysTrueOrFalse(Cond, S1C)) {
19345 S0 = S0->getOperand(0);
19346 Updated = true;
19347 }
19348 }
19349 if (S1->getOpcode() == ISD::FREEZE && S1.hasOneUse() && S0C) {
19350 if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond), S0C)) {
19351 S1 = S1->getOperand(0);
19352 Updated = true;
19353 }
19354 }
19355
19356 if (Updated)
19357 return DAG.getNode(
19358 ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
19359 DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2,
19360 N->getFlags());
19361 }
19362
19363 // If N is a constant we could fold this into a fallthrough or unconditional
19364 // branch. However that doesn't happen very often in normal code, because
19365 // Instcombine/SimplifyCFG should have handled the available opportunities.
19366 // If we did this folding here, it would be necessary to update the
19367 // MachineBasicBlock CFG, which is awkward.
19368
19369 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
19370 // on the target, also copy fast math flags.
19371 if (N1.getOpcode() == ISD::SETCC &&
19372 TLI.isOperationLegalOrCustom(ISD::BR_CC,
19373 N1.getOperand(0).getValueType())) {
19374 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, Chain,
19375 N1.getOperand(2), N1.getOperand(0), N1.getOperand(1), N2,
19376 N1->getFlags());
19377 }
19378
19379 if (N1.hasOneUse()) {
19380 // rebuildSetCC calls visitXor which may change the Chain when there is a
19381 // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
19382 HandleSDNode ChainHandle(Chain);
19383 if (SDValue NewN1 = rebuildSetCC(N1))
19384 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
19385 ChainHandle.getValue(), NewN1, N2, N->getFlags());
19386 }
19387
19388 return SDValue();
19389}
19390
19391SDValue DAGCombiner::rebuildSetCC(SDValue N) {
19392 if (N.getOpcode() == ISD::SRL ||
19393 (N.getOpcode() == ISD::TRUNCATE &&
19394 (N.getOperand(0).hasOneUse() &&
19395 N.getOperand(0).getOpcode() == ISD::SRL))) {
19396 // Look pass the truncate.
19397 if (N.getOpcode() == ISD::TRUNCATE)
19398 N = N.getOperand(0);
19399
19400 // Match this pattern so that we can generate simpler code:
19401 //
19402 // %a = ...
19403 // %b = and i32 %a, 2
19404 // %c = srl i32 %b, 1
19405 // brcond i32 %c ...
19406 //
19407 // into
19408 //
19409 // %a = ...
19410 // %b = and i32 %a, 2
19411 // %c = setcc eq %b, 0
19412 // brcond %c ...
19413 //
19414 // This applies only when the AND constant value has one bit set and the
19415 // SRL constant is equal to the log2 of the AND constant. The back-end is
19416 // smart enough to convert the result into a TEST/JMP sequence.
19417 SDValue Op0 = N.getOperand(0);
19418 SDValue Op1 = N.getOperand(1);
19419
19420 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
19421 SDValue AndOp1 = Op0.getOperand(1);
19422
19423 if (AndOp1.getOpcode() == ISD::Constant) {
19424 const APInt &AndConst = AndOp1->getAsAPIntVal();
19425
19426 if (AndConst.isPowerOf2() &&
19427 Op1->getAsAPIntVal() == AndConst.logBase2()) {
19428 SDLoc DL(N);
19429 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
19430 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
19431 ISD::SETNE);
19432 }
19433 }
19434 }
19435 }
19436
19437 // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
19438 // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
19439 if (N.getOpcode() == ISD::XOR) {
19440 // Because we may call this on a speculatively constructed
19441 // SimplifiedSetCC Node, we need to simplify this node first.
19442 // Ideally this should be folded into SimplifySetCC and not
19443 // here. For now, grab a handle to N so we don't lose it from
19444 // replacements interal to the visit.
19445 HandleSDNode XORHandle(N);
19446 while (N.getOpcode() == ISD::XOR) {
19447 SDValue Tmp = visitXOR(N.getNode());
19448 // No simplification done.
19449 if (!Tmp.getNode())
19450 break;
19451 // Returning N is form in-visit replacement that may invalidated
19452 // N. Grab value from Handle.
19453 if (Tmp.getNode() == N.getNode())
19454 N = XORHandle.getValue();
19455 else // Node simplified. Try simplifying again.
19456 N = Tmp;
19457 }
19458
19459 if (N.getOpcode() != ISD::XOR)
19460 return N;
19461
19462 SDValue Op0 = N->getOperand(0);
19463 SDValue Op1 = N->getOperand(1);
19464
19465 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
19466 bool Equal = false;
19467 // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
19468 if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
19469 Op0.getValueType() == MVT::i1) {
19470 N = Op0;
19471 Op0 = N->getOperand(0);
19472 Op1 = N->getOperand(1);
19473 Equal = true;
19474 }
19475
19476 EVT SetCCVT = N.getValueType();
19477 if (LegalTypes)
19478 SetCCVT = getSetCCResultType(SetCCVT);
19479 // Replace the uses of XOR with SETCC. Note, avoid this transformation if
19480 // it would introduce illegal operations post-legalization as this can
19481 // result in infinite looping between converting xor->setcc here, and
19482 // expanding setcc->xor in LegalizeSetCCCondCode if requested.
19484 if (!LegalOperations || TLI.isCondCodeLegal(CC, Op0.getSimpleValueType()))
19485 return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1, CC);
19486 }
19487 }
19488
19489 return SDValue();
19490}
19491
19492// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
19493//
19494SDValue DAGCombiner::visitBR_CC(SDNode *N) {
19495 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
19496 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
19497
19498 // If N is a constant we could fold this into a fallthrough or unconditional
19499 // branch. However that doesn't happen very often in normal code, because
19500 // Instcombine/SimplifyCFG should have handled the available opportunities.
19501 // If we did this folding here, it would be necessary to update the
19502 // MachineBasicBlock CFG, which is awkward.
19503
19504 // Use SimplifySetCC to simplify SETCC's.
19506 CondLHS, CondRHS, CC->get(), SDLoc(N),
19507 false);
19508 if (Simp.getNode()) AddToWorklist(Simp.getNode());
19509
19510 // fold to a simpler setcc
19511 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
19512 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
19513 N->getOperand(0), Simp.getOperand(2),
19514 Simp.getOperand(0), Simp.getOperand(1),
19515 N->getOperand(4));
19516
19517 return SDValue();
19518}
19519
19520static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
19521 bool &IsLoad, bool &IsMasked, SDValue &Ptr,
19522 const TargetLowering &TLI) {
19523 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19524 if (LD->isIndexed())
19525 return false;
19526 EVT VT = LD->getMemoryVT();
19527 if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
19528 return false;
19529 Ptr = LD->getBasePtr();
19530 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19531 if (ST->isIndexed())
19532 return false;
19533 EVT VT = ST->getMemoryVT();
19534 if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
19535 return false;
19536 Ptr = ST->getBasePtr();
19537 IsLoad = false;
19538 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
19539 if (LD->isIndexed())
19540 return false;
19541 EVT VT = LD->getMemoryVT();
19542 if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
19543 !TLI.isIndexedMaskedLoadLegal(Dec, VT))
19544 return false;
19545 Ptr = LD->getBasePtr();
19546 IsMasked = true;
19548 if (ST->isIndexed())
19549 return false;
19550 EVT VT = ST->getMemoryVT();
19551 if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
19552 !TLI.isIndexedMaskedStoreLegal(Dec, VT))
19553 return false;
19554 Ptr = ST->getBasePtr();
19555 IsLoad = false;
19556 IsMasked = true;
19557 } else {
19558 return false;
19559 }
19560 return true;
19561}
19562
19563/// Try turning a load/store into a pre-indexed load/store when the base
19564/// pointer is an add or subtract and it has other uses besides the load/store.
19565/// After the transformation, the new indexed load/store has effectively folded
19566/// the add/subtract in and all of its other uses are redirected to the
19567/// new load/store.
19568bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
19569 if (Level < AfterLegalizeDAG)
19570 return false;
19571
19572 bool IsLoad = true;
19573 bool IsMasked = false;
19574 SDValue Ptr;
19575 if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
19576 Ptr, TLI))
19577 return false;
19578
19579 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
19580 // out. There is no reason to make this a preinc/predec.
19581 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
19582 Ptr->hasOneUse())
19583 return false;
19584
19585 // Ask the target to do addressing mode selection.
19589 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
19590 return false;
19591
19592 // Backends without true r+i pre-indexed forms may need to pass a
19593 // constant base with a variable offset so that constant coercion
19594 // will work with the patterns in canonical form.
19595 bool Swapped = false;
19596 if (isa<ConstantSDNode>(BasePtr)) {
19597 std::swap(BasePtr, Offset);
19598 Swapped = true;
19599 }
19600
19601 // Don't create a indexed load / store with zero offset.
19603 return false;
19604
19605 // Try turning it into a pre-indexed load / store except when:
19606 // 1) The new base ptr is a frame index.
19607 // 2) If N is a store and the new base ptr is either the same as or is a
19608 // predecessor of the value being stored.
19609 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
19610 // that would create a cycle.
19611 // 4) All uses are load / store ops that use it as old base ptr.
19612
19613 // Check #1. Preinc'ing a frame index would require copying the stack pointer
19614 // (plus the implicit offset) to a register to preinc anyway.
19615 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
19616 return false;
19617
19618 // Check #2.
19619 if (!IsLoad) {
19620 SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
19621 : cast<StoreSDNode>(N)->getValue();
19622
19623 // Would require a copy.
19624 if (Val == BasePtr)
19625 return false;
19626
19627 // Would create a cycle.
19628 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
19629 return false;
19630 }
19631
19632 // Caches for hasPredecessorHelper.
19633 SmallPtrSet<const SDNode *, 32> Visited;
19635 Worklist.push_back(N);
19636
19637 // If the offset is a constant, there may be other adds of constants that
19638 // can be folded with this one. We should do this to avoid having to keep
19639 // a copy of the original base pointer.
19640 SmallVector<SDNode *, 16> OtherUses;
19643 for (SDUse &Use : BasePtr->uses()) {
19644 // Skip the use that is Ptr and uses of other results from BasePtr's
19645 // node (important for nodes that return multiple results).
19646 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
19647 continue;
19648
19649 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist,
19650 MaxSteps))
19651 continue;
19652
19653 if (Use.getUser()->getOpcode() != ISD::ADD &&
19654 Use.getUser()->getOpcode() != ISD::SUB) {
19655 OtherUses.clear();
19656 break;
19657 }
19658
19659 SDValue Op1 = Use.getUser()->getOperand((Use.getOperandNo() + 1) & 1);
19660 if (!isa<ConstantSDNode>(Op1)) {
19661 OtherUses.clear();
19662 break;
19663 }
19664
19665 // FIXME: In some cases, we can be smarter about this.
19666 if (Op1.getValueType() != Offset.getValueType()) {
19667 OtherUses.clear();
19668 break;
19669 }
19670
19671 OtherUses.push_back(Use.getUser());
19672 }
19673
19674 if (Swapped)
19675 std::swap(BasePtr, Offset);
19676
19677 // Now check for #3 and #4.
19678 bool RealUse = false;
19679
19680 for (SDNode *User : Ptr->users()) {
19681 if (User == N)
19682 continue;
19683 if (SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))
19684 return false;
19685
19686 // If Ptr may be folded in addressing mode of other use, then it's
19687 // not profitable to do this transformation.
19688 if (!canFoldInAddressingMode(Ptr.getNode(), User, DAG, TLI))
19689 RealUse = true;
19690 }
19691
19692 if (!RealUse)
19693 return false;
19694
19696 if (!IsMasked) {
19697 if (IsLoad)
19698 Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
19699 else
19700 Result =
19701 DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
19702 } else {
19703 if (IsLoad)
19704 Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
19705 Offset, AM);
19706 else
19707 Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
19708 Offset, AM);
19709 }
19710 ++PreIndexedNodes;
19711 ++NodesCombined;
19712 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
19713 Result.dump(&DAG); dbgs() << '\n');
19714 WorklistRemover DeadNodes(*this);
19715 if (IsLoad) {
19716 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
19717 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
19718 } else {
19719 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
19720 }
19721
19722 // Finally, since the node is now dead, remove it from the graph.
19723 deleteAndRecombine(N);
19724
19725 if (Swapped)
19726 std::swap(BasePtr, Offset);
19727
19728 // Replace other uses of BasePtr that can be updated to use Ptr
19729 for (SDNode *OtherUse : OtherUses) {
19730 unsigned OffsetIdx = 1;
19731 if (OtherUse->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
19732 OffsetIdx = 0;
19733 assert(OtherUse->getOperand(!OffsetIdx).getNode() == BasePtr.getNode() &&
19734 "Expected BasePtr operand");
19735
19736 // We need to replace ptr0 in the following expression:
19737 // x0 * offset0 + y0 * ptr0 = t0
19738 // knowing that
19739 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
19740 //
19741 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
19742 // indexed load/store and the expression that needs to be re-written.
19743 //
19744 // Therefore, we have:
19745 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
19746
19747 auto *CN = cast<ConstantSDNode>(OtherUse->getOperand(OffsetIdx));
19748 const APInt &Offset0 = CN->getAPIntValue();
19749 const APInt &Offset1 = Offset->getAsAPIntVal();
19750 int X0 = (OtherUse->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
19751 int Y0 = (OtherUse->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
19752 int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
19753 int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
19754
19755 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
19756
19757 APInt CNV = Offset0;
19758 if (X0 < 0) CNV = -CNV;
19759 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
19760 else CNV = CNV - Offset1;
19761
19762 SDLoc DL(OtherUse);
19763
19764 // We can now generate the new expression.
19765 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
19766 SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
19767
19768 SDValue NewUse =
19769 DAG.getNode(Opcode, DL, OtherUse->getValueType(0), NewOp1, NewOp2);
19770 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUse, 0), NewUse);
19771 deleteAndRecombine(OtherUse);
19772 }
19773
19774 // Replace the uses of Ptr with uses of the updated base value.
19775 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
19776 deleteAndRecombine(Ptr.getNode());
19777 AddToWorklist(Result.getNode());
19778
19779 return true;
19780}
19781
19783 SDValue &BasePtr, SDValue &Offset,
19785 SelectionDAG &DAG,
19786 const TargetLowering &TLI) {
19787 if (PtrUse == N ||
19788 (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
19789 return false;
19790
19791 if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
19792 return false;
19793
19794 // Don't create a indexed load / store with zero offset.
19796 return false;
19797
19798 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
19799 return false;
19800
19803 for (SDNode *User : BasePtr->users()) {
19804 if (User == Ptr.getNode())
19805 continue;
19806
19807 // No if there's a later user which could perform the index instead.
19808 if (isa<MemSDNode>(User)) {
19809 bool IsLoad = true;
19810 bool IsMasked = false;
19811 SDValue OtherPtr;
19813 IsMasked, OtherPtr, TLI)) {
19815 Worklist.push_back(User);
19816 if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps))
19817 return false;
19818 }
19819 }
19820
19821 // If all the uses are load / store addresses, then don't do the
19822 // transformation.
19823 if (User->getOpcode() == ISD::ADD || User->getOpcode() == ISD::SUB) {
19824 for (SDNode *UserUser : User->users())
19825 if (canFoldInAddressingMode(User, UserUser, DAG, TLI))
19826 return false;
19827 }
19828 }
19829 return true;
19830}
19831
19833 bool &IsMasked, SDValue &Ptr,
19834 SDValue &BasePtr, SDValue &Offset,
19836 SelectionDAG &DAG,
19837 const TargetLowering &TLI) {
19839 IsMasked, Ptr, TLI) ||
19840 Ptr->hasOneUse())
19841 return nullptr;
19842
19843 // Try turning it into a post-indexed load / store except when
19844 // 1) All uses are load / store ops that use it as base ptr (and
19845 // it may be folded as addressing mmode).
19846 // 2) Op must be independent of N, i.e. Op is neither a predecessor
19847 // nor a successor of N. Otherwise, if Op is folded that would
19848 // create a cycle.
19850 for (SDNode *Op : Ptr->users()) {
19851 // Check for #1.
19852 if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
19853 continue;
19854
19855 // Check for #2.
19858 // Ptr is predecessor to both N and Op.
19859 Visited.insert(Ptr.getNode());
19860 Worklist.push_back(N);
19861 Worklist.push_back(Op);
19862 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) &&
19863 !SDNode::hasPredecessorHelper(Op, Visited, Worklist, MaxSteps))
19864 return Op;
19865 }
19866 return nullptr;
19867}
19868
19869/// Try to combine a load/store with a add/sub of the base pointer node into a
19870/// post-indexed load/store. The transformation folded the add/subtract into the
19871/// new indexed load/store effectively and all of its uses are redirected to the
19872/// new load/store.
19873bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
19874 if (Level < AfterLegalizeDAG)
19875 return false;
19876
19877 bool IsLoad = true;
19878 bool IsMasked = false;
19879 SDValue Ptr;
19883 SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
19884 Offset, AM, DAG, TLI);
19885 if (!Op)
19886 return false;
19887
19889 if (!IsMasked)
19890 Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
19891 Offset, AM)
19892 : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
19893 BasePtr, Offset, AM);
19894 else
19895 Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
19896 BasePtr, Offset, AM)
19897 : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
19898 BasePtr, Offset, AM);
19899 ++PostIndexedNodes;
19900 ++NodesCombined;
19901 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";
19902 Result.dump(&DAG); dbgs() << '\n');
19903 WorklistRemover DeadNodes(*this);
19904 if (IsLoad) {
19905 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
19906 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
19907 } else {
19908 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
19909 }
19910
19911 // Finally, since the node is now dead, remove it from the graph.
19912 deleteAndRecombine(N);
19913
19914 // Replace the uses of Use with uses of the updated base value.
19916 Result.getValue(IsLoad ? 1 : 0));
19917 deleteAndRecombine(Op);
19918 return true;
19919}
19920
19921/// Return the base-pointer arithmetic from an indexed \p LD.
19922SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
19923 ISD::MemIndexedMode AM = LD->getAddressingMode();
19924 assert(AM != ISD::UNINDEXED);
19925 SDValue BP = LD->getOperand(1);
19926 SDValue Inc = LD->getOperand(2);
19927
19928 // Some backends use TargetConstants for load offsets, but don't expect
19929 // TargetConstants in general ADD nodes. We can convert these constants into
19930 // regular Constants (if the constant is not opaque).
19932 !cast<ConstantSDNode>(Inc)->isOpaque()) &&
19933 "Cannot split out indexing using opaque target constants");
19934 if (Inc.getOpcode() == ISD::TargetConstant) {
19935 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
19936 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
19937 ConstInc->getValueType(0));
19938 }
19939
19940 unsigned Opc =
19941 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
19942 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
19943}
19944
19946 return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
19947}
19948
19949bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
19950 EVT STType = Val.getValueType();
19951 EVT STMemType = ST->getMemoryVT();
19952 if (STType == STMemType)
19953 return true;
19954 if (isTypeLegal(STMemType))
19955 return false; // fail.
19956 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
19957 TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
19958 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
19959 return true;
19960 }
19961 if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
19962 STType.isInteger() && STMemType.isInteger()) {
19963 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
19964 return true;
19965 }
19966 if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
19967 Val = DAG.getBitcast(STMemType, Val);
19968 return true;
19969 }
19970 return false; // fail.
19971}
19972
19973bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
19974 EVT LDMemType = LD->getMemoryVT();
19975 EVT LDType = LD->getValueType(0);
19976 assert(Val.getValueType() == LDMemType &&
19977 "Attempting to extend value of non-matching type");
19978 if (LDType == LDMemType)
19979 return true;
19980 if (LDMemType.isInteger() && LDType.isInteger()) {
19981 switch (LD->getExtensionType()) {
19982 case ISD::NON_EXTLOAD:
19983 Val = DAG.getBitcast(LDType, Val);
19984 return true;
19985 case ISD::EXTLOAD:
19986 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
19987 return true;
19988 case ISD::SEXTLOAD:
19989 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
19990 return true;
19991 case ISD::ZEXTLOAD:
19992 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
19993 return true;
19994 }
19995 }
19996 return false;
19997}
19998
19999StoreSDNode *DAGCombiner::getUniqueStoreFeeding(LoadSDNode *LD,
20000 int64_t &Offset) {
20001 SDValue Chain = LD->getOperand(0);
20002
20003 // Look through CALLSEQ_START.
20004 if (Chain.getOpcode() == ISD::CALLSEQ_START)
20005 Chain = Chain->getOperand(0);
20006
20007 StoreSDNode *ST = nullptr;
20009 if (Chain.getOpcode() == ISD::TokenFactor) {
20010 // Look for unique store within the TokenFactor.
20011 for (SDValue Op : Chain->ops()) {
20012 StoreSDNode *Store = dyn_cast<StoreSDNode>(Op.getNode());
20013 if (!Store)
20014 continue;
20015 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
20016 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
20017 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
20018 continue;
20019 // Make sure the store is not aliased with any nodes in TokenFactor.
20020 GatherAllAliases(Store, Chain, Aliases);
20021 if (Aliases.empty() ||
20022 (Aliases.size() == 1 && Aliases.front().getNode() == Store))
20023 ST = Store;
20024 break;
20025 }
20026 } else {
20027 StoreSDNode *Store = dyn_cast<StoreSDNode>(Chain.getNode());
20028 if (Store) {
20029 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
20030 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
20031 if (BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
20032 ST = Store;
20033 }
20034 }
20035
20036 return ST;
20037}
20038
20039SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
20040 if (OptLevel == CodeGenOptLevel::None || !LD->isSimple())
20041 return SDValue();
20042 SDValue Chain = LD->getOperand(0);
20043 int64_t Offset;
20044
20045 StoreSDNode *ST = getUniqueStoreFeeding(LD, Offset);
20046 // TODO: Relax this restriction for unordered atomics (see D66309)
20047 if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace())
20048 return SDValue();
20049
20050 EVT LDType = LD->getValueType(0);
20051 EVT LDMemType = LD->getMemoryVT();
20052 EVT STMemType = ST->getMemoryVT();
20053 EVT STType = ST->getValue().getValueType();
20054
20055 // There are two cases to consider here:
20056 // 1. The store is fixed width and the load is scalable. In this case we
20057 // don't know at compile time if the store completely envelops the load
20058 // so we abandon the optimisation.
20059 // 2. The store is scalable and the load is fixed width. We could
20060 // potentially support a limited number of cases here, but there has been
20061 // no cost-benefit analysis to prove it's worth it.
20062 bool LdStScalable = LDMemType.isScalableVT();
20063 if (LdStScalable != STMemType.isScalableVT())
20064 return SDValue();
20065
20066 // If we are dealing with scalable vectors on a big endian platform the
20067 // calculation of offsets below becomes trickier, since we do not know at
20068 // compile time the absolute size of the vector. Until we've done more
20069 // analysis on big-endian platforms it seems better to bail out for now.
20070 if (LdStScalable && DAG.getDataLayout().isBigEndian())
20071 return SDValue();
20072
20073 // Normalize for Endianness. After this Offset=0 will denote that the least
20074 // significant bit in the loaded value maps to the least significant bit in
20075 // the stored value). With Offset=n (for n > 0) the loaded value starts at the
20076 // n:th least significant byte of the stored value.
20077 int64_t OrigOffset = Offset;
20078 if (DAG.getDataLayout().isBigEndian())
20079 Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedValue() -
20080 (int64_t)LDMemType.getStoreSizeInBits().getFixedValue()) /
20081 8 -
20082 Offset;
20083
20084 // Check that the stored value cover all bits that are loaded.
20085 bool STCoversLD;
20086
20087 TypeSize LdMemSize = LDMemType.getSizeInBits();
20088 TypeSize StMemSize = STMemType.getSizeInBits();
20089 if (LdStScalable)
20090 STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
20091 else
20092 STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedValue() <=
20093 StMemSize.getFixedValue());
20094
20095 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
20096 if (LD->isIndexed()) {
20097 // Cannot handle opaque target constants and we must respect the user's
20098 // request not to split indexes from loads.
20099 if (!canSplitIdx(LD))
20100 return SDValue();
20101 SDValue Idx = SplitIndexingFromLoad(LD);
20102 SDValue Ops[] = {Val, Idx, Chain};
20103 return CombineTo(LD, Ops, 3);
20104 }
20105 return CombineTo(LD, Val, Chain);
20106 };
20107
20108 if (!STCoversLD)
20109 return SDValue();
20110
20111 // Memory as copy space (potentially masked).
20112 if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
20113 // Simple case: Direct non-truncating forwarding
20114 if (LDType.getSizeInBits() == LdMemSize)
20115 return ReplaceLd(LD, ST->getValue(), Chain);
20116 // Can we model the truncate and extension with an and mask?
20117 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
20118 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
20119 // Mask to size of LDMemType
20120 auto Mask =
20122 StMemSize.getFixedValue()),
20123 SDLoc(ST), STType);
20124 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
20125 return ReplaceLd(LD, Val, Chain);
20126 }
20127 }
20128
20129 // Handle some cases for big-endian that would be Offset 0 and handled for
20130 // little-endian.
20131 SDValue Val = ST->getValue();
20132 if (DAG.getDataLayout().isBigEndian() && Offset > 0 && OrigOffset == 0) {
20133 if (STType.isInteger() && !STType.isVector() && LDType.isInteger() &&
20134 !LDType.isVector() && isTypeLegal(STType) &&
20135 TLI.isOperationLegal(ISD::SRL, STType)) {
20136 Val = DAG.getNode(ISD::SRL, SDLoc(LD), STType, Val,
20137 DAG.getConstant(Offset * 8, SDLoc(LD), STType));
20138 Offset = 0;
20139 }
20140 }
20141
20142 // TODO: Deal with nonzero offset.
20143 if (LD->getBasePtr().isUndef() || Offset != 0)
20144 return SDValue();
20145 // Model necessary truncations / extenstions.
20146 // Truncate Value To Stored Memory Size.
20147 do {
20148 if (!getTruncatedStoreValue(ST, Val))
20149 break;
20150 if (!isTypeLegal(LDMemType))
20151 break;
20152 if (STMemType != LDMemType) {
20153 // TODO: Support vectors? This requires extract_subvector/bitcast.
20154 if (!STMemType.isVector() && !LDMemType.isVector() &&
20155 STMemType.isInteger() && LDMemType.isInteger())
20156 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
20157 else
20158 break;
20159 }
20160 if (!extendLoadedValueToExtension(LD, Val))
20161 break;
20162 return ReplaceLd(LD, Val, Chain);
20163 } while (false);
20164
20165 // On failure, cleanup dead nodes we may have created.
20166 if (Val->use_empty())
20167 deleteAndRecombine(Val.getNode());
20168 return SDValue();
20169}
20170
20171SDValue DAGCombiner::visitLOAD(SDNode *N) {
20172 LoadSDNode *LD = cast<LoadSDNode>(N);
20173 SDValue Chain = LD->getChain();
20174 SDValue Ptr = LD->getBasePtr();
20175
20176 // If load is not volatile and there are no uses of the loaded value (and
20177 // the updated indexed value in case of indexed loads), change uses of the
20178 // chain value into uses of the chain input (i.e. delete the dead load).
20179 // TODO: Allow this for unordered atomics (see D66309)
20180 if (LD->isSimple()) {
20181 if (N->getValueType(1) == MVT::Other) {
20182 // Unindexed loads.
20183 if (!N->hasAnyUseOfValue(0)) {
20184 // It's not safe to use the two value CombineTo variant here. e.g.
20185 // v1, chain2 = load chain1, loc
20186 // v2, chain3 = load chain2, loc
20187 // v3 = add v2, c
20188 // Now we replace use of chain2 with chain1. This makes the second load
20189 // isomorphic to the one we are deleting, and thus makes this load live.
20190 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
20191 dbgs() << "\nWith chain: "; Chain.dump(&DAG);
20192 dbgs() << "\n");
20193 WorklistRemover DeadNodes(*this);
20194 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
20195 AddUsersToWorklist(Chain.getNode());
20196 if (N->use_empty())
20197 deleteAndRecombine(N);
20198
20199 return SDValue(N, 0); // Return N so it doesn't get rechecked!
20200 }
20201 } else {
20202 // Indexed loads.
20203 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
20204
20205 // If this load has an opaque TargetConstant offset, then we cannot split
20206 // the indexing into an add/sub directly (that TargetConstant may not be
20207 // valid for a different type of node, and we cannot convert an opaque
20208 // target constant into a regular constant).
20209 bool CanSplitIdx = canSplitIdx(LD);
20210
20211 if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
20212 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
20213 SDValue Index;
20214 if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
20215 Index = SplitIndexingFromLoad(LD);
20216 // Try to fold the base pointer arithmetic into subsequent loads and
20217 // stores.
20218 AddUsersToWorklist(N);
20219 } else
20220 Index = DAG.getUNDEF(N->getValueType(1));
20221 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
20222 dbgs() << "\nWith: "; Undef.dump(&DAG);
20223 dbgs() << " and 2 other values\n");
20224 WorklistRemover DeadNodes(*this);
20225 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
20226 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
20227 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
20228 deleteAndRecombine(N);
20229 return SDValue(N, 0); // Return N so it doesn't get rechecked!
20230 }
20231 }
20232 }
20233
20234 // If this load is directly stored, replace the load value with the stored
20235 // value.
20236 if (auto V = ForwardStoreValueToDirectLoad(LD))
20237 return V;
20238
20239 // Try to infer better alignment information than the load already has.
20240 if (OptLevel != CodeGenOptLevel::None && LD->isUnindexed() &&
20241 !LD->isAtomic()) {
20242 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
20243 if (*Alignment > LD->getAlign() &&
20244 isAligned(*Alignment, LD->getSrcValueOffset())) {
20245 SDValue NewLoad = DAG.getExtLoad(
20246 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
20247 LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
20248 LD->getMemOperand()->getFlags(), LD->getAAInfo());
20249 // NewLoad will always be N as we are only refining the alignment
20250 assert(NewLoad.getNode() == N);
20251 (void)NewLoad;
20252 }
20253 }
20254 }
20255
20256 if (LD->isUnindexed()) {
20257 // Walk up chain skipping non-aliasing memory nodes.
20258 SDValue BetterChain = FindBetterChain(LD, Chain);
20259
20260 // If there is a better chain.
20261 if (Chain != BetterChain) {
20262 SDValue ReplLoad;
20263
20264 // Replace the chain to void dependency.
20265 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
20266 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
20267 BetterChain, Ptr, LD->getMemOperand());
20268 } else {
20269 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
20270 LD->getValueType(0),
20271 BetterChain, Ptr, LD->getMemoryVT(),
20272 LD->getMemOperand());
20273 }
20274
20275 // Create token factor to keep old chain connected.
20276 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
20277 MVT::Other, Chain, ReplLoad.getValue(1));
20278
20279 // Replace uses with load result and token factor
20280 return CombineTo(N, ReplLoad.getValue(0), Token);
20281 }
20282 }
20283
20284 // Try transforming N to an indexed load.
20285 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
20286 return SDValue(N, 0);
20287
20288 // Try to slice up N to more direct loads if the slices are mapped to
20289 // different register banks or pairing can take place.
20290 if (SliceUpLoad(N))
20291 return SDValue(N, 0);
20292
20293 return SDValue();
20294}
20295
20296namespace {
20297
20298/// Helper structure used to slice a load in smaller loads.
20299/// Basically a slice is obtained from the following sequence:
20300/// Origin = load Ty1, Base
20301/// Shift = srl Ty1 Origin, CstTy Amount
20302/// Inst = trunc Shift to Ty2
20303///
20304/// Then, it will be rewritten into:
20305/// Slice = load SliceTy, Base + SliceOffset
20306/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
20307///
20308/// SliceTy is deduced from the number of bits that are actually used to
20309/// build Inst.
20310struct LoadedSlice {
20311 /// Helper structure used to compute the cost of a slice.
20312 struct Cost {
20313 /// Are we optimizing for code size.
20314 bool ForCodeSize = false;
20315
20316 /// Various cost.
20317 unsigned Loads = 0;
20318 unsigned Truncates = 0;
20319 unsigned CrossRegisterBanksCopies = 0;
20320 unsigned ZExts = 0;
20321 unsigned Shift = 0;
20322
20323 explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
20324
20325 /// Get the cost of one isolated slice.
20326 Cost(const LoadedSlice &LS, bool ForCodeSize)
20327 : ForCodeSize(ForCodeSize), Loads(1) {
20328 EVT TruncType = LS.Inst->getValueType(0);
20329 EVT LoadedType = LS.getLoadedType();
20330 if (TruncType != LoadedType &&
20331 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
20332 ZExts = 1;
20333 }
20334
20335 /// Account for slicing gain in the current cost.
20336 /// Slicing provide a few gains like removing a shift or a
20337 /// truncate. This method allows to grow the cost of the original
20338 /// load with the gain from this slice.
20339 void addSliceGain(const LoadedSlice &LS) {
20340 // Each slice saves a truncate.
20341 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
20342 if (!TLI.isTruncateFree(LS.Inst->getOperand(0), LS.Inst->getValueType(0)))
20343 ++Truncates;
20344 // If there is a shift amount, this slice gets rid of it.
20345 if (LS.Shift)
20346 ++Shift;
20347 // If this slice can merge a cross register bank copy, account for it.
20348 if (LS.canMergeExpensiveCrossRegisterBankCopy())
20349 ++CrossRegisterBanksCopies;
20350 }
20351
20352 Cost &operator+=(const Cost &RHS) {
20353 Loads += RHS.Loads;
20354 Truncates += RHS.Truncates;
20355 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
20356 ZExts += RHS.ZExts;
20357 Shift += RHS.Shift;
20358 return *this;
20359 }
20360
20361 bool operator==(const Cost &RHS) const {
20362 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
20363 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
20364 ZExts == RHS.ZExts && Shift == RHS.Shift;
20365 }
20366
20367 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
20368
20369 bool operator<(const Cost &RHS) const {
20370 // Assume cross register banks copies are as expensive as loads.
20371 // FIXME: Do we want some more target hooks?
20372 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
20373 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
20374 // Unless we are optimizing for code size, consider the
20375 // expensive operation first.
20376 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
20377 return ExpensiveOpsLHS < ExpensiveOpsRHS;
20378 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
20379 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
20380 }
20381
20382 bool operator>(const Cost &RHS) const { return RHS < *this; }
20383
20384 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
20385
20386 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
20387 };
20388
20389 // The last instruction that represent the slice. This should be a
20390 // truncate instruction.
20391 SDNode *Inst;
20392
20393 // The original load instruction.
20394 LoadSDNode *Origin;
20395
20396 // The right shift amount in bits from the original load.
20397 unsigned Shift;
20398
20399 // The DAG from which Origin came from.
20400 // This is used to get some contextual information about legal types, etc.
20401 SelectionDAG *DAG;
20402
20403 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
20404 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
20405 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
20406
20407 /// Get the bits used in a chunk of bits \p BitWidth large.
20408 /// \return Result is \p BitWidth and has used bits set to 1 and
20409 /// not used bits set to 0.
20410 APInt getUsedBits() const {
20411 // Reproduce the trunc(lshr) sequence:
20412 // - Start from the truncated value.
20413 // - Zero extend to the desired bit width.
20414 // - Shift left.
20415 assert(Origin && "No original load to compare against.");
20416 unsigned BitWidth = Origin->getValueSizeInBits(0);
20417 assert(Inst && "This slice is not bound to an instruction");
20418 assert(Inst->getValueSizeInBits(0) <= BitWidth &&
20419 "Extracted slice is bigger than the whole type!");
20420 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
20421 UsedBits.setAllBits();
20422 UsedBits = UsedBits.zext(BitWidth);
20423 UsedBits <<= Shift;
20424 return UsedBits;
20425 }
20426
20427 /// Get the size of the slice to be loaded in bytes.
20428 unsigned getLoadedSize() const {
20429 unsigned SliceSize = getUsedBits().popcount();
20430 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
20431 return SliceSize / 8;
20432 }
20433
20434 /// Get the type that will be loaded for this slice.
20435 /// Note: This may not be the final type for the slice.
20436 EVT getLoadedType() const {
20437 assert(DAG && "Missing context");
20438 LLVMContext &Ctxt = *DAG->getContext();
20439 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
20440 }
20441
20442 /// Get the alignment of the load used for this slice.
20443 Align getAlign() const {
20444 Align Alignment = Origin->getAlign();
20445 uint64_t Offset = getOffsetFromBase();
20446 if (Offset != 0)
20447 Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
20448 return Alignment;
20449 }
20450
20451 /// Check if this slice can be rewritten with legal operations.
20452 bool isLegal() const {
20453 // An invalid slice is not legal.
20454 if (!Origin || !Inst || !DAG)
20455 return false;
20456
20457 // Offsets are for indexed load only, we do not handle that.
20458 if (!Origin->getOffset().isUndef())
20459 return false;
20460
20461 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
20462
20463 // Check that the type is legal.
20464 EVT SliceType = getLoadedType();
20465 if (!TLI.isTypeLegal(SliceType))
20466 return false;
20467
20468 // Check that the load is legal for this type.
20469 if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
20470 return false;
20471
20472 // Check that the offset can be computed.
20473 // 1. Check its type.
20474 EVT PtrType = Origin->getBasePtr().getValueType();
20475 if (PtrType == MVT::Untyped || PtrType.isExtended())
20476 return false;
20477
20478 // 2. Check that it fits in the immediate.
20479 if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
20480 return false;
20481
20482 // 3. Check that the computation is legal.
20483 if (!TLI.isOperationLegal(ISD::ADD, PtrType))
20484 return false;
20485
20486 // Check that the zext is legal if it needs one.
20487 EVT TruncateType = Inst->getValueType(0);
20488 if (TruncateType != SliceType &&
20489 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
20490 return false;
20491
20492 return true;
20493 }
20494
20495 /// Get the offset in bytes of this slice in the original chunk of
20496 /// bits.
20497 /// \pre DAG != nullptr.
20498 uint64_t getOffsetFromBase() const {
20499 assert(DAG && "Missing context.");
20500 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
20501 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
20502 uint64_t Offset = Shift / 8;
20503 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
20504 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
20505 "The size of the original loaded type is not a multiple of a"
20506 " byte.");
20507 // If Offset is bigger than TySizeInBytes, it means we are loading all
20508 // zeros. This should have been optimized before in the process.
20509 assert(TySizeInBytes > Offset &&
20510 "Invalid shift amount for given loaded size");
20511 if (IsBigEndian)
20512 Offset = TySizeInBytes - Offset - getLoadedSize();
20513 return Offset;
20514 }
20515
20516 /// Generate the sequence of instructions to load the slice
20517 /// represented by this object and redirect the uses of this slice to
20518 /// this new sequence of instructions.
20519 /// \pre this->Inst && this->Origin are valid Instructions and this
20520 /// object passed the legal check: LoadedSlice::isLegal returned true.
20521 /// \return The last instruction of the sequence used to load the slice.
20522 SDValue loadSlice() const {
20523 assert(Inst && Origin && "Unable to replace a non-existing slice.");
20524 const SDValue &OldBaseAddr = Origin->getBasePtr();
20525 SDValue BaseAddr = OldBaseAddr;
20526 // Get the offset in that chunk of bytes w.r.t. the endianness.
20527 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
20528 assert(Offset >= 0 && "Offset too big to fit in int64_t!");
20529 if (Offset) {
20530 // BaseAddr = BaseAddr + Offset.
20531 EVT ArithType = BaseAddr.getValueType();
20532 SDLoc DL(Origin);
20533 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
20534 DAG->getConstant(Offset, DL, ArithType));
20535 }
20536
20537 // Create the type of the loaded slice according to its size.
20538 EVT SliceType = getLoadedType();
20539
20540 // Create the load for the slice.
20541 SDValue LastInst =
20542 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
20544 Origin->getMemOperand()->getFlags());
20545 // If the final type is not the same as the loaded type, this means that
20546 // we have to pad with zero. Create a zero extend for that.
20547 EVT FinalType = Inst->getValueType(0);
20548 if (SliceType != FinalType)
20549 LastInst =
20550 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
20551 return LastInst;
20552 }
20553
20554 /// Check if this slice can be merged with an expensive cross register
20555 /// bank copy. E.g.,
20556 /// i = load i32
20557 /// f = bitcast i32 i to float
20558 bool canMergeExpensiveCrossRegisterBankCopy() const {
20559 if (!Inst || !Inst->hasOneUse())
20560 return false;
20561 SDNode *User = *Inst->user_begin();
20562 if (User->getOpcode() != ISD::BITCAST)
20563 return false;
20564 assert(DAG && "Missing context");
20565 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
20566 EVT ResVT = User->getValueType(0);
20567 const TargetRegisterClass *ResRC =
20568 TLI.getRegClassFor(ResVT.getSimpleVT(), User->isDivergent());
20569 const TargetRegisterClass *ArgRC =
20570 TLI.getRegClassFor(User->getOperand(0).getValueType().getSimpleVT(),
20571 User->getOperand(0)->isDivergent());
20572 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
20573 return false;
20574
20575 // At this point, we know that we perform a cross-register-bank copy.
20576 // Check if it is expensive.
20577 const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
20578 // Assume bitcasts are cheap, unless both register classes do not
20579 // explicitly share a common sub class.
20580 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
20581 return false;
20582
20583 // Check if it will be merged with the load.
20584 // 1. Check the alignment / fast memory access constraint.
20585 unsigned IsFast = 0;
20586 if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
20587 Origin->getAddressSpace(), getAlign(),
20588 Origin->getMemOperand()->getFlags(), &IsFast) ||
20589 !IsFast)
20590 return false;
20591
20592 // 2. Check that the load is a legal operation for that type.
20593 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
20594 return false;
20595
20596 // 3. Check that we do not have a zext in the way.
20597 if (Inst->getValueType(0) != getLoadedType())
20598 return false;
20599
20600 return true;
20601 }
20602};
20603
20604} // end anonymous namespace
20605
20606/// Check that all bits set in \p UsedBits form a dense region, i.e.,
20607/// \p UsedBits looks like 0..0 1..1 0..0.
20608static bool areUsedBitsDense(const APInt &UsedBits) {
20609 // If all the bits are one, this is dense!
20610 if (UsedBits.isAllOnes())
20611 return true;
20612
20613 // Get rid of the unused bits on the right.
20614 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countr_zero());
20615 // Get rid of the unused bits on the left.
20616 if (NarrowedUsedBits.countl_zero())
20617 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
20618 // Check that the chunk of bits is completely used.
20619 return NarrowedUsedBits.isAllOnes();
20620}
20621
20622/// Check whether or not \p First and \p Second are next to each other
20623/// in memory. This means that there is no hole between the bits loaded
20624/// by \p First and the bits loaded by \p Second.
20625static bool areSlicesNextToEachOther(const LoadedSlice &First,
20626 const LoadedSlice &Second) {
20627 assert(First.Origin == Second.Origin && First.Origin &&
20628 "Unable to match different memory origins.");
20629 APInt UsedBits = First.getUsedBits();
20630 assert((UsedBits & Second.getUsedBits()) == 0 &&
20631 "Slices are not supposed to overlap.");
20632 UsedBits |= Second.getUsedBits();
20633 return areUsedBitsDense(UsedBits);
20634}
20635
20636/// Adjust the \p GlobalLSCost according to the target
20637/// paring capabilities and the layout of the slices.
20638/// \pre \p GlobalLSCost should account for at least as many loads as
20639/// there is in the slices in \p LoadedSlices.
20641 LoadedSlice::Cost &GlobalLSCost) {
20642 unsigned NumberOfSlices = LoadedSlices.size();
20643 // If there is less than 2 elements, no pairing is possible.
20644 if (NumberOfSlices < 2)
20645 return;
20646
20647 // Sort the slices so that elements that are likely to be next to each
20648 // other in memory are next to each other in the list.
20649 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
20650 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
20651 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
20652 });
20653 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
20654 // First (resp. Second) is the first (resp. Second) potentially candidate
20655 // to be placed in a paired load.
20656 const LoadedSlice *First = nullptr;
20657 const LoadedSlice *Second = nullptr;
20658 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
20659 // Set the beginning of the pair.
20660 First = Second) {
20661 Second = &LoadedSlices[CurrSlice];
20662
20663 // If First is NULL, it means we start a new pair.
20664 // Get to the next slice.
20665 if (!First)
20666 continue;
20667
20668 EVT LoadedType = First->getLoadedType();
20669
20670 // If the types of the slices are different, we cannot pair them.
20671 if (LoadedType != Second->getLoadedType())
20672 continue;
20673
20674 // Check if the target supplies paired loads for this type.
20675 Align RequiredAlignment;
20676 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
20677 // move to the next pair, this type is hopeless.
20678 Second = nullptr;
20679 continue;
20680 }
20681 // Check if we meet the alignment requirement.
20682 if (First->getAlign() < RequiredAlignment)
20683 continue;
20684
20685 // Check that both loads are next to each other in memory.
20686 if (!areSlicesNextToEachOther(*First, *Second))
20687 continue;
20688
20689 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
20690 --GlobalLSCost.Loads;
20691 // Move to the next pair.
20692 Second = nullptr;
20693 }
20694}
20695
20696/// Check the profitability of all involved LoadedSlice.
20697/// Currently, it is considered profitable if there is exactly two
20698/// involved slices (1) which are (2) next to each other in memory, and
20699/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
20700///
20701/// Note: The order of the elements in \p LoadedSlices may be modified, but not
20702/// the elements themselves.
20703///
20704/// FIXME: When the cost model will be mature enough, we can relax
20705/// constraints (1) and (2).
20707 const APInt &UsedBits, bool ForCodeSize) {
20708 unsigned NumberOfSlices = LoadedSlices.size();
20710 return NumberOfSlices > 1;
20711
20712 // Check (1).
20713 if (NumberOfSlices != 2)
20714 return false;
20715
20716 // Check (2).
20717 if (!areUsedBitsDense(UsedBits))
20718 return false;
20719
20720 // Check (3).
20721 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
20722 // The original code has one big load.
20723 OrigCost.Loads = 1;
20724 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
20725 const LoadedSlice &LS = LoadedSlices[CurrSlice];
20726 // Accumulate the cost of all the slices.
20727 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
20728 GlobalSlicingCost += SliceCost;
20729
20730 // Account as cost in the original configuration the gain obtained
20731 // with the current slices.
20732 OrigCost.addSliceGain(LS);
20733 }
20734
20735 // If the target supports paired load, adjust the cost accordingly.
20736 adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
20737 return OrigCost > GlobalSlicingCost;
20738}
20739
20740/// If the given load, \p LI, is used only by trunc or trunc(lshr)
20741/// operations, split it in the various pieces being extracted.
20742///
20743/// This sort of thing is introduced by SROA.
20744/// This slicing takes care not to insert overlapping loads.
20745/// \pre LI is a simple load (i.e., not an atomic or volatile load).
20746bool DAGCombiner::SliceUpLoad(SDNode *N) {
20747 if (Level < AfterLegalizeDAG)
20748 return false;
20749
20750 LoadSDNode *LD = cast<LoadSDNode>(N);
20751 if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
20752 !LD->getValueType(0).isInteger())
20753 return false;
20754
20755 // The algorithm to split up a load of a scalable vector into individual
20756 // elements currently requires knowing the length of the loaded type,
20757 // so will need adjusting to work on scalable vectors.
20758 if (LD->getValueType(0).isScalableVector())
20759 return false;
20760
20761 // Keep track of already used bits to detect overlapping values.
20762 // In that case, we will just abort the transformation.
20763 APInt UsedBits(LD->getValueSizeInBits(0), 0);
20764
20765 SmallVector<LoadedSlice, 4> LoadedSlices;
20766
20767 // Check if this load is used as several smaller chunks of bits.
20768 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
20769 // of computation for each trunc.
20770 for (SDUse &U : LD->uses()) {
20771 // Skip the uses of the chain.
20772 if (U.getResNo() != 0)
20773 continue;
20774
20775 SDNode *User = U.getUser();
20776 unsigned Shift = 0;
20777
20778 // Check if this is a trunc(lshr).
20779 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
20780 isa<ConstantSDNode>(User->getOperand(1))) {
20781 Shift = User->getConstantOperandVal(1);
20782 User = *User->user_begin();
20783 }
20784
20785 // At this point, User is a Truncate, iff we encountered, trunc or
20786 // trunc(lshr).
20787 if (User->getOpcode() != ISD::TRUNCATE)
20788 return false;
20789
20790 // The width of the type must be a power of 2 and greater than 8-bits.
20791 // Otherwise the load cannot be represented in LLVM IR.
20792 // Moreover, if we shifted with a non-8-bits multiple, the slice
20793 // will be across several bytes. We do not support that.
20794 unsigned Width = User->getValueSizeInBits(0);
20795 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
20796 return false;
20797
20798 // Build the slice for this chain of computations.
20799 LoadedSlice LS(User, LD, Shift, &DAG);
20800 APInt CurrentUsedBits = LS.getUsedBits();
20801
20802 // Check if this slice overlaps with another.
20803 if ((CurrentUsedBits & UsedBits) != 0)
20804 return false;
20805 // Update the bits used globally.
20806 UsedBits |= CurrentUsedBits;
20807
20808 // Check if the new slice would be legal.
20809 if (!LS.isLegal())
20810 return false;
20811
20812 // Record the slice.
20813 LoadedSlices.push_back(LS);
20814 }
20815
20816 // Abort slicing if it does not seem to be profitable.
20817 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
20818 return false;
20819
20820 ++SlicedLoads;
20821
20822 // Rewrite each chain to use an independent load.
20823 // By construction, each chain can be represented by a unique load.
20824
20825 // Prepare the argument for the new token factor for all the slices.
20826 SmallVector<SDValue, 8> ArgChains;
20827 for (const LoadedSlice &LS : LoadedSlices) {
20828 SDValue SliceInst = LS.loadSlice();
20829 CombineTo(LS.Inst, SliceInst, true);
20830 if (SliceInst.getOpcode() != ISD::LOAD)
20831 SliceInst = SliceInst.getOperand(0);
20832 assert(SliceInst->getOpcode() == ISD::LOAD &&
20833 "It takes more than a zext to get to the loaded slice!!");
20834 ArgChains.push_back(SliceInst.getValue(1));
20835 }
20836
20837 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
20838 ArgChains);
20839 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
20840 AddToWorklist(Chain.getNode());
20841 return true;
20842}
20843
20844/// Check to see if V is (and load (ptr), imm), where the load is having
20845/// specific bytes cleared out. If so, return the byte size being masked out
20846/// and the shift amount.
20847static std::pair<unsigned, unsigned>
20849 std::pair<unsigned, unsigned> Result(0, 0);
20850
20851 // Check for the structure we're looking for.
20852 if (V->getOpcode() != ISD::AND ||
20853 !isa<ConstantSDNode>(V->getOperand(1)) ||
20854 !ISD::isNormalLoad(V->getOperand(0).getNode()))
20855 return Result;
20856
20857 // Check the chain and pointer.
20858 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
20859 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
20860
20861 // This only handles simple types.
20862 if (V.getValueType() != MVT::i16 &&
20863 V.getValueType() != MVT::i32 &&
20864 V.getValueType() != MVT::i64)
20865 return Result;
20866
20867 // Check the constant mask. Invert it so that the bits being masked out are
20868 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
20869 // follow the sign bit for uniformity.
20870 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
20871 unsigned NotMaskLZ = llvm::countl_zero(NotMask);
20872 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
20873 unsigned NotMaskTZ = llvm::countr_zero(NotMask);
20874 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
20875 if (NotMaskLZ == 64) return Result; // All zero mask.
20876
20877 // See if we have a continuous run of bits. If so, we have 0*1+0*
20878 if (llvm::countr_one(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
20879 return Result;
20880
20881 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
20882 if (V.getValueType() != MVT::i64 && NotMaskLZ)
20883 NotMaskLZ -= 64-V.getValueSizeInBits();
20884
20885 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
20886 switch (MaskedBytes) {
20887 case 1:
20888 case 2:
20889 case 4: break;
20890 default: return Result; // All one mask, or 5-byte mask.
20891 }
20892
20893 // Verify that the first bit starts at a multiple of mask so that the access
20894 // is aligned the same as the access width.
20895 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
20896
20897 // For narrowing to be valid, it must be the case that the load the
20898 // immediately preceding memory operation before the store.
20899 if (LD == Chain.getNode())
20900 ; // ok.
20901 else if (Chain->getOpcode() == ISD::TokenFactor &&
20902 SDValue(LD, 1).hasOneUse()) {
20903 // LD has only 1 chain use so they are no indirect dependencies.
20904 if (!LD->isOperandOf(Chain.getNode()))
20905 return Result;
20906 } else
20907 return Result; // Fail.
20908
20909 Result.first = MaskedBytes;
20910 Result.second = NotMaskTZ/8;
20911 return Result;
20912}
20913
20914/// Check to see if IVal is something that provides a value as specified by
20915/// MaskInfo. If so, replace the specified store with a narrower store of
20916/// truncated IVal.
20917static SDValue
20918ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
20919 SDValue IVal, StoreSDNode *St,
20920 DAGCombiner *DC) {
20921 unsigned NumBytes = MaskInfo.first;
20922 unsigned ByteShift = MaskInfo.second;
20923 SelectionDAG &DAG = DC->getDAG();
20924
20925 // Check to see if IVal is all zeros in the part being masked in by the 'or'
20926 // that uses this. If not, this is not a replacement.
20927 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
20928 ByteShift*8, (ByteShift+NumBytes)*8);
20929 if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
20930
20931 // Check that it is legal on the target to do this. It is legal if the new
20932 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
20933 // legalization. If the source type is legal, but the store type isn't, see
20934 // if we can use a truncating store.
20935 MVT VT = MVT::getIntegerVT(NumBytes * 8);
20936 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20937 bool UseTruncStore;
20938 if (DC->isTypeLegal(VT))
20939 UseTruncStore = false;
20940 else if (TLI.isTypeLegal(IVal.getValueType()) &&
20941 TLI.isTruncStoreLegal(IVal.getValueType(), VT))
20942 UseTruncStore = true;
20943 else
20944 return SDValue();
20945
20946 // Can't do this for indexed stores.
20947 if (St->isIndexed())
20948 return SDValue();
20949
20950 // Check that the target doesn't think this is a bad idea.
20951 if (St->getMemOperand() &&
20952 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
20953 *St->getMemOperand()))
20954 return SDValue();
20955
20956 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
20957 // shifted by ByteShift and truncated down to NumBytes.
20958 if (ByteShift) {
20959 SDLoc DL(IVal);
20960 IVal = DAG.getNode(
20961 ISD::SRL, DL, IVal.getValueType(), IVal,
20962 DAG.getShiftAmountConstant(ByteShift * 8, IVal.getValueType(), DL));
20963 }
20964
20965 // Figure out the offset for the store and the alignment of the access.
20966 unsigned StOffset;
20967 if (DAG.getDataLayout().isLittleEndian())
20968 StOffset = ByteShift;
20969 else
20970 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
20971
20972 SDValue Ptr = St->getBasePtr();
20973 if (StOffset) {
20974 SDLoc DL(IVal);
20976 }
20977
20978 ++OpsNarrowed;
20979 if (UseTruncStore)
20980 return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr,
20981 St->getPointerInfo().getWithOffset(StOffset), VT,
20982 St->getBaseAlign());
20983
20984 // Truncate down to the new size.
20985 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
20986
20987 return DAG.getStore(St->getChain(), SDLoc(St), IVal, Ptr,
20988 St->getPointerInfo().getWithOffset(StOffset),
20989 St->getBaseAlign());
20990}
20991
20992/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
20993/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
20994/// narrowing the load and store if it would end up being a win for performance
20995/// or code size.
20996SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
20997 StoreSDNode *ST = cast<StoreSDNode>(N);
20998 if (!ST->isSimple())
20999 return SDValue();
21000
21001 SDValue Chain = ST->getChain();
21002 SDValue Value = ST->getValue();
21003 SDValue Ptr = ST->getBasePtr();
21004 EVT VT = Value.getValueType();
21005
21006 if (ST->isTruncatingStore() || VT.isVector())
21007 return SDValue();
21008
21009 unsigned Opc = Value.getOpcode();
21010
21011 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
21012 !Value.hasOneUse())
21013 return SDValue();
21014
21015 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
21016 // is a byte mask indicating a consecutive number of bytes, check to see if
21017 // Y is known to provide just those bytes. If so, we try to replace the
21018 // load + replace + store sequence with a single (narrower) store, which makes
21019 // the load dead.
21021 std::pair<unsigned, unsigned> MaskedLoad;
21022 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
21023 if (MaskedLoad.first)
21024 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
21025 Value.getOperand(1), ST,this))
21026 return NewST;
21027
21028 // Or is commutative, so try swapping X and Y.
21029 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
21030 if (MaskedLoad.first)
21031 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
21032 Value.getOperand(0), ST,this))
21033 return NewST;
21034 }
21035
21037 return SDValue();
21038
21039 if (Value.getOperand(1).getOpcode() != ISD::Constant)
21040 return SDValue();
21041
21042 SDValue N0 = Value.getOperand(0);
21043 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
21044 Chain == SDValue(N0.getNode(), 1)) {
21045 LoadSDNode *LD = cast<LoadSDNode>(N0);
21046 if (LD->getBasePtr() != Ptr ||
21047 LD->getPointerInfo().getAddrSpace() !=
21048 ST->getPointerInfo().getAddrSpace())
21049 return SDValue();
21050
21051 // Find the type NewVT to narrow the load / op / store to.
21052 SDValue N1 = Value.getOperand(1);
21053 unsigned BitWidth = N1.getValueSizeInBits();
21054 APInt Imm = N1->getAsAPIntVal();
21055 if (Opc == ISD::AND)
21056 Imm.flipAllBits();
21057 if (Imm == 0 || Imm.isAllOnes())
21058 return SDValue();
21059 // Find least/most significant bit that need to be part of the narrowed
21060 // operation. We assume target will need to address/access full bytes, so
21061 // we make sure to align LSB and MSB at byte boundaries.
21062 unsigned BitsPerByteMask = 7u;
21063 unsigned LSB = Imm.countr_zero() & ~BitsPerByteMask;
21064 unsigned MSB = (Imm.getActiveBits() - 1) | BitsPerByteMask;
21065 unsigned NewBW = NextPowerOf2(MSB - LSB);
21066 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
21067 // The narrowing should be profitable, the load/store operation should be
21068 // legal (or custom) and the store size should be equal to the NewVT width.
21069 while (NewBW < BitWidth &&
21070 (NewVT.getStoreSizeInBits() != NewBW ||
21071 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
21073 !TLI.isNarrowingProfitable(N, VT, NewVT)))) {
21074 NewBW = NextPowerOf2(NewBW);
21075 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
21076 }
21077 if (NewBW >= BitWidth)
21078 return SDValue();
21079
21080 // If we come this far NewVT/NewBW reflect a power-of-2 sized type that is
21081 // large enough to cover all bits that should be modified. This type might
21082 // however be larger than really needed (such as i32 while we actually only
21083 // need to modify one byte). Now we need to find our how to align the memory
21084 // accesses to satisfy preferred alignments as well as avoiding to access
21085 // memory outside the store size of the orignal access.
21086
21087 unsigned VTStoreSize = VT.getStoreSizeInBits().getFixedValue();
21088
21089 // Let ShAmt denote amount of bits to skip, counted from the least
21090 // significant bits of Imm. And let PtrOff how much the pointer needs to be
21091 // offsetted (in bytes) for the new access.
21092 unsigned ShAmt = 0;
21093 uint64_t PtrOff = 0;
21094 for (; ShAmt + NewBW <= VTStoreSize; ShAmt += 8) {
21095 // Make sure the range [ShAmt, ShAmt+NewBW) cover both LSB and MSB.
21096 if (ShAmt > LSB)
21097 return SDValue();
21098 if (ShAmt + NewBW < MSB)
21099 continue;
21100
21101 // Calculate PtrOff.
21102 unsigned PtrAdjustmentInBits = DAG.getDataLayout().isBigEndian()
21103 ? VTStoreSize - NewBW - ShAmt
21104 : ShAmt;
21105 PtrOff = PtrAdjustmentInBits / 8;
21106
21107 // Now check if narrow access is allowed and fast, considering alignments.
21108 unsigned IsFast = 0;
21109 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
21110 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
21111 LD->getAddressSpace(), NewAlign,
21112 LD->getMemOperand()->getFlags(), &IsFast) &&
21113 IsFast)
21114 break;
21115 }
21116 // If loop above did not find any accepted ShAmt we need to exit here.
21117 if (ShAmt + NewBW > VTStoreSize)
21118 return SDValue();
21119
21120 APInt NewImm = Imm.lshr(ShAmt).trunc(NewBW);
21121 if (Opc == ISD::AND)
21122 NewImm.flipAllBits();
21123 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
21124 SDValue NewPtr =
21125 DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(PtrOff), SDLoc(LD));
21126 SDValue NewLD =
21127 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
21128 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
21129 LD->getMemOperand()->getFlags(), LD->getAAInfo());
21130 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
21131 DAG.getConstant(NewImm, SDLoc(Value), NewVT));
21132 SDValue NewST =
21133 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
21134 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
21135
21136 AddToWorklist(NewPtr.getNode());
21137 AddToWorklist(NewLD.getNode());
21138 AddToWorklist(NewVal.getNode());
21139 WorklistRemover DeadNodes(*this);
21140 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
21141 ++OpsNarrowed;
21142 return NewST;
21143 }
21144
21145 return SDValue();
21146}
21147
21148/// For a given floating point load / store pair, if the load value isn't used
21149/// by any other operations, then consider transforming the pair to integer
21150/// load / store operations if the target deems the transformation profitable.
21151SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
21152 StoreSDNode *ST = cast<StoreSDNode>(N);
21153 SDValue Value = ST->getValue();
21154 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
21155 Value.hasOneUse()) {
21156 LoadSDNode *LD = cast<LoadSDNode>(Value);
21157 EVT VT = LD->getMemoryVT();
21158 if (!VT.isSimple() || !VT.isFloatingPoint() || VT != ST->getMemoryVT() ||
21159 LD->isNonTemporal() || ST->isNonTemporal() ||
21160 LD->getPointerInfo().getAddrSpace() != 0 ||
21161 ST->getPointerInfo().getAddrSpace() != 0)
21162 return SDValue();
21163
21164 TypeSize VTSize = VT.getSizeInBits();
21165
21166 // We don't know the size of scalable types at compile time so we cannot
21167 // create an integer of the equivalent size.
21168 if (VTSize.isScalable())
21169 return SDValue();
21170
21171 unsigned FastLD = 0, FastST = 0;
21172 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedValue());
21173 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
21174 !TLI.isOperationLegal(ISD::STORE, IntVT) ||
21175 !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
21176 !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) ||
21177 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
21178 *LD->getMemOperand(), &FastLD) ||
21179 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
21180 *ST->getMemOperand(), &FastST) ||
21181 !FastLD || !FastST)
21182 return SDValue();
21183
21184 SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(),
21185 LD->getBasePtr(), LD->getMemOperand());
21186
21187 SDValue NewST = DAG.getStore(ST->getChain(), SDLoc(N), NewLD,
21188 ST->getBasePtr(), ST->getMemOperand());
21189
21190 AddToWorklist(NewLD.getNode());
21191 AddToWorklist(NewST.getNode());
21192 WorklistRemover DeadNodes(*this);
21193 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
21194 ++LdStFP2Int;
21195 return NewST;
21196 }
21197
21198 return SDValue();
21199}
21200
21201// This is a helper function for visitMUL to check the profitability
21202// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
21203// MulNode is the original multiply, AddNode is (add x, c1),
21204// and ConstNode is c2.
21205//
21206// If the (add x, c1) has multiple uses, we could increase
21207// the number of adds if we make this transformation.
21208// It would only be worth doing this if we can remove a
21209// multiply in the process. Check for that here.
21210// To illustrate:
21211// (A + c1) * c3
21212// (A + c2) * c3
21213// We're checking for cases where we have common "c3 * A" expressions.
21214bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
21215 SDValue ConstNode) {
21216 // If the add only has one use, and the target thinks the folding is
21217 // profitable or does not lead to worse code, this would be OK to do.
21218 if (AddNode->hasOneUse() &&
21219 TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
21220 return true;
21221
21222 // Walk all the users of the constant with which we're multiplying.
21223 for (SDNode *User : ConstNode->users()) {
21224 if (User == MulNode) // This use is the one we're on right now. Skip it.
21225 continue;
21226
21227 if (User->getOpcode() == ISD::MUL) { // We have another multiply use.
21228 SDNode *OtherOp;
21229 SDNode *MulVar = AddNode.getOperand(0).getNode();
21230
21231 // OtherOp is what we're multiplying against the constant.
21232 if (User->getOperand(0) == ConstNode)
21233 OtherOp = User->getOperand(1).getNode();
21234 else
21235 OtherOp = User->getOperand(0).getNode();
21236
21237 // Check to see if multiply is with the same operand of our "add".
21238 //
21239 // ConstNode = CONST
21240 // User = ConstNode * A <-- visiting User. OtherOp is A.
21241 // ...
21242 // AddNode = (A + c1) <-- MulVar is A.
21243 // = AddNode * ConstNode <-- current visiting instruction.
21244 //
21245 // If we make this transformation, we will have a common
21246 // multiply (ConstNode * A) that we can save.
21247 if (OtherOp == MulVar)
21248 return true;
21249
21250 // Now check to see if a future expansion will give us a common
21251 // multiply.
21252 //
21253 // ConstNode = CONST
21254 // AddNode = (A + c1)
21255 // ... = AddNode * ConstNode <-- current visiting instruction.
21256 // ...
21257 // OtherOp = (A + c2)
21258 // User = OtherOp * ConstNode <-- visiting User.
21259 //
21260 // If we make this transformation, we will have a common
21261 // multiply (CONST * A) after we also do the same transformation
21262 // to the "t2" instruction.
21263 if (OtherOp->getOpcode() == ISD::ADD &&
21265 OtherOp->getOperand(0).getNode() == MulVar)
21266 return true;
21267 }
21268 }
21269
21270 // Didn't find a case where this would be profitable.
21271 return false;
21272}
21273
21274SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
21275 unsigned NumStores) {
21277 SmallPtrSet<const SDNode *, 8> Visited;
21278 SDLoc StoreDL(StoreNodes[0].MemNode);
21279
21280 for (unsigned i = 0; i < NumStores; ++i) {
21281 Visited.insert(StoreNodes[i].MemNode);
21282 }
21283
21284 // don't include nodes that are children or repeated nodes.
21285 for (unsigned i = 0; i < NumStores; ++i) {
21286 if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
21287 Chains.push_back(StoreNodes[i].MemNode->getChain());
21288 }
21289
21290 assert(!Chains.empty() && "Chain should have generated a chain");
21291 return DAG.getTokenFactor(StoreDL, Chains);
21292}
21293
21294bool DAGCombiner::hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes) {
21295 const Value *UnderlyingObj = nullptr;
21296 for (const auto &MemOp : StoreNodes) {
21297 const MachineMemOperand *MMO = MemOp.MemNode->getMemOperand();
21298 // Pseudo value like stack frame has its own frame index and size, should
21299 // not use the first store's frame index for other frames.
21300 if (MMO->getPseudoValue())
21301 return false;
21302
21303 if (!MMO->getValue())
21304 return false;
21305
21306 const Value *Obj = getUnderlyingObject(MMO->getValue());
21307
21308 if (UnderlyingObj && UnderlyingObj != Obj)
21309 return false;
21310
21311 if (!UnderlyingObj)
21312 UnderlyingObj = Obj;
21313 }
21314
21315 return true;
21316}
21317
21318bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
21319 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
21320 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
21321 // Make sure we have something to merge.
21322 if (NumStores < 2)
21323 return false;
21324
21325 assert((!UseTrunc || !UseVector) &&
21326 "This optimization cannot emit a vector truncating store");
21327
21328 // The latest Node in the DAG.
21329 SDLoc DL(StoreNodes[0].MemNode);
21330
21331 TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
21332 unsigned SizeInBits = NumStores * ElementSizeBits;
21333 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
21334
21335 std::optional<MachineMemOperand::Flags> Flags;
21336 AAMDNodes AAInfo;
21337 for (unsigned I = 0; I != NumStores; ++I) {
21338 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
21339 if (!Flags) {
21340 Flags = St->getMemOperand()->getFlags();
21341 AAInfo = St->getAAInfo();
21342 continue;
21343 }
21344 // Skip merging if there's an inconsistent flag.
21345 if (Flags != St->getMemOperand()->getFlags())
21346 return false;
21347 // Concatenate AA metadata.
21348 AAInfo = AAInfo.concat(St->getAAInfo());
21349 }
21350
21351 EVT StoreTy;
21352 if (UseVector) {
21353 unsigned Elts = NumStores * NumMemElts;
21354 // Get the type for the merged vector store.
21355 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
21356 } else
21357 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
21358
21359 SDValue StoredVal;
21360 if (UseVector) {
21361 if (IsConstantSrc) {
21362 SmallVector<SDValue, 8> BuildVector;
21363 for (unsigned I = 0; I != NumStores; ++I) {
21364 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
21365 SDValue Val = St->getValue();
21366 // If constant is of the wrong type, convert it now. This comes up
21367 // when one of our stores was truncating.
21368 if (MemVT != Val.getValueType()) {
21369 Val = peekThroughBitcasts(Val);
21370 // Deal with constants of wrong size.
21371 if (ElementSizeBits != Val.getValueSizeInBits()) {
21372 auto *C = dyn_cast<ConstantSDNode>(Val);
21373 if (!C)
21374 // Not clear how to truncate FP values.
21375 // TODO: Handle truncation of build_vector constants
21376 return false;
21377
21378 EVT IntMemVT =
21380 Val = DAG.getConstant(C->getAPIntValue()
21381 .zextOrTrunc(Val.getValueSizeInBits())
21382 .zextOrTrunc(ElementSizeBits),
21383 SDLoc(C), IntMemVT);
21384 }
21385 // Make sure correctly size type is the correct type.
21386 Val = DAG.getBitcast(MemVT, Val);
21387 }
21388 BuildVector.push_back(Val);
21389 }
21390 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
21392 DL, StoreTy, BuildVector);
21393 } else {
21395 for (unsigned i = 0; i < NumStores; ++i) {
21396 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
21398 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
21399 // type MemVT. If the underlying value is not the correct
21400 // type, but it is an extraction of an appropriate vector we
21401 // can recast Val to be of the correct type. This may require
21402 // converting between EXTRACT_VECTOR_ELT and
21403 // EXTRACT_SUBVECTOR.
21404 if ((MemVT != Val.getValueType()) &&
21407 EVT MemVTScalarTy = MemVT.getScalarType();
21408 // We may need to add a bitcast here to get types to line up.
21409 if (MemVTScalarTy != Val.getValueType().getScalarType()) {
21410 Val = DAG.getBitcast(MemVT, Val);
21411 } else if (MemVT.isVector() &&
21413 Val = DAG.getNode(ISD::BUILD_VECTOR, DL, MemVT, Val);
21414 } else {
21415 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
21417 SDValue Vec = Val.getOperand(0);
21418 SDValue Idx = Val.getOperand(1);
21419 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
21420 }
21421 }
21422 Ops.push_back(Val);
21423 }
21424
21425 // Build the extracted vector elements back into a vector.
21426 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
21428 DL, StoreTy, Ops);
21429 }
21430 } else {
21431 // We should always use a vector store when merging extracted vector
21432 // elements, so this path implies a store of constants.
21433 assert(IsConstantSrc && "Merged vector elements should use vector store");
21434
21435 APInt StoreInt(SizeInBits, 0);
21436
21437 // Construct a single integer constant which is made of the smaller
21438 // constant inputs.
21439 bool IsLE = DAG.getDataLayout().isLittleEndian();
21440 for (unsigned i = 0; i < NumStores; ++i) {
21441 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
21442 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
21443
21444 SDValue Val = St->getValue();
21445 Val = peekThroughBitcasts(Val);
21446 StoreInt <<= ElementSizeBits;
21447 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
21448 StoreInt |= C->getAPIntValue()
21449 .zextOrTrunc(ElementSizeBits)
21450 .zextOrTrunc(SizeInBits);
21451 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
21452 StoreInt |= C->getValueAPF()
21453 .bitcastToAPInt()
21454 .zextOrTrunc(ElementSizeBits)
21455 .zextOrTrunc(SizeInBits);
21456 // If fp truncation is necessary give up for now.
21457 if (MemVT.getSizeInBits() != ElementSizeBits)
21458 return false;
21459 } else if (ISD::isBuildVectorOfConstantSDNodes(Val.getNode()) ||
21461 // Not yet handled
21462 return false;
21463 } else {
21464 llvm_unreachable("Invalid constant element type");
21465 }
21466 }
21467
21468 // Create the new Load and Store operations.
21469 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
21470 }
21471
21472 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
21473 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
21474 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
21475
21476 // make sure we use trunc store if it's necessary to be legal.
21477 // When generate the new widen store, if the first store's pointer info can
21478 // not be reused, discard the pointer info except the address space because
21479 // now the widen store can not be represented by the original pointer info
21480 // which is for the narrow memory object.
21481 SDValue NewStore;
21482 if (!UseTrunc) {
21483 NewStore = DAG.getStore(
21484 NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
21485 CanReusePtrInfo
21486 ? FirstInChain->getPointerInfo()
21487 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
21488 FirstInChain->getAlign(), *Flags, AAInfo);
21489 } else { // Must be realized as a trunc store
21490 EVT LegalizedStoredValTy =
21491 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
21492 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
21493 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
21494 SDValue ExtendedStoreVal =
21495 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
21496 LegalizedStoredValTy);
21497 NewStore = DAG.getTruncStore(
21498 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
21499 CanReusePtrInfo
21500 ? FirstInChain->getPointerInfo()
21501 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
21502 StoredVal.getValueType() /*TVT*/, FirstInChain->getAlign(), *Flags,
21503 AAInfo);
21504 }
21505
21506 // Replace all merged stores with the new store.
21507 for (unsigned i = 0; i < NumStores; ++i)
21508 CombineTo(StoreNodes[i].MemNode, NewStore);
21509
21510 AddToWorklist(NewChain.getNode());
21511 return true;
21512}
21513
21514SDNode *
21515DAGCombiner::getStoreMergeCandidates(StoreSDNode *St,
21516 SmallVectorImpl<MemOpLink> &StoreNodes) {
21517 // This holds the base pointer, index, and the offset in bytes from the base
21518 // pointer. We must have a base and an offset. Do not handle stores to undef
21519 // base pointers.
21520 BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
21521 if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
21522 return nullptr;
21523
21525 StoreSource StoreSrc = getStoreSource(Val);
21526 assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
21527
21528 // Match on loadbaseptr if relevant.
21529 EVT MemVT = St->getMemoryVT();
21530 BaseIndexOffset LBasePtr;
21531 EVT LoadVT;
21532 if (StoreSrc == StoreSource::Load) {
21533 auto *Ld = cast<LoadSDNode>(Val);
21534 LBasePtr = BaseIndexOffset::match(Ld, DAG);
21535 LoadVT = Ld->getMemoryVT();
21536 // Load and store should be the same type.
21537 if (MemVT != LoadVT)
21538 return nullptr;
21539 // Loads must only have one use.
21540 if (!Ld->hasNUsesOfValue(1, 0))
21541 return nullptr;
21542 // The memory operands must not be volatile/indexed/atomic.
21543 // TODO: May be able to relax for unordered atomics (see D66309)
21544 if (!Ld->isSimple() || Ld->isIndexed())
21545 return nullptr;
21546 }
21547 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
21548 int64_t &Offset) -> bool {
21549 // The memory operands must not be volatile/indexed/atomic.
21550 // TODO: May be able to relax for unordered atomics (see D66309)
21551 if (!Other->isSimple() || Other->isIndexed())
21552 return false;
21553 // Don't mix temporal stores with non-temporal stores.
21554 if (St->isNonTemporal() != Other->isNonTemporal())
21555 return false;
21557 return false;
21558 SDValue OtherBC = peekThroughBitcasts(Other->getValue());
21559 // Allow merging constants of different types as integers.
21560 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
21561 : Other->getMemoryVT() != MemVT;
21562 switch (StoreSrc) {
21563 case StoreSource::Load: {
21564 if (NoTypeMatch)
21565 return false;
21566 // The Load's Base Ptr must also match.
21567 auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
21568 if (!OtherLd)
21569 return false;
21570 BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
21571 if (LoadVT != OtherLd->getMemoryVT())
21572 return false;
21573 // Loads must only have one use.
21574 if (!OtherLd->hasNUsesOfValue(1, 0))
21575 return false;
21576 // The memory operands must not be volatile/indexed/atomic.
21577 // TODO: May be able to relax for unordered atomics (see D66309)
21578 if (!OtherLd->isSimple() || OtherLd->isIndexed())
21579 return false;
21580 // Don't mix temporal loads with non-temporal loads.
21581 if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
21582 return false;
21584 *OtherLd))
21585 return false;
21586 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
21587 return false;
21588 break;
21589 }
21590 case StoreSource::Constant:
21591 if (NoTypeMatch)
21592 return false;
21593 if (getStoreSource(OtherBC) != StoreSource::Constant)
21594 return false;
21595 break;
21596 case StoreSource::Extract:
21597 // Do not merge truncated stores here.
21598 if (Other->isTruncatingStore())
21599 return false;
21600 if (!MemVT.bitsEq(OtherBC.getValueType()))
21601 return false;
21602 if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
21603 OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
21604 return false;
21605 break;
21606 default:
21607 llvm_unreachable("Unhandled store source for merging");
21608 }
21610 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
21611 };
21612
21613 // We are looking for a root node which is an ancestor to all mergable
21614 // stores. We search up through a load, to our root and then down
21615 // through all children. For instance we will find Store{1,2,3} if
21616 // St is Store1, Store2. or Store3 where the root is not a load
21617 // which always true for nonvolatile ops. TODO: Expand
21618 // the search to find all valid candidates through multiple layers of loads.
21619 //
21620 // Root
21621 // |-------|-------|
21622 // Load Load Store3
21623 // | |
21624 // Store1 Store2
21625 //
21626 // FIXME: We should be able to climb and
21627 // descend TokenFactors to find candidates as well.
21628
21629 SDNode *RootNode = St->getChain().getNode();
21630 // Bail out if we already analyzed this root node and found nothing.
21631 if (ChainsWithoutMergeableStores.contains(RootNode))
21632 return nullptr;
21633
21634 // Check if the pair of StoreNode and the RootNode already bail out many
21635 // times which is over the limit in dependence check.
21636 auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
21637 SDNode *RootNode) -> bool {
21638 auto RootCount = StoreRootCountMap.find(StoreNode);
21639 return RootCount != StoreRootCountMap.end() &&
21640 RootCount->second.first == RootNode &&
21641 RootCount->second.second > StoreMergeDependenceLimit;
21642 };
21643
21644 auto TryToAddCandidate = [&](SDUse &Use) {
21645 // This must be a chain use.
21646 if (Use.getOperandNo() != 0)
21647 return;
21648 if (auto *OtherStore = dyn_cast<StoreSDNode>(Use.getUser())) {
21649 BaseIndexOffset Ptr;
21650 int64_t PtrDiff;
21651 if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
21652 !OverLimitInDependenceCheck(OtherStore, RootNode))
21653 StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
21654 }
21655 };
21656
21657 unsigned NumNodesExplored = 0;
21658 const unsigned MaxSearchNodes = 1024;
21659 if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
21660 RootNode = Ldn->getChain().getNode();
21661 // Bail out if we already analyzed this root node and found nothing.
21662 if (ChainsWithoutMergeableStores.contains(RootNode))
21663 return nullptr;
21664 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
21665 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
21666 SDNode *User = I->getUser();
21667 if (I->getOperandNo() == 0 && isa<LoadSDNode>(User)) { // walk down chain
21668 for (SDUse &U2 : User->uses())
21669 TryToAddCandidate(U2);
21670 }
21671 // Check stores that depend on the root (e.g. Store 3 in the chart above).
21672 if (I->getOperandNo() == 0 && isa<StoreSDNode>(User)) {
21673 TryToAddCandidate(*I);
21674 }
21675 }
21676 } else {
21677 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
21678 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
21679 TryToAddCandidate(*I);
21680 }
21681
21682 return RootNode;
21683}
21684
21685// We need to check that merging these stores does not cause a loop in the
21686// DAG. Any store candidate may depend on another candidate indirectly through
21687// its operands. Check in parallel by searching up from operands of candidates.
21688bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
21689 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
21690 SDNode *RootNode) {
21691 // FIXME: We should be able to truncate a full search of
21692 // predecessors by doing a BFS and keeping tabs the originating
21693 // stores from which worklist nodes come from in a similar way to
21694 // TokenFactor simplfication.
21695
21696 SmallPtrSet<const SDNode *, 32> Visited;
21698
21699 // RootNode is a predecessor to all candidates so we need not search
21700 // past it. Add RootNode (peeking through TokenFactors). Do not count
21701 // these towards size check.
21702
21703 Worklist.push_back(RootNode);
21704 while (!Worklist.empty()) {
21705 auto N = Worklist.pop_back_val();
21706 if (!Visited.insert(N).second)
21707 continue; // Already present in Visited.
21708 if (N->getOpcode() == ISD::TokenFactor) {
21709 for (SDValue Op : N->ops())
21710 Worklist.push_back(Op.getNode());
21711 }
21712 }
21713
21714 // Don't count pruning nodes towards max.
21715 unsigned int Max = 1024 + Visited.size();
21716 // Search Ops of store candidates.
21717 for (unsigned i = 0; i < NumStores; ++i) {
21718 SDNode *N = StoreNodes[i].MemNode;
21719 // Of the 4 Store Operands:
21720 // * Chain (Op 0) -> We have already considered these
21721 // in candidate selection, but only by following the
21722 // chain dependencies. We could still have a chain
21723 // dependency to a load, that has a non-chain dep to
21724 // another load, that depends on a store, etc. So it is
21725 // possible to have dependencies that consist of a mix
21726 // of chain and non-chain deps, and we need to include
21727 // chain operands in the analysis here..
21728 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
21729 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
21730 // but aren't necessarily fromt the same base node, so
21731 // cycles possible (e.g. via indexed store).
21732 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
21733 // non-indexed stores). Not constant on all targets (e.g. ARM)
21734 // and so can participate in a cycle.
21735 for (const SDValue &Op : N->op_values())
21736 Worklist.push_back(Op.getNode());
21737 }
21738 // Search through DAG. We can stop early if we find a store node.
21739 for (unsigned i = 0; i < NumStores; ++i)
21740 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
21741 Max)) {
21742 // If the searching bail out, record the StoreNode and RootNode in the
21743 // StoreRootCountMap. If we have seen the pair many times over a limit,
21744 // we won't add the StoreNode into StoreNodes set again.
21745 if (Visited.size() >= Max) {
21746 auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
21747 if (RootCount.first == RootNode)
21748 RootCount.second++;
21749 else
21750 RootCount = {RootNode, 1};
21751 }
21752 return false;
21753 }
21754 return true;
21755}
21756
21757bool DAGCombiner::hasCallInLdStChain(StoreSDNode *St, LoadSDNode *Ld) {
21758 SmallPtrSet<const SDNode *, 32> Visited;
21760 Worklist.emplace_back(St->getChain().getNode(), false);
21761
21762 while (!Worklist.empty()) {
21763 auto [Node, FoundCall] = Worklist.pop_back_val();
21764 if (!Visited.insert(Node).second || Node->getNumOperands() == 0)
21765 continue;
21766
21767 switch (Node->getOpcode()) {
21768 case ISD::CALLSEQ_END:
21769 Worklist.emplace_back(Node->getOperand(0).getNode(), true);
21770 break;
21771 case ISD::TokenFactor:
21772 for (SDValue Op : Node->ops())
21773 Worklist.emplace_back(Op.getNode(), FoundCall);
21774 break;
21775 case ISD::LOAD:
21776 if (Node == Ld)
21777 return FoundCall;
21778 [[fallthrough]];
21779 default:
21780 assert(Node->getOperand(0).getValueType() == MVT::Other &&
21781 "Invalid chain type");
21782 Worklist.emplace_back(Node->getOperand(0).getNode(), FoundCall);
21783 break;
21784 }
21785 }
21786 return false;
21787}
21788
21789unsigned
21790DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
21791 int64_t ElementSizeBytes) const {
21792 while (true) {
21793 // Find a store past the width of the first store.
21794 size_t StartIdx = 0;
21795 while ((StartIdx + 1 < StoreNodes.size()) &&
21796 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
21797 StoreNodes[StartIdx + 1].OffsetFromBase)
21798 ++StartIdx;
21799
21800 // Bail if we don't have enough candidates to merge.
21801 if (StartIdx + 1 >= StoreNodes.size())
21802 return 0;
21803
21804 // Trim stores that overlapped with the first store.
21805 if (StartIdx)
21806 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
21807
21808 // Scan the memory operations on the chain and find the first
21809 // non-consecutive store memory address.
21810 unsigned NumConsecutiveStores = 1;
21811 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
21812 // Check that the addresses are consecutive starting from the second
21813 // element in the list of stores.
21814 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
21815 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
21816 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
21817 break;
21818 NumConsecutiveStores = i + 1;
21819 }
21820 if (NumConsecutiveStores > 1)
21821 return NumConsecutiveStores;
21822
21823 // There are no consecutive stores at the start of the list.
21824 // Remove the first store and try again.
21825 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
21826 }
21827}
21828
21829bool DAGCombiner::tryStoreMergeOfConstants(
21830 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
21831 EVT MemVT, SDNode *RootNode, bool AllowVectors) {
21832 LLVMContext &Context = *DAG.getContext();
21833 const DataLayout &DL = DAG.getDataLayout();
21834 int64_t ElementSizeBytes = MemVT.getStoreSize();
21835 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
21836 bool MadeChange = false;
21837
21838 // Store the constants into memory as one consecutive store.
21839 while (NumConsecutiveStores >= 2) {
21840 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
21841 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
21842 Align FirstStoreAlign = FirstInChain->getAlign();
21843 unsigned LastLegalType = 1;
21844 unsigned LastLegalVectorType = 1;
21845 bool LastIntegerTrunc = false;
21846 bool NonZero = false;
21847 unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
21848 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
21849 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
21850 SDValue StoredVal = ST->getValue();
21851 bool IsElementZero = false;
21852 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
21853 IsElementZero = C->isZero();
21854 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
21855 IsElementZero = C->getConstantFPValue()->isNullValue();
21856 else if (ISD::isBuildVectorAllZeros(StoredVal.getNode()))
21857 IsElementZero = true;
21858 if (IsElementZero) {
21859 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
21860 FirstZeroAfterNonZero = i;
21861 }
21862 NonZero |= !IsElementZero;
21863
21864 // Find a legal type for the constant store.
21865 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
21866 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
21867 unsigned IsFast = 0;
21868
21869 // Break early when size is too large to be legal.
21870 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
21871 break;
21872
21873 if (TLI.isTypeLegal(StoreTy) &&
21874 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
21875 DAG.getMachineFunction()) &&
21876 TLI.allowsMemoryAccess(Context, DL, StoreTy,
21877 *FirstInChain->getMemOperand(), &IsFast) &&
21878 IsFast) {
21879 LastIntegerTrunc = false;
21880 LastLegalType = i + 1;
21881 // Or check whether a truncstore is legal.
21882 } else if (TLI.getTypeAction(Context, StoreTy) ==
21884 EVT LegalizedStoredValTy =
21885 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
21886 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
21887 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
21888 DAG.getMachineFunction()) &&
21889 TLI.allowsMemoryAccess(Context, DL, StoreTy,
21890 *FirstInChain->getMemOperand(), &IsFast) &&
21891 IsFast) {
21892 LastIntegerTrunc = true;
21893 LastLegalType = i + 1;
21894 }
21895 }
21896
21897 // We only use vectors if the target allows it and the function is not
21898 // marked with the noimplicitfloat attribute.
21899 if (TLI.storeOfVectorConstantIsCheap(!NonZero, MemVT, i + 1, FirstStoreAS) &&
21900 AllowVectors) {
21901 // Find a legal type for the vector store.
21902 unsigned Elts = (i + 1) * NumMemElts;
21903 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
21904 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
21905 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
21906 TLI.allowsMemoryAccess(Context, DL, Ty,
21907 *FirstInChain->getMemOperand(), &IsFast) &&
21908 IsFast)
21909 LastLegalVectorType = i + 1;
21910 }
21911 }
21912
21913 bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
21914 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
21915 bool UseTrunc = LastIntegerTrunc && !UseVector;
21916
21917 // Check if we found a legal integer type that creates a meaningful
21918 // merge.
21919 if (NumElem < 2) {
21920 // We know that candidate stores are in order and of correct
21921 // shape. While there is no mergeable sequence from the
21922 // beginning one may start later in the sequence. The only
21923 // reason a merge of size N could have failed where another of
21924 // the same size would not have, is if the alignment has
21925 // improved or we've dropped a non-zero value. Drop as many
21926 // candidates as we can here.
21927 unsigned NumSkip = 1;
21928 while ((NumSkip < NumConsecutiveStores) &&
21929 (NumSkip < FirstZeroAfterNonZero) &&
21930 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
21931 NumSkip++;
21932
21933 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
21934 NumConsecutiveStores -= NumSkip;
21935 continue;
21936 }
21937
21938 // Check that we can merge these candidates without causing a cycle.
21939 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
21940 RootNode)) {
21941 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
21942 NumConsecutiveStores -= NumElem;
21943 continue;
21944 }
21945
21946 MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
21947 /*IsConstantSrc*/ true,
21948 UseVector, UseTrunc);
21949
21950 // Remove merged stores for next iteration.
21951 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
21952 NumConsecutiveStores -= NumElem;
21953 }
21954 return MadeChange;
21955}
21956
21957bool DAGCombiner::tryStoreMergeOfExtracts(
21958 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
21959 EVT MemVT, SDNode *RootNode) {
21960 LLVMContext &Context = *DAG.getContext();
21961 const DataLayout &DL = DAG.getDataLayout();
21962 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
21963 bool MadeChange = false;
21964
21965 // Loop on Consecutive Stores on success.
21966 while (NumConsecutiveStores >= 2) {
21967 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
21968 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
21969 Align FirstStoreAlign = FirstInChain->getAlign();
21970 unsigned NumStoresToMerge = 1;
21971 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
21972 // Find a legal type for the vector store.
21973 unsigned Elts = (i + 1) * NumMemElts;
21974 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
21975 unsigned IsFast = 0;
21976
21977 // Break early when size is too large to be legal.
21978 if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
21979 break;
21980
21981 if (TLI.isTypeLegal(Ty) &&
21982 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
21983 TLI.allowsMemoryAccess(Context, DL, Ty,
21984 *FirstInChain->getMemOperand(), &IsFast) &&
21985 IsFast)
21986 NumStoresToMerge = i + 1;
21987 }
21988
21989 // Check if we found a legal integer type creating a meaningful
21990 // merge.
21991 if (NumStoresToMerge < 2) {
21992 // We know that candidate stores are in order and of correct
21993 // shape. While there is no mergeable sequence from the
21994 // beginning one may start later in the sequence. The only
21995 // reason a merge of size N could have failed where another of
21996 // the same size would not have, is if the alignment has
21997 // improved. Drop as many candidates as we can here.
21998 unsigned NumSkip = 1;
21999 while ((NumSkip < NumConsecutiveStores) &&
22000 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
22001 NumSkip++;
22002
22003 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
22004 NumConsecutiveStores -= NumSkip;
22005 continue;
22006 }
22007
22008 // Check that we can merge these candidates without causing a cycle.
22009 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
22010 RootNode)) {
22011 StoreNodes.erase(StoreNodes.begin(),
22012 StoreNodes.begin() + NumStoresToMerge);
22013 NumConsecutiveStores -= NumStoresToMerge;
22014 continue;
22015 }
22016
22017 MadeChange |= mergeStoresOfConstantsOrVecElts(
22018 StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
22019 /*UseVector*/ true, /*UseTrunc*/ false);
22020
22021 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
22022 NumConsecutiveStores -= NumStoresToMerge;
22023 }
22024 return MadeChange;
22025}
22026
22027bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
22028 unsigned NumConsecutiveStores, EVT MemVT,
22029 SDNode *RootNode, bool AllowVectors,
22030 bool IsNonTemporalStore,
22031 bool IsNonTemporalLoad) {
22032 LLVMContext &Context = *DAG.getContext();
22033 const DataLayout &DL = DAG.getDataLayout();
22034 int64_t ElementSizeBytes = MemVT.getStoreSize();
22035 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
22036 bool MadeChange = false;
22037
22038 // Look for load nodes which are used by the stored values.
22039 SmallVector<MemOpLink, 8> LoadNodes;
22040
22041 // Find acceptable loads. Loads need to have the same chain (token factor),
22042 // must not be zext, volatile, indexed, and they must be consecutive.
22043 BaseIndexOffset LdBasePtr;
22044
22045 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
22046 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
22048 LoadSDNode *Ld = cast<LoadSDNode>(Val);
22049
22050 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
22051 // If this is not the first ptr that we check.
22052 int64_t LdOffset = 0;
22053 if (LdBasePtr.getBase().getNode()) {
22054 // The base ptr must be the same.
22055 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
22056 break;
22057 } else {
22058 // Check that all other base pointers are the same as this one.
22059 LdBasePtr = LdPtr;
22060 }
22061
22062 // We found a potential memory operand to merge.
22063 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
22064 }
22065
22066 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
22067 Align RequiredAlignment;
22068 bool NeedRotate = false;
22069 if (LoadNodes.size() == 2) {
22070 // If we have load/store pair instructions and we only have two values,
22071 // don't bother merging.
22072 if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
22073 StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
22074 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
22075 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
22076 break;
22077 }
22078 // If the loads are reversed, see if we can rotate the halves into place.
22079 int64_t Offset0 = LoadNodes[0].OffsetFromBase;
22080 int64_t Offset1 = LoadNodes[1].OffsetFromBase;
22081 EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
22082 if (Offset0 - Offset1 == ElementSizeBytes &&
22083 (hasOperation(ISD::ROTL, PairVT) ||
22084 hasOperation(ISD::ROTR, PairVT))) {
22085 std::swap(LoadNodes[0], LoadNodes[1]);
22086 NeedRotate = true;
22087 }
22088 }
22089 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
22090 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
22091 Align FirstStoreAlign = FirstInChain->getAlign();
22092 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
22093
22094 // Scan the memory operations on the chain and find the first
22095 // non-consecutive load memory address. These variables hold the index in
22096 // the store node array.
22097
22098 unsigned LastConsecutiveLoad = 1;
22099
22100 // This variable refers to the size and not index in the array.
22101 unsigned LastLegalVectorType = 1;
22102 unsigned LastLegalIntegerType = 1;
22103 bool isDereferenceable = true;
22104 bool DoIntegerTruncate = false;
22105 int64_t StartAddress = LoadNodes[0].OffsetFromBase;
22106 SDValue LoadChain = FirstLoad->getChain();
22107 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
22108 // All loads must share the same chain.
22109 if (LoadNodes[i].MemNode->getChain() != LoadChain)
22110 break;
22111
22112 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
22113 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
22114 break;
22115 LastConsecutiveLoad = i;
22116
22117 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
22118 isDereferenceable = false;
22119
22120 // Find a legal type for the vector store.
22121 unsigned Elts = (i + 1) * NumMemElts;
22122 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
22123
22124 // Break early when size is too large to be legal.
22125 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
22126 break;
22127
22128 unsigned IsFastSt = 0;
22129 unsigned IsFastLd = 0;
22130 // Don't try vector types if we need a rotate. We may still fail the
22131 // legality checks for the integer type, but we can't handle the rotate
22132 // case with vectors.
22133 // FIXME: We could use a shuffle in place of the rotate.
22134 if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
22135 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
22136 DAG.getMachineFunction()) &&
22137 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22138 *FirstInChain->getMemOperand(), &IsFastSt) &&
22139 IsFastSt &&
22140 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22141 *FirstLoad->getMemOperand(), &IsFastLd) &&
22142 IsFastLd) {
22143 LastLegalVectorType = i + 1;
22144 }
22145
22146 // Find a legal type for the integer store.
22147 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
22148 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
22149 if (TLI.isTypeLegal(StoreTy) &&
22150 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
22151 DAG.getMachineFunction()) &&
22152 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22153 *FirstInChain->getMemOperand(), &IsFastSt) &&
22154 IsFastSt &&
22155 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22156 *FirstLoad->getMemOperand(), &IsFastLd) &&
22157 IsFastLd) {
22158 LastLegalIntegerType = i + 1;
22159 DoIntegerTruncate = false;
22160 // Or check whether a truncstore and extload is legal.
22161 } else if (TLI.getTypeAction(Context, StoreTy) ==
22163 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
22164 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
22165 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
22166 DAG.getMachineFunction()) &&
22167 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
22168 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
22169 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
22170 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22171 *FirstInChain->getMemOperand(), &IsFastSt) &&
22172 IsFastSt &&
22173 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22174 *FirstLoad->getMemOperand(), &IsFastLd) &&
22175 IsFastLd) {
22176 LastLegalIntegerType = i + 1;
22177 DoIntegerTruncate = true;
22178 }
22179 }
22180 }
22181
22182 // Only use vector types if the vector type is larger than the integer
22183 // type. If they are the same, use integers.
22184 bool UseVectorTy =
22185 LastLegalVectorType > LastLegalIntegerType && AllowVectors;
22186 unsigned LastLegalType =
22187 std::max(LastLegalVectorType, LastLegalIntegerType);
22188
22189 // We add +1 here because the LastXXX variables refer to location while
22190 // the NumElem refers to array/index size.
22191 unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
22192 NumElem = std::min(LastLegalType, NumElem);
22193 Align FirstLoadAlign = FirstLoad->getAlign();
22194
22195 if (NumElem < 2) {
22196 // We know that candidate stores are in order and of correct
22197 // shape. While there is no mergeable sequence from the
22198 // beginning one may start later in the sequence. The only
22199 // reason a merge of size N could have failed where another of
22200 // the same size would not have is if the alignment or either
22201 // the load or store has improved. Drop as many candidates as we
22202 // can here.
22203 unsigned NumSkip = 1;
22204 while ((NumSkip < LoadNodes.size()) &&
22205 (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
22206 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
22207 NumSkip++;
22208 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
22209 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
22210 NumConsecutiveStores -= NumSkip;
22211 continue;
22212 }
22213
22214 // Check that we can merge these candidates without causing a cycle.
22215 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
22216 RootNode)) {
22217 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
22218 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
22219 NumConsecutiveStores -= NumElem;
22220 continue;
22221 }
22222
22223 // Find if it is better to use vectors or integers to load and store
22224 // to memory.
22225 EVT JointMemOpVT;
22226 if (UseVectorTy) {
22227 // Find a legal type for the vector store.
22228 unsigned Elts = NumElem * NumMemElts;
22229 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
22230 } else {
22231 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
22232 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
22233 }
22234
22235 // Check if there is a call in the load/store chain.
22236 if (!TLI.shouldMergeStoreOfLoadsOverCall(MemVT, JointMemOpVT) &&
22237 hasCallInLdStChain(cast<StoreSDNode>(StoreNodes[0].MemNode),
22238 cast<LoadSDNode>(LoadNodes[0].MemNode))) {
22239 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
22240 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
22241 NumConsecutiveStores -= NumElem;
22242 continue;
22243 }
22244
22245 SDLoc LoadDL(LoadNodes[0].MemNode);
22246 SDLoc StoreDL(StoreNodes[0].MemNode);
22247
22248 // The merged loads are required to have the same incoming chain, so
22249 // using the first's chain is acceptable.
22250
22251 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
22252 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
22253 AddToWorklist(NewStoreChain.getNode());
22254
22255 MachineMemOperand::Flags LdMMOFlags =
22256 isDereferenceable ? MachineMemOperand::MODereferenceable
22258 if (IsNonTemporalLoad)
22260
22261 LdMMOFlags |= TLI.getTargetMMOFlags(*FirstLoad);
22262
22263 MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
22266
22267 StMMOFlags |= TLI.getTargetMMOFlags(*StoreNodes[0].MemNode);
22268
22269 SDValue NewLoad, NewStore;
22270 if (UseVectorTy || !DoIntegerTruncate) {
22271 NewLoad = DAG.getLoad(
22272 JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
22273 FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
22274 SDValue StoreOp = NewLoad;
22275 if (NeedRotate) {
22276 unsigned LoadWidth = ElementSizeBytes * 8 * 2;
22277 assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
22278 "Unexpected type for rotate-able load pair");
22279 SDValue RotAmt =
22280 DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
22281 // Target can convert to the identical ROTR if it does not have ROTL.
22282 StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
22283 }
22284 NewStore = DAG.getStore(
22285 NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
22286 CanReusePtrInfo ? FirstInChain->getPointerInfo()
22287 : MachinePointerInfo(FirstStoreAS),
22288 FirstStoreAlign, StMMOFlags);
22289 } else { // This must be the truncstore/extload case
22290 EVT ExtendedTy =
22291 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
22292 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
22293 FirstLoad->getChain(), FirstLoad->getBasePtr(),
22294 FirstLoad->getPointerInfo(), JointMemOpVT,
22295 FirstLoadAlign, LdMMOFlags);
22296 NewStore = DAG.getTruncStore(
22297 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
22298 CanReusePtrInfo ? FirstInChain->getPointerInfo()
22299 : MachinePointerInfo(FirstStoreAS),
22300 JointMemOpVT, FirstInChain->getAlign(),
22301 FirstInChain->getMemOperand()->getFlags());
22302 }
22303
22304 // Transfer chain users from old loads to the new load.
22305 for (unsigned i = 0; i < NumElem; ++i) {
22306 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
22308 SDValue(NewLoad.getNode(), 1));
22309 }
22310
22311 // Replace all stores with the new store. Recursively remove corresponding
22312 // values if they are no longer used.
22313 for (unsigned i = 0; i < NumElem; ++i) {
22314 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
22315 CombineTo(StoreNodes[i].MemNode, NewStore);
22316 if (Val->use_empty())
22317 recursivelyDeleteUnusedNodes(Val.getNode());
22318 }
22319
22320 MadeChange = true;
22321 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
22322 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
22323 NumConsecutiveStores -= NumElem;
22324 }
22325 return MadeChange;
22326}
22327
22328bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
22329 if (OptLevel == CodeGenOptLevel::None || !EnableStoreMerging)
22330 return false;
22331
22332 // TODO: Extend this function to merge stores of scalable vectors.
22333 // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
22334 // store since we know <vscale x 16 x i8> is exactly twice as large as
22335 // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
22336 EVT MemVT = St->getMemoryVT();
22337 if (MemVT.isScalableVT())
22338 return false;
22339 if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
22340 return false;
22341
22342 // This function cannot currently deal with non-byte-sized memory sizes.
22343 int64_t ElementSizeBytes = MemVT.getStoreSize();
22344 if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
22345 return false;
22346
22347 // Do not bother looking at stored values that are not constants, loads, or
22348 // extracted vector elements.
22349 SDValue StoredVal = peekThroughBitcasts(St->getValue());
22350 const StoreSource StoreSrc = getStoreSource(StoredVal);
22351 if (StoreSrc == StoreSource::Unknown)
22352 return false;
22353
22354 SmallVector<MemOpLink, 8> StoreNodes;
22355 // Find potential store merge candidates by searching through chain sub-DAG
22356 SDNode *RootNode = getStoreMergeCandidates(St, StoreNodes);
22357
22358 // Check if there is anything to merge.
22359 if (StoreNodes.size() < 2)
22360 return false;
22361
22362 // Sort the memory operands according to their distance from the
22363 // base pointer.
22364 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
22365 return LHS.OffsetFromBase < RHS.OffsetFromBase;
22366 });
22367
22368 bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
22369 Attribute::NoImplicitFloat);
22370 bool IsNonTemporalStore = St->isNonTemporal();
22371 bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
22372 cast<LoadSDNode>(StoredVal)->isNonTemporal();
22373
22374 // Store Merge attempts to merge the lowest stores. This generally
22375 // works out as if successful, as the remaining stores are checked
22376 // after the first collection of stores is merged. However, in the
22377 // case that a non-mergeable store is found first, e.g., {p[-2],
22378 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
22379 // mergeable cases. To prevent this, we prune such stores from the
22380 // front of StoreNodes here.
22381 bool MadeChange = false;
22382 while (StoreNodes.size() > 1) {
22383 unsigned NumConsecutiveStores =
22384 getConsecutiveStores(StoreNodes, ElementSizeBytes);
22385 // There are no more stores in the list to examine.
22386 if (NumConsecutiveStores == 0)
22387 return MadeChange;
22388
22389 // We have at least 2 consecutive stores. Try to merge them.
22390 assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
22391 switch (StoreSrc) {
22392 case StoreSource::Constant:
22393 MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
22394 MemVT, RootNode, AllowVectors);
22395 break;
22396
22397 case StoreSource::Extract:
22398 MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
22399 MemVT, RootNode);
22400 break;
22401
22402 case StoreSource::Load:
22403 MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
22404 MemVT, RootNode, AllowVectors,
22405 IsNonTemporalStore, IsNonTemporalLoad);
22406 break;
22407
22408 default:
22409 llvm_unreachable("Unhandled store source type");
22410 }
22411 }
22412
22413 // Remember if we failed to optimize, to save compile time.
22414 if (!MadeChange)
22415 ChainsWithoutMergeableStores.insert(RootNode);
22416
22417 return MadeChange;
22418}
22419
22420SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
22421 SDLoc SL(ST);
22422 SDValue ReplStore;
22423
22424 // Replace the chain to avoid dependency.
22425 if (ST->isTruncatingStore()) {
22426 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
22427 ST->getBasePtr(), ST->getMemoryVT(),
22428 ST->getMemOperand());
22429 } else {
22430 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
22431 ST->getMemOperand());
22432 }
22433
22434 // Create token to keep both nodes around.
22435 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
22436 MVT::Other, ST->getChain(), ReplStore);
22437
22438 // Make sure the new and old chains are cleaned up.
22439 AddToWorklist(Token.getNode());
22440
22441 // Don't add users to work list.
22442 return CombineTo(ST, Token, false);
22443}
22444
22445SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
22446 SDValue Value = ST->getValue();
22447 if (Value.getOpcode() == ISD::TargetConstantFP)
22448 return SDValue();
22449
22450 if (!ISD::isNormalStore(ST))
22451 return SDValue();
22452
22453 SDLoc DL(ST);
22454
22455 SDValue Chain = ST->getChain();
22456 SDValue Ptr = ST->getBasePtr();
22457
22458 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
22459
22460 // NOTE: If the original store is volatile, this transform must not increase
22461 // the number of stores. For example, on x86-32 an f64 can be stored in one
22462 // processor operation but an i64 (which is not legal) requires two. So the
22463 // transform should not be done in this case.
22464
22465 SDValue Tmp;
22466 switch (CFP->getSimpleValueType(0).SimpleTy) {
22467 default:
22468 llvm_unreachable("Unknown FP type");
22469 case MVT::f16: // We don't do this for these yet.
22470 case MVT::bf16:
22471 case MVT::f80:
22472 case MVT::f128:
22473 case MVT::ppcf128:
22474 return SDValue();
22475 case MVT::f32:
22476 if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
22477 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
22478 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
22479 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
22480 MVT::i32);
22481 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
22482 }
22483
22484 return SDValue();
22485 case MVT::f64:
22486 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
22487 ST->isSimple()) ||
22488 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
22489 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
22490 getZExtValue(), SDLoc(CFP), MVT::i64);
22491 return DAG.getStore(Chain, DL, Tmp,
22492 Ptr, ST->getMemOperand());
22493 }
22494
22495 if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32) &&
22496 !TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) {
22497 // Many FP stores are not made apparent until after legalize, e.g. for
22498 // argument passing. Since this is so common, custom legalize the
22499 // 64-bit integer store into two 32-bit stores.
22500 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
22501 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
22502 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
22503 if (DAG.getDataLayout().isBigEndian())
22504 std::swap(Lo, Hi);
22505
22506 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
22507 AAMDNodes AAInfo = ST->getAAInfo();
22508
22509 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
22510 ST->getBaseAlign(), MMOFlags, AAInfo);
22512 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
22513 ST->getPointerInfo().getWithOffset(4),
22514 ST->getBaseAlign(), MMOFlags, AAInfo);
22515 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
22516 St0, St1);
22517 }
22518
22519 return SDValue();
22520 }
22521}
22522
22523// (store (insert_vector_elt (load p), x, i), p) -> (store x, p+offset)
22524//
22525// If a store of a load with an element inserted into it has no other
22526// uses in between the chain, then we can consider the vector store
22527// dead and replace it with just the single scalar element store.
22528SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
22529 SDLoc DL(ST);
22530 SDValue Value = ST->getValue();
22531 SDValue Ptr = ST->getBasePtr();
22532 SDValue Chain = ST->getChain();
22533 if (Value.getOpcode() != ISD::INSERT_VECTOR_ELT || !Value.hasOneUse())
22534 return SDValue();
22535
22536 SDValue Elt = Value.getOperand(1);
22537 SDValue Idx = Value.getOperand(2);
22538
22539 // If the element isn't byte sized or is implicitly truncated then we can't
22540 // compute an offset.
22541 EVT EltVT = Elt.getValueType();
22542 if (!EltVT.isByteSized() ||
22543 EltVT != Value.getOperand(0).getValueType().getVectorElementType())
22544 return SDValue();
22545
22546 auto *Ld = dyn_cast<LoadSDNode>(Value.getOperand(0));
22547 if (!Ld || Ld->getBasePtr() != Ptr ||
22548 ST->getMemoryVT() != Ld->getMemoryVT() || !ST->isSimple() ||
22549 !ISD::isNormalStore(ST) ||
22550 Ld->getAddressSpace() != ST->getAddressSpace() ||
22552 return SDValue();
22553
22554 unsigned IsFast;
22555 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
22556 Elt.getValueType(), ST->getAddressSpace(),
22557 ST->getAlign(), ST->getMemOperand()->getFlags(),
22558 &IsFast) ||
22559 !IsFast)
22560 return SDValue();
22561
22562 MachinePointerInfo PointerInfo(ST->getAddressSpace());
22563
22564 // If the offset is a known constant then try to recover the pointer
22565 // info
22566 SDValue NewPtr;
22567 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
22568 unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8;
22569 NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(COffset), DL);
22570 PointerInfo = ST->getPointerInfo().getWithOffset(COffset);
22571 } else {
22572 NewPtr = TLI.getVectorElementPointer(DAG, Ptr, Value.getValueType(), Idx);
22573 }
22574
22575 return DAG.getStore(Chain, DL, Elt, NewPtr, PointerInfo, ST->getAlign(),
22576 ST->getMemOperand()->getFlags());
22577}
22578
22579SDValue DAGCombiner::visitATOMIC_STORE(SDNode *N) {
22580 AtomicSDNode *ST = cast<AtomicSDNode>(N);
22581 SDValue Val = ST->getVal();
22582 EVT VT = Val.getValueType();
22583 EVT MemVT = ST->getMemoryVT();
22584
22585 if (MemVT.bitsLT(VT)) { // Is truncating store
22586 APInt TruncDemandedBits = APInt::getLowBitsSet(VT.getScalarSizeInBits(),
22587 MemVT.getScalarSizeInBits());
22588 // See if we can simplify the operation with SimplifyDemandedBits, which
22589 // only works if the value has a single use.
22590 if (SimplifyDemandedBits(Val, TruncDemandedBits))
22591 return SDValue(N, 0);
22592 }
22593
22594 return SDValue();
22595}
22596
22598 const SDLoc &Dl) {
22599 if (!Store->isSimple() || !ISD::isNormalStore(Store))
22600 return SDValue();
22601
22602 SDValue StoredVal = Store->getValue();
22603 SDValue StorePtr = Store->getBasePtr();
22604 SDValue StoreOffset = Store->getOffset();
22605 EVT VT = Store->getMemoryVT();
22606 unsigned AddrSpace = Store->getAddressSpace();
22607 Align Alignment = Store->getAlign();
22608 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22609
22610 if (!TLI.isOperationLegalOrCustom(ISD::MSTORE, VT) ||
22611 !TLI.allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment))
22612 return SDValue();
22613
22614 SDValue Mask, OtherVec, LoadCh;
22615 unsigned LoadPos;
22616 if (sd_match(StoredVal,
22617 m_VSelect(m_Value(Mask), m_Value(OtherVec),
22618 m_Load(m_Value(LoadCh), m_Specific(StorePtr),
22619 m_Specific(StoreOffset))))) {
22620 LoadPos = 2;
22621 } else if (sd_match(StoredVal,
22622 m_VSelect(m_Value(Mask),
22623 m_Load(m_Value(LoadCh), m_Specific(StorePtr),
22624 m_Specific(StoreOffset)),
22625 m_Value(OtherVec)))) {
22626 LoadPos = 1;
22627 } else {
22628 return SDValue();
22629 }
22630
22631 auto *Load = cast<LoadSDNode>(StoredVal.getOperand(LoadPos));
22632 if (!Load->isSimple() || !ISD::isNormalLoad(Load) ||
22633 Load->getAddressSpace() != AddrSpace)
22634 return SDValue();
22635
22636 if (!Store->getChain().reachesChainWithoutSideEffects(LoadCh))
22637 return SDValue();
22638
22639 if (LoadPos == 1)
22640 Mask = DAG.getNOT(Dl, Mask, Mask.getValueType());
22641
22642 return DAG.getMaskedStore(Store->getChain(), Dl, OtherVec, StorePtr,
22643 StoreOffset, Mask, VT, Store->getMemOperand(),
22644 Store->getAddressingMode());
22645}
22646
22647SDValue DAGCombiner::visitSTORE(SDNode *N) {
22648 StoreSDNode *ST = cast<StoreSDNode>(N);
22649 SDValue Chain = ST->getChain();
22650 SDValue Value = ST->getValue();
22651 SDValue Ptr = ST->getBasePtr();
22652
22653 // If this is a store of a bit convert, store the input value if the
22654 // resultant store does not need a higher alignment than the original.
22655 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
22656 ST->isUnindexed()) {
22657 EVT SVT = Value.getOperand(0).getValueType();
22658 // If the store is volatile, we only want to change the store type if the
22659 // resulting store is legal. Otherwise we might increase the number of
22660 // memory accesses. We don't care if the original type was legal or not
22661 // as we assume software couldn't rely on the number of accesses of an
22662 // illegal type.
22663 // TODO: May be able to relax for unordered atomics (see D66309)
22664 if (((!LegalOperations && ST->isSimple()) ||
22665 TLI.isOperationLegal(ISD::STORE, SVT)) &&
22666 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
22667 DAG, *ST->getMemOperand())) {
22668 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
22669 ST->getMemOperand());
22670 }
22671 }
22672
22673 // Turn 'store undef, Ptr' -> nothing.
22674 if (Value.isUndef() && ST->isUnindexed() && !ST->isVolatile())
22675 return Chain;
22676
22677 // Try to infer better alignment information than the store already has.
22678 if (OptLevel != CodeGenOptLevel::None && ST->isUnindexed() &&
22679 !ST->isAtomic()) {
22680 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
22681 if (*Alignment > ST->getAlign() &&
22682 isAligned(*Alignment, ST->getSrcValueOffset())) {
22683 SDValue NewStore =
22684 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
22685 ST->getMemoryVT(), *Alignment,
22686 ST->getMemOperand()->getFlags(), ST->getAAInfo());
22687 // NewStore will always be N as we are only refining the alignment
22688 assert(NewStore.getNode() == N);
22689 (void)NewStore;
22690 }
22691 }
22692 }
22693
22694 // Try transforming a pair floating point load / store ops to integer
22695 // load / store ops.
22696 if (SDValue NewST = TransformFPLoadStorePair(N))
22697 return NewST;
22698
22699 // Try transforming several stores into STORE (BSWAP).
22700 if (SDValue Store = mergeTruncStores(ST))
22701 return Store;
22702
22703 if (ST->isUnindexed()) {
22704 // Walk up chain skipping non-aliasing memory nodes, on this store and any
22705 // adjacent stores.
22706 if (findBetterNeighborChains(ST)) {
22707 // replaceStoreChain uses CombineTo, which handled all of the worklist
22708 // manipulation. Return the original node to not do anything else.
22709 return SDValue(ST, 0);
22710 }
22711 Chain = ST->getChain();
22712 }
22713
22714 // FIXME: is there such a thing as a truncating indexed store?
22715 if (ST->isTruncatingStore() && ST->isUnindexed() &&
22716 Value.getValueType().isInteger() &&
22718 !cast<ConstantSDNode>(Value)->isOpaque())) {
22719 // Convert a truncating store of a extension into a standard store.
22720 if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
22721 Value.getOpcode() == ISD::SIGN_EXTEND ||
22722 Value.getOpcode() == ISD::ANY_EXTEND) &&
22723 Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
22724 TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
22725 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
22726 ST->getMemOperand());
22727
22728 APInt TruncDemandedBits =
22729 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
22730 ST->getMemoryVT().getScalarSizeInBits());
22731
22732 // See if we can simplify the operation with SimplifyDemandedBits, which
22733 // only works if the value has a single use.
22734 AddToWorklist(Value.getNode());
22735 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
22736 // Re-visit the store if anything changed and the store hasn't been merged
22737 // with another node (N is deleted) SimplifyDemandedBits will add Value's
22738 // node back to the worklist if necessary, but we also need to re-visit
22739 // the Store node itself.
22740 if (N->getOpcode() != ISD::DELETED_NODE)
22741 AddToWorklist(N);
22742 return SDValue(N, 0);
22743 }
22744
22745 // Otherwise, see if we can simplify the input to this truncstore with
22746 // knowledge that only the low bits are being used. For example:
22747 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
22748 if (SDValue Shorter =
22749 TLI.SimplifyMultipleUseDemandedBits(Value, TruncDemandedBits, DAG))
22750 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
22751 ST->getMemOperand());
22752
22753 // If we're storing a truncated constant, see if we can simplify it.
22754 // TODO: Move this to targetShrinkDemandedConstant?
22755 if (auto *Cst = dyn_cast<ConstantSDNode>(Value))
22756 if (!Cst->isOpaque()) {
22757 const APInt &CValue = Cst->getAPIntValue();
22758 APInt NewVal = CValue & TruncDemandedBits;
22759 if (NewVal != CValue) {
22760 SDValue Shorter =
22761 DAG.getConstant(NewVal, SDLoc(N), Value.getValueType());
22762 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr,
22763 ST->getMemoryVT(), ST->getMemOperand());
22764 }
22765 }
22766 }
22767
22768 // If this is a load followed by a store to the same location, then the store
22769 // is dead/noop. Peek through any truncates if canCombineTruncStore failed.
22770 // TODO: Add big-endian truncate support with test coverage.
22771 // TODO: Can relax for unordered atomics (see D66309)
22772 SDValue TruncVal = DAG.getDataLayout().isLittleEndian()
22774 : Value;
22775 if (auto *Ld = dyn_cast<LoadSDNode>(TruncVal)) {
22776 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
22777 ST->isUnindexed() && ST->isSimple() &&
22778 Ld->getAddressSpace() == ST->getAddressSpace() &&
22779 // There can't be any side effects between the load and store, such as
22780 // a call or store.
22782 // The store is dead, remove it.
22783 return Chain;
22784 }
22785 }
22786
22787 // Try scalarizing vector stores of loads where we only change one element
22788 if (SDValue NewST = replaceStoreOfInsertLoad(ST))
22789 return NewST;
22790
22791 // TODO: Can relax for unordered atomics (see D66309)
22792 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
22793 if (ST->isUnindexed() && ST->isSimple() &&
22794 ST1->isUnindexed() && ST1->isSimple()) {
22795 if (OptLevel != CodeGenOptLevel::None && ST1->getBasePtr() == Ptr &&
22796 ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
22797 ST->getAddressSpace() == ST1->getAddressSpace()) {
22798 // If this is a store followed by a store with the same value to the
22799 // same location, then the store is dead/noop.
22800 return Chain;
22801 }
22802
22803 if (OptLevel != CodeGenOptLevel::None && ST1->hasOneUse() &&
22804 !ST1->getBasePtr().isUndef() &&
22805 ST->getAddressSpace() == ST1->getAddressSpace()) {
22806 // If we consider two stores and one smaller in size is a scalable
22807 // vector type and another one a bigger size store with a fixed type,
22808 // then we could not allow the scalable store removal because we don't
22809 // know its final size in the end.
22810 if (ST->getMemoryVT().isScalableVector() ||
22811 ST1->getMemoryVT().isScalableVector()) {
22812 if (ST1->getBasePtr() == Ptr &&
22813 TypeSize::isKnownLE(ST1->getMemoryVT().getStoreSize(),
22814 ST->getMemoryVT().getStoreSize())) {
22815 CombineTo(ST1, ST1->getChain());
22816 return SDValue(N, 0);
22817 }
22818 } else {
22819 const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
22820 const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
22821 // If this is a store who's preceding store to a subset of the current
22822 // location and no one other node is chained to that store we can
22823 // effectively drop the store. Do not remove stores to undef as they
22824 // may be used as data sinks.
22825 if (STBase.contains(DAG, ST->getMemoryVT().getFixedSizeInBits(),
22826 ChainBase,
22827 ST1->getMemoryVT().getFixedSizeInBits())) {
22828 CombineTo(ST1, ST1->getChain());
22829 return SDValue(N, 0);
22830 }
22831 }
22832 }
22833 }
22834 }
22835
22836 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
22837 // truncating store. We can do this even if this is already a truncstore.
22838 if ((Value.getOpcode() == ISD::FP_ROUND ||
22839 Value.getOpcode() == ISD::TRUNCATE) &&
22840 Value->hasOneUse() && ST->isUnindexed() &&
22841 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
22842 ST->getMemoryVT(), LegalOperations)) {
22843 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
22844 Ptr, ST->getMemoryVT(), ST->getMemOperand());
22845 }
22846
22847 // Always perform this optimization before types are legal. If the target
22848 // prefers, also try this after legalization to catch stores that were created
22849 // by intrinsics or other nodes.
22850 if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
22851 while (true) {
22852 // There can be multiple store sequences on the same chain.
22853 // Keep trying to merge store sequences until we are unable to do so
22854 // or until we merge the last store on the chain.
22855 bool Changed = mergeConsecutiveStores(ST);
22856 if (!Changed) break;
22857 // Return N as merge only uses CombineTo and no worklist clean
22858 // up is necessary.
22859 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
22860 return SDValue(N, 0);
22861 }
22862 }
22863
22864 // Try transforming N to an indexed store.
22865 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
22866 return SDValue(N, 0);
22867
22868 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
22869 //
22870 // Make sure to do this only after attempting to merge stores in order to
22871 // avoid changing the types of some subset of stores due to visit order,
22872 // preventing their merging.
22873 if (isa<ConstantFPSDNode>(ST->getValue())) {
22874 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
22875 return NewSt;
22876 }
22877
22878 if (SDValue NewSt = splitMergedValStore(ST))
22879 return NewSt;
22880
22881 if (SDValue MaskedStore = foldToMaskedStore(ST, DAG, SDLoc(N)))
22882 return MaskedStore;
22883
22884 return ReduceLoadOpStoreWidth(N);
22885}
22886
22887SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
22888 const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
22889 const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(), 0, false);
22890
22891 // We walk up the chains to find stores.
22892 SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
22893 while (!Chains.empty()) {
22894 SDValue Chain = Chains.pop_back_val();
22895 if (!Chain.hasOneUse())
22896 continue;
22897 switch (Chain.getOpcode()) {
22898 case ISD::TokenFactor:
22899 for (unsigned Nops = Chain.getNumOperands(); Nops;)
22900 Chains.push_back(Chain.getOperand(--Nops));
22901 break;
22902 case ISD::LIFETIME_START:
22903 case ISD::LIFETIME_END:
22904 // We can forward past any lifetime start/end that can be proven not to
22905 // alias the node.
22906 if (!mayAlias(Chain.getNode(), N))
22907 Chains.push_back(Chain.getOperand(0));
22908 break;
22909 case ISD::STORE: {
22910 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
22911 // TODO: Can relax for unordered atomics (see D66309)
22912 if (!ST->isSimple() || ST->isIndexed())
22913 continue;
22914 const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
22915 // The bounds of a scalable store are not known until runtime, so this
22916 // store cannot be elided.
22917 if (StoreSize.isScalable())
22918 continue;
22919 const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
22920 // If we store purely within object bounds just before its lifetime ends,
22921 // we can remove the store.
22922 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
22923 if (LifetimeEndBase.contains(
22924 DAG, MFI.getObjectSize(LifetimeEnd->getFrameIndex()) * 8,
22925 StoreBase, StoreSize.getFixedValue() * 8)) {
22926 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
22927 dbgs() << "\nwithin LIFETIME_END of : ";
22928 LifetimeEndBase.dump(); dbgs() << "\n");
22929 CombineTo(ST, ST->getChain());
22930 return SDValue(N, 0);
22931 }
22932 }
22933 }
22934 }
22935 return SDValue();
22936}
22937
22938/// For the instruction sequence of store below, F and I values
22939/// are bundled together as an i64 value before being stored into memory.
22940/// Sometimes it is more efficent to generate separate stores for F and I,
22941/// which can remove the bitwise instructions or sink them to colder places.
22942///
22943/// (store (or (zext (bitcast F to i32) to i64),
22944/// (shl (zext I to i64), 32)), addr) -->
22945/// (store F, addr) and (store I, addr+4)
22946///
22947/// Similarly, splitting for other merged store can also be beneficial, like:
22948/// For pair of {i32, i32}, i64 store --> two i32 stores.
22949/// For pair of {i32, i16}, i64 store --> two i32 stores.
22950/// For pair of {i16, i16}, i32 store --> two i16 stores.
22951/// For pair of {i16, i8}, i32 store --> two i16 stores.
22952/// For pair of {i8, i8}, i16 store --> two i8 stores.
22953///
22954/// We allow each target to determine specifically which kind of splitting is
22955/// supported.
22956///
22957/// The store patterns are commonly seen from the simple code snippet below
22958/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
22959/// void goo(const std::pair<int, float> &);
22960/// hoo() {
22961/// ...
22962/// goo(std::make_pair(tmp, ftmp));
22963/// ...
22964/// }
22965///
22966SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
22967 if (OptLevel == CodeGenOptLevel::None)
22968 return SDValue();
22969
22970 // Can't change the number of memory accesses for a volatile store or break
22971 // atomicity for an atomic one.
22972 if (!ST->isSimple())
22973 return SDValue();
22974
22975 SDValue Val = ST->getValue();
22976 SDLoc DL(ST);
22977
22978 // Match OR operand.
22979 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
22980 return SDValue();
22981
22982 // Match SHL operand and get Lower and Higher parts of Val.
22983 SDValue Op1 = Val.getOperand(0);
22984 SDValue Op2 = Val.getOperand(1);
22985 SDValue Lo, Hi;
22986 if (Op1.getOpcode() != ISD::SHL) {
22987 std::swap(Op1, Op2);
22988 if (Op1.getOpcode() != ISD::SHL)
22989 return SDValue();
22990 }
22991 Lo = Op2;
22992 Hi = Op1.getOperand(0);
22993 if (!Op1.hasOneUse())
22994 return SDValue();
22995
22996 // Match shift amount to HalfValBitSize.
22997 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
22998 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
22999 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
23000 return SDValue();
23001
23002 // Lo and Hi are zero-extended from int with size less equal than 32
23003 // to i64.
23004 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
23005 !Lo.getOperand(0).getValueType().isScalarInteger() ||
23006 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
23007 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
23008 !Hi.getOperand(0).getValueType().isScalarInteger() ||
23009 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
23010 return SDValue();
23011
23012 // Use the EVT of low and high parts before bitcast as the input
23013 // of target query.
23014 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
23015 ? Lo.getOperand(0).getValueType()
23016 : Lo.getValueType();
23017 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
23018 ? Hi.getOperand(0).getValueType()
23019 : Hi.getValueType();
23020 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
23021 return SDValue();
23022
23023 // Start to split store.
23024 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
23025 AAMDNodes AAInfo = ST->getAAInfo();
23026
23027 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
23028 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
23029 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
23030 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
23031
23032 SDValue Chain = ST->getChain();
23033 SDValue Ptr = ST->getBasePtr();
23034 // Lower value store.
23035 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
23036 ST->getBaseAlign(), MMOFlags, AAInfo);
23037 Ptr =
23038 DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(HalfValBitSize / 8), DL);
23039 // Higher value store.
23040 SDValue St1 = DAG.getStore(
23041 St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
23042 ST->getBaseAlign(), MMOFlags, AAInfo);
23043 return St1;
23044}
23045
23046// Merge an insertion into an existing shuffle:
23047// (insert_vector_elt (vector_shuffle X, Y, Mask),
23048// .(extract_vector_elt X, N), InsIndex)
23049// --> (vector_shuffle X, Y, NewMask)
23050// and variations where shuffle operands may be CONCAT_VECTORS.
23052 SmallVectorImpl<int> &NewMask, SDValue Elt,
23053 unsigned InsIndex) {
23054 if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
23056 return false;
23057
23058 // Vec's operand 0 is using indices from 0 to N-1 and
23059 // operand 1 from N to 2N - 1, where N is the number of
23060 // elements in the vectors.
23061 SDValue InsertVal0 = Elt.getOperand(0);
23062 int ElementOffset = -1;
23063
23064 // We explore the inputs of the shuffle in order to see if we find the
23065 // source of the extract_vector_elt. If so, we can use it to modify the
23066 // shuffle rather than perform an insert_vector_elt.
23068 ArgWorkList.emplace_back(Mask.size(), Y);
23069 ArgWorkList.emplace_back(0, X);
23070
23071 while (!ArgWorkList.empty()) {
23072 int ArgOffset;
23073 SDValue ArgVal;
23074 std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
23075
23076 if (ArgVal == InsertVal0) {
23077 ElementOffset = ArgOffset;
23078 break;
23079 }
23080
23081 // Peek through concat_vector.
23082 if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
23083 int CurrentArgOffset =
23084 ArgOffset + ArgVal.getValueType().getVectorNumElements();
23085 int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
23086 for (SDValue Op : reverse(ArgVal->ops())) {
23087 CurrentArgOffset -= Step;
23088 ArgWorkList.emplace_back(CurrentArgOffset, Op);
23089 }
23090
23091 // Make sure we went through all the elements and did not screw up index
23092 // computation.
23093 assert(CurrentArgOffset == ArgOffset);
23094 }
23095 }
23096
23097 // If we failed to find a match, see if we can replace an UNDEF shuffle
23098 // operand.
23099 if (ElementOffset == -1) {
23100 if (!Y.isUndef() || InsertVal0.getValueType() != Y.getValueType())
23101 return false;
23102 ElementOffset = Mask.size();
23103 Y = InsertVal0;
23104 }
23105
23106 NewMask.assign(Mask.begin(), Mask.end());
23107 NewMask[InsIndex] = ElementOffset + Elt.getConstantOperandVal(1);
23108 assert(NewMask[InsIndex] < (int)(2 * Mask.size()) && NewMask[InsIndex] >= 0 &&
23109 "NewMask[InsIndex] is out of bound");
23110 return true;
23111}
23112
23113// Merge an insertion into an existing shuffle:
23114// (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
23115// InsIndex)
23116// --> (vector_shuffle X, Y) and variations where shuffle operands may be
23117// CONCAT_VECTORS.
23118SDValue DAGCombiner::mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex) {
23119 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
23120 "Expected extract_vector_elt");
23121 SDValue InsertVal = N->getOperand(1);
23122 SDValue Vec = N->getOperand(0);
23123
23124 auto *SVN = dyn_cast<ShuffleVectorSDNode>(Vec);
23125 if (!SVN || !Vec.hasOneUse())
23126 return SDValue();
23127
23128 ArrayRef<int> Mask = SVN->getMask();
23129 SDValue X = Vec.getOperand(0);
23130 SDValue Y = Vec.getOperand(1);
23131
23132 SmallVector<int, 16> NewMask(Mask);
23133 if (mergeEltWithShuffle(X, Y, Mask, NewMask, InsertVal, InsIndex)) {
23134 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
23135 Vec.getValueType(), SDLoc(N), X, Y, NewMask, DAG);
23136 if (LegalShuffle)
23137 return LegalShuffle;
23138 }
23139
23140 return SDValue();
23141}
23142
23143// Convert a disguised subvector insertion into a shuffle:
23144// insert_vector_elt V, (bitcast X from vector type), IdxC -->
23145// bitcast(shuffle (bitcast V), (extended X), Mask)
23146// Note: We do not use an insert_subvector node because that requires a
23147// legal subvector type.
23148SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
23149 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
23150 "Expected extract_vector_elt");
23151 SDValue InsertVal = N->getOperand(1);
23152
23153 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
23154 !InsertVal.getOperand(0).getValueType().isVector())
23155 return SDValue();
23156
23157 SDValue SubVec = InsertVal.getOperand(0);
23158 SDValue DestVec = N->getOperand(0);
23159 EVT SubVecVT = SubVec.getValueType();
23160 EVT VT = DestVec.getValueType();
23161 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
23162 // If the source only has a single vector element, the cost of creating adding
23163 // it to a vector is likely to exceed the cost of a insert_vector_elt.
23164 if (NumSrcElts == 1)
23165 return SDValue();
23166 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
23167 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
23168
23169 // Step 1: Create a shuffle mask that implements this insert operation. The
23170 // vector that we are inserting into will be operand 0 of the shuffle, so
23171 // those elements are just 'i'. The inserted subvector is in the first
23172 // positions of operand 1 of the shuffle. Example:
23173 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
23174 SmallVector<int, 16> Mask(NumMaskVals);
23175 for (unsigned i = 0; i != NumMaskVals; ++i) {
23176 if (i / NumSrcElts == InsIndex)
23177 Mask[i] = (i % NumSrcElts) + NumMaskVals;
23178 else
23179 Mask[i] = i;
23180 }
23181
23182 // Bail out if the target can not handle the shuffle we want to create.
23183 EVT SubVecEltVT = SubVecVT.getVectorElementType();
23184 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
23185 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
23186 return SDValue();
23187
23188 // Step 2: Create a wide vector from the inserted source vector by appending
23189 // undefined elements. This is the same size as our destination vector.
23190 SDLoc DL(N);
23191 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
23192 ConcatOps[0] = SubVec;
23193 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
23194
23195 // Step 3: Shuffle in the padded subvector.
23196 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
23197 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
23198 AddToWorklist(PaddedSubV.getNode());
23199 AddToWorklist(DestVecBC.getNode());
23200 AddToWorklist(Shuf.getNode());
23201 return DAG.getBitcast(VT, Shuf);
23202}
23203
23204// Combine insert(shuffle(load, <u,0,1,2>), load, 0) into a single load if
23205// possible and the new load will be quick. We use more loads but less shuffles
23206// and inserts.
23207SDValue DAGCombiner::combineInsertEltToLoad(SDNode *N, unsigned InsIndex) {
23208 EVT VT = N->getValueType(0);
23209
23210 // InsIndex is expected to be the first of last lane.
23211 if (!VT.isFixedLengthVector() ||
23212 (InsIndex != 0 && InsIndex != VT.getVectorNumElements() - 1))
23213 return SDValue();
23214
23215 // Look for a shuffle with the mask u,0,1,2,3,4,5,6 or 1,2,3,4,5,6,7,u
23216 // depending on the InsIndex.
23217 auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
23218 SDValue Scalar = N->getOperand(1);
23219 if (!Shuffle || !all_of(enumerate(Shuffle->getMask()), [&](auto P) {
23220 return InsIndex == P.index() || P.value() < 0 ||
23221 (InsIndex == 0 && P.value() == (int)P.index() - 1) ||
23222 (InsIndex == VT.getVectorNumElements() - 1 &&
23223 P.value() == (int)P.index() + 1);
23224 }))
23225 return SDValue();
23226
23227 // We optionally skip over an extend so long as both loads are extended in the
23228 // same way from the same type.
23229 unsigned Extend = 0;
23230 if (Scalar.getOpcode() == ISD::ZERO_EXTEND ||
23231 Scalar.getOpcode() == ISD::SIGN_EXTEND ||
23232 Scalar.getOpcode() == ISD::ANY_EXTEND) {
23233 Extend = Scalar.getOpcode();
23234 Scalar = Scalar.getOperand(0);
23235 }
23236
23237 auto *ScalarLoad = dyn_cast<LoadSDNode>(Scalar);
23238 if (!ScalarLoad)
23239 return SDValue();
23240
23241 SDValue Vec = Shuffle->getOperand(0);
23242 if (Extend) {
23243 if (Vec.getOpcode() != Extend)
23244 return SDValue();
23245 Vec = Vec.getOperand(0);
23246 }
23247 auto *VecLoad = dyn_cast<LoadSDNode>(Vec);
23248 if (!VecLoad || Vec.getValueType().getScalarType() != Scalar.getValueType())
23249 return SDValue();
23250
23251 int EltSize = ScalarLoad->getValueType(0).getScalarSizeInBits();
23252 if (EltSize == 0 || EltSize % 8 != 0 || !ScalarLoad->isSimple() ||
23253 !VecLoad->isSimple() || VecLoad->getExtensionType() != ISD::NON_EXTLOAD ||
23254 ScalarLoad->getExtensionType() != ISD::NON_EXTLOAD ||
23255 ScalarLoad->getAddressSpace() != VecLoad->getAddressSpace())
23256 return SDValue();
23257
23258 // Check that the offset between the pointers to produce a single continuous
23259 // load.
23260 if (InsIndex == 0) {
23261 if (!DAG.areNonVolatileConsecutiveLoads(ScalarLoad, VecLoad, EltSize / 8,
23262 -1))
23263 return SDValue();
23264 } else {
23266 VecLoad, ScalarLoad, VT.getVectorNumElements() * EltSize / 8, -1))
23267 return SDValue();
23268 }
23269
23270 // And that the new unaligned load will be fast.
23271 unsigned IsFast = 0;
23272 Align NewAlign = commonAlignment(VecLoad->getAlign(), EltSize / 8);
23273 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
23274 Vec.getValueType(), VecLoad->getAddressSpace(),
23275 NewAlign, VecLoad->getMemOperand()->getFlags(),
23276 &IsFast) ||
23277 !IsFast)
23278 return SDValue();
23279
23280 // Calculate the new Ptr and create the new load.
23281 SDLoc DL(N);
23282 SDValue Ptr = ScalarLoad->getBasePtr();
23283 if (InsIndex != 0)
23284 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), VecLoad->getBasePtr(),
23285 DAG.getConstant(EltSize / 8, DL, Ptr.getValueType()));
23286 MachinePointerInfo PtrInfo =
23287 InsIndex == 0 ? ScalarLoad->getPointerInfo()
23288 : VecLoad->getPointerInfo().getWithOffset(EltSize / 8);
23289
23290 SDValue Load = DAG.getLoad(VecLoad->getValueType(0), DL,
23291 ScalarLoad->getChain(), Ptr, PtrInfo, NewAlign);
23292 DAG.makeEquivalentMemoryOrdering(ScalarLoad, Load.getValue(1));
23293 DAG.makeEquivalentMemoryOrdering(VecLoad, Load.getValue(1));
23294 return Extend ? DAG.getNode(Extend, DL, VT, Load) : Load;
23295}
23296
23297SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
23298 SDValue InVec = N->getOperand(0);
23299 SDValue InVal = N->getOperand(1);
23300 SDValue EltNo = N->getOperand(2);
23301 SDLoc DL(N);
23302
23303 EVT VT = InVec.getValueType();
23304 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
23305
23306 // Insert into out-of-bounds element is undefined.
23307 if (IndexC && VT.isFixedLengthVector() &&
23308 IndexC->getZExtValue() >= VT.getVectorNumElements())
23309 return DAG.getUNDEF(VT);
23310
23311 // Remove redundant insertions:
23312 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
23313 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23314 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
23315 return InVec;
23316
23317 if (!IndexC) {
23318 // If this is variable insert to undef vector, it might be better to splat:
23319 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
23320 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
23321 return DAG.getSplat(VT, DL, InVal);
23322 return SDValue();
23323 }
23324
23325 if (VT.isScalableVector())
23326 return SDValue();
23327
23328 unsigned NumElts = VT.getVectorNumElements();
23329
23330 // We must know which element is being inserted for folds below here.
23331 unsigned Elt = IndexC->getZExtValue();
23332
23333 // Handle <1 x ???> vector insertion special cases.
23334 if (NumElts == 1) {
23335 // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
23336 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23337 InVal.getOperand(0).getValueType() == VT &&
23338 isNullConstant(InVal.getOperand(1)))
23339 return InVal.getOperand(0);
23340 }
23341
23342 // Canonicalize insert_vector_elt dag nodes.
23343 // Example:
23344 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
23345 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
23346 //
23347 // Do this only if the child insert_vector node has one use; also
23348 // do this only if indices are both constants and Idx1 < Idx0.
23349 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
23350 && isa<ConstantSDNode>(InVec.getOperand(2))) {
23351 unsigned OtherElt = InVec.getConstantOperandVal(2);
23352 if (Elt < OtherElt) {
23353 // Swap nodes.
23354 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
23355 InVec.getOperand(0), InVal, EltNo);
23356 AddToWorklist(NewOp.getNode());
23357 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
23358 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
23359 }
23360 }
23361
23362 if (SDValue Shuf = mergeInsertEltWithShuffle(N, Elt))
23363 return Shuf;
23364
23365 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
23366 return Shuf;
23367
23368 if (SDValue Shuf = combineInsertEltToLoad(N, Elt))
23369 return Shuf;
23370
23371 // Attempt to convert an insert_vector_elt chain into a legal build_vector.
23372 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
23373 // vXi1 vector - we don't need to recurse.
23374 if (NumElts == 1)
23375 return DAG.getBuildVector(VT, DL, {InVal});
23376
23377 // If we haven't already collected the element, insert into the op list.
23378 EVT MaxEltVT = InVal.getValueType();
23379 auto AddBuildVectorOp = [&](SmallVectorImpl<SDValue> &Ops, SDValue Elt,
23380 unsigned Idx) {
23381 if (!Ops[Idx]) {
23382 Ops[Idx] = Elt;
23383 if (VT.isInteger()) {
23384 EVT EltVT = Elt.getValueType();
23385 MaxEltVT = MaxEltVT.bitsGE(EltVT) ? MaxEltVT : EltVT;
23386 }
23387 }
23388 };
23389
23390 // Ensure all the operands are the same value type, fill any missing
23391 // operands with UNDEF and create the BUILD_VECTOR.
23392 auto CanonicalizeBuildVector = [&](SmallVectorImpl<SDValue> &Ops,
23393 bool FreezeUndef = false) {
23394 assert(Ops.size() == NumElts && "Unexpected vector size");
23395 SDValue UndefOp = FreezeUndef ? DAG.getFreeze(DAG.getUNDEF(MaxEltVT))
23396 : DAG.getUNDEF(MaxEltVT);
23397 for (SDValue &Op : Ops) {
23398 if (Op)
23399 Op = VT.isInteger() ? DAG.getAnyExtOrTrunc(Op, DL, MaxEltVT) : Op;
23400 else
23401 Op = UndefOp;
23402 }
23403 return DAG.getBuildVector(VT, DL, Ops);
23404 };
23405
23407 Ops[Elt] = InVal;
23408
23409 // Recurse up a INSERT_VECTOR_ELT chain to build a BUILD_VECTOR.
23410 for (SDValue CurVec = InVec; CurVec;) {
23411 // UNDEF - build new BUILD_VECTOR from already inserted operands.
23412 if (CurVec.isUndef())
23413 return CanonicalizeBuildVector(Ops);
23414
23415 // FREEZE(UNDEF) - build new BUILD_VECTOR from already inserted operands.
23416 if (ISD::isFreezeUndef(CurVec.getNode()) && CurVec.hasOneUse())
23417 return CanonicalizeBuildVector(Ops, /*FreezeUndef=*/true);
23418
23419 // BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR.
23420 if (CurVec.getOpcode() == ISD::BUILD_VECTOR && CurVec.hasOneUse()) {
23421 for (unsigned I = 0; I != NumElts; ++I)
23422 AddBuildVectorOp(Ops, CurVec.getOperand(I), I);
23423 return CanonicalizeBuildVector(Ops);
23424 }
23425
23426 // SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR.
23427 if (CurVec.getOpcode() == ISD::SCALAR_TO_VECTOR && CurVec.hasOneUse()) {
23428 AddBuildVectorOp(Ops, CurVec.getOperand(0), 0);
23429 return CanonicalizeBuildVector(Ops);
23430 }
23431
23432 // INSERT_VECTOR_ELT - insert operand and continue up the chain.
23433 if (CurVec.getOpcode() == ISD::INSERT_VECTOR_ELT && CurVec.hasOneUse())
23434 if (auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2)))
23435 if (CurIdx->getAPIntValue().ult(NumElts)) {
23436 unsigned Idx = CurIdx->getZExtValue();
23437 AddBuildVectorOp(Ops, CurVec.getOperand(1), Idx);
23438
23439 // Found entire BUILD_VECTOR.
23440 if (all_of(Ops, [](SDValue Op) { return !!Op; }))
23441 return CanonicalizeBuildVector(Ops);
23442
23443 CurVec = CurVec->getOperand(0);
23444 continue;
23445 }
23446
23447 // VECTOR_SHUFFLE - if all the operands match the shuffle's sources,
23448 // update the shuffle mask (and second operand if we started with unary
23449 // shuffle) and create a new legal shuffle.
23450 if (CurVec.getOpcode() == ISD::VECTOR_SHUFFLE && CurVec.hasOneUse()) {
23451 auto *SVN = cast<ShuffleVectorSDNode>(CurVec);
23452 SDValue LHS = SVN->getOperand(0);
23453 SDValue RHS = SVN->getOperand(1);
23454 SmallVector<int, 16> Mask(SVN->getMask());
23455 bool Merged = true;
23456 for (auto I : enumerate(Ops)) {
23457 SDValue &Op = I.value();
23458 if (Op) {
23459 SmallVector<int, 16> NewMask;
23460 if (!mergeEltWithShuffle(LHS, RHS, Mask, NewMask, Op, I.index())) {
23461 Merged = false;
23462 break;
23463 }
23464 Mask = std::move(NewMask);
23465 }
23466 }
23467 if (Merged)
23468 if (SDValue NewShuffle =
23469 TLI.buildLegalVectorShuffle(VT, DL, LHS, RHS, Mask, DAG))
23470 return NewShuffle;
23471 }
23472
23473 if (!LegalOperations) {
23474 bool IsNull = llvm::isNullConstant(InVal);
23475 // We can convert to AND/OR mask if all insertions are zero or -1
23476 // respectively.
23477 if ((IsNull || llvm::isAllOnesConstant(InVal)) &&
23478 all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
23479 count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
23480 SDValue Zero = DAG.getConstant(0, DL, MaxEltVT);
23481 SDValue AllOnes = DAG.getAllOnesConstant(DL, MaxEltVT);
23483
23484 // Build the mask and return the corresponding DAG node.
23485 auto BuildMaskAndNode = [&](SDValue TrueVal, SDValue FalseVal,
23486 unsigned MaskOpcode) {
23487 for (unsigned I = 0; I != NumElts; ++I)
23488 Mask[I] = Ops[I] ? TrueVal : FalseVal;
23489 return DAG.getNode(MaskOpcode, DL, VT, CurVec,
23490 DAG.getBuildVector(VT, DL, Mask));
23491 };
23492
23493 // If all elements are zero, we can use AND with all ones.
23494 if (IsNull)
23495 return BuildMaskAndNode(Zero, AllOnes, ISD::AND);
23496
23497 // If all elements are -1, we can use OR with zero.
23498 return BuildMaskAndNode(AllOnes, Zero, ISD::OR);
23499 }
23500 }
23501
23502 // Failed to find a match in the chain - bail.
23503 break;
23504 }
23505
23506 // See if we can fill in the missing constant elements as zeros.
23507 // TODO: Should we do this for any constant?
23508 APInt DemandedZeroElts = APInt::getZero(NumElts);
23509 for (unsigned I = 0; I != NumElts; ++I)
23510 if (!Ops[I])
23511 DemandedZeroElts.setBit(I);
23512
23513 if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) {
23514 SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT)
23515 : DAG.getConstantFP(0, DL, MaxEltVT);
23516 for (unsigned I = 0; I != NumElts; ++I)
23517 if (!Ops[I])
23518 Ops[I] = Zero;
23519
23520 return CanonicalizeBuildVector(Ops);
23521 }
23522 }
23523
23524 return SDValue();
23525}
23526
23527/// Transform a vector binary operation into a scalar binary operation by moving
23528/// the math/logic after an extract element of a vector.
23530 const SDLoc &DL, bool LegalTypes) {
23531 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23532 SDValue Vec = ExtElt->getOperand(0);
23533 SDValue Index = ExtElt->getOperand(1);
23534 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
23535 unsigned Opc = Vec.getOpcode();
23536 if (!IndexC || !Vec.hasOneUse() || (!TLI.isBinOp(Opc) && Opc != ISD::SETCC) ||
23537 Vec->getNumValues() != 1)
23538 return SDValue();
23539
23540 // Targets may want to avoid this to prevent an expensive register transfer.
23541 if (!TLI.shouldScalarizeBinop(Vec))
23542 return SDValue();
23543
23544 EVT ResVT = ExtElt->getValueType(0);
23545 if (Opc == ISD::SETCC &&
23546 (ResVT != Vec.getValueType().getVectorElementType() || LegalTypes))
23547 return SDValue();
23548
23549 // Extracting an element of a vector constant is constant-folded, so this
23550 // transform is just replacing a vector op with a scalar op while moving the
23551 // extract.
23552 SDValue Op0 = Vec.getOperand(0);
23553 SDValue Op1 = Vec.getOperand(1);
23554 APInt SplatVal;
23555 if (!isAnyConstantBuildVector(Op0, true) &&
23556 !ISD::isConstantSplatVector(Op0.getNode(), SplatVal) &&
23557 !isAnyConstantBuildVector(Op1, true) &&
23558 !ISD::isConstantSplatVector(Op1.getNode(), SplatVal))
23559 return SDValue();
23560
23561 // extractelt (op X, C), IndexC --> op (extractelt X, IndexC), C'
23562 // extractelt (op C, X), IndexC --> op C', (extractelt X, IndexC)
23563 if (Opc == ISD::SETCC) {
23564 EVT OpVT = Op0.getValueType().getVectorElementType();
23565 Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op0, Index);
23566 Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op1, Index);
23567 SDValue NewVal = DAG.getSetCC(
23568 DL, ResVT, Op0, Op1, cast<CondCodeSDNode>(Vec->getOperand(2))->get());
23569 // We may need to sign- or zero-extend the result to match the same
23570 // behaviour as the vector version of SETCC.
23571 unsigned VecBoolContents = TLI.getBooleanContents(Vec.getValueType());
23572 if (ResVT != MVT::i1 &&
23573 VecBoolContents != TargetLowering::UndefinedBooleanContent &&
23574 VecBoolContents != TLI.getBooleanContents(ResVT)) {
23576 NewVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ResVT, NewVal,
23577 DAG.getValueType(MVT::i1));
23578 else
23579 NewVal = DAG.getZeroExtendInReg(NewVal, DL, MVT::i1);
23580 }
23581 return NewVal;
23582 }
23583 Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op0, Index);
23584 Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op1, Index);
23585 return DAG.getNode(Opc, DL, ResVT, Op0, Op1);
23586}
23587
23588// Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract,
23589// recursively analyse all of it's users. and try to model themselves as
23590// bit sequence extractions. If all of them agree on the new, narrower element
23591// type, and all of them can be modelled as ISD::EXTRACT_VECTOR_ELT's of that
23592// new element type, do so now.
23593// This is mainly useful to recover from legalization that scalarized
23594// the vector as wide elements, but tries to rebuild it with narrower elements.
23595//
23596// Some more nodes could be modelled if that helps cover interesting patterns.
23597bool DAGCombiner::refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(
23598 SDNode *N) {
23599 // We perform this optimization post type-legalization because
23600 // the type-legalizer often scalarizes integer-promoted vectors.
23601 // Performing this optimization before may cause legalizaton cycles.
23602 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
23603 return false;
23604
23605 // TODO: Add support for big-endian.
23606 if (DAG.getDataLayout().isBigEndian())
23607 return false;
23608
23609 SDValue VecOp = N->getOperand(0);
23610 EVT VecVT = VecOp.getValueType();
23611 assert(!VecVT.isScalableVector() && "Only for fixed vectors.");
23612
23613 // We must start with a constant extraction index.
23614 auto *IndexC = dyn_cast<ConstantSDNode>(N->getOperand(1));
23615 if (!IndexC)
23616 return false;
23617
23618 assert(IndexC->getZExtValue() < VecVT.getVectorNumElements() &&
23619 "Original ISD::EXTRACT_VECTOR_ELT is undefinend?");
23620
23621 // TODO: deal with the case of implicit anyext of the extraction.
23622 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
23623 EVT ScalarVT = N->getValueType(0);
23624 if (VecVT.getScalarType() != ScalarVT)
23625 return false;
23626
23627 // TODO: deal with the cases other than everything being integer-typed.
23628 if (!ScalarVT.isScalarInteger())
23629 return false;
23630
23631 struct Entry {
23632 SDNode *Producer;
23633
23634 // Which bits of VecOp does it contain?
23635 unsigned BitPos;
23636 int NumBits;
23637 // NOTE: the actual width of \p Producer may be wider than NumBits!
23638
23639 Entry(Entry &&) = default;
23640 Entry(SDNode *Producer_, unsigned BitPos_, int NumBits_)
23641 : Producer(Producer_), BitPos(BitPos_), NumBits(NumBits_) {}
23642
23643 Entry() = delete;
23644 Entry(const Entry &) = delete;
23645 Entry &operator=(const Entry &) = delete;
23646 Entry &operator=(Entry &&) = delete;
23647 };
23648 SmallVector<Entry, 32> Worklist;
23650
23651 // We start at the "root" ISD::EXTRACT_VECTOR_ELT.
23652 Worklist.emplace_back(N, /*BitPos=*/VecEltBitWidth * IndexC->getZExtValue(),
23653 /*NumBits=*/VecEltBitWidth);
23654
23655 while (!Worklist.empty()) {
23656 Entry E = Worklist.pop_back_val();
23657 // Does the node not even use any of the VecOp bits?
23658 if (!(E.NumBits > 0 && E.BitPos < VecVT.getSizeInBits() &&
23659 E.BitPos + E.NumBits <= VecVT.getSizeInBits()))
23660 return false; // Let's allow the other combines clean this up first.
23661 // Did we fail to model any of the users of the Producer?
23662 bool ProducerIsLeaf = false;
23663 // Look at each user of this Producer.
23664 for (SDNode *User : E.Producer->users()) {
23665 switch (User->getOpcode()) {
23666 // TODO: support ISD::BITCAST
23667 // TODO: support ISD::ANY_EXTEND
23668 // TODO: support ISD::ZERO_EXTEND
23669 // TODO: support ISD::SIGN_EXTEND
23670 case ISD::TRUNCATE:
23671 // Truncation simply means we keep position, but extract less bits.
23672 Worklist.emplace_back(User, E.BitPos,
23673 /*NumBits=*/User->getValueSizeInBits(0));
23674 break;
23675 // TODO: support ISD::SRA
23676 // TODO: support ISD::SHL
23677 case ISD::SRL:
23678 // We should be shifting the Producer by a constant amount.
23679 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(User->getOperand(1));
23680 User->getOperand(0).getNode() == E.Producer && ShAmtC) {
23681 // Logical right-shift means that we start extraction later,
23682 // but stop it at the same position we did previously.
23683 unsigned ShAmt = ShAmtC->getZExtValue();
23684 Worklist.emplace_back(User, E.BitPos + ShAmt, E.NumBits - ShAmt);
23685 break;
23686 }
23687 [[fallthrough]];
23688 default:
23689 // We can not model this user of the Producer.
23690 // Which means the current Producer will be a ISD::EXTRACT_VECTOR_ELT.
23691 ProducerIsLeaf = true;
23692 // Profitability check: all users that we can not model
23693 // must be ISD::BUILD_VECTOR's.
23694 if (User->getOpcode() != ISD::BUILD_VECTOR)
23695 return false;
23696 break;
23697 }
23698 }
23699 if (ProducerIsLeaf)
23700 Leafs.emplace_back(std::move(E));
23701 }
23702
23703 unsigned NewVecEltBitWidth = Leafs.front().NumBits;
23704
23705 // If we are still at the same element granularity, give up,
23706 if (NewVecEltBitWidth == VecEltBitWidth)
23707 return false;
23708
23709 // The vector width must be a multiple of the new element width.
23710 if (VecVT.getSizeInBits() % NewVecEltBitWidth != 0)
23711 return false;
23712
23713 // All leafs must agree on the new element width.
23714 // All leafs must not expect any "padding" bits ontop of that width.
23715 // All leafs must start extraction from multiple of that width.
23716 if (!all_of(Leafs, [NewVecEltBitWidth](const Entry &E) {
23717 return (unsigned)E.NumBits == NewVecEltBitWidth &&
23718 E.Producer->getValueSizeInBits(0) == NewVecEltBitWidth &&
23719 E.BitPos % NewVecEltBitWidth == 0;
23720 }))
23721 return false;
23722
23723 EVT NewScalarVT = EVT::getIntegerVT(*DAG.getContext(), NewVecEltBitWidth);
23724 EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewScalarVT,
23725 VecVT.getSizeInBits() / NewVecEltBitWidth);
23726
23727 if (LegalTypes &&
23728 !(TLI.isTypeLegal(NewScalarVT) && TLI.isTypeLegal(NewVecVT)))
23729 return false;
23730
23731 if (LegalOperations &&
23732 !(TLI.isOperationLegalOrCustom(ISD::BITCAST, NewVecVT) &&
23734 return false;
23735
23736 SDValue NewVecOp = DAG.getBitcast(NewVecVT, VecOp);
23737 for (const Entry &E : Leafs) {
23738 SDLoc DL(E.Producer);
23739 unsigned NewIndex = E.BitPos / NewVecEltBitWidth;
23740 assert(NewIndex < NewVecVT.getVectorNumElements() &&
23741 "Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?");
23742 SDValue V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, NewScalarVT, NewVecOp,
23743 DAG.getVectorIdxConstant(NewIndex, DL));
23744 CombineTo(E.Producer, V);
23745 }
23746
23747 return true;
23748}
23749
23750SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
23751 SDValue VecOp = N->getOperand(0);
23752 SDValue Index = N->getOperand(1);
23753 EVT ScalarVT = N->getValueType(0);
23754 EVT VecVT = VecOp.getValueType();
23755 if (VecOp.isUndef())
23756 return DAG.getUNDEF(ScalarVT);
23757
23758 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
23759 //
23760 // This only really matters if the index is non-constant since other combines
23761 // on the constant elements already work.
23762 SDLoc DL(N);
23763 if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
23764 Index == VecOp.getOperand(2)) {
23765 SDValue Elt = VecOp.getOperand(1);
23766 AddUsersToWorklist(VecOp.getNode());
23767 return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
23768 }
23769
23770 // (vextract (scalar_to_vector val, 0) -> val
23771 if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
23772 // Only 0'th element of SCALAR_TO_VECTOR is defined.
23773 if (DAG.isKnownNeverZero(Index))
23774 return DAG.getUNDEF(ScalarVT);
23775
23776 // Check if the result type doesn't match the inserted element type.
23777 // The inserted element and extracted element may have mismatched bitwidth.
23778 // As a result, EXTRACT_VECTOR_ELT may extend or truncate the extracted vector.
23779 SDValue InOp = VecOp.getOperand(0);
23780 if (InOp.getValueType() != ScalarVT) {
23781 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
23782 if (InOp.getValueType().bitsGT(ScalarVT))
23783 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
23784 return DAG.getNode(ISD::ANY_EXTEND, DL, ScalarVT, InOp);
23785 }
23786 return InOp;
23787 }
23788
23789 // extract_vector_elt of out-of-bounds element -> UNDEF
23790 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
23791 if (IndexC && VecVT.isFixedLengthVector() &&
23792 IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
23793 return DAG.getUNDEF(ScalarVT);
23794
23795 // extract_vector_elt (build_vector x, y), 1 -> y
23796 if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
23797 VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
23798 TLI.isTypeLegal(VecVT)) {
23799 assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
23800 VecVT.isFixedLengthVector()) &&
23801 "BUILD_VECTOR used for scalable vectors");
23802 unsigned IndexVal =
23803 VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
23804 SDValue Elt = VecOp.getOperand(IndexVal);
23805 EVT InEltVT = Elt.getValueType();
23806
23807 if (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT) ||
23808 isNullConstant(Elt)) {
23809 // Sometimes build_vector's scalar input types do not match result type.
23810 if (ScalarVT == InEltVT)
23811 return Elt;
23812
23813 // TODO: It may be useful to truncate if free if the build_vector
23814 // implicitly converts.
23815 }
23816 }
23817
23818 if (SDValue BO = scalarizeExtractedBinOp(N, DAG, DL, LegalTypes))
23819 return BO;
23820
23821 if (VecVT.isScalableVector())
23822 return SDValue();
23823
23824 // All the code from this point onwards assumes fixed width vectors, but it's
23825 // possible that some of the combinations could be made to work for scalable
23826 // vectors too.
23827 unsigned NumElts = VecVT.getVectorNumElements();
23828 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
23829
23830 // See if the extracted element is constant, in which case fold it if its
23831 // a legal fp immediate.
23832 if (IndexC && ScalarVT.isFloatingPoint()) {
23833 APInt EltMask = APInt::getOneBitSet(NumElts, IndexC->getZExtValue());
23834 KnownBits KnownElt = DAG.computeKnownBits(VecOp, EltMask);
23835 if (KnownElt.isConstant()) {
23836 APFloat CstFP =
23837 APFloat(ScalarVT.getFltSemantics(), KnownElt.getConstant());
23838 if (TLI.isFPImmLegal(CstFP, ScalarVT))
23839 return DAG.getConstantFP(CstFP, DL, ScalarVT);
23840 }
23841 }
23842
23843 // TODO: These transforms should not require the 'hasOneUse' restriction, but
23844 // there are regressions on multiple targets without it. We can end up with a
23845 // mess of scalar and vector code if we reduce only part of the DAG to scalar.
23846 if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
23847 VecOp.hasOneUse()) {
23848 // The vector index of the LSBs of the source depend on the endian-ness.
23849 bool IsLE = DAG.getDataLayout().isLittleEndian();
23850 unsigned ExtractIndex = IndexC->getZExtValue();
23851 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
23852 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
23853 SDValue BCSrc = VecOp.getOperand(0);
23854 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
23855 return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT);
23856
23857 // TODO: Add support for SCALAR_TO_VECTOR implicit truncation.
23858 if (LegalTypes && BCSrc.getValueType().isInteger() &&
23859 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR &&
23860 BCSrc.getScalarValueSizeInBits() ==
23862 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
23863 // trunc i64 X to i32
23864 SDValue X = BCSrc.getOperand(0);
23865 EVT XVT = X.getValueType();
23866 assert(XVT.isScalarInteger() && ScalarVT.isScalarInteger() &&
23867 "Extract element and scalar to vector can't change element type "
23868 "from FP to integer.");
23869 unsigned XBitWidth = X.getValueSizeInBits();
23870 unsigned Scale = XBitWidth / VecEltBitWidth;
23871 BCTruncElt = IsLE ? 0 : Scale - 1;
23872
23873 // An extract element return value type can be wider than its vector
23874 // operand element type. In that case, the high bits are undefined, so
23875 // it's possible that we may need to extend rather than truncate.
23876 if (ExtractIndex < Scale && XBitWidth > VecEltBitWidth) {
23877 assert(XBitWidth % VecEltBitWidth == 0 &&
23878 "Scalar bitwidth must be a multiple of vector element bitwidth");
23879
23880 if (ExtractIndex != BCTruncElt) {
23881 unsigned ShiftIndex =
23882 IsLE ? ExtractIndex : (Scale - 1) - ExtractIndex;
23883 X = DAG.getNode(
23884 ISD::SRL, DL, XVT, X,
23885 DAG.getShiftAmountConstant(ShiftIndex * VecEltBitWidth, XVT, DL));
23886 }
23887
23888 return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
23889 }
23890 }
23891 }
23892
23893 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
23894 // We only perform this optimization before the op legalization phase because
23895 // we may introduce new vector instructions which are not backed by TD
23896 // patterns. For example on AVX, extracting elements from a wide vector
23897 // without using extract_subvector. However, if we can find an underlying
23898 // scalar value, then we can always use that.
23899 if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
23900 auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
23901 // Find the new index to extract from.
23902 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
23903
23904 // Extracting an undef index is undef.
23905 if (OrigElt == -1)
23906 return DAG.getUNDEF(ScalarVT);
23907
23908 // Select the right vector half to extract from.
23909 SDValue SVInVec;
23910 if (OrigElt < (int)NumElts) {
23911 SVInVec = VecOp.getOperand(0);
23912 } else {
23913 SVInVec = VecOp.getOperand(1);
23914 OrigElt -= NumElts;
23915 }
23916
23917 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
23918 // TODO: Check if shuffle mask is legal?
23919 if (LegalOperations && TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VecVT) &&
23920 !VecOp.hasOneUse())
23921 return SDValue();
23922
23923 SDValue InOp = SVInVec.getOperand(OrigElt);
23924 if (InOp.getValueType() != ScalarVT) {
23925 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
23926 InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
23927 }
23928
23929 return InOp;
23930 }
23931
23932 // FIXME: We should handle recursing on other vector shuffles and
23933 // scalar_to_vector here as well.
23934
23935 if (!LegalOperations ||
23936 // FIXME: Should really be just isOperationLegalOrCustom.
23939 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
23940 DAG.getVectorIdxConstant(OrigElt, DL));
23941 }
23942 }
23943
23944 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
23945 // simplify it based on the (valid) extraction indices.
23946 if (llvm::all_of(VecOp->users(), [&](SDNode *Use) {
23947 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23948 Use->getOperand(0) == VecOp &&
23949 isa<ConstantSDNode>(Use->getOperand(1));
23950 })) {
23951 APInt DemandedElts = APInt::getZero(NumElts);
23952 for (SDNode *User : VecOp->users()) {
23953 auto *CstElt = cast<ConstantSDNode>(User->getOperand(1));
23954 if (CstElt->getAPIntValue().ult(NumElts))
23955 DemandedElts.setBit(CstElt->getZExtValue());
23956 }
23957 if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
23958 // We simplified the vector operand of this extract element. If this
23959 // extract is not dead, visit it again so it is folded properly.
23960 if (N->getOpcode() != ISD::DELETED_NODE)
23961 AddToWorklist(N);
23962 return SDValue(N, 0);
23963 }
23964 APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
23965 if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
23966 // We simplified the vector operand of this extract element. If this
23967 // extract is not dead, visit it again so it is folded properly.
23968 if (N->getOpcode() != ISD::DELETED_NODE)
23969 AddToWorklist(N);
23970 return SDValue(N, 0);
23971 }
23972 }
23973
23974 if (refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(N))
23975 return SDValue(N, 0);
23976
23977 // Everything under here is trying to match an extract of a loaded value.
23978 // If the result of load has to be truncated, then it's not necessarily
23979 // profitable.
23980 bool BCNumEltsChanged = false;
23981 EVT ExtVT = VecVT.getVectorElementType();
23982 EVT LVT = ExtVT;
23983 if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
23984 return SDValue();
23985
23986 if (VecOp.getOpcode() == ISD::BITCAST) {
23987 // Don't duplicate a load with other uses.
23988 if (!VecOp.hasOneUse())
23989 return SDValue();
23990
23991 EVT BCVT = VecOp.getOperand(0).getValueType();
23992 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
23993 return SDValue();
23994 if (NumElts != BCVT.getVectorNumElements())
23995 BCNumEltsChanged = true;
23996 VecOp = VecOp.getOperand(0);
23997 ExtVT = BCVT.getVectorElementType();
23998 }
23999
24000 // extract (vector load $addr), i --> load $addr + i * size
24001 if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
24002 ISD::isNormalLoad(VecOp.getNode()) &&
24003 !Index->hasPredecessor(VecOp.getNode())) {
24004 auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
24005 if (VecLoad && VecLoad->isSimple()) {
24006 if (SDValue Scalarized = TLI.scalarizeExtractedVectorLoad(
24007 ScalarVT, SDLoc(N), VecVT, Index, VecLoad, DAG)) {
24008 ++OpsNarrowed;
24009 return Scalarized;
24010 }
24011 }
24012 }
24013
24014 // Perform only after legalization to ensure build_vector / vector_shuffle
24015 // optimizations have already been done.
24016 if (!LegalOperations || !IndexC)
24017 return SDValue();
24018
24019 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
24020 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
24021 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
24022 int Elt = IndexC->getZExtValue();
24023 LoadSDNode *LN0 = nullptr;
24024 if (ISD::isNormalLoad(VecOp.getNode())) {
24025 LN0 = cast<LoadSDNode>(VecOp);
24026 } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
24027 VecOp.getOperand(0).getValueType() == ExtVT &&
24028 ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
24029 // Don't duplicate a load with other uses.
24030 if (!VecOp.hasOneUse())
24031 return SDValue();
24032
24033 LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
24034 }
24035 if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
24036 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
24037 // =>
24038 // (load $addr+1*size)
24039
24040 // Don't duplicate a load with other uses.
24041 if (!VecOp.hasOneUse())
24042 return SDValue();
24043
24044 // If the bit convert changed the number of elements, it is unsafe
24045 // to examine the mask.
24046 if (BCNumEltsChanged)
24047 return SDValue();
24048
24049 // Select the input vector, guarding against out of range extract vector.
24050 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
24051 VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
24052
24053 if (VecOp.getOpcode() == ISD::BITCAST) {
24054 // Don't duplicate a load with other uses.
24055 if (!VecOp.hasOneUse())
24056 return SDValue();
24057
24058 VecOp = VecOp.getOperand(0);
24059 }
24060 if (ISD::isNormalLoad(VecOp.getNode())) {
24061 LN0 = cast<LoadSDNode>(VecOp);
24062 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
24063 Index = DAG.getConstant(Elt, DL, Index.getValueType());
24064 }
24065 } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
24066 VecVT.getVectorElementType() == ScalarVT &&
24067 (!LegalTypes ||
24068 TLI.isTypeLegal(
24070 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
24071 // -> extract_vector_elt a, 0
24072 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
24073 // -> extract_vector_elt a, 1
24074 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
24075 // -> extract_vector_elt b, 0
24076 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
24077 // -> extract_vector_elt b, 1
24078 EVT ConcatVT = VecOp.getOperand(0).getValueType();
24079 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
24080 SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, DL,
24081 Index.getValueType());
24082
24083 SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
24085 ConcatVT.getVectorElementType(),
24086 ConcatOp, NewIdx);
24087 return DAG.getNode(ISD::BITCAST, DL, ScalarVT, Elt);
24088 }
24089
24090 // Make sure we found a non-volatile load and the extractelement is
24091 // the only use.
24092 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
24093 return SDValue();
24094
24095 // If Idx was -1 above, Elt is going to be -1, so just return undef.
24096 if (Elt == -1)
24097 return DAG.getUNDEF(LVT);
24098
24099 if (SDValue Scalarized =
24100 TLI.scalarizeExtractedVectorLoad(LVT, DL, VecVT, Index, LN0, DAG)) {
24101 ++OpsNarrowed;
24102 return Scalarized;
24103 }
24104
24105 return SDValue();
24106}
24107
24108// Simplify (build_vec (ext )) to (bitcast (build_vec ))
24109SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
24110 // We perform this optimization post type-legalization because
24111 // the type-legalizer often scalarizes integer-promoted vectors.
24112 // Performing this optimization before may create bit-casts which
24113 // will be type-legalized to complex code sequences.
24114 // We perform this optimization only before the operation legalizer because we
24115 // may introduce illegal operations.
24116 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
24117 return SDValue();
24118
24119 unsigned NumInScalars = N->getNumOperands();
24120 SDLoc DL(N);
24121 EVT VT = N->getValueType(0);
24122
24123 // Check to see if this is a BUILD_VECTOR of a bunch of values
24124 // which come from any_extend or zero_extend nodes. If so, we can create
24125 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
24126 // optimizations. We do not handle sign-extend because we can't fill the sign
24127 // using shuffles.
24128 EVT SourceType = MVT::Other;
24129 bool AllAnyExt = true;
24130
24131 for (unsigned i = 0; i != NumInScalars; ++i) {
24132 SDValue In = N->getOperand(i);
24133 // Ignore undef inputs.
24134 if (In.isUndef()) continue;
24135
24136 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
24137 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
24138
24139 // Abort if the element is not an extension.
24140 if (!ZeroExt && !AnyExt) {
24141 SourceType = MVT::Other;
24142 break;
24143 }
24144
24145 // The input is a ZeroExt or AnyExt. Check the original type.
24146 EVT InTy = In.getOperand(0).getValueType();
24147
24148 // Check that all of the widened source types are the same.
24149 if (SourceType == MVT::Other)
24150 // First time.
24151 SourceType = InTy;
24152 else if (InTy != SourceType) {
24153 // Multiple income types. Abort.
24154 SourceType = MVT::Other;
24155 break;
24156 }
24157
24158 // Check if all of the extends are ANY_EXTENDs.
24159 AllAnyExt &= AnyExt;
24160 }
24161
24162 // In order to have valid types, all of the inputs must be extended from the
24163 // same source type and all of the inputs must be any or zero extend.
24164 // Scalar sizes must be a power of two.
24165 EVT OutScalarTy = VT.getScalarType();
24166 bool ValidTypes =
24167 SourceType != MVT::Other &&
24170
24171 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
24172 // turn into a single shuffle instruction.
24173 if (!ValidTypes)
24174 return SDValue();
24175
24176 // If we already have a splat buildvector, then don't fold it if it means
24177 // introducing zeros.
24178 if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
24179 return SDValue();
24180
24181 bool isLE = DAG.getDataLayout().isLittleEndian();
24182 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
24183 assert(ElemRatio > 1 && "Invalid element size ratio");
24184 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
24185 DAG.getConstant(0, DL, SourceType);
24186
24187 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
24188 SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
24189
24190 // Populate the new build_vector
24191 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
24192 SDValue Cast = N->getOperand(i);
24193 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
24194 Cast.getOpcode() == ISD::ZERO_EXTEND ||
24195 Cast.isUndef()) && "Invalid cast opcode");
24196 SDValue In;
24197 if (Cast.isUndef())
24198 In = DAG.getUNDEF(SourceType);
24199 else
24200 In = Cast->getOperand(0);
24201 unsigned Index = isLE ? (i * ElemRatio) :
24202 (i * ElemRatio + (ElemRatio - 1));
24203
24204 assert(Index < Ops.size() && "Invalid index");
24205 Ops[Index] = In;
24206 }
24207
24208 // The type of the new BUILD_VECTOR node.
24209 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
24210 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
24211 "Invalid vector size");
24212 // Check if the new vector type is legal.
24213 if (!isTypeLegal(VecVT) ||
24214 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
24216 return SDValue();
24217
24218 // Make the new BUILD_VECTOR.
24219 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
24220
24221 // The new BUILD_VECTOR node has the potential to be further optimized.
24222 AddToWorklist(BV.getNode());
24223 // Bitcast to the desired type.
24224 return DAG.getBitcast(VT, BV);
24225}
24226
24227// Simplify (build_vec (trunc $1)
24228// (trunc (srl $1 half-width))
24229// (trunc (srl $1 (2 * half-width))))
24230// to (bitcast $1)
24231SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
24232 assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
24233
24234 EVT VT = N->getValueType(0);
24235
24236 // Don't run this before LegalizeTypes if VT is legal.
24237 // Targets may have other preferences.
24238 if (Level < AfterLegalizeTypes && TLI.isTypeLegal(VT))
24239 return SDValue();
24240
24241 // Only for little endian
24242 if (!DAG.getDataLayout().isLittleEndian())
24243 return SDValue();
24244
24245 EVT OutScalarTy = VT.getScalarType();
24246 uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
24247
24248 // Only for power of two types to be sure that bitcast works well
24249 if (!isPowerOf2_64(ScalarTypeBitsize))
24250 return SDValue();
24251
24252 unsigned NumInScalars = N->getNumOperands();
24253
24254 // Look through bitcasts
24255 auto PeekThroughBitcast = [](SDValue Op) {
24256 if (Op.getOpcode() == ISD::BITCAST)
24257 return Op.getOperand(0);
24258 return Op;
24259 };
24260
24261 // The source value where all the parts are extracted.
24262 SDValue Src;
24263 for (unsigned i = 0; i != NumInScalars; ++i) {
24264 SDValue In = PeekThroughBitcast(N->getOperand(i));
24265 // Ignore undef inputs.
24266 if (In.isUndef()) continue;
24267
24268 if (In.getOpcode() != ISD::TRUNCATE)
24269 return SDValue();
24270
24271 In = PeekThroughBitcast(In.getOperand(0));
24272
24273 if (In.getOpcode() != ISD::SRL) {
24274 // For now only build_vec without shuffling, handle shifts here in the
24275 // future.
24276 if (i != 0)
24277 return SDValue();
24278
24279 Src = In;
24280 } else {
24281 // In is SRL
24282 SDValue part = PeekThroughBitcast(In.getOperand(0));
24283
24284 if (!Src) {
24285 Src = part;
24286 } else if (Src != part) {
24287 // Vector parts do not stem from the same variable
24288 return SDValue();
24289 }
24290
24291 SDValue ShiftAmtVal = In.getOperand(1);
24292 if (!isa<ConstantSDNode>(ShiftAmtVal))
24293 return SDValue();
24294
24295 uint64_t ShiftAmt = In.getConstantOperandVal(1);
24296
24297 // The extracted value is not extracted at the right position
24298 if (ShiftAmt != i * ScalarTypeBitsize)
24299 return SDValue();
24300 }
24301 }
24302
24303 // Only cast if the size is the same
24304 if (!Src || Src.getValueType().getSizeInBits() != VT.getSizeInBits())
24305 return SDValue();
24306
24307 return DAG.getBitcast(VT, Src);
24308}
24309
24310SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
24311 ArrayRef<int> VectorMask,
24312 SDValue VecIn1, SDValue VecIn2,
24313 unsigned LeftIdx, bool DidSplitVec) {
24314 EVT VT = N->getValueType(0);
24315 EVT InVT1 = VecIn1.getValueType();
24316 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
24317
24318 unsigned NumElems = VT.getVectorNumElements();
24319 unsigned ShuffleNumElems = NumElems;
24320
24321 // If we artificially split a vector in two already, then the offsets in the
24322 // operands will all be based off of VecIn1, even those in VecIn2.
24323 unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
24324
24325 uint64_t VTSize = VT.getFixedSizeInBits();
24326 uint64_t InVT1Size = InVT1.getFixedSizeInBits();
24327 uint64_t InVT2Size = InVT2.getFixedSizeInBits();
24328
24329 assert(InVT2Size <= InVT1Size &&
24330 "Inputs must be sorted to be in non-increasing vector size order.");
24331
24332 // We can't generate a shuffle node with mismatched input and output types.
24333 // Try to make the types match the type of the output.
24334 if (InVT1 != VT || InVT2 != VT) {
24335 if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
24336 // If the output vector length is a multiple of both input lengths,
24337 // we can concatenate them and pad the rest with undefs.
24338 unsigned NumConcats = VTSize / InVT1Size;
24339 assert(NumConcats >= 2 && "Concat needs at least two inputs!");
24340 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
24341 ConcatOps[0] = VecIn1;
24342 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
24343 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
24344 VecIn2 = SDValue();
24345 } else if (InVT1Size == VTSize * 2) {
24346 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
24347 return SDValue();
24348
24349 if (!VecIn2.getNode()) {
24350 // If we only have one input vector, and it's twice the size of the
24351 // output, split it in two.
24352 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
24353 DAG.getVectorIdxConstant(NumElems, DL));
24354 VecIn1 = DAG.getExtractSubvector(DL, VT, VecIn1, 0);
24355 // Since we now have shorter input vectors, adjust the offset of the
24356 // second vector's start.
24357 Vec2Offset = NumElems;
24358 } else {
24359 assert(InVT2Size <= InVT1Size &&
24360 "Second input is not going to be larger than the first one.");
24361
24362 // VecIn1 is wider than the output, and we have another, possibly
24363 // smaller input. Pad the smaller input with undefs, shuffle at the
24364 // input vector width, and extract the output.
24365 // The shuffle type is different than VT, so check legality again.
24366 if (LegalOperations &&
24368 return SDValue();
24369
24370 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
24371 // lower it back into a BUILD_VECTOR. So if the inserted type is
24372 // illegal, don't even try.
24373 if (InVT1 != InVT2) {
24374 if (!TLI.isTypeLegal(InVT2))
24375 return SDValue();
24376 VecIn2 = DAG.getInsertSubvector(DL, DAG.getUNDEF(InVT1), VecIn2, 0);
24377 }
24378 ShuffleNumElems = NumElems * 2;
24379 }
24380 } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
24381 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
24382 ConcatOps[0] = VecIn2;
24383 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
24384 } else if (InVT1Size / VTSize > 1 && InVT1Size % VTSize == 0) {
24385 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems) ||
24386 !TLI.isTypeLegal(InVT1) || !TLI.isTypeLegal(InVT2))
24387 return SDValue();
24388 // If dest vector has less than two elements, then use shuffle and extract
24389 // from larger regs will cost even more.
24390 if (VT.getVectorNumElements() <= 2 || !VecIn2.getNode())
24391 return SDValue();
24392 assert(InVT2Size <= InVT1Size &&
24393 "Second input is not going to be larger than the first one.");
24394
24395 // VecIn1 is wider than the output, and we have another, possibly
24396 // smaller input. Pad the smaller input with undefs, shuffle at the
24397 // input vector width, and extract the output.
24398 // The shuffle type is different than VT, so check legality again.
24399 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
24400 return SDValue();
24401
24402 if (InVT1 != InVT2) {
24403 VecIn2 = DAG.getInsertSubvector(DL, DAG.getUNDEF(InVT1), VecIn2, 0);
24404 }
24405 ShuffleNumElems = InVT1Size / VTSize * NumElems;
24406 } else {
24407 // TODO: Support cases where the length mismatch isn't exactly by a
24408 // factor of 2.
24409 // TODO: Move this check upwards, so that if we have bad type
24410 // mismatches, we don't create any DAG nodes.
24411 return SDValue();
24412 }
24413 }
24414
24415 // Initialize mask to undef.
24416 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
24417
24418 // Only need to run up to the number of elements actually used, not the
24419 // total number of elements in the shuffle - if we are shuffling a wider
24420 // vector, the high lanes should be set to undef.
24421 for (unsigned i = 0; i != NumElems; ++i) {
24422 if (VectorMask[i] <= 0)
24423 continue;
24424
24425 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
24426 if (VectorMask[i] == (int)LeftIdx) {
24427 Mask[i] = ExtIndex;
24428 } else if (VectorMask[i] == (int)LeftIdx + 1) {
24429 Mask[i] = Vec2Offset + ExtIndex;
24430 }
24431 }
24432
24433 // The type the input vectors may have changed above.
24434 InVT1 = VecIn1.getValueType();
24435
24436 // If we already have a VecIn2, it should have the same type as VecIn1.
24437 // If we don't, get an undef/zero vector of the appropriate type.
24438 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
24439 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
24440
24441 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
24442 if (ShuffleNumElems > NumElems)
24443 Shuffle = DAG.getExtractSubvector(DL, VT, Shuffle, 0);
24444
24445 return Shuffle;
24446}
24447
24449 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
24450
24451 // First, determine where the build vector is not undef.
24452 // TODO: We could extend this to handle zero elements as well as undefs.
24453 int NumBVOps = BV->getNumOperands();
24454 int ZextElt = -1;
24455 for (int i = 0; i != NumBVOps; ++i) {
24456 SDValue Op = BV->getOperand(i);
24457 if (Op.isUndef())
24458 continue;
24459 if (ZextElt == -1)
24460 ZextElt = i;
24461 else
24462 return SDValue();
24463 }
24464 // Bail out if there's no non-undef element.
24465 if (ZextElt == -1)
24466 return SDValue();
24467
24468 // The build vector contains some number of undef elements and exactly
24469 // one other element. That other element must be a zero-extended scalar
24470 // extracted from a vector at a constant index to turn this into a shuffle.
24471 // Also, require that the build vector does not implicitly truncate/extend
24472 // its elements.
24473 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
24474 EVT VT = BV->getValueType(0);
24475 SDValue Zext = BV->getOperand(ZextElt);
24476 if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
24480 return SDValue();
24481
24482 // The zero-extend must be a multiple of the source size, and we must be
24483 // building a vector of the same size as the source of the extract element.
24484 SDValue Extract = Zext.getOperand(0);
24485 unsigned DestSize = Zext.getValueSizeInBits();
24486 unsigned SrcSize = Extract.getValueSizeInBits();
24487 if (DestSize % SrcSize != 0 ||
24488 Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
24489 return SDValue();
24490
24491 // Create a shuffle mask that will combine the extracted element with zeros
24492 // and undefs.
24493 int ZextRatio = DestSize / SrcSize;
24494 int NumMaskElts = NumBVOps * ZextRatio;
24495 SmallVector<int, 32> ShufMask(NumMaskElts, -1);
24496 for (int i = 0; i != NumMaskElts; ++i) {
24497 if (i / ZextRatio == ZextElt) {
24498 // The low bits of the (potentially translated) extracted element map to
24499 // the source vector. The high bits map to zero. We will use a zero vector
24500 // as the 2nd source operand of the shuffle, so use the 1st element of
24501 // that vector (mask value is number-of-elements) for the high bits.
24502 int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - 1) : 0;
24503 ShufMask[i] = (i % ZextRatio == Low) ? Extract.getConstantOperandVal(1)
24504 : NumMaskElts;
24505 }
24506
24507 // Undef elements of the build vector remain undef because we initialize
24508 // the shuffle mask with -1.
24509 }
24510
24511 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
24512 // bitcast (shuffle V, ZeroVec, VectorMask)
24513 SDLoc DL(BV);
24514 EVT VecVT = Extract.getOperand(0).getValueType();
24515 SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
24516 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24517 SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
24518 ZeroVec, ShufMask, DAG);
24519 if (!Shuf)
24520 return SDValue();
24521 return DAG.getBitcast(VT, Shuf);
24522}
24523
24524// FIXME: promote to STLExtras.
24525template <typename R, typename T>
24526static auto getFirstIndexOf(R &&Range, const T &Val) {
24527 auto I = find(Range, Val);
24528 if (I == Range.end())
24529 return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
24530 return std::distance(Range.begin(), I);
24531}
24532
24533// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
24534// operations. If the types of the vectors we're extracting from allow it,
24535// turn this into a vector_shuffle node.
24536SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
24537 SDLoc DL(N);
24538 EVT VT = N->getValueType(0);
24539
24540 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
24541 if (!isTypeLegal(VT))
24542 return SDValue();
24543
24545 return V;
24546
24547 // May only combine to shuffle after legalize if shuffle is legal.
24548 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
24549 return SDValue();
24550
24551 bool UsesZeroVector = false;
24552 unsigned NumElems = N->getNumOperands();
24553
24554 // Record, for each element of the newly built vector, which input vector
24555 // that element comes from. -1 stands for undef, 0 for the zero vector,
24556 // and positive values for the input vectors.
24557 // VectorMask maps each element to its vector number, and VecIn maps vector
24558 // numbers to their initial SDValues.
24559
24560 SmallVector<int, 8> VectorMask(NumElems, -1);
24562 VecIn.push_back(SDValue());
24563
24564 // If we have a single extract_element with a constant index, track the index
24565 // value.
24566 unsigned OneConstExtractIndex = ~0u;
24567
24568 // Count the number of extract_vector_elt sources (i.e. non-constant or undef)
24569 unsigned NumExtracts = 0;
24570
24571 for (unsigned i = 0; i != NumElems; ++i) {
24572 SDValue Op = N->getOperand(i);
24573
24574 if (Op.isUndef())
24575 continue;
24576
24577 // See if we can use a blend with a zero vector.
24578 // TODO: Should we generalize this to a blend with an arbitrary constant
24579 // vector?
24581 UsesZeroVector = true;
24582 VectorMask[i] = 0;
24583 continue;
24584 }
24585
24586 // Not an undef or zero. If the input is something other than an
24587 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
24588 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
24589 return SDValue();
24590
24591 SDValue ExtractedFromVec = Op.getOperand(0);
24592 if (ExtractedFromVec.getValueType().isScalableVector())
24593 return SDValue();
24594 auto *ExtractIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
24595 if (!ExtractIdx)
24596 return SDValue();
24597
24598 if (ExtractIdx->getAsAPIntVal().uge(
24599 ExtractedFromVec.getValueType().getVectorNumElements()))
24600 return SDValue();
24601
24602 // All inputs must have the same element type as the output.
24603 if (VT.getVectorElementType() !=
24604 ExtractedFromVec.getValueType().getVectorElementType())
24605 return SDValue();
24606
24607 OneConstExtractIndex = ExtractIdx->getZExtValue();
24608 ++NumExtracts;
24609
24610 // Have we seen this input vector before?
24611 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
24612 // a map back from SDValues to numbers isn't worth it.
24613 int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
24614 if (Idx == -1) { // A new source vector?
24615 Idx = VecIn.size();
24616 VecIn.push_back(ExtractedFromVec);
24617 }
24618
24619 VectorMask[i] = Idx;
24620 }
24621
24622 // If we didn't find at least one input vector, bail out.
24623 if (VecIn.size() < 2)
24624 return SDValue();
24625
24626 // If all the Operands of BUILD_VECTOR extract from same
24627 // vector, then split the vector efficiently based on the maximum
24628 // vector access index and adjust the VectorMask and
24629 // VecIn accordingly.
24630 bool DidSplitVec = false;
24631 if (VecIn.size() == 2) {
24632 // If we only found a single constant indexed extract_vector_elt feeding the
24633 // build_vector, do not produce a more complicated shuffle if the extract is
24634 // cheap with other constant/undef elements. Skip broadcast patterns with
24635 // multiple uses in the build_vector.
24636
24637 // TODO: This should be more aggressive about skipping the shuffle
24638 // formation, particularly if VecIn[1].hasOneUse(), and regardless of the
24639 // index.
24640 if (NumExtracts == 1 &&
24643 TLI.isExtractVecEltCheap(VT, OneConstExtractIndex))
24644 return SDValue();
24645
24646 unsigned MaxIndex = 0;
24647 unsigned NearestPow2 = 0;
24648 SDValue Vec = VecIn.back();
24649 EVT InVT = Vec.getValueType();
24650 SmallVector<unsigned, 8> IndexVec(NumElems, 0);
24651
24652 for (unsigned i = 0; i < NumElems; i++) {
24653 if (VectorMask[i] <= 0)
24654 continue;
24655 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
24656 IndexVec[i] = Index;
24657 MaxIndex = std::max(MaxIndex, Index);
24658 }
24659
24660 NearestPow2 = PowerOf2Ceil(MaxIndex);
24661 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
24662 NumElems * 2 < NearestPow2) {
24663 unsigned SplitSize = NearestPow2 / 2;
24664 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
24665 InVT.getVectorElementType(), SplitSize);
24666 if (TLI.isTypeLegal(SplitVT) &&
24667 SplitSize + SplitVT.getVectorNumElements() <=
24668 InVT.getVectorNumElements()) {
24669 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
24670 DAG.getVectorIdxConstant(SplitSize, DL));
24671 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
24672 DAG.getVectorIdxConstant(0, DL));
24673 VecIn.pop_back();
24674 VecIn.push_back(VecIn1);
24675 VecIn.push_back(VecIn2);
24676 DidSplitVec = true;
24677
24678 for (unsigned i = 0; i < NumElems; i++) {
24679 if (VectorMask[i] <= 0)
24680 continue;
24681 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
24682 }
24683 }
24684 }
24685 }
24686
24687 // Sort input vectors by decreasing vector element count,
24688 // while preserving the relative order of equally-sized vectors.
24689 // Note that we keep the first "implicit zero vector as-is.
24690 SmallVector<SDValue, 8> SortedVecIn(VecIn);
24691 llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
24692 [](const SDValue &a, const SDValue &b) {
24693 return a.getValueType().getVectorNumElements() >
24694 b.getValueType().getVectorNumElements();
24695 });
24696
24697 // We now also need to rebuild the VectorMask, because it referenced element
24698 // order in VecIn, and we just sorted them.
24699 for (int &SourceVectorIndex : VectorMask) {
24700 if (SourceVectorIndex <= 0)
24701 continue;
24702 unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
24703 assert(Idx > 0 && Idx < SortedVecIn.size() &&
24704 VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
24705 SourceVectorIndex = Idx;
24706 }
24707
24708 VecIn = std::move(SortedVecIn);
24709
24710 // TODO: Should this fire if some of the input vectors has illegal type (like
24711 // it does now), or should we let legalization run its course first?
24712
24713 // Shuffle phase:
24714 // Take pairs of vectors, and shuffle them so that the result has elements
24715 // from these vectors in the correct places.
24716 // For example, given:
24717 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
24718 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
24719 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
24720 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
24721 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
24722 // We will generate:
24723 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
24724 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
24725 SmallVector<SDValue, 4> Shuffles;
24726 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
24727 unsigned LeftIdx = 2 * In + 1;
24728 SDValue VecLeft = VecIn[LeftIdx];
24729 SDValue VecRight =
24730 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
24731
24732 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
24733 VecRight, LeftIdx, DidSplitVec))
24734 Shuffles.push_back(Shuffle);
24735 else
24736 return SDValue();
24737 }
24738
24739 // If we need the zero vector as an "ingredient" in the blend tree, add it
24740 // to the list of shuffles.
24741 if (UsesZeroVector)
24742 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
24743 : DAG.getConstantFP(0.0, DL, VT));
24744
24745 // If we only have one shuffle, we're done.
24746 if (Shuffles.size() == 1)
24747 return Shuffles[0];
24748
24749 // Update the vector mask to point to the post-shuffle vectors.
24750 for (int &Vec : VectorMask)
24751 if (Vec == 0)
24752 Vec = Shuffles.size() - 1;
24753 else
24754 Vec = (Vec - 1) / 2;
24755
24756 // More than one shuffle. Generate a binary tree of blends, e.g. if from
24757 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
24758 // generate:
24759 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
24760 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
24761 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
24762 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
24763 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
24764 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
24765 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
24766
24767 // Make sure the initial size of the shuffle list is even.
24768 if (Shuffles.size() % 2)
24769 Shuffles.push_back(DAG.getUNDEF(VT));
24770
24771 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
24772 if (CurSize % 2) {
24773 Shuffles[CurSize] = DAG.getUNDEF(VT);
24774 CurSize++;
24775 }
24776 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
24777 int Left = 2 * In;
24778 int Right = 2 * In + 1;
24779 SmallVector<int, 8> Mask(NumElems, -1);
24780 SDValue L = Shuffles[Left];
24781 ArrayRef<int> LMask;
24782 bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE &&
24783 L.use_empty() && L.getOperand(1).isUndef() &&
24784 L.getOperand(0).getValueType() == L.getValueType();
24785 if (IsLeftShuffle) {
24786 LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask();
24787 L = L.getOperand(0);
24788 }
24789 SDValue R = Shuffles[Right];
24790 ArrayRef<int> RMask;
24791 bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE &&
24792 R.use_empty() && R.getOperand(1).isUndef() &&
24793 R.getOperand(0).getValueType() == R.getValueType();
24794 if (IsRightShuffle) {
24795 RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask();
24796 R = R.getOperand(0);
24797 }
24798 for (unsigned I = 0; I != NumElems; ++I) {
24799 if (VectorMask[I] == Left) {
24800 Mask[I] = I;
24801 if (IsLeftShuffle)
24802 Mask[I] = LMask[I];
24803 VectorMask[I] = In;
24804 } else if (VectorMask[I] == Right) {
24805 Mask[I] = I + NumElems;
24806 if (IsRightShuffle)
24807 Mask[I] = RMask[I] + NumElems;
24808 VectorMask[I] = In;
24809 }
24810 }
24811
24812 Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask);
24813 }
24814 }
24815 return Shuffles[0];
24816}
24817
24818// Try to turn a build vector of zero extends of extract vector elts into a
24819// a vector zero extend and possibly an extract subvector.
24820// TODO: Support sign extend?
24821// TODO: Allow undef elements?
24822SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
24823 if (LegalOperations)
24824 return SDValue();
24825
24826 EVT VT = N->getValueType(0);
24827
24828 bool FoundZeroExtend = false;
24829 SDValue Op0 = N->getOperand(0);
24830 auto checkElem = [&](SDValue Op) -> int64_t {
24831 unsigned Opc = Op.getOpcode();
24832 FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
24833 if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
24834 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
24835 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
24836 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
24837 return C->getZExtValue();
24838 return -1;
24839 };
24840
24841 // Make sure the first element matches
24842 // (zext (extract_vector_elt X, C))
24843 // Offset must be a constant multiple of the
24844 // known-minimum vector length of the result type.
24845 int64_t Offset = checkElem(Op0);
24846 if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
24847 return SDValue();
24848
24849 unsigned NumElems = N->getNumOperands();
24850 SDValue In = Op0.getOperand(0).getOperand(0);
24851 EVT InSVT = In.getValueType().getScalarType();
24852 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
24853
24854 // Don't create an illegal input type after type legalization.
24855 if (LegalTypes && !TLI.isTypeLegal(InVT))
24856 return SDValue();
24857
24858 // Ensure all the elements come from the same vector and are adjacent.
24859 for (unsigned i = 1; i != NumElems; ++i) {
24860 if ((Offset + i) != checkElem(N->getOperand(i)))
24861 return SDValue();
24862 }
24863
24864 SDLoc DL(N);
24865 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
24866 Op0.getOperand(0).getOperand(1));
24867 return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
24868 VT, In);
24869}
24870
24871// If this is a very simple BUILD_VECTOR with first element being a ZERO_EXTEND,
24872// and all other elements being constant zero's, granularize the BUILD_VECTOR's
24873// element width, absorbing the ZERO_EXTEND, turning it into a constant zero op.
24874// This patten can appear during legalization.
24875//
24876// NOTE: This can be generalized to allow more than a single
24877// non-constant-zero op, UNDEF's, and to be KnownBits-based,
24878SDValue DAGCombiner::convertBuildVecZextToBuildVecWithZeros(SDNode *N) {
24879 // Don't run this after legalization. Targets may have other preferences.
24880 if (Level >= AfterLegalizeDAG)
24881 return SDValue();
24882
24883 // FIXME: support big-endian.
24884 if (DAG.getDataLayout().isBigEndian())
24885 return SDValue();
24886
24887 EVT VT = N->getValueType(0);
24888 EVT OpVT = N->getOperand(0).getValueType();
24889 assert(!VT.isScalableVector() && "Encountered scalable BUILD_VECTOR?");
24890
24891 EVT OpIntVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
24892
24893 if (!TLI.isTypeLegal(OpIntVT) ||
24894 (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::BITCAST, OpIntVT)))
24895 return SDValue();
24896
24897 unsigned EltBitwidth = VT.getScalarSizeInBits();
24898 // NOTE: the actual width of operands may be wider than that!
24899
24900 // Analyze all operands of this BUILD_VECTOR. What is the largest number of
24901 // active bits they all have? We'll want to truncate them all to that width.
24902 unsigned ActiveBits = 0;
24903 APInt KnownZeroOps(VT.getVectorNumElements(), 0);
24904 for (auto I : enumerate(N->ops())) {
24905 SDValue Op = I.value();
24906 // FIXME: support UNDEF elements?
24907 if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
24908 unsigned OpActiveBits =
24909 Cst->getAPIntValue().trunc(EltBitwidth).getActiveBits();
24910 if (OpActiveBits == 0) {
24911 KnownZeroOps.setBit(I.index());
24912 continue;
24913 }
24914 // Profitability check: don't allow non-zero constant operands.
24915 return SDValue();
24916 }
24917 // Profitability check: there must only be a single non-zero operand,
24918 // and it must be the first operand of the BUILD_VECTOR.
24919 if (I.index() != 0)
24920 return SDValue();
24921 // The operand must be a zero-extension itself.
24922 // FIXME: this could be generalized to known leading zeros check.
24923 if (Op.getOpcode() != ISD::ZERO_EXTEND)
24924 return SDValue();
24925 unsigned CurrActiveBits =
24926 Op.getOperand(0).getValueSizeInBits().getFixedValue();
24927 assert(!ActiveBits && "Already encountered non-constant-zero operand?");
24928 ActiveBits = CurrActiveBits;
24929 // We want to at least halve the element size.
24930 if (2 * ActiveBits > EltBitwidth)
24931 return SDValue();
24932 }
24933
24934 // This BUILD_VECTOR must have at least one non-constant-zero operand.
24935 if (ActiveBits == 0)
24936 return SDValue();
24937
24938 // We have EltBitwidth bits, the *minimal* chunk size is ActiveBits,
24939 // into how many chunks can we split our element width?
24940 EVT NewScalarIntVT, NewIntVT;
24941 std::optional<unsigned> Factor;
24942 // We can split the element into at least two chunks, but not into more
24943 // than |_ EltBitwidth / ActiveBits _| chunks. Find a largest split factor
24944 // for which the element width is a multiple of it,
24945 // and the resulting types/operations on that chunk width are legal.
24946 assert(2 * ActiveBits <= EltBitwidth &&
24947 "We know that half or less bits of the element are active.");
24948 for (unsigned Scale = EltBitwidth / ActiveBits; Scale >= 2; --Scale) {
24949 if (EltBitwidth % Scale != 0)
24950 continue;
24951 unsigned ChunkBitwidth = EltBitwidth / Scale;
24952 assert(ChunkBitwidth >= ActiveBits && "As per starting point.");
24953 NewScalarIntVT = EVT::getIntegerVT(*DAG.getContext(), ChunkBitwidth);
24954 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewScalarIntVT,
24955 Scale * N->getNumOperands());
24956 if (!TLI.isTypeLegal(NewScalarIntVT) || !TLI.isTypeLegal(NewIntVT) ||
24957 (LegalOperations &&
24958 !(TLI.isOperationLegalOrCustom(ISD::TRUNCATE, NewScalarIntVT) &&
24960 continue;
24961 Factor = Scale;
24962 break;
24963 }
24964 if (!Factor)
24965 return SDValue();
24966
24967 SDLoc DL(N);
24968 SDValue ZeroOp = DAG.getConstant(0, DL, NewScalarIntVT);
24969
24970 // Recreate the BUILD_VECTOR, with elements now being Factor times smaller.
24972 NewOps.reserve(NewIntVT.getVectorNumElements());
24973 for (auto I : enumerate(N->ops())) {
24974 SDValue Op = I.value();
24975 assert(!Op.isUndef() && "FIXME: after allowing UNDEF's, handle them here.");
24976 unsigned SrcOpIdx = I.index();
24977 if (KnownZeroOps[SrcOpIdx]) {
24978 NewOps.append(*Factor, ZeroOp);
24979 continue;
24980 }
24981 Op = DAG.getBitcast(OpIntVT, Op);
24982 Op = DAG.getNode(ISD::TRUNCATE, DL, NewScalarIntVT, Op);
24983 NewOps.emplace_back(Op);
24984 NewOps.append(*Factor - 1, ZeroOp);
24985 }
24986 assert(NewOps.size() == NewIntVT.getVectorNumElements());
24987 SDValue NewBV = DAG.getBuildVector(NewIntVT, DL, NewOps);
24988 NewBV = DAG.getBitcast(VT, NewBV);
24989 return NewBV;
24990}
24991
24992SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
24993 EVT VT = N->getValueType(0);
24994
24995 // A vector built entirely of undefs is undef.
24997 return DAG.getUNDEF(VT);
24998
24999 // If this is a splat of a bitcast from another vector, change to a
25000 // concat_vector.
25001 // For example:
25002 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
25003 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
25004 //
25005 // If X is a build_vector itself, the concat can become a larger build_vector.
25006 // TODO: Maybe this is useful for non-splat too?
25007 if (!LegalOperations) {
25008 SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue();
25009 // Only change build_vector to a concat_vector if the splat value type is
25010 // same as the vector element type.
25011 if (Splat && Splat.getValueType() == VT.getVectorElementType()) {
25013 EVT SrcVT = Splat.getValueType();
25014 if (SrcVT.isVector()) {
25015 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
25016 EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
25017 SrcVT.getVectorElementType(), NumElts);
25018 if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
25019 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
25020 SDValue Concat =
25021 DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), NewVT, Ops);
25022 return DAG.getBitcast(VT, Concat);
25023 }
25024 }
25025 }
25026 }
25027
25028 // Check if we can express BUILD VECTOR via subvector extract.
25029 if (!LegalTypes && (N->getNumOperands() > 1)) {
25030 SDValue Op0 = N->getOperand(0);
25031 auto checkElem = [&](SDValue Op) -> uint64_t {
25032 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
25033 (Op0.getOperand(0) == Op.getOperand(0)))
25034 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
25035 return CNode->getZExtValue();
25036 return -1;
25037 };
25038
25039 int Offset = checkElem(Op0);
25040 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
25041 if (Offset + i != checkElem(N->getOperand(i))) {
25042 Offset = -1;
25043 break;
25044 }
25045 }
25046
25047 if ((Offset == 0) &&
25048 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
25049 return Op0.getOperand(0);
25050 if ((Offset != -1) &&
25051 ((Offset % N->getValueType(0).getVectorNumElements()) ==
25052 0)) // IDX must be multiple of output size.
25053 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
25054 Op0.getOperand(0), Op0.getOperand(1));
25055 }
25056
25057 if (SDValue V = convertBuildVecZextToZext(N))
25058 return V;
25059
25060 if (SDValue V = convertBuildVecZextToBuildVecWithZeros(N))
25061 return V;
25062
25063 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
25064 return V;
25065
25066 if (SDValue V = reduceBuildVecTruncToBitCast(N))
25067 return V;
25068
25069 if (SDValue V = reduceBuildVecToShuffle(N))
25070 return V;
25071
25072 // A splat of a single element is a SPLAT_VECTOR if supported on the target.
25073 // Do this late as some of the above may replace the splat.
25076 assert(!V.isUndef() && "Splat of undef should have been handled earlier");
25077 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
25078 }
25079
25080 return SDValue();
25081}
25082
25084 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25085 EVT OpVT = N->getOperand(0).getValueType();
25086
25087 // If the operands are legal vectors, leave them alone.
25088 if (TLI.isTypeLegal(OpVT) || OpVT.isScalableVector())
25089 return SDValue();
25090
25091 SDLoc DL(N);
25092 EVT VT = N->getValueType(0);
25094 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
25095
25096 // Keep track of what we encounter.
25097 EVT AnyFPVT;
25098
25099 for (const SDValue &Op : N->ops()) {
25100 if (ISD::BITCAST == Op.getOpcode() &&
25101 !Op.getOperand(0).getValueType().isVector())
25102 Ops.push_back(Op.getOperand(0));
25103 else if (Op.isUndef())
25104 Ops.push_back(DAG.getNode(ISD::UNDEF, DL, SVT));
25105 else
25106 return SDValue();
25107
25108 // Note whether we encounter an integer or floating point scalar.
25109 // If it's neither, bail out, it could be something weird like x86mmx.
25110 EVT LastOpVT = Ops.back().getValueType();
25111 if (LastOpVT.isFloatingPoint())
25112 AnyFPVT = LastOpVT;
25113 else if (!LastOpVT.isInteger())
25114 return SDValue();
25115 }
25116
25117 // If any of the operands is a floating point scalar bitcast to a vector,
25118 // use floating point types throughout, and bitcast everything.
25119 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
25120 if (AnyFPVT != EVT()) {
25121 SVT = AnyFPVT;
25122 for (SDValue &Op : Ops) {
25123 if (Op.getValueType() == SVT)
25124 continue;
25125 if (Op.isUndef())
25126 Op = DAG.getNode(ISD::UNDEF, DL, SVT);
25127 else
25128 Op = DAG.getBitcast(SVT, Op);
25129 }
25130 }
25131
25132 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
25133 VT.getSizeInBits() / SVT.getSizeInBits());
25134 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
25135}
25136
25137// Attempt to merge nested concat_vectors/undefs.
25138// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
25139// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
25141 SelectionDAG &DAG) {
25142 EVT VT = N->getValueType(0);
25143
25144 // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
25145 EVT SubVT;
25146 SDValue FirstConcat;
25147 for (const SDValue &Op : N->ops()) {
25148 if (Op.isUndef())
25149 continue;
25150 if (Op.getOpcode() != ISD::CONCAT_VECTORS)
25151 return SDValue();
25152 if (!FirstConcat) {
25153 SubVT = Op.getOperand(0).getValueType();
25154 if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
25155 return SDValue();
25156 FirstConcat = Op;
25157 continue;
25158 }
25159 if (SubVT != Op.getOperand(0).getValueType())
25160 return SDValue();
25161 }
25162 assert(FirstConcat && "Concat of all-undefs found");
25163
25164 SmallVector<SDValue> ConcatOps;
25165 for (const SDValue &Op : N->ops()) {
25166 if (Op.isUndef()) {
25167 ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
25168 continue;
25169 }
25170 ConcatOps.append(Op->op_begin(), Op->op_end());
25171 }
25172 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
25173}
25174
25175// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
25176// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
25177// most two distinct vectors the same size as the result, attempt to turn this
25178// into a legal shuffle.
25180 EVT VT = N->getValueType(0);
25181 EVT OpVT = N->getOperand(0).getValueType();
25182
25183 // We currently can't generate an appropriate shuffle for a scalable vector.
25184 if (VT.isScalableVector())
25185 return SDValue();
25186
25187 int NumElts = VT.getVectorNumElements();
25188 int NumOpElts = OpVT.getVectorNumElements();
25189
25190 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
25192
25193 for (SDValue Op : N->ops()) {
25195
25196 // UNDEF nodes convert to UNDEF shuffle mask values.
25197 if (Op.isUndef()) {
25198 Mask.append((unsigned)NumOpElts, -1);
25199 continue;
25200 }
25201
25202 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
25203 return SDValue();
25204
25205 // What vector are we extracting the subvector from and at what index?
25206 SDValue ExtVec = Op.getOperand(0);
25207 int ExtIdx = Op.getConstantOperandVal(1);
25208
25209 // We want the EVT of the original extraction to correctly scale the
25210 // extraction index.
25211 EVT ExtVT = ExtVec.getValueType();
25212 ExtVec = peekThroughBitcasts(ExtVec);
25213
25214 // UNDEF nodes convert to UNDEF shuffle mask values.
25215 if (ExtVec.isUndef()) {
25216 Mask.append((unsigned)NumOpElts, -1);
25217 continue;
25218 }
25219
25220 // Ensure that we are extracting a subvector from a vector the same
25221 // size as the result.
25222 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
25223 return SDValue();
25224
25225 // Scale the subvector index to account for any bitcast.
25226 int NumExtElts = ExtVT.getVectorNumElements();
25227 if (0 == (NumExtElts % NumElts))
25228 ExtIdx /= (NumExtElts / NumElts);
25229 else if (0 == (NumElts % NumExtElts))
25230 ExtIdx *= (NumElts / NumExtElts);
25231 else
25232 return SDValue();
25233
25234 // At most we can reference 2 inputs in the final shuffle.
25235 if (SV0.isUndef() || SV0 == ExtVec) {
25236 SV0 = ExtVec;
25237 for (int i = 0; i != NumOpElts; ++i)
25238 Mask.push_back(i + ExtIdx);
25239 } else if (SV1.isUndef() || SV1 == ExtVec) {
25240 SV1 = ExtVec;
25241 for (int i = 0; i != NumOpElts; ++i)
25242 Mask.push_back(i + ExtIdx + NumElts);
25243 } else {
25244 return SDValue();
25245 }
25246 }
25247
25248 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25249 return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
25250 DAG.getBitcast(VT, SV1), Mask, DAG);
25251}
25252
25254 unsigned CastOpcode = N->getOperand(0).getOpcode();
25255 switch (CastOpcode) {
25256 case ISD::SINT_TO_FP:
25257 case ISD::UINT_TO_FP:
25258 case ISD::FP_TO_SINT:
25259 case ISD::FP_TO_UINT:
25260 // TODO: Allow more opcodes?
25261 // case ISD::BITCAST:
25262 // case ISD::TRUNCATE:
25263 // case ISD::ZERO_EXTEND:
25264 // case ISD::SIGN_EXTEND:
25265 // case ISD::FP_EXTEND:
25266 break;
25267 default:
25268 return SDValue();
25269 }
25270
25271 EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
25272 if (!SrcVT.isVector())
25273 return SDValue();
25274
25275 // All operands of the concat must be the same kind of cast from the same
25276 // source type.
25278 for (SDValue Op : N->ops()) {
25279 if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
25280 Op.getOperand(0).getValueType() != SrcVT)
25281 return SDValue();
25282 SrcOps.push_back(Op.getOperand(0));
25283 }
25284
25285 // The wider cast must be supported by the target. This is unusual because
25286 // the operation support type parameter depends on the opcode. In addition,
25287 // check the other type in the cast to make sure this is really legal.
25288 EVT VT = N->getValueType(0);
25289 EVT SrcEltVT = SrcVT.getVectorElementType();
25290 ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
25291 EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
25292 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25293 switch (CastOpcode) {
25294 case ISD::SINT_TO_FP:
25295 case ISD::UINT_TO_FP:
25296 if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
25297 !TLI.isTypeLegal(VT))
25298 return SDValue();
25299 break;
25300 case ISD::FP_TO_SINT:
25301 case ISD::FP_TO_UINT:
25302 if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
25303 !TLI.isTypeLegal(ConcatSrcVT))
25304 return SDValue();
25305 break;
25306 default:
25307 llvm_unreachable("Unexpected cast opcode");
25308 }
25309
25310 // concat (cast X), (cast Y)... -> cast (concat X, Y...)
25311 SDLoc DL(N);
25312 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
25313 return DAG.getNode(CastOpcode, DL, VT, NewConcat);
25314}
25315
25316// See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of
25317// the operands is a SHUFFLE_VECTOR, and all other operands are also operands
25318// to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR.
25320 SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
25321 bool LegalOperations) {
25322 EVT VT = N->getValueType(0);
25323 EVT OpVT = N->getOperand(0).getValueType();
25324 if (VT.isScalableVector())
25325 return SDValue();
25326
25327 // For now, only allow simple 2-operand concatenations.
25328 if (N->getNumOperands() != 2)
25329 return SDValue();
25330
25331 // Don't create illegal types/shuffles when not allowed to.
25332 if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
25333 (LegalOperations &&
25335 return SDValue();
25336
25337 // Analyze all of the operands of the CONCAT_VECTORS. Out of all of them,
25338 // we want to find one that is: (1) a SHUFFLE_VECTOR (2) only used by us,
25339 // and (3) all operands of CONCAT_VECTORS must be either that SHUFFLE_VECTOR,
25340 // or one of the operands of that SHUFFLE_VECTOR (but not UNDEF!).
25341 // (4) and for now, the SHUFFLE_VECTOR must be unary.
25342 ShuffleVectorSDNode *SVN = nullptr;
25343 for (SDValue Op : N->ops()) {
25344 if (auto *CurSVN = dyn_cast<ShuffleVectorSDNode>(Op);
25345 CurSVN && CurSVN->getOperand(1).isUndef() && N->isOnlyUserOf(CurSVN) &&
25346 all_of(N->ops(), [CurSVN](SDValue Op) {
25347 // FIXME: can we allow UNDEF operands?
25348 return !Op.isUndef() &&
25349 (Op.getNode() == CurSVN || is_contained(CurSVN->ops(), Op));
25350 })) {
25351 SVN = CurSVN;
25352 break;
25353 }
25354 }
25355 if (!SVN)
25356 return SDValue();
25357
25358 // We are going to pad the shuffle operands, so any indice, that was picking
25359 // from the second operand, must be adjusted.
25360 SmallVector<int, 16> AdjustedMask(SVN->getMask());
25361 assert(SVN->getOperand(1).isUndef() && "Expected unary shuffle!");
25362
25363 // Identity masks for the operands of the (padded) shuffle.
25364 SmallVector<int, 32> IdentityMask(2 * OpVT.getVectorNumElements());
25365 MutableArrayRef<int> FirstShufOpIdentityMask =
25366 MutableArrayRef<int>(IdentityMask)
25368 MutableArrayRef<int> SecondShufOpIdentityMask =
25370 std::iota(FirstShufOpIdentityMask.begin(), FirstShufOpIdentityMask.end(), 0);
25371 std::iota(SecondShufOpIdentityMask.begin(), SecondShufOpIdentityMask.end(),
25373
25374 // New combined shuffle mask.
25376 Mask.reserve(VT.getVectorNumElements());
25377 for (SDValue Op : N->ops()) {
25378 assert(!Op.isUndef() && "Not expecting to concatenate UNDEF.");
25379 if (Op.getNode() == SVN) {
25380 append_range(Mask, AdjustedMask);
25381 continue;
25382 }
25383 if (Op == SVN->getOperand(0)) {
25384 append_range(Mask, FirstShufOpIdentityMask);
25385 continue;
25386 }
25387 if (Op == SVN->getOperand(1)) {
25388 append_range(Mask, SecondShufOpIdentityMask);
25389 continue;
25390 }
25391 llvm_unreachable("Unexpected operand!");
25392 }
25393
25394 // Don't create illegal shuffle masks.
25395 if (!TLI.isShuffleMaskLegal(Mask, VT))
25396 return SDValue();
25397
25398 // Pad the shuffle operands with UNDEF.
25399 SDLoc dl(N);
25400 std::array<SDValue, 2> ShufOps;
25401 for (auto I : zip(SVN->ops(), ShufOps)) {
25402 SDValue ShufOp = std::get<0>(I);
25403 SDValue &NewShufOp = std::get<1>(I);
25404 if (ShufOp.isUndef())
25405 NewShufOp = DAG.getUNDEF(VT);
25406 else {
25407 SmallVector<SDValue, 2> ShufOpParts(N->getNumOperands(),
25408 DAG.getUNDEF(OpVT));
25409 ShufOpParts[0] = ShufOp;
25410 NewShufOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, ShufOpParts);
25411 }
25412 }
25413 // Finally, create the new wide shuffle.
25414 return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask);
25415}
25416
25418 const TargetLowering &TLI,
25419 bool LegalTypes,
25420 bool LegalOperations) {
25421 EVT VT = N->getValueType(0);
25422
25423 // Post-legalization we can only create wider SPLAT_VECTOR operations if both
25424 // the type and operation is legal. The Hexagon target has custom
25425 // legalization for SPLAT_VECTOR that splits the operation into two parts and
25426 // concatenates them. Therefore, custom lowering must also be rejected in
25427 // order to avoid an infinite loop.
25428 if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
25429 (LegalOperations && !TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT)))
25430 return SDValue();
25431
25432 SDValue Op0 = N->getOperand(0);
25433 if (!llvm::all_equal(N->op_values()) || Op0.getOpcode() != ISD::SPLAT_VECTOR)
25434 return SDValue();
25435
25436 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, Op0.getOperand(0));
25437}
25438
25439SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
25440 // If we only have one input vector, we don't need to do any concatenation.
25441 if (N->getNumOperands() == 1)
25442 return N->getOperand(0);
25443
25444 // Check if all of the operands are undefs.
25445 EVT VT = N->getValueType(0);
25447 return DAG.getUNDEF(VT);
25448
25449 // Optimize concat_vectors where all but the first of the vectors are undef.
25450 if (all_of(drop_begin(N->ops()),
25451 [](const SDValue &Op) { return Op.isUndef(); })) {
25452 SDValue In = N->getOperand(0);
25453 assert(In.getValueType().isVector() && "Must concat vectors");
25454
25455 // If the input is a concat_vectors, just make a larger concat by padding
25456 // with smaller undefs.
25457 //
25458 // Legalizing in AArch64TargetLowering::LowerCONCAT_VECTORS() and combining
25459 // here could cause an infinite loop. That legalizing happens when LegalDAG
25460 // is true and input of AArch64TargetLowering::LowerCONCAT_VECTORS() is
25461 // scalable.
25462 if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse() &&
25463 !(LegalDAG && In.getValueType().isScalableVector())) {
25464 unsigned NumOps = N->getNumOperands() * In.getNumOperands();
25466 Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
25467 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
25468 }
25469
25471
25472 // concat_vectors(scalar_to_vector(scalar), undef) ->
25473 // scalar_to_vector(scalar)
25474 if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
25475 Scalar.hasOneUse()) {
25476 EVT SVT = Scalar.getValueType().getVectorElementType();
25477 if (SVT == Scalar.getOperand(0).getValueType())
25478 Scalar = Scalar.getOperand(0);
25479 }
25480
25481 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
25482 if (!Scalar.getValueType().isVector() && In.hasOneUse()) {
25483 // If the bitcast type isn't legal, it might be a trunc of a legal type;
25484 // look through the trunc so we can still do the transform:
25485 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
25486 if (Scalar->getOpcode() == ISD::TRUNCATE &&
25487 !TLI.isTypeLegal(Scalar.getValueType()) &&
25488 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
25489 Scalar = Scalar->getOperand(0);
25490
25491 EVT SclTy = Scalar.getValueType();
25492
25493 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
25494 return SDValue();
25495
25496 // Bail out if the vector size is not a multiple of the scalar size.
25497 if (VT.getSizeInBits() % SclTy.getSizeInBits())
25498 return SDValue();
25499
25500 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
25501 if (VNTNumElms < 2)
25502 return SDValue();
25503
25504 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
25505 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
25506 return SDValue();
25507
25508 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
25509 return DAG.getBitcast(VT, Res);
25510 }
25511 }
25512
25513 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
25514 // We have already tested above for an UNDEF only concatenation.
25515 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
25516 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
25517 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
25518 return Op.isUndef() || ISD::BUILD_VECTOR == Op.getOpcode();
25519 };
25520 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
25522 EVT SVT = VT.getScalarType();
25523
25524 EVT MinVT = SVT;
25525 if (!SVT.isFloatingPoint()) {
25526 // If BUILD_VECTOR are from built from integer, they may have different
25527 // operand types. Get the smallest type and truncate all operands to it.
25528 bool FoundMinVT = false;
25529 for (const SDValue &Op : N->ops())
25530 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
25531 EVT OpSVT = Op.getOperand(0).getValueType();
25532 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
25533 FoundMinVT = true;
25534 }
25535 assert(FoundMinVT && "Concat vector type mismatch");
25536 }
25537
25538 for (const SDValue &Op : N->ops()) {
25539 EVT OpVT = Op.getValueType();
25540 unsigned NumElts = OpVT.getVectorNumElements();
25541
25542 if (Op.isUndef())
25543 Opnds.append(NumElts, DAG.getUNDEF(MinVT));
25544
25545 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
25546 if (SVT.isFloatingPoint()) {
25547 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
25548 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
25549 } else {
25550 for (unsigned i = 0; i != NumElts; ++i)
25551 Opnds.push_back(
25552 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
25553 }
25554 }
25555 }
25556
25557 assert(VT.getVectorNumElements() == Opnds.size() &&
25558 "Concat vector type mismatch");
25559 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
25560 }
25561
25562 if (SDValue V =
25563 combineConcatVectorOfSplats(N, DAG, TLI, LegalTypes, LegalOperations))
25564 return V;
25565
25566 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
25567 // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
25569 return V;
25570
25571 if (Level <= AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
25572 // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
25574 return V;
25575
25576 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
25578 return V;
25579 }
25580
25581 if (SDValue V = combineConcatVectorOfCasts(N, DAG))
25582 return V;
25583
25585 N, DAG, TLI, LegalTypes, LegalOperations))
25586 return V;
25587
25588 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
25589 // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
25590 // operands and look for a CONCAT operations that place the incoming vectors
25591 // at the exact same location.
25592 //
25593 // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
25594 SDValue SingleSource = SDValue();
25595 unsigned PartNumElem =
25596 N->getOperand(0).getValueType().getVectorMinNumElements();
25597
25598 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
25599 SDValue Op = N->getOperand(i);
25600
25601 if (Op.isUndef())
25602 continue;
25603
25604 // Check if this is the identity extract:
25605 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
25606 return SDValue();
25607
25608 // Find the single incoming vector for the extract_subvector.
25609 if (SingleSource.getNode()) {
25610 if (Op.getOperand(0) != SingleSource)
25611 return SDValue();
25612 } else {
25613 SingleSource = Op.getOperand(0);
25614
25615 // Check the source type is the same as the type of the result.
25616 // If not, this concat may extend the vector, so we can not
25617 // optimize it away.
25618 if (SingleSource.getValueType() != N->getValueType(0))
25619 return SDValue();
25620 }
25621
25622 // Check that we are reading from the identity index.
25623 unsigned IdentityIndex = i * PartNumElem;
25624 if (Op.getConstantOperandAPInt(1) != IdentityIndex)
25625 return SDValue();
25626 }
25627
25628 if (SingleSource.getNode())
25629 return SingleSource;
25630
25631 return SDValue();
25632}
25633
25634SDValue DAGCombiner::visitVECTOR_INTERLEAVE(SDNode *N) {
25635 // Check to see if all operands are identical.
25636 if (!llvm::all_equal(N->op_values()))
25637 return SDValue();
25638
25639 // Check to see if the identical operand is a splat.
25640 if (!DAG.isSplatValue(N->getOperand(0)))
25641 return SDValue();
25642
25643 // interleave splat(X), splat(X).... --> splat(X), splat(X)....
25645 Ops.append(N->op_values().begin(), N->op_values().end());
25646 return CombineTo(N, &Ops);
25647}
25648
25649// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
25650// if the subvector can be sourced for free.
25651static SDValue getSubVectorSrc(SDValue V, unsigned Index, EVT SubVT) {
25652 if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
25653 V.getOperand(1).getValueType() == SubVT &&
25654 V.getConstantOperandAPInt(2) == Index) {
25655 return V.getOperand(1);
25656 }
25657 if (V.getOpcode() == ISD::CONCAT_VECTORS &&
25658 V.getOperand(0).getValueType() == SubVT &&
25659 (Index % SubVT.getVectorMinNumElements()) == 0) {
25660 uint64_t SubIdx = Index / SubVT.getVectorMinNumElements();
25661 return V.getOperand(SubIdx);
25662 }
25663 return SDValue();
25664}
25665
25667 unsigned Index, const SDLoc &DL,
25668 SelectionDAG &DAG,
25669 bool LegalOperations) {
25670 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25671 unsigned BinOpcode = BinOp.getOpcode();
25672 if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
25673 return SDValue();
25674
25675 EVT VecVT = BinOp.getValueType();
25676 SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
25677 if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
25678 return SDValue();
25679 if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
25680 return SDValue();
25681
25682 SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
25683 SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
25684
25685 // TODO: We could handle the case where only 1 operand is being inserted by
25686 // creating an extract of the other operand, but that requires checking
25687 // number of uses and/or costs.
25688 if (!Sub0 || !Sub1)
25689 return SDValue();
25690
25691 // We are inserting both operands of the wide binop only to extract back
25692 // to the narrow vector size. Eliminate all of the insert/extract:
25693 // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
25694 return DAG.getNode(BinOpcode, DL, SubVT, Sub0, Sub1, BinOp->getFlags());
25695}
25696
25697/// If we are extracting a subvector produced by a wide binary operator try
25698/// to use a narrow binary operator and/or avoid concatenation and extraction.
25699static SDValue narrowExtractedVectorBinOp(EVT VT, SDValue Src, unsigned Index,
25700 const SDLoc &DL, SelectionDAG &DAG,
25701 bool LegalOperations) {
25702 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
25703 // some of these bailouts with other transforms.
25704
25705 if (SDValue V = narrowInsertExtractVectorBinOp(VT, Src, Index, DL, DAG,
25706 LegalOperations))
25707 return V;
25708
25709 // We are looking for an optionally bitcasted wide vector binary operator
25710 // feeding an extract subvector.
25711 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25712 SDValue BinOp = peekThroughBitcasts(Src);
25713 unsigned BOpcode = BinOp.getOpcode();
25714 if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
25715 return SDValue();
25716
25717 // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
25718 // reduced to the unary fneg when it is visited, and we probably want to deal
25719 // with fneg in a target-specific way.
25720 if (BOpcode == ISD::FSUB) {
25721 auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
25722 if (C && C->getValueAPF().isNegZero())
25723 return SDValue();
25724 }
25725
25726 // The binop must be a vector type, so we can extract some fraction of it.
25727 EVT WideBVT = BinOp.getValueType();
25728 // The optimisations below currently assume we are dealing with fixed length
25729 // vectors. It is possible to add support for scalable vectors, but at the
25730 // moment we've done no analysis to prove whether they are profitable or not.
25731 if (!WideBVT.isFixedLengthVector())
25732 return SDValue();
25733
25734 assert((Index % VT.getVectorNumElements()) == 0 &&
25735 "Extract index is not a multiple of the vector length.");
25736
25737 // Bail out if this is not a proper multiple width extraction.
25738 unsigned WideWidth = WideBVT.getSizeInBits();
25739 unsigned NarrowWidth = VT.getSizeInBits();
25740 if (WideWidth % NarrowWidth != 0)
25741 return SDValue();
25742
25743 // Bail out if we are extracting a fraction of a single operation. This can
25744 // occur because we potentially looked through a bitcast of the binop.
25745 unsigned NarrowingRatio = WideWidth / NarrowWidth;
25746 unsigned WideNumElts = WideBVT.getVectorNumElements();
25747 if (WideNumElts % NarrowingRatio != 0)
25748 return SDValue();
25749
25750 // Bail out if the target does not support a narrower version of the binop.
25751 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
25752 WideNumElts / NarrowingRatio);
25753 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT,
25754 LegalOperations))
25755 return SDValue();
25756
25757 // If extraction is cheap, we don't need to look at the binop operands
25758 // for concat ops. The narrow binop alone makes this transform profitable.
25759 // We can't just reuse the original extract index operand because we may have
25760 // bitcasted.
25761 unsigned ConcatOpNum = Index / VT.getVectorNumElements();
25762 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
25763 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
25764 BinOp.hasOneUse() && Src->hasOneUse()) {
25765 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
25766 SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
25767 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
25768 BinOp.getOperand(0), NewExtIndex);
25769 SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
25770 BinOp.getOperand(1), NewExtIndex);
25771 SDValue NarrowBinOp =
25772 DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
25773 return DAG.getBitcast(VT, NarrowBinOp);
25774 }
25775
25776 // Only handle the case where we are doubling and then halving. A larger ratio
25777 // may require more than two narrow binops to replace the wide binop.
25778 if (NarrowingRatio != 2)
25779 return SDValue();
25780
25781 // TODO: The motivating case for this transform is an x86 AVX1 target. That
25782 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
25783 // flavors, but no other 256-bit integer support. This could be extended to
25784 // handle any binop, but that may require fixing/adding other folds to avoid
25785 // codegen regressions.
25786 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
25787 return SDValue();
25788
25789 // We need at least one concatenation operation of a binop operand to make
25790 // this transform worthwhile. The concat must double the input vector sizes.
25791 auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
25792 if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
25793 return V.getOperand(ConcatOpNum);
25794 return SDValue();
25795 };
25796 SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
25797 SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
25798
25799 if (SubVecL || SubVecR) {
25800 // If a binop operand was not the result of a concat, we must extract a
25801 // half-sized operand for our new narrow binop:
25802 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
25803 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
25804 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
25805 SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
25806 SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
25807 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
25808 BinOp.getOperand(0), IndexC);
25809
25810 SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
25811 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
25812 BinOp.getOperand(1), IndexC);
25813
25814 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
25815 return DAG.getBitcast(VT, NarrowBinOp);
25816 }
25817
25818 return SDValue();
25819}
25820
25821/// If we are extracting a subvector from a wide vector load, convert to a
25822/// narrow load to eliminate the extraction:
25823/// (extract_subvector (load wide vector)) --> (load narrow vector)
25824static SDValue narrowExtractedVectorLoad(EVT VT, SDValue Src, unsigned Index,
25825 const SDLoc &DL, SelectionDAG &DAG) {
25826 // TODO: Add support for big-endian. The offset calculation must be adjusted.
25827 if (DAG.getDataLayout().isBigEndian())
25828 return SDValue();
25829
25830 auto *Ld = dyn_cast<LoadSDNode>(Src);
25831 if (!Ld || !ISD::isNormalLoad(Ld) || !Ld->isSimple())
25832 return SDValue();
25833
25834 // We can only create byte sized loads.
25835 if (!VT.isByteSized())
25836 return SDValue();
25837
25838 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25839 if (!TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, VT))
25840 return SDValue();
25841
25842 unsigned NumElts = VT.getVectorMinNumElements();
25843 // A fixed length vector being extracted from a scalable vector
25844 // may not be any *smaller* than the scalable one.
25845 if (Index == 0 && NumElts >= Ld->getValueType(0).getVectorMinNumElements())
25846 return SDValue();
25847
25848 // The definition of EXTRACT_SUBVECTOR states that the index must be a
25849 // multiple of the minimum number of elements in the result type.
25850 assert(Index % NumElts == 0 && "The extract subvector index is not a "
25851 "multiple of the result's element count");
25852
25853 // It's fine to use TypeSize here as we know the offset will not be negative.
25854 TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
25855 std::optional<unsigned> ByteOffset;
25856 if (Offset.isFixed())
25857 ByteOffset = Offset.getFixedValue();
25858
25859 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT, ByteOffset))
25860 return SDValue();
25861
25862 // The narrow load will be offset from the base address of the old load if
25863 // we are extracting from something besides index 0 (little-endian).
25864 // TODO: Use "BaseIndexOffset" to make this more effective.
25865 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
25866
25868 MachineMemOperand *MMO;
25869 if (Offset.isScalable()) {
25870 MachinePointerInfo MPI =
25872 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, VT.getStoreSize());
25873 } else
25874 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedValue(),
25875 VT.getStoreSize());
25876
25877 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
25878 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
25879 return NewLd;
25880}
25881
25882/// Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
25883/// try to produce VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
25884/// EXTRACT_SUBVECTOR(Op?, ?),
25885/// Mask'))
25886/// iff it is legal and profitable to do so. Notably, the trimmed mask
25887/// (containing only the elements that are extracted)
25888/// must reference at most two subvectors.
25890 unsigned Index,
25891 const SDLoc &DL,
25892 SelectionDAG &DAG,
25893 bool LegalOperations) {
25894 // Only deal with non-scalable vectors.
25895 EVT WideVT = Src.getValueType();
25896 if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
25897 return SDValue();
25898
25899 // The operand must be a shufflevector.
25900 auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(Src);
25901 if (!WideShuffleVector)
25902 return SDValue();
25903
25904 // The old shuffleneeds to go away.
25905 if (!WideShuffleVector->hasOneUse())
25906 return SDValue();
25907
25908 // And the narrow shufflevector that we'll form must be legal.
25909 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25910 if (LegalOperations &&
25912 return SDValue();
25913
25914 int NumEltsExtracted = NarrowVT.getVectorNumElements();
25915 assert((Index % NumEltsExtracted) == 0 &&
25916 "Extract index is not a multiple of the output vector length.");
25917
25918 int WideNumElts = WideVT.getVectorNumElements();
25919
25920 SmallVector<int, 16> NewMask;
25921 NewMask.reserve(NumEltsExtracted);
25922 SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
25923 DemandedSubvectors;
25924
25925 // Try to decode the wide mask into narrow mask from at most two subvectors.
25926 for (int M : WideShuffleVector->getMask().slice(Index, NumEltsExtracted)) {
25927 assert((M >= -1) && (M < (2 * WideNumElts)) &&
25928 "Out-of-bounds shuffle mask?");
25929
25930 if (M < 0) {
25931 // Does not depend on operands, does not require adjustment.
25932 NewMask.emplace_back(M);
25933 continue;
25934 }
25935
25936 // From which operand of the shuffle does this shuffle mask element pick?
25937 int WideShufOpIdx = M / WideNumElts;
25938 // Which element of that operand is picked?
25939 int OpEltIdx = M % WideNumElts;
25940
25941 assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
25942 "Shuffle mask vector decomposition failure.");
25943
25944 // And which NumEltsExtracted-sized subvector of that operand is that?
25945 int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
25946 // And which element within that subvector of that operand is that?
25947 int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
25948
25949 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
25950 "Shuffle mask subvector decomposition failure.");
25951
25952 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
25953 WideShufOpIdx * WideNumElts) == M &&
25954 "Shuffle mask full decomposition failure.");
25955
25956 SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
25957
25958 if (Op.isUndef()) {
25959 // Picking from an undef operand. Let's adjust mask instead.
25960 NewMask.emplace_back(-1);
25961 continue;
25962 }
25963
25964 const std::pair<SDValue, int> DemandedSubvector =
25965 std::make_pair(Op, OpSubvecIdx);
25966
25967 if (DemandedSubvectors.insert(DemandedSubvector)) {
25968 if (DemandedSubvectors.size() > 2)
25969 return SDValue(); // We can't handle more than two subvectors.
25970 // How many elements into the WideVT does this subvector start?
25971 int Index = NumEltsExtracted * OpSubvecIdx;
25972 // Bail out if the extraction isn't going to be cheap.
25973 if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
25974 return SDValue();
25975 }
25976
25977 // Ok, but from which operand of the new shuffle will this element pick?
25978 int NewOpIdx =
25979 getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
25980 assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
25981
25982 int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
25983 NewMask.emplace_back(AdjM);
25984 }
25985 assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
25986 assert(DemandedSubvectors.size() <= 2 &&
25987 "Should have ended up demanding at most two subvectors.");
25988
25989 // Did we discover that the shuffle does not actually depend on operands?
25990 if (DemandedSubvectors.empty())
25991 return DAG.getUNDEF(NarrowVT);
25992
25993 // Profitability check: only deal with extractions from the first subvector
25994 // unless the mask becomes an identity mask.
25995 if (!ShuffleVectorInst::isIdentityMask(NewMask, NewMask.size()) ||
25996 any_of(NewMask, [](int M) { return M < 0; }))
25997 for (auto &DemandedSubvector : DemandedSubvectors)
25998 if (DemandedSubvector.second != 0)
25999 return SDValue();
26000
26001 // We still perform the exact same EXTRACT_SUBVECTOR, just on different
26002 // operand[s]/index[es], so there is no point in checking for it's legality.
26003
26004 // Do not turn a legal shuffle into an illegal one.
26005 if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
26006 !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
26007 return SDValue();
26008
26010 for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
26011 &DemandedSubvector : DemandedSubvectors) {
26012 // How many elements into the WideVT does this subvector start?
26013 int Index = NumEltsExtracted * DemandedSubvector.second;
26014 SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
26015 NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
26016 DemandedSubvector.first, IndexC));
26017 }
26018 assert((NewOps.size() == 1 || NewOps.size() == 2) &&
26019 "Should end up with either one or two ops");
26020
26021 // If we ended up with only one operand, pad with an undef.
26022 if (NewOps.size() == 1)
26023 NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
26024
26025 return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
26026}
26027
26028SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
26029 EVT NVT = N->getValueType(0);
26030 SDValue V = N->getOperand(0);
26031 uint64_t ExtIdx = N->getConstantOperandVal(1);
26032 SDLoc DL(N);
26033
26034 // Extract from UNDEF is UNDEF.
26035 if (V.isUndef())
26036 return DAG.getUNDEF(NVT);
26037
26038 if (SDValue NarrowLoad = narrowExtractedVectorLoad(NVT, V, ExtIdx, DL, DAG))
26039 return NarrowLoad;
26040
26041 // Combine an extract of an extract into a single extract_subvector.
26042 // ext (ext X, C), 0 --> ext X, C
26043 if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
26044 // The index has to be a multiple of the new result type's known minimum
26045 // vector length.
26046 if (V.getConstantOperandVal(1) % NVT.getVectorMinNumElements() == 0 &&
26047 TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
26048 V.getConstantOperandVal(1)) &&
26050 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, V.getOperand(0),
26051 V.getOperand(1));
26052 }
26053 }
26054
26055 // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
26056 if (V.getOpcode() == ISD::SPLAT_VECTOR)
26057 if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
26058 if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
26059 return DAG.getSplatVector(NVT, DL, V.getOperand(0));
26060
26061 // extract_subvector(insert_subvector(x,y,c1),c2)
26062 // --> extract_subvector(y,c2-c1)
26063 // iff we're just extracting from the inserted subvector.
26064 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
26065 SDValue InsSub = V.getOperand(1);
26066 EVT InsSubVT = InsSub.getValueType();
26067 unsigned NumInsElts = InsSubVT.getVectorMinNumElements();
26068 unsigned InsIdx = V.getConstantOperandVal(2);
26069 unsigned NumSubElts = NVT.getVectorMinNumElements();
26070 if (InsIdx <= ExtIdx && (ExtIdx + NumSubElts) <= (InsIdx + NumInsElts) &&
26071 TLI.isExtractSubvectorCheap(NVT, InsSubVT, ExtIdx - InsIdx) &&
26072 InsSubVT.isFixedLengthVector() && NVT.isFixedLengthVector() &&
26073 V.getValueType().isFixedLengthVector())
26074 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, InsSub,
26075 DAG.getVectorIdxConstant(ExtIdx - InsIdx, DL));
26076 }
26077
26078 // Try to move vector bitcast after extract_subv by scaling extraction index:
26079 // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
26080 if (V.getOpcode() == ISD::BITCAST &&
26081 V.getOperand(0).getValueType().isVector() &&
26082 (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
26083 SDValue SrcOp = V.getOperand(0);
26084 EVT SrcVT = SrcOp.getValueType();
26085 unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
26086 unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
26087 if ((SrcNumElts % DestNumElts) == 0) {
26088 unsigned SrcDestRatio = SrcNumElts / DestNumElts;
26089 ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
26090 EVT NewExtVT =
26091 EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), NewExtEC);
26093 SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
26094 SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
26095 V.getOperand(0), NewIndex);
26096 return DAG.getBitcast(NVT, NewExtract);
26097 }
26098 }
26099 if ((DestNumElts % SrcNumElts) == 0) {
26100 unsigned DestSrcRatio = DestNumElts / SrcNumElts;
26101 if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
26102 ElementCount NewExtEC =
26103 NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
26104 EVT ScalarVT = SrcVT.getScalarType();
26105 if ((ExtIdx % DestSrcRatio) == 0) {
26106 unsigned IndexValScaled = ExtIdx / DestSrcRatio;
26107 EVT NewExtVT =
26108 EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
26110 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
26111 SDValue NewExtract =
26112 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
26113 V.getOperand(0), NewIndex);
26114 return DAG.getBitcast(NVT, NewExtract);
26115 }
26116 if (NewExtEC.isScalar() &&
26118 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
26119 SDValue NewExtract =
26120 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
26121 V.getOperand(0), NewIndex);
26122 return DAG.getBitcast(NVT, NewExtract);
26123 }
26124 }
26125 }
26126 }
26127 }
26128
26129 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
26130 unsigned ExtNumElts = NVT.getVectorMinNumElements();
26131 EVT ConcatSrcVT = V.getOperand(0).getValueType();
26132 assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
26133 "Concat and extract subvector do not change element type");
26134
26135 unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
26136 unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
26137
26138 // If the concatenated source types match this extract, it's a direct
26139 // simplification:
26140 // extract_subvec (concat V1, V2, ...), i --> Vi
26141 if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
26142 return V.getOperand(ConcatOpIdx);
26143
26144 // If the concatenated source vectors are a multiple length of this extract,
26145 // then extract a fraction of one of those source vectors directly from a
26146 // concat operand. Example:
26147 // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
26148 // v2i8 extract_subvec v8i8 Y, 6
26149 if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
26150 ConcatSrcNumElts % ExtNumElts == 0) {
26151 unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
26152 assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
26153 "Trying to extract from >1 concat operand?");
26154 assert(NewExtIdx % ExtNumElts == 0 &&
26155 "Extract index is not a multiple of the input vector length.");
26156 SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
26157 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
26158 V.getOperand(ConcatOpIdx), NewIndexC);
26159 }
26160 }
26161
26163 NVT, V, ExtIdx, DL, DAG, LegalOperations))
26164 return Shuffle;
26165
26166 if (SDValue NarrowBOp =
26167 narrowExtractedVectorBinOp(NVT, V, ExtIdx, DL, DAG, LegalOperations))
26168 return NarrowBOp;
26169
26171
26172 // If the input is a build vector. Try to make a smaller build vector.
26173 if (V.getOpcode() == ISD::BUILD_VECTOR) {
26174 EVT InVT = V.getValueType();
26175 unsigned ExtractSize = NVT.getSizeInBits();
26176 unsigned EltSize = InVT.getScalarSizeInBits();
26177 // Only do this if we won't split any elements.
26178 if (ExtractSize % EltSize == 0) {
26179 unsigned NumElems = ExtractSize / EltSize;
26180 EVT EltVT = InVT.getVectorElementType();
26181 EVT ExtractVT =
26182 NumElems == 1 ? EltVT
26183 : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
26184 if ((Level < AfterLegalizeDAG ||
26185 (NumElems == 1 ||
26186 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
26187 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
26188 unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
26189
26190 if (NumElems == 1) {
26191 SDValue Src = V->getOperand(IdxVal);
26192 if (EltVT != Src.getValueType())
26193 Src = DAG.getNode(ISD::TRUNCATE, DL, EltVT, Src);
26194 return DAG.getBitcast(NVT, Src);
26195 }
26196
26197 // Extract the pieces from the original build_vector.
26198 SDValue BuildVec =
26199 DAG.getBuildVector(ExtractVT, DL, V->ops().slice(IdxVal, NumElems));
26200 return DAG.getBitcast(NVT, BuildVec);
26201 }
26202 }
26203 }
26204
26205 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
26206 // Handle only simple case where vector being inserted and vector
26207 // being extracted are of same size.
26208 EVT SmallVT = V.getOperand(1).getValueType();
26209 if (NVT.bitsEq(SmallVT)) {
26210 // Combine:
26211 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
26212 // Into:
26213 // indices are equal or bit offsets are equal => V1
26214 // otherwise => (extract_subvec V1, ExtIdx)
26215 uint64_t InsIdx = V.getConstantOperandVal(2);
26216 if (InsIdx * SmallVT.getScalarSizeInBits() ==
26217 ExtIdx * NVT.getScalarSizeInBits()) {
26218 if (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))
26219 return DAG.getBitcast(NVT, V.getOperand(1));
26220 } else {
26221 return DAG.getNode(
26223 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
26224 N->getOperand(1));
26225 }
26226 }
26227 }
26228
26229 // If only EXTRACT_SUBVECTOR nodes use the source vector we can
26230 // simplify it based on the (valid) extractions.
26231 if (!V.getValueType().isScalableVector() &&
26232 llvm::all_of(V->users(), [&](SDNode *Use) {
26233 return Use->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
26234 Use->getOperand(0) == V;
26235 })) {
26236 unsigned NumElts = V.getValueType().getVectorNumElements();
26237 APInt DemandedElts = APInt::getZero(NumElts);
26238 for (SDNode *User : V->users()) {
26239 unsigned ExtIdx = User->getConstantOperandVal(1);
26240 unsigned NumSubElts = User->getValueType(0).getVectorNumElements();
26241 DemandedElts.setBits(ExtIdx, ExtIdx + NumSubElts);
26242 }
26243 if (SimplifyDemandedVectorElts(V, DemandedElts, /*AssumeSingleUse=*/true)) {
26244 // We simplified the vector operand of this extract subvector. If this
26245 // extract is not dead, visit it again so it is folded properly.
26246 if (N->getOpcode() != ISD::DELETED_NODE)
26247 AddToWorklist(N);
26248 return SDValue(N, 0);
26249 }
26250 } else {
26252 return SDValue(N, 0);
26253 }
26254
26255 return SDValue();
26256}
26257
26258/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
26259/// followed by concatenation. Narrow vector ops may have better performance
26260/// than wide ops, and this can unlock further narrowing of other vector ops.
26261/// Targets can invert this transform later if it is not profitable.
26263 SelectionDAG &DAG) {
26264 SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
26265 if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
26266 N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
26267 !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
26268 return SDValue();
26269
26270 // Split the wide shuffle mask into halves. Any mask element that is accessing
26271 // operand 1 is offset down to account for narrowing of the vectors.
26272 ArrayRef<int> Mask = Shuf->getMask();
26273 EVT VT = Shuf->getValueType(0);
26274 unsigned NumElts = VT.getVectorNumElements();
26275 unsigned HalfNumElts = NumElts / 2;
26276 SmallVector<int, 16> Mask0(HalfNumElts, -1);
26277 SmallVector<int, 16> Mask1(HalfNumElts, -1);
26278 for (unsigned i = 0; i != NumElts; ++i) {
26279 if (Mask[i] == -1)
26280 continue;
26281 // If we reference the upper (undef) subvector then the element is undef.
26282 if ((Mask[i] % NumElts) >= HalfNumElts)
26283 continue;
26284 int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
26285 if (i < HalfNumElts)
26286 Mask0[i] = M;
26287 else
26288 Mask1[i - HalfNumElts] = M;
26289 }
26290
26291 // Ask the target if this is a valid transform.
26292 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26293 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
26294 HalfNumElts);
26295 if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
26296 !TLI.isShuffleMaskLegal(Mask1, HalfVT))
26297 return SDValue();
26298
26299 // shuffle (concat X, undef), (concat Y, undef), Mask -->
26300 // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
26301 SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
26302 SDLoc DL(Shuf);
26303 SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
26304 SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
26305 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
26306}
26307
26308// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
26309// or turn a shuffle of a single concat into simpler shuffle then concat.
26311 EVT VT = N->getValueType(0);
26312 unsigned NumElts = VT.getVectorNumElements();
26313
26314 SDValue N0 = N->getOperand(0);
26315 SDValue N1 = N->getOperand(1);
26317 ArrayRef<int> Mask = SVN->getMask();
26318
26320 EVT ConcatVT = N0.getOperand(0).getValueType();
26321 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
26322 unsigned NumConcats = NumElts / NumElemsPerConcat;
26323
26324 auto IsUndefMaskElt = [](int i) { return i == -1; };
26325
26326 // Special case: shuffle(concat(A,B)) can be more efficiently represented
26327 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
26328 // half vector elements.
26329 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
26330 llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
26331 IsUndefMaskElt)) {
26332 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
26333 N0.getOperand(1),
26334 Mask.slice(0, NumElemsPerConcat));
26335 N1 = DAG.getUNDEF(ConcatVT);
26336 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
26337 }
26338
26339 // Look at every vector that's inserted. We're looking for exact
26340 // subvector-sized copies from a concatenated vector
26341 for (unsigned I = 0; I != NumConcats; ++I) {
26342 unsigned Begin = I * NumElemsPerConcat;
26343 ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
26344
26345 // Make sure we're dealing with a copy.
26346 if (llvm::all_of(SubMask, IsUndefMaskElt)) {
26347 Ops.push_back(DAG.getUNDEF(ConcatVT));
26348 continue;
26349 }
26350
26351 int OpIdx = -1;
26352 for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
26353 if (IsUndefMaskElt(SubMask[i]))
26354 continue;
26355 if ((SubMask[i] % (int)NumElemsPerConcat) != i)
26356 return SDValue();
26357 int EltOpIdx = SubMask[i] / NumElemsPerConcat;
26358 if (0 <= OpIdx && EltOpIdx != OpIdx)
26359 return SDValue();
26360 OpIdx = EltOpIdx;
26361 }
26362 assert(0 <= OpIdx && "Unknown concat_vectors op");
26363
26364 if (OpIdx < (int)N0.getNumOperands())
26365 Ops.push_back(N0.getOperand(OpIdx));
26366 else
26367 Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
26368 }
26369
26370 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
26371}
26372
26373// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
26374// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
26375//
26376// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
26377// a simplification in some sense, but it isn't appropriate in general: some
26378// BUILD_VECTORs are substantially cheaper than others. The general case
26379// of a BUILD_VECTOR requires inserting each element individually (or
26380// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
26381// all constants is a single constant pool load. A BUILD_VECTOR where each
26382// element is identical is a splat. A BUILD_VECTOR where most of the operands
26383// are undef lowers to a small number of element insertions.
26384//
26385// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
26386// We don't fold shuffles where one side is a non-zero constant, and we don't
26387// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
26388// non-constant operands. This seems to work out reasonably well in practice.
26390 SelectionDAG &DAG,
26391 const TargetLowering &TLI) {
26392 EVT VT = SVN->getValueType(0);
26393 unsigned NumElts = VT.getVectorNumElements();
26394 SDValue N0 = SVN->getOperand(0);
26395 SDValue N1 = SVN->getOperand(1);
26396
26397 if (!N0->hasOneUse())
26398 return SDValue();
26399
26400 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
26401 // discussed above.
26402 if (!N1.isUndef()) {
26403 if (!N1->hasOneUse())
26404 return SDValue();
26405
26406 bool N0AnyConst = isAnyConstantBuildVector(N0);
26407 bool N1AnyConst = isAnyConstantBuildVector(N1);
26408 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
26409 return SDValue();
26410 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
26411 return SDValue();
26412 }
26413
26414 // If both inputs are splats of the same value then we can safely merge this
26415 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
26416 bool IsSplat = false;
26417 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
26418 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
26419 if (BV0 && BV1)
26420 if (SDValue Splat0 = BV0->getSplatValue())
26421 IsSplat = (Splat0 == BV1->getSplatValue());
26422
26424 SmallSet<SDValue, 16> DuplicateOps;
26425 for (int M : SVN->getMask()) {
26426 SDValue Op = DAG.getUNDEF(VT.getScalarType());
26427 if (M >= 0) {
26428 int Idx = M < (int)NumElts ? M : M - NumElts;
26429 SDValue &S = (M < (int)NumElts ? N0 : N1);
26430 if (S.getOpcode() == ISD::BUILD_VECTOR) {
26431 Op = S.getOperand(Idx);
26432 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
26433 SDValue Op0 = S.getOperand(0);
26434 Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
26435 } else {
26436 // Operand can't be combined - bail out.
26437 return SDValue();
26438 }
26439 }
26440
26441 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
26442 // generating a splat; semantically, this is fine, but it's likely to
26443 // generate low-quality code if the target can't reconstruct an appropriate
26444 // shuffle.
26445 if (!Op.isUndef() && !isIntOrFPConstant(Op))
26446 if (!IsSplat && !DuplicateOps.insert(Op).second)
26447 return SDValue();
26448
26449 Ops.push_back(Op);
26450 }
26451
26452 // BUILD_VECTOR requires all inputs to be of the same type, find the
26453 // maximum type and extend them all.
26454 EVT SVT = VT.getScalarType();
26455 if (SVT.isInteger())
26456 for (SDValue &Op : Ops)
26457 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
26458 if (SVT != VT.getScalarType())
26459 for (SDValue &Op : Ops)
26460 Op = Op.isUndef() ? DAG.getUNDEF(SVT)
26461 : (TLI.isZExtFree(Op.getValueType(), SVT)
26462 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
26463 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT));
26464 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
26465}
26466
26467// Match shuffles that can be converted to *_vector_extend_in_reg.
26468// This is often generated during legalization.
26469// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)),
26470// and returns the EVT to which the extension should be performed.
26471// NOTE: this assumes that the src is the first operand of the shuffle.
26473 unsigned Opcode, EVT VT, std::function<bool(unsigned)> Match,
26474 SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
26475 bool LegalOperations) {
26476 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
26477
26478 // TODO Add support for big-endian when we have a test case.
26479 if (!VT.isInteger() || IsBigEndian)
26480 return std::nullopt;
26481
26482 unsigned NumElts = VT.getVectorNumElements();
26483 unsigned EltSizeInBits = VT.getScalarSizeInBits();
26484
26485 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
26486 // power-of-2 extensions as they are the most likely.
26487 // FIXME: should try Scale == NumElts case too,
26488 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
26489 // The vector width must be a multiple of Scale.
26490 if (NumElts % Scale != 0)
26491 continue;
26492
26493 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
26494 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
26495
26496 if ((LegalTypes && !TLI.isTypeLegal(OutVT)) ||
26497 (LegalOperations && !TLI.isOperationLegalOrCustom(Opcode, OutVT)))
26498 continue;
26499
26500 if (Match(Scale))
26501 return OutVT;
26502 }
26503
26504 return std::nullopt;
26505}
26506
26507// Match shuffles that can be converted to any_vector_extend_in_reg.
26508// This is often generated during legalization.
26509// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
26511 SelectionDAG &DAG,
26512 const TargetLowering &TLI,
26513 bool LegalOperations) {
26514 EVT VT = SVN->getValueType(0);
26515 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
26516
26517 // TODO Add support for big-endian when we have a test case.
26518 if (!VT.isInteger() || IsBigEndian)
26519 return SDValue();
26520
26521 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
26522 auto isAnyExtend = [NumElts = VT.getVectorNumElements(),
26523 Mask = SVN->getMask()](unsigned Scale) {
26524 for (unsigned i = 0; i != NumElts; ++i) {
26525 if (Mask[i] < 0)
26526 continue;
26527 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
26528 continue;
26529 return false;
26530 }
26531 return true;
26532 };
26533
26534 unsigned Opcode = ISD::ANY_EXTEND_VECTOR_INREG;
26535 SDValue N0 = SVN->getOperand(0);
26536 // Never create an illegal type. Only create unsupported operations if we
26537 // are pre-legalization.
26538 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
26539 Opcode, VT, isAnyExtend, DAG, TLI, /*LegalTypes=*/true, LegalOperations);
26540 if (!OutVT)
26541 return SDValue();
26542 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT, N0));
26543}
26544
26545// Match shuffles that can be converted to zero_extend_vector_inreg.
26546// This is often generated during legalization.
26547// e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src))
26549 SelectionDAG &DAG,
26550 const TargetLowering &TLI,
26551 bool LegalOperations) {
26552 bool LegalTypes = true;
26553 EVT VT = SVN->getValueType(0);
26554 assert(!VT.isScalableVector() && "Encountered scalable shuffle?");
26555 unsigned NumElts = VT.getVectorNumElements();
26556 unsigned EltSizeInBits = VT.getScalarSizeInBits();
26557
26558 // TODO: add support for big-endian when we have a test case.
26559 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
26560 if (!VT.isInteger() || IsBigEndian)
26561 return SDValue();
26562
26563 SmallVector<int, 16> Mask(SVN->getMask());
26564 auto ForEachDecomposedIndice = [NumElts, &Mask](auto Fn) {
26565 for (int &Indice : Mask) {
26566 if (Indice < 0)
26567 continue;
26568 int OpIdx = (unsigned)Indice < NumElts ? 0 : 1;
26569 int OpEltIdx = (unsigned)Indice < NumElts ? Indice : Indice - NumElts;
26570 Fn(Indice, OpIdx, OpEltIdx);
26571 }
26572 };
26573
26574 // Which elements of which operand does this shuffle demand?
26575 std::array<APInt, 2> OpsDemandedElts;
26576 for (APInt &OpDemandedElts : OpsDemandedElts)
26577 OpDemandedElts = APInt::getZero(NumElts);
26578 ForEachDecomposedIndice(
26579 [&OpsDemandedElts](int &Indice, int OpIdx, int OpEltIdx) {
26580 OpsDemandedElts[OpIdx].setBit(OpEltIdx);
26581 });
26582
26583 // Element-wise(!), which of these demanded elements are know to be zero?
26584 std::array<APInt, 2> OpsKnownZeroElts;
26585 for (auto I : zip(SVN->ops(), OpsDemandedElts, OpsKnownZeroElts))
26586 std::get<2>(I) =
26587 DAG.computeVectorKnownZeroElements(std::get<0>(I), std::get<1>(I));
26588
26589 // Manifest zeroable element knowledge in the shuffle mask.
26590 // NOTE: we don't have 'zeroable' sentinel value in generic DAG,
26591 // this is a local invention, but it won't leak into DAG.
26592 // FIXME: should we not manifest them, but just check when matching?
26593 bool HadZeroableElts = false;
26594 ForEachDecomposedIndice([&OpsKnownZeroElts, &HadZeroableElts](
26595 int &Indice, int OpIdx, int OpEltIdx) {
26596 if (OpsKnownZeroElts[OpIdx][OpEltIdx]) {
26597 Indice = -2; // Zeroable element.
26598 HadZeroableElts = true;
26599 }
26600 });
26601
26602 // Don't proceed unless we've refined at least one zeroable mask indice.
26603 // If we didn't, then we are still trying to match the same shuffle mask
26604 // we previously tried to match as ISD::ANY_EXTEND_VECTOR_INREG,
26605 // and evidently failed. Proceeding will lead to endless combine loops.
26606 if (!HadZeroableElts)
26607 return SDValue();
26608
26609 // The shuffle may be more fine-grained than we want. Widen elements first.
26610 // FIXME: should we do this before manifesting zeroable shuffle mask indices?
26611 SmallVector<int, 16> ScaledMask;
26612 getShuffleMaskWithWidestElts(Mask, ScaledMask);
26613 assert(Mask.size() >= ScaledMask.size() &&
26614 Mask.size() % ScaledMask.size() == 0 && "Unexpected mask widening.");
26615 int Prescale = Mask.size() / ScaledMask.size();
26616
26617 NumElts = ScaledMask.size();
26618 EltSizeInBits *= Prescale;
26619
26620 EVT PrescaledVT = EVT::getVectorVT(
26621 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits),
26622 NumElts);
26623
26624 if (LegalTypes && !TLI.isTypeLegal(PrescaledVT) && TLI.isTypeLegal(VT))
26625 return SDValue();
26626
26627 // For example,
26628 // shuffle<0,z,1,-1> == (v2i64 zero_extend_vector_inreg(v4i32))
26629 // But not shuffle<z,z,1,-1> and not shuffle<0,z,z,-1> ! (for same types)
26630 auto isZeroExtend = [NumElts, &ScaledMask](unsigned Scale) {
26631 assert(Scale >= 2 && Scale <= NumElts && NumElts % Scale == 0 &&
26632 "Unexpected mask scaling factor.");
26633 ArrayRef<int> Mask = ScaledMask;
26634 for (unsigned SrcElt = 0, NumSrcElts = NumElts / Scale;
26635 SrcElt != NumSrcElts; ++SrcElt) {
26636 // Analyze the shuffle mask in Scale-sized chunks.
26637 ArrayRef<int> MaskChunk = Mask.take_front(Scale);
26638 assert(MaskChunk.size() == Scale && "Unexpected mask size.");
26639 Mask = Mask.drop_front(MaskChunk.size());
26640 // The first indice in this chunk must be SrcElt, but not zero!
26641 // FIXME: undef should be fine, but that results in more-defined result.
26642 if (int FirstIndice = MaskChunk[0]; (unsigned)FirstIndice != SrcElt)
26643 return false;
26644 // The rest of the indices in this chunk must be zeros.
26645 // FIXME: undef should be fine, but that results in more-defined result.
26646 if (!all_of(MaskChunk.drop_front(1),
26647 [](int Indice) { return Indice == -2; }))
26648 return false;
26649 }
26650 assert(Mask.empty() && "Did not process the whole mask?");
26651 return true;
26652 };
26653
26654 unsigned Opcode = ISD::ZERO_EXTEND_VECTOR_INREG;
26655 for (bool Commuted : {false, true}) {
26656 SDValue Op = SVN->getOperand(!Commuted ? 0 : 1);
26657 if (Commuted)
26659 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
26660 Opcode, PrescaledVT, isZeroExtend, DAG, TLI, LegalTypes,
26661 LegalOperations);
26662 if (OutVT)
26663 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT,
26664 DAG.getBitcast(PrescaledVT, Op)));
26665 }
26666 return SDValue();
26667}
26668
26669// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
26670// each source element of a large type into the lowest elements of a smaller
26671// destination type. This is often generated during legalization.
26672// If the source node itself was a '*_extend_vector_inreg' node then we should
26673// then be able to remove it.
26675 SelectionDAG &DAG) {
26676 EVT VT = SVN->getValueType(0);
26677 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
26678
26679 // TODO Add support for big-endian when we have a test case.
26680 if (!VT.isInteger() || IsBigEndian)
26681 return SDValue();
26682
26684
26685 unsigned Opcode = N0.getOpcode();
26686 if (!ISD::isExtVecInRegOpcode(Opcode))
26687 return SDValue();
26688
26689 SDValue N00 = N0.getOperand(0);
26690 ArrayRef<int> Mask = SVN->getMask();
26691 unsigned NumElts = VT.getVectorNumElements();
26692 unsigned EltSizeInBits = VT.getScalarSizeInBits();
26693 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
26694 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
26695
26696 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
26697 return SDValue();
26698 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
26699
26700 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
26701 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
26702 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
26703 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
26704 for (unsigned i = 0; i != NumElts; ++i) {
26705 if (Mask[i] < 0)
26706 continue;
26707 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
26708 continue;
26709 return false;
26710 }
26711 return true;
26712 };
26713
26714 // At the moment we just handle the case where we've truncated back to the
26715 // same size as before the extension.
26716 // TODO: handle more extension/truncation cases as cases arise.
26717 if (EltSizeInBits != ExtSrcSizeInBits)
26718 return SDValue();
26719
26720 // We can remove *extend_vector_inreg only if the truncation happens at
26721 // the same scale as the extension.
26722 if (isTruncate(ExtScale))
26723 return DAG.getBitcast(VT, N00);
26724
26725 return SDValue();
26726}
26727
26728// Combine shuffles of splat-shuffles of the form:
26729// shuffle (shuffle V, undef, splat-mask), undef, M
26730// If splat-mask contains undef elements, we need to be careful about
26731// introducing undef's in the folded mask which are not the result of composing
26732// the masks of the shuffles.
26734 SelectionDAG &DAG) {
26735 EVT VT = Shuf->getValueType(0);
26736 unsigned NumElts = VT.getVectorNumElements();
26737
26738 if (!Shuf->getOperand(1).isUndef())
26739 return SDValue();
26740
26741 // See if this unary non-splat shuffle actually *is* a splat shuffle,
26742 // in disguise, with all demanded elements being identical.
26743 // FIXME: this can be done per-operand.
26744 if (!Shuf->isSplat()) {
26745 APInt DemandedElts(NumElts, 0);
26746 for (int Idx : Shuf->getMask()) {
26747 if (Idx < 0)
26748 continue; // Ignore sentinel indices.
26749 assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?");
26750 DemandedElts.setBit(Idx);
26751 }
26752 assert(DemandedElts.popcount() > 1 && "Is a splat shuffle already?");
26753 APInt UndefElts;
26754 if (DAG.isSplatValue(Shuf->getOperand(0), DemandedElts, UndefElts)) {
26755 // Even if all demanded elements are splat, some of them could be undef.
26756 // Which lowest demanded element is *not* known-undef?
26757 std::optional<unsigned> MinNonUndefIdx;
26758 for (int Idx : Shuf->getMask()) {
26759 if (Idx < 0 || UndefElts[Idx])
26760 continue; // Ignore sentinel indices, and undef elements.
26761 MinNonUndefIdx = std::min<unsigned>(Idx, MinNonUndefIdx.value_or(~0U));
26762 }
26763 if (!MinNonUndefIdx)
26764 return DAG.getUNDEF(VT); // All undef - result is undef.
26765 assert(*MinNonUndefIdx < NumElts && "Expected valid element index.");
26766 SmallVector<int, 8> SplatMask(Shuf->getMask());
26767 for (int &Idx : SplatMask) {
26768 if (Idx < 0)
26769 continue; // Passthrough sentinel indices.
26770 // Otherwise, just pick the lowest demanded non-undef element.
26771 // Or sentinel undef, if we know we'd pick a known-undef element.
26772 Idx = UndefElts[Idx] ? -1 : *MinNonUndefIdx;
26773 }
26774 assert(SplatMask != Shuf->getMask() && "Expected mask to change!");
26775 return DAG.getVectorShuffle(VT, SDLoc(Shuf), Shuf->getOperand(0),
26776 Shuf->getOperand(1), SplatMask);
26777 }
26778 }
26779
26780 // If the inner operand is a known splat with no undefs, just return that directly.
26781 // TODO: Create DemandedElts mask from Shuf's mask.
26782 // TODO: Allow undef elements and merge with the shuffle code below.
26783 if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false))
26784 return Shuf->getOperand(0);
26785
26787 if (!Splat || !Splat->isSplat())
26788 return SDValue();
26789
26790 ArrayRef<int> ShufMask = Shuf->getMask();
26791 ArrayRef<int> SplatMask = Splat->getMask();
26792 assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
26793
26794 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
26795 // every undef mask element in the splat-shuffle has a corresponding undef
26796 // element in the user-shuffle's mask or if the composition of mask elements
26797 // would result in undef.
26798 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
26799 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
26800 // In this case it is not legal to simplify to the splat-shuffle because we
26801 // may be exposing the users of the shuffle an undef element at index 1
26802 // which was not there before the combine.
26803 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
26804 // In this case the composition of masks yields SplatMask, so it's ok to
26805 // simplify to the splat-shuffle.
26806 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
26807 // In this case the composed mask includes all undef elements of SplatMask
26808 // and in addition sets element zero to undef. It is safe to simplify to
26809 // the splat-shuffle.
26810 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
26811 ArrayRef<int> SplatMask) {
26812 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
26813 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
26814 SplatMask[UserMask[i]] != -1)
26815 return false;
26816 return true;
26817 };
26818 if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
26819 return Shuf->getOperand(0);
26820
26821 // Create a new shuffle with a mask that is composed of the two shuffles'
26822 // masks.
26823 SmallVector<int, 32> NewMask;
26824 for (int Idx : ShufMask)
26825 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
26826
26827 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
26828 Splat->getOperand(0), Splat->getOperand(1),
26829 NewMask);
26830}
26831
26832// Combine shuffles of bitcasts into a shuffle of the bitcast type, providing
26833// the mask can be treated as a larger type.
26835 SelectionDAG &DAG,
26836 const TargetLowering &TLI,
26837 bool LegalOperations) {
26838 SDValue Op0 = SVN->getOperand(0);
26839 SDValue Op1 = SVN->getOperand(1);
26840 EVT VT = SVN->getValueType(0);
26841 if (Op0.getOpcode() != ISD::BITCAST)
26842 return SDValue();
26843 EVT InVT = Op0.getOperand(0).getValueType();
26844 if (!InVT.isVector() ||
26845 (!Op1.isUndef() && (Op1.getOpcode() != ISD::BITCAST ||
26846 Op1.getOperand(0).getValueType() != InVT)))
26847 return SDValue();
26849 (Op1.isUndef() || isAnyConstantBuildVector(Op1.getOperand(0))))
26850 return SDValue();
26851
26852 int VTLanes = VT.getVectorNumElements();
26853 int InLanes = InVT.getVectorNumElements();
26854 if (VTLanes <= InLanes || VTLanes % InLanes != 0 ||
26855 (LegalOperations &&
26857 return SDValue();
26858 int Factor = VTLanes / InLanes;
26859
26860 // Check that each group of lanes in the mask are either undef or make a valid
26861 // mask for the wider lane type.
26862 ArrayRef<int> Mask = SVN->getMask();
26863 SmallVector<int> NewMask;
26864 if (!widenShuffleMaskElts(Factor, Mask, NewMask))
26865 return SDValue();
26866
26867 if (!TLI.isShuffleMaskLegal(NewMask, InVT))
26868 return SDValue();
26869
26870 // Create the new shuffle with the new mask and bitcast it back to the
26871 // original type.
26872 SDLoc DL(SVN);
26873 Op0 = Op0.getOperand(0);
26874 Op1 = Op1.isUndef() ? DAG.getUNDEF(InVT) : Op1.getOperand(0);
26875 SDValue NewShuf = DAG.getVectorShuffle(InVT, DL, Op0, Op1, NewMask);
26876 return DAG.getBitcast(VT, NewShuf);
26877}
26878
26879/// Combine shuffle of shuffle of the form:
26880/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
26882 SelectionDAG &DAG) {
26883 if (!OuterShuf->getOperand(1).isUndef())
26884 return SDValue();
26885 auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
26886 if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
26887 return SDValue();
26888
26889 ArrayRef<int> OuterMask = OuterShuf->getMask();
26890 ArrayRef<int> InnerMask = InnerShuf->getMask();
26891 unsigned NumElts = OuterMask.size();
26892 assert(NumElts == InnerMask.size() && "Mask length mismatch");
26893 SmallVector<int, 32> CombinedMask(NumElts, -1);
26894 int SplatIndex = -1;
26895 for (unsigned i = 0; i != NumElts; ++i) {
26896 // Undef lanes remain undef.
26897 int OuterMaskElt = OuterMask[i];
26898 if (OuterMaskElt == -1)
26899 continue;
26900
26901 // Peek through the shuffle masks to get the underlying source element.
26902 int InnerMaskElt = InnerMask[OuterMaskElt];
26903 if (InnerMaskElt == -1)
26904 continue;
26905
26906 // Initialize the splatted element.
26907 if (SplatIndex == -1)
26908 SplatIndex = InnerMaskElt;
26909
26910 // Non-matching index - this is not a splat.
26911 if (SplatIndex != InnerMaskElt)
26912 return SDValue();
26913
26914 CombinedMask[i] = InnerMaskElt;
26915 }
26916 assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
26917 getSplatIndex(CombinedMask) != -1) &&
26918 "Expected a splat mask");
26919
26920 // TODO: The transform may be a win even if the mask is not legal.
26921 EVT VT = OuterShuf->getValueType(0);
26922 assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
26923 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
26924 return SDValue();
26925
26926 return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
26927 InnerShuf->getOperand(1), CombinedMask);
26928}
26929
26930/// If the shuffle mask is taking exactly one element from the first vector
26931/// operand and passing through all other elements from the second vector
26932/// operand, return the index of the mask element that is choosing an element
26933/// from the first operand. Otherwise, return -1.
26935 int MaskSize = Mask.size();
26936 int EltFromOp0 = -1;
26937 // TODO: This does not match if there are undef elements in the shuffle mask.
26938 // Should we ignore undefs in the shuffle mask instead? The trade-off is
26939 // removing an instruction (a shuffle), but losing the knowledge that some
26940 // vector lanes are not needed.
26941 for (int i = 0; i != MaskSize; ++i) {
26942 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
26943 // We're looking for a shuffle of exactly one element from operand 0.
26944 if (EltFromOp0 != -1)
26945 return -1;
26946 EltFromOp0 = i;
26947 } else if (Mask[i] != i + MaskSize) {
26948 // Nothing from operand 1 can change lanes.
26949 return -1;
26950 }
26951 }
26952 return EltFromOp0;
26953}
26954
26955/// If a shuffle inserts exactly one element from a source vector operand into
26956/// another vector operand and we can access the specified element as a scalar,
26957/// then we can eliminate the shuffle.
26958SDValue DAGCombiner::replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf) {
26959 // First, check if we are taking one element of a vector and shuffling that
26960 // element into another vector.
26961 ArrayRef<int> Mask = Shuf->getMask();
26962 SmallVector<int, 16> CommutedMask(Mask);
26963 SDValue Op0 = Shuf->getOperand(0);
26964 SDValue Op1 = Shuf->getOperand(1);
26965 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
26966 if (ShufOp0Index == -1) {
26967 // Commute mask and check again.
26969 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
26970 if (ShufOp0Index == -1)
26971 return SDValue();
26972 // Commute operands to match the commuted shuffle mask.
26973 std::swap(Op0, Op1);
26974 Mask = CommutedMask;
26975 }
26976
26977 // The shuffle inserts exactly one element from operand 0 into operand 1.
26978 // Now see if we can access that element as a scalar via a real insert element
26979 // instruction.
26980 // TODO: We can try harder to locate the element as a scalar. Examples: it
26981 // could be an operand of BUILD_VECTOR, or a constant.
26982 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
26983 "Shuffle mask value must be from operand 0");
26984
26985 SDValue Elt;
26986 if (sd_match(Op0, m_InsertElt(m_Value(), m_Value(Elt),
26987 m_SpecificInt(Mask[ShufOp0Index])))) {
26988 // There's an existing insertelement with constant insertion index, so we
26989 // don't need to check the legality/profitability of a replacement operation
26990 // that differs at most in the constant value. The target should be able to
26991 // lower any of those in a similar way. If not, legalization will expand
26992 // this to a scalar-to-vector plus shuffle.
26993 //
26994 // Note that the shuffle may move the scalar from the position that the
26995 // insert element used. Therefore, our new insert element occurs at the
26996 // shuffle's mask index value, not the insert's index value.
26997 //
26998 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
26999 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
27000 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
27001 Op1, Elt, NewInsIndex);
27002 }
27003
27004 if (!hasOperation(ISD::INSERT_VECTOR_ELT, Op0.getValueType()))
27005 return SDValue();
27006
27008 Mask[ShufOp0Index] == 0) {
27009 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
27010 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
27011 Op1, Elt, NewInsIndex);
27012 }
27013
27014 return SDValue();
27015}
27016
27017/// If we have a unary shuffle of a shuffle, see if it can be folded away
27018/// completely. This has the potential to lose undef knowledge because the first
27019/// shuffle may not have an undef mask element where the second one does. So
27020/// only call this after doing simplifications based on demanded elements.
27022 // shuf (shuf0 X, Y, Mask0), undef, Mask
27023 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
27024 if (!Shuf0 || !Shuf->getOperand(1).isUndef())
27025 return SDValue();
27026
27027 ArrayRef<int> Mask = Shuf->getMask();
27028 ArrayRef<int> Mask0 = Shuf0->getMask();
27029 for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
27030 // Ignore undef elements.
27031 if (Mask[i] == -1)
27032 continue;
27033 assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
27034
27035 // Is the element of the shuffle operand chosen by this shuffle the same as
27036 // the element chosen by the shuffle operand itself?
27037 if (Mask0[Mask[i]] != Mask0[i])
27038 return SDValue();
27039 }
27040 // Every element of this shuffle is identical to the result of the previous
27041 // shuffle, so we can replace this value.
27042 return Shuf->getOperand(0);
27043}
27044
27045SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
27046 EVT VT = N->getValueType(0);
27047 unsigned NumElts = VT.getVectorNumElements();
27048
27049 SDValue N0 = N->getOperand(0);
27050 SDValue N1 = N->getOperand(1);
27051
27052 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
27053
27054 // Canonicalize shuffle undef, undef -> undef
27055 if (N0.isUndef() && N1.isUndef())
27056 return DAG.getUNDEF(VT);
27057
27058 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
27059
27060 // Canonicalize shuffle v, v -> v, undef
27061 if (N0 == N1)
27062 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
27063 createUnaryMask(SVN->getMask(), NumElts));
27064
27065 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
27066 if (N0.isUndef())
27067 return DAG.getCommutedVectorShuffle(*SVN);
27068
27069 // Remove references to rhs if it is undef
27070 if (N1.isUndef()) {
27071 bool Changed = false;
27072 SmallVector<int, 8> NewMask;
27073 for (unsigned i = 0; i != NumElts; ++i) {
27074 int Idx = SVN->getMaskElt(i);
27075 if (Idx >= (int)NumElts) {
27076 Idx = -1;
27077 Changed = true;
27078 }
27079 NewMask.push_back(Idx);
27080 }
27081 if (Changed)
27082 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
27083 }
27084
27085 if (SDValue InsElt = replaceShuffleOfInsert(SVN))
27086 return InsElt;
27087
27088 // A shuffle of a single vector that is a splatted value can always be folded.
27089 if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
27090 return V;
27091
27092 if (SDValue V = formSplatFromShuffles(SVN, DAG))
27093 return V;
27094
27095 // If it is a splat, check if the argument vector is another splat or a
27096 // build_vector.
27097 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
27098 int SplatIndex = SVN->getSplatIndex();
27099 if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
27100 TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) {
27101 // splat (vector_bo L, R), Index -->
27102 // splat (scalar_bo (extelt L, Index), (extelt R, Index))
27103 SDValue L = N0.getOperand(0), R = N0.getOperand(1);
27104 SDLoc DL(N);
27105 EVT EltVT = VT.getScalarType();
27106 SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
27107 SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
27108 SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
27109 SDValue NewBO =
27110 DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags());
27111 SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
27112 SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
27113 return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
27114 }
27115
27116 // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x)
27117 // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x)
27118 if ((!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) &&
27119 N0.hasOneUse()) {
27120 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == 0)
27121 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(0));
27122
27124 if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
27125 if (Idx->getAPIntValue() == SplatIndex)
27126 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
27127
27128 // Look through a bitcast if LE and splatting lane 0, through to a
27129 // scalar_to_vector or a build_vector.
27130 if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(0).hasOneUse() &&
27131 SplatIndex == 0 && DAG.getDataLayout().isLittleEndian() &&
27134 EVT N00VT = N0.getOperand(0).getValueType();
27135 if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() &&
27136 VT.isInteger() && N00VT.isInteger()) {
27137 EVT InVT =
27140 SDLoc(N), InVT);
27141 return DAG.getSplatBuildVector(VT, SDLoc(N), Op);
27142 }
27143 }
27144 }
27145
27146 // If this is a bit convert that changes the element type of the vector but
27147 // not the number of vector elements, look through it. Be careful not to
27148 // look though conversions that change things like v4f32 to v2f64.
27149 SDNode *V = N0.getNode();
27150 if (V->getOpcode() == ISD::BITCAST) {
27151 SDValue ConvInput = V->getOperand(0);
27152 if (ConvInput.getValueType().isVector() &&
27153 ConvInput.getValueType().getVectorNumElements() == NumElts)
27154 V = ConvInput.getNode();
27155 }
27156
27157 if (V->getOpcode() == ISD::BUILD_VECTOR) {
27158 assert(V->getNumOperands() == NumElts &&
27159 "BUILD_VECTOR has wrong number of operands");
27160 SDValue Base;
27161 bool AllSame = true;
27162 for (unsigned i = 0; i != NumElts; ++i) {
27163 if (!V->getOperand(i).isUndef()) {
27164 Base = V->getOperand(i);
27165 break;
27166 }
27167 }
27168 // Splat of <u, u, u, u>, return <u, u, u, u>
27169 if (!Base.getNode())
27170 return N0;
27171 for (unsigned i = 0; i != NumElts; ++i) {
27172 if (V->getOperand(i) != Base) {
27173 AllSame = false;
27174 break;
27175 }
27176 }
27177 // Splat of <x, x, x, x>, return <x, x, x, x>
27178 if (AllSame)
27179 return N0;
27180
27181 // Canonicalize any other splat as a build_vector, but avoid defining any
27182 // undefined elements in the mask.
27183 SDValue Splatted = V->getOperand(SplatIndex);
27184 SmallVector<SDValue, 8> Ops(NumElts, Splatted);
27185 EVT EltVT = Splatted.getValueType();
27186
27187 for (unsigned i = 0; i != NumElts; ++i) {
27188 if (SVN->getMaskElt(i) < 0)
27189 Ops[i] = DAG.getUNDEF(EltVT);
27190 }
27191
27192 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
27193
27194 // We may have jumped through bitcasts, so the type of the
27195 // BUILD_VECTOR may not match the type of the shuffle.
27196 if (V->getValueType(0) != VT)
27197 NewBV = DAG.getBitcast(VT, NewBV);
27198 return NewBV;
27199 }
27200 }
27201
27202 // Simplify source operands based on shuffle mask.
27204 return SDValue(N, 0);
27205
27206 // This is intentionally placed after demanded elements simplification because
27207 // it could eliminate knowledge of undef elements created by this shuffle.
27208 if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
27209 return ShufOp;
27210
27211 // Match shuffles that can be converted to any_vector_extend_in_reg.
27212 if (SDValue V =
27213 combineShuffleToAnyExtendVectorInreg(SVN, DAG, TLI, LegalOperations))
27214 return V;
27215
27216 // Combine "truncate_vector_in_reg" style shuffles.
27217 if (SDValue V = combineTruncationShuffle(SVN, DAG))
27218 return V;
27219
27220 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
27221 Level < AfterLegalizeVectorOps &&
27222 (N1.isUndef() ||
27223 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
27224 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
27225 if (SDValue V = partitionShuffleOfConcats(N, DAG))
27226 return V;
27227 }
27228
27229 // A shuffle of a concat of the same narrow vector can be reduced to use
27230 // only low-half elements of a concat with undef:
27231 // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
27232 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
27233 N0.getNumOperands() == 2 &&
27234 N0.getOperand(0) == N0.getOperand(1)) {
27235 int HalfNumElts = (int)NumElts / 2;
27236 SmallVector<int, 8> NewMask;
27237 for (unsigned i = 0; i != NumElts; ++i) {
27238 int Idx = SVN->getMaskElt(i);
27239 if (Idx >= HalfNumElts) {
27240 assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
27241 Idx -= HalfNumElts;
27242 }
27243 NewMask.push_back(Idx);
27244 }
27245 if (TLI.isShuffleMaskLegal(NewMask, VT)) {
27246 SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
27247 SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
27248 N0.getOperand(0), UndefVec);
27249 return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
27250 }
27251 }
27252
27253 // See if we can replace a shuffle with an insert_subvector.
27254 // e.g. v2i32 into v8i32:
27255 // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
27256 // --> insert_subvector(lhs,rhs1,4).
27257 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
27259 auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
27260 // Ensure RHS subvectors are legal.
27261 assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
27262 EVT SubVT = RHS.getOperand(0).getValueType();
27263 int NumSubVecs = RHS.getNumOperands();
27264 int NumSubElts = SubVT.getVectorNumElements();
27265 assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
27266 if (!TLI.isTypeLegal(SubVT))
27267 return SDValue();
27268
27269 // Don't bother if we have an unary shuffle (matches undef + LHS elts).
27270 if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
27271 return SDValue();
27272
27273 // Search [NumSubElts] spans for RHS sequence.
27274 // TODO: Can we avoid nested loops to increase performance?
27275 SmallVector<int> InsertionMask(NumElts);
27276 for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
27277 for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
27278 // Reset mask to identity.
27279 std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
27280
27281 // Add subvector insertion.
27282 std::iota(InsertionMask.begin() + SubIdx,
27283 InsertionMask.begin() + SubIdx + NumSubElts,
27284 NumElts + (SubVec * NumSubElts));
27285
27286 // See if the shuffle mask matches the reference insertion mask.
27287 bool MatchingShuffle = true;
27288 for (int i = 0; i != (int)NumElts; ++i) {
27289 int ExpectIdx = InsertionMask[i];
27290 int ActualIdx = Mask[i];
27291 if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
27292 MatchingShuffle = false;
27293 break;
27294 }
27295 }
27296
27297 if (MatchingShuffle)
27298 return DAG.getInsertSubvector(SDLoc(N), LHS, RHS.getOperand(SubVec),
27299 SubIdx);
27300 }
27301 }
27302 return SDValue();
27303 };
27304 ArrayRef<int> Mask = SVN->getMask();
27305 if (N1.getOpcode() == ISD::CONCAT_VECTORS)
27306 if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
27307 return InsertN1;
27308 if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
27309 SmallVector<int> CommuteMask(Mask);
27311 if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
27312 return InsertN0;
27313 }
27314 }
27315
27316 // If we're not performing a select/blend shuffle, see if we can convert the
27317 // shuffle into a AND node, with all the out-of-lane elements are known zero.
27318 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
27319 bool IsInLaneMask = true;
27320 ArrayRef<int> Mask = SVN->getMask();
27321 SmallVector<int, 16> ClearMask(NumElts, -1);
27322 APInt DemandedLHS = APInt::getZero(NumElts);
27323 APInt DemandedRHS = APInt::getZero(NumElts);
27324 for (int I = 0; I != (int)NumElts; ++I) {
27325 int M = Mask[I];
27326 if (M < 0)
27327 continue;
27328 ClearMask[I] = M == I ? I : (I + NumElts);
27329 IsInLaneMask &= (M == I) || (M == (int)(I + NumElts));
27330 if (M != I) {
27331 APInt &Demanded = M < (int)NumElts ? DemandedLHS : DemandedRHS;
27332 Demanded.setBit(M % NumElts);
27333 }
27334 }
27335 // TODO: Should we try to mask with N1 as well?
27336 if (!IsInLaneMask && (!DemandedLHS.isZero() || !DemandedRHS.isZero()) &&
27337 (DemandedLHS.isZero() || DAG.MaskedVectorIsZero(N0, DemandedLHS)) &&
27338 (DemandedRHS.isZero() || DAG.MaskedVectorIsZero(N1, DemandedRHS))) {
27339 SDLoc DL(N);
27340 EVT IntVT = VT.changeVectorElementTypeToInteger();
27341 EVT IntSVT = VT.getVectorElementType().changeTypeToInteger();
27342 // Transform the type to a legal type so that the buildvector constant
27343 // elements are not illegal. Make sure that the result is larger than the
27344 // original type, incase the value is split into two (eg i64->i32).
27345 if (!TLI.isTypeLegal(IntSVT) && LegalTypes)
27346 IntSVT = TLI.getTypeToTransformTo(*DAG.getContext(), IntSVT);
27347 if (IntSVT.getSizeInBits() >= IntVT.getScalarSizeInBits()) {
27348 SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT);
27349 SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT);
27350 SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT));
27351 for (int I = 0; I != (int)NumElts; ++I)
27352 if (0 <= Mask[I])
27353 AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt;
27354
27355 // See if a clear mask is legal instead of going via
27356 // XformToShuffleWithZero which loses UNDEF mask elements.
27357 if (TLI.isVectorClearMaskLegal(ClearMask, IntVT))
27358 return DAG.getBitcast(
27359 VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0),
27360 DAG.getConstant(0, DL, IntVT), ClearMask));
27361
27362 if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
27363 return DAG.getBitcast(
27364 VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0),
27365 DAG.getBuildVector(IntVT, DL, AndMask)));
27366 }
27367 }
27368 }
27369
27370 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
27371 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
27372 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
27373 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
27374 return Res;
27375
27376 // If this shuffle only has a single input that is a bitcasted shuffle,
27377 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
27378 // back to their original types.
27379 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
27380 N1.isUndef() && Level < AfterLegalizeVectorOps &&
27381 TLI.isTypeLegal(VT)) {
27382
27384 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
27385 EVT SVT = VT.getScalarType();
27386 EVT InnerVT = BC0->getValueType(0);
27387 EVT InnerSVT = InnerVT.getScalarType();
27388
27389 // Determine which shuffle works with the smaller scalar type.
27390 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
27391 EVT ScaleSVT = ScaleVT.getScalarType();
27392
27393 if (TLI.isTypeLegal(ScaleVT) &&
27394 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
27395 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
27396 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
27397 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
27398
27399 // Scale the shuffle masks to the smaller scalar type.
27400 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
27401 SmallVector<int, 8> InnerMask;
27402 SmallVector<int, 8> OuterMask;
27403 narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
27404 narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
27405
27406 // Merge the shuffle masks.
27407 SmallVector<int, 8> NewMask;
27408 for (int M : OuterMask)
27409 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
27410
27411 // Test for shuffle mask legality over both commutations.
27412 SDValue SV0 = BC0->getOperand(0);
27413 SDValue SV1 = BC0->getOperand(1);
27414 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
27415 if (!LegalMask) {
27416 std::swap(SV0, SV1);
27418 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
27419 }
27420
27421 if (LegalMask) {
27422 SV0 = DAG.getBitcast(ScaleVT, SV0);
27423 SV1 = DAG.getBitcast(ScaleVT, SV1);
27424 return DAG.getBitcast(
27425 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
27426 }
27427 }
27428 }
27429 }
27430
27431 // Match shuffles of bitcasts, so long as the mask can be treated as the
27432 // larger type.
27433 if (SDValue V = combineShuffleOfBitcast(SVN, DAG, TLI, LegalOperations))
27434 return V;
27435
27436 // Compute the combined shuffle mask for a shuffle with SV0 as the first
27437 // operand, and SV1 as the second operand.
27438 // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
27439 // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
27440 auto MergeInnerShuffle =
27441 [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
27442 ShuffleVectorSDNode *OtherSVN, SDValue N1,
27443 const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
27444 SmallVectorImpl<int> &Mask) -> bool {
27445 // Don't try to fold splats; they're likely to simplify somehow, or they
27446 // might be free.
27447 if (OtherSVN->isSplat())
27448 return false;
27449
27450 SV0 = SV1 = SDValue();
27451 Mask.clear();
27452
27453 for (unsigned i = 0; i != NumElts; ++i) {
27454 int Idx = SVN->getMaskElt(i);
27455 if (Idx < 0) {
27456 // Propagate Undef.
27457 Mask.push_back(Idx);
27458 continue;
27459 }
27460
27461 if (Commute)
27462 Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
27463
27464 SDValue CurrentVec;
27465 if (Idx < (int)NumElts) {
27466 // This shuffle index refers to the inner shuffle N0. Lookup the inner
27467 // shuffle mask to identify which vector is actually referenced.
27468 Idx = OtherSVN->getMaskElt(Idx);
27469 if (Idx < 0) {
27470 // Propagate Undef.
27471 Mask.push_back(Idx);
27472 continue;
27473 }
27474 CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
27475 : OtherSVN->getOperand(1);
27476 } else {
27477 // This shuffle index references an element within N1.
27478 CurrentVec = N1;
27479 }
27480
27481 // Simple case where 'CurrentVec' is UNDEF.
27482 if (CurrentVec.isUndef()) {
27483 Mask.push_back(-1);
27484 continue;
27485 }
27486
27487 // Canonicalize the shuffle index. We don't know yet if CurrentVec
27488 // will be the first or second operand of the combined shuffle.
27489 Idx = Idx % NumElts;
27490 if (!SV0.getNode() || SV0 == CurrentVec) {
27491 // Ok. CurrentVec is the left hand side.
27492 // Update the mask accordingly.
27493 SV0 = CurrentVec;
27494 Mask.push_back(Idx);
27495 continue;
27496 }
27497 if (!SV1.getNode() || SV1 == CurrentVec) {
27498 // Ok. CurrentVec is the right hand side.
27499 // Update the mask accordingly.
27500 SV1 = CurrentVec;
27501 Mask.push_back(Idx + NumElts);
27502 continue;
27503 }
27504
27505 // Last chance - see if the vector is another shuffle and if it
27506 // uses one of the existing candidate shuffle ops.
27507 if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
27508 int InnerIdx = CurrentSVN->getMaskElt(Idx);
27509 if (InnerIdx < 0) {
27510 Mask.push_back(-1);
27511 continue;
27512 }
27513 SDValue InnerVec = (InnerIdx < (int)NumElts)
27514 ? CurrentSVN->getOperand(0)
27515 : CurrentSVN->getOperand(1);
27516 if (InnerVec.isUndef()) {
27517 Mask.push_back(-1);
27518 continue;
27519 }
27520 InnerIdx %= NumElts;
27521 if (InnerVec == SV0) {
27522 Mask.push_back(InnerIdx);
27523 continue;
27524 }
27525 if (InnerVec == SV1) {
27526 Mask.push_back(InnerIdx + NumElts);
27527 continue;
27528 }
27529 }
27530
27531 // Bail out if we cannot convert the shuffle pair into a single shuffle.
27532 return false;
27533 }
27534
27535 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
27536 return true;
27537
27538 // Avoid introducing shuffles with illegal mask.
27539 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
27540 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
27541 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
27542 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
27543 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
27544 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
27545 if (TLI.isShuffleMaskLegal(Mask, VT))
27546 return true;
27547
27548 std::swap(SV0, SV1);
27550 return TLI.isShuffleMaskLegal(Mask, VT);
27551 };
27552
27553 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
27554 // Canonicalize shuffles according to rules:
27555 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
27556 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
27557 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
27558 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
27560 // The incoming shuffle must be of the same type as the result of the
27561 // current shuffle.
27562 assert(N1->getOperand(0).getValueType() == VT &&
27563 "Shuffle types don't match");
27564
27565 SDValue SV0 = N1->getOperand(0);
27566 SDValue SV1 = N1->getOperand(1);
27567 bool HasSameOp0 = N0 == SV0;
27568 bool IsSV1Undef = SV1.isUndef();
27569 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
27570 // Commute the operands of this shuffle so merging below will trigger.
27571 return DAG.getCommutedVectorShuffle(*SVN);
27572 }
27573
27574 // Canonicalize splat shuffles to the RHS to improve merging below.
27575 // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
27576 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
27577 N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
27578 cast<ShuffleVectorSDNode>(N0)->isSplat() &&
27579 !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
27580 return DAG.getCommutedVectorShuffle(*SVN);
27581 }
27582
27583 // Try to fold according to rules:
27584 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
27585 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
27586 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
27587 // Don't try to fold shuffles with illegal type.
27588 // Only fold if this shuffle is the only user of the other shuffle.
27589 // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
27590 for (int i = 0; i != 2; ++i) {
27591 if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
27592 N->isOnlyUserOf(N->getOperand(i).getNode())) {
27593 // The incoming shuffle must be of the same type as the result of the
27594 // current shuffle.
27595 auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
27596 assert(OtherSV->getOperand(0).getValueType() == VT &&
27597 "Shuffle types don't match");
27598
27599 SDValue SV0, SV1;
27600 SmallVector<int, 4> Mask;
27601 if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
27602 SV0, SV1, Mask)) {
27603 // Check if all indices in Mask are Undef. In case, propagate Undef.
27604 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
27605 return DAG.getUNDEF(VT);
27606
27607 return DAG.getVectorShuffle(VT, SDLoc(N),
27608 SV0 ? SV0 : DAG.getUNDEF(VT),
27609 SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
27610 }
27611 }
27612 }
27613
27614 // Merge shuffles through binops if we are able to merge it with at least
27615 // one other shuffles.
27616 // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
27617 // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
27618 unsigned SrcOpcode = N0.getOpcode();
27619 if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
27620 (N1.isUndef() ||
27621 (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
27622 // Get binop source ops, or just pass on the undef.
27623 SDValue Op00 = N0.getOperand(0);
27624 SDValue Op01 = N0.getOperand(1);
27625 SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
27626 SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
27627 // TODO: We might be able to relax the VT check but we don't currently
27628 // have any isBinOp() that has different result/ops VTs so play safe until
27629 // we have test coverage.
27630 if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
27631 Op01.getValueType() == VT && Op11.getValueType() == VT &&
27632 (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
27633 Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
27634 Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
27635 Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
27636 auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
27637 SmallVectorImpl<int> &Mask, bool LeftOp,
27638 bool Commute) {
27639 SDValue InnerN = Commute ? N1 : N0;
27640 SDValue Op0 = LeftOp ? Op00 : Op01;
27641 SDValue Op1 = LeftOp ? Op10 : Op11;
27642 if (Commute)
27643 std::swap(Op0, Op1);
27644 // Only accept the merged shuffle if we don't introduce undef elements,
27645 // or the inner shuffle already contained undef elements.
27646 auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
27647 return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
27648 MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
27649 Mask) &&
27650 (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
27651 llvm::none_of(Mask, [](int M) { return M < 0; }));
27652 };
27653
27654 // Ensure we don't increase the number of shuffles - we must merge a
27655 // shuffle from at least one of the LHS and RHS ops.
27656 bool MergedLeft = false;
27657 SDValue LeftSV0, LeftSV1;
27658 SmallVector<int, 4> LeftMask;
27659 if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
27660 CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
27661 MergedLeft = true;
27662 } else {
27663 LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
27664 LeftSV0 = Op00, LeftSV1 = Op10;
27665 }
27666
27667 bool MergedRight = false;
27668 SDValue RightSV0, RightSV1;
27669 SmallVector<int, 4> RightMask;
27670 if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
27671 CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
27672 MergedRight = true;
27673 } else {
27674 RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
27675 RightSV0 = Op01, RightSV1 = Op11;
27676 }
27677
27678 if (MergedLeft || MergedRight) {
27679 SDLoc DL(N);
27681 VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
27682 LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
27684 VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
27685 RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
27686 return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
27687 }
27688 }
27689 }
27690 }
27691
27692 if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
27693 return V;
27694
27695 // Match shuffles that can be converted to ISD::ZERO_EXTEND_VECTOR_INREG.
27696 // Perform this really late, because it could eliminate knowledge
27697 // of undef elements created by this shuffle.
27698 if (Level < AfterLegalizeTypes)
27699 if (SDValue V = combineShuffleToZeroExtendVectorInReg(SVN, DAG, TLI,
27700 LegalOperations))
27701 return V;
27702
27703 return SDValue();
27704}
27705
27706SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
27707 EVT VT = N->getValueType(0);
27708 if (!VT.isFixedLengthVector())
27709 return SDValue();
27710
27711 // Try to convert a scalar binop with an extracted vector element to a vector
27712 // binop. This is intended to reduce potentially expensive register moves.
27713 // TODO: Check if both operands are extracted.
27714 // TODO: How to prefer scalar/vector ops with multiple uses of the extact?
27715 // TODO: Generalize this, so it can be called from visitINSERT_VECTOR_ELT().
27716 SDValue Scalar = N->getOperand(0);
27717 unsigned Opcode = Scalar.getOpcode();
27718 EVT VecEltVT = VT.getScalarType();
27719 if (Scalar.hasOneUse() && Scalar->getNumValues() == 1 &&
27720 TLI.isBinOp(Opcode) && Scalar.getValueType() == VecEltVT &&
27721 Scalar.getOperand(0).getValueType() == VecEltVT &&
27722 Scalar.getOperand(1).getValueType() == VecEltVT &&
27723 Scalar->isOnlyUserOf(Scalar.getOperand(0).getNode()) &&
27724 Scalar->isOnlyUserOf(Scalar.getOperand(1).getNode()) &&
27725 DAG.isSafeToSpeculativelyExecute(Opcode) && hasOperation(Opcode, VT)) {
27726 // Match an extract element and get a shuffle mask equivalent.
27727 SmallVector<int, 8> ShufMask(VT.getVectorNumElements(), -1);
27728
27729 for (int i : {0, 1}) {
27730 // s2v (bo (extelt V, Idx), C) --> shuffle (bo V, C'), {Idx, -1, -1...}
27731 // s2v (bo C, (extelt V, Idx)) --> shuffle (bo C', V), {Idx, -1, -1...}
27732 SDValue EE = Scalar.getOperand(i);
27733 auto *C = dyn_cast<ConstantSDNode>(Scalar.getOperand(i ? 0 : 1));
27734 if (C && EE.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
27735 EE.getOperand(0).getValueType() == VT &&
27737 // Mask = {ExtractIndex, undef, undef....}
27738 ShufMask[0] = EE.getConstantOperandVal(1);
27739 // Make sure the shuffle is legal if we are crossing lanes.
27740 if (TLI.isShuffleMaskLegal(ShufMask, VT)) {
27741 SDLoc DL(N);
27742 SDValue V[] = {EE.getOperand(0),
27743 DAG.getConstant(C->getAPIntValue(), DL, VT)};
27744 SDValue VecBO = DAG.getNode(Opcode, DL, VT, V[i], V[1 - i]);
27745 return DAG.getVectorShuffle(VT, DL, VecBO, DAG.getUNDEF(VT),
27746 ShufMask);
27747 }
27748 }
27749 }
27750 }
27751
27752 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
27753 // with a VECTOR_SHUFFLE and possible truncate.
27754 if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
27755 !Scalar.getOperand(0).getValueType().isFixedLengthVector())
27756 return SDValue();
27757
27758 // If we have an implicit truncate, truncate here if it is legal.
27759 if (VecEltVT != Scalar.getValueType() &&
27760 Scalar.getValueType().isScalarInteger() && isTypeLegal(VecEltVT)) {
27761 SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(Scalar), VecEltVT, Scalar);
27762 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
27763 }
27764
27765 auto *ExtIndexC = dyn_cast<ConstantSDNode>(Scalar.getOperand(1));
27766 if (!ExtIndexC)
27767 return SDValue();
27768
27769 SDValue SrcVec = Scalar.getOperand(0);
27770 EVT SrcVT = SrcVec.getValueType();
27771 unsigned SrcNumElts = SrcVT.getVectorNumElements();
27772 unsigned VTNumElts = VT.getVectorNumElements();
27773 if (VecEltVT == SrcVT.getScalarType() && VTNumElts <= SrcNumElts) {
27774 // Create a shuffle equivalent for scalar-to-vector: {ExtIndex, -1, -1, ...}
27775 SmallVector<int, 8> Mask(SrcNumElts, -1);
27776 Mask[0] = ExtIndexC->getZExtValue();
27777 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
27778 SrcVT, SDLoc(N), SrcVec, DAG.getUNDEF(SrcVT), Mask, DAG);
27779 if (!LegalShuffle)
27780 return SDValue();
27781
27782 // If the initial vector is the same size, the shuffle is the result.
27783 if (VT == SrcVT)
27784 return LegalShuffle;
27785
27786 // If not, shorten the shuffled vector.
27787 if (VTNumElts != SrcNumElts) {
27788 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
27789 EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
27790 SrcVT.getVectorElementType(), VTNumElts);
27791 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, LegalShuffle,
27792 ZeroIdx);
27793 }
27794 }
27795
27796 return SDValue();
27797}
27798
27799SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
27800 EVT VT = N->getValueType(0);
27801 SDValue N0 = N->getOperand(0);
27802 SDValue N1 = N->getOperand(1);
27803 SDValue N2 = N->getOperand(2);
27804 uint64_t InsIdx = N->getConstantOperandVal(2);
27805
27806 // If inserting an UNDEF, just return the original vector.
27807 if (N1.isUndef())
27808 return N0;
27809
27810 // If this is an insert of an extracted vector into an undef vector, we can
27811 // just use the input to the extract if the types match, and can simplify
27812 // in some cases even if they don't.
27813 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
27814 N1.getOperand(1) == N2) {
27815 EVT SrcVT = N1.getOperand(0).getValueType();
27816 if (SrcVT == VT)
27817 return N1.getOperand(0);
27818 // TODO: To remove the zero check, need to adjust the offset to
27819 // a multiple of the new src type.
27820 if (isNullConstant(N2)) {
27821 if (VT.knownBitsGE(SrcVT) &&
27822 !(VT.isFixedLengthVector() && SrcVT.isScalableVector()))
27823 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
27824 VT, N0, N1.getOperand(0), N2);
27825 else if (VT.knownBitsLE(SrcVT) &&
27826 !(VT.isScalableVector() && SrcVT.isFixedLengthVector()))
27827 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N),
27828 VT, N1.getOperand(0), N2);
27829 }
27830 }
27831
27832 // Handle case where we've ended up inserting back into the source vector
27833 // we extracted the subvector from.
27834 // insert_subvector(N0, extract_subvector(N0, N2), N2) --> N0
27835 if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && N1.getOperand(0) == N0 &&
27836 N1.getOperand(1) == N2)
27837 return N0;
27838
27839 // Simplify scalar inserts into an undef vector:
27840 // insert_subvector undef, (splat X), N2 -> splat X
27841 if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
27842 if (DAG.isConstantValueOfAnyType(N1.getOperand(0)) || N1.hasOneUse())
27843 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
27844
27845 // insert_subvector (splat X), (splat X), N2 -> splat X
27846 if (N0.getOpcode() == ISD::SPLAT_VECTOR && N0.getOpcode() == N1.getOpcode() &&
27847 N0.getOperand(0) == N1.getOperand(0))
27848 return N0;
27849
27850 // If we are inserting a bitcast value into an undef, with the same
27851 // number of elements, just use the bitcast input of the extract.
27852 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
27853 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
27854 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
27856 N1.getOperand(0).getOperand(1) == N2 &&
27858 VT.getVectorElementCount() &&
27860 VT.getSizeInBits()) {
27861 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
27862 }
27863
27864 // If both N1 and N2 are bitcast values on which insert_subvector
27865 // would makes sense, pull the bitcast through.
27866 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
27867 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
27868 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
27869 SDValue CN0 = N0.getOperand(0);
27870 SDValue CN1 = N1.getOperand(0);
27871 EVT CN0VT = CN0.getValueType();
27872 EVT CN1VT = CN1.getValueType();
27873 if (CN0VT.isVector() && CN1VT.isVector() &&
27874 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
27876 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
27877 CN0.getValueType(), CN0, CN1, N2);
27878 return DAG.getBitcast(VT, NewINSERT);
27879 }
27880 }
27881
27882 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
27883 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
27884 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
27885 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
27886 N0.getOperand(1).getValueType() == N1.getValueType() &&
27887 N0.getOperand(2) == N2)
27888 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
27889 N1, N2);
27890
27891 // Eliminate an intermediate insert into an undef vector:
27892 // insert_subvector undef, (insert_subvector undef, X, 0), 0 -->
27893 // insert_subvector undef, X, 0
27894 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
27895 N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)) &&
27896 isNullConstant(N2))
27897 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
27898 N1.getOperand(1), N2);
27899
27900 // Push subvector bitcasts to the output, adjusting the index as we go.
27901 // insert_subvector(bitcast(v), bitcast(s), c1)
27902 // -> bitcast(insert_subvector(v, s, c2))
27903 if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
27904 N1.getOpcode() == ISD::BITCAST) {
27905 SDValue N0Src = peekThroughBitcasts(N0);
27906 SDValue N1Src = peekThroughBitcasts(N1);
27907 EVT N0SrcSVT = N0Src.getValueType().getScalarType();
27908 EVT N1SrcSVT = N1Src.getValueType().getScalarType();
27909 if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
27910 N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
27911 EVT NewVT;
27912 SDLoc DL(N);
27913 SDValue NewIdx;
27914 LLVMContext &Ctx = *DAG.getContext();
27915 ElementCount NumElts = VT.getVectorElementCount();
27916 unsigned EltSizeInBits = VT.getScalarSizeInBits();
27917 if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
27918 unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
27919 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
27920 NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
27921 } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
27922 unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
27923 if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
27924 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
27925 NumElts.divideCoefficientBy(Scale));
27926 NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
27927 }
27928 }
27929 if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
27930 SDValue Res = DAG.getBitcast(NewVT, N0Src);
27931 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
27932 return DAG.getBitcast(VT, Res);
27933 }
27934 }
27935 }
27936
27937 // Canonicalize insert_subvector dag nodes.
27938 // Example:
27939 // (insert_subvector (insert_subvector A, Idx0), Idx1)
27940 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
27941 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
27942 N1.getValueType() == N0.getOperand(1).getValueType()) {
27943 unsigned OtherIdx = N0.getConstantOperandVal(2);
27944 if (InsIdx < OtherIdx) {
27945 // Swap nodes.
27946 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
27947 N0.getOperand(0), N1, N2);
27948 AddToWorklist(NewOp.getNode());
27949 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
27950 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
27951 }
27952 }
27953
27954 // If the input vector is a concatenation, and the insert replaces
27955 // one of the pieces, we can optimize into a single concat_vectors.
27956 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
27957 N0.getOperand(0).getValueType() == N1.getValueType() &&
27960 unsigned Factor = N1.getValueType().getVectorMinNumElements();
27962 Ops[InsIdx / Factor] = N1;
27963 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
27964 }
27965
27966 // Simplify source operands based on insertion.
27968 return SDValue(N, 0);
27969
27970 return SDValue();
27971}
27972
27973SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
27974 SDValue N0 = N->getOperand(0);
27975
27976 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
27977 if (N0->getOpcode() == ISD::FP16_TO_FP)
27978 return N0->getOperand(0);
27979
27980 return SDValue();
27981}
27982
27983SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
27984 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
27985 auto Op = N->getOpcode();
27986 assert((Op == ISD::FP16_TO_FP || Op == ISD::BF16_TO_FP) &&
27987 "opcode should be FP16_TO_FP or BF16_TO_FP.");
27988 SDValue N0 = N->getOperand(0);
27989
27990 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) or
27991 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
27992 if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
27993 ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
27994 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
27995 return DAG.getNode(Op, SDLoc(N), N->getValueType(0), N0.getOperand(0));
27996 }
27997 }
27998
27999 if (SDValue CastEliminated = eliminateFPCastPair(N))
28000 return CastEliminated;
28001
28002 // Sometimes constants manage to survive very late in the pipeline, e.g.,
28003 // because they are wrapped inside the <1 x f16> type. Try one last time to
28004 // get rid of them.
28005 SDValue Folded = DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N),
28006 N->getValueType(0), {N0});
28007 return Folded;
28008}
28009
28010SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {
28011 SDValue N0 = N->getOperand(0);
28012
28013 // fold (fp_to_bf16 (bf16_to_fp op)) -> op
28014 if (N0->getOpcode() == ISD::BF16_TO_FP)
28015 return N0->getOperand(0);
28016
28017 return SDValue();
28018}
28019
28020SDValue DAGCombiner::visitBF16_TO_FP(SDNode *N) {
28021 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
28022 return visitFP16_TO_FP(N);
28023}
28024
28025SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
28026 SDValue N0 = N->getOperand(0);
28027 EVT VT = N0.getValueType();
28028 unsigned Opcode = N->getOpcode();
28029
28030 // VECREDUCE over 1-element vector is just an extract.
28031 if (VT.getVectorElementCount().isScalar()) {
28032 SDLoc dl(N);
28033 SDValue Res =
28035 DAG.getVectorIdxConstant(0, dl));
28036 if (Res.getValueType() != N->getValueType(0))
28037 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
28038 return Res;
28039 }
28040
28041 // On an boolean vector an and/or reduction is the same as a umin/umax
28042 // reduction. Convert them if the latter is legal while the former isn't.
28043 if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
28044 unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
28045 ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
28046 if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
28047 TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
28049 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
28050 }
28051
28052 // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
28053 // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
28054 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
28055 TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
28056 SDValue Vec = N0.getOperand(0);
28057 SDValue Subvec = N0.getOperand(1);
28058 if ((Opcode == ISD::VECREDUCE_OR &&
28059 (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) ||
28060 (Opcode == ISD::VECREDUCE_AND &&
28061 (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec))))
28062 return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
28063 }
28064
28065 // vecreduce_or(sext(x)) -> sext(vecreduce_or(x))
28066 // Same for zext and anyext, and for and/or/xor reductions.
28067 if ((Opcode == ISD::VECREDUCE_OR || Opcode == ISD::VECREDUCE_AND ||
28068 Opcode == ISD::VECREDUCE_XOR) &&
28069 (N0.getOpcode() == ISD::SIGN_EXTEND ||
28070 N0.getOpcode() == ISD::ZERO_EXTEND ||
28071 N0.getOpcode() == ISD::ANY_EXTEND) &&
28072 TLI.isOperationLegalOrCustom(Opcode, N0.getOperand(0).getValueType())) {
28073 SDValue Red = DAG.getNode(Opcode, SDLoc(N),
28075 N0.getOperand(0));
28076 return DAG.getNode(N0.getOpcode(), SDLoc(N), N->getValueType(0), Red);
28077 }
28078 return SDValue();
28079}
28080
28081SDValue DAGCombiner::visitVP_FSUB(SDNode *N) {
28082 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
28083
28084 // FSUB -> FMA combines:
28085 if (SDValue Fused = visitFSUBForFMACombine<VPMatchContext>(N)) {
28086 AddToWorklist(Fused.getNode());
28087 return Fused;
28088 }
28089 return SDValue();
28090}
28091
28092SDValue DAGCombiner::visitVPOp(SDNode *N) {
28093
28094 if (N->getOpcode() == ISD::VP_GATHER)
28095 if (SDValue SD = visitVPGATHER(N))
28096 return SD;
28097
28098 if (N->getOpcode() == ISD::VP_SCATTER)
28099 if (SDValue SD = visitVPSCATTER(N))
28100 return SD;
28101
28102 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD)
28103 if (SDValue SD = visitVP_STRIDED_LOAD(N))
28104 return SD;
28105
28106 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE)
28107 if (SDValue SD = visitVP_STRIDED_STORE(N))
28108 return SD;
28109
28110 // VP operations in which all vector elements are disabled - either by
28111 // determining that the mask is all false or that the EVL is 0 - can be
28112 // eliminated.
28113 bool AreAllEltsDisabled = false;
28114 if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
28115 AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
28116 if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
28117 AreAllEltsDisabled |=
28118 ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
28119
28120 // This is the only generic VP combine we support for now.
28121 if (!AreAllEltsDisabled) {
28122 switch (N->getOpcode()) {
28123 case ISD::VP_FADD:
28124 return visitVP_FADD(N);
28125 case ISD::VP_FSUB:
28126 return visitVP_FSUB(N);
28127 case ISD::VP_FMA:
28128 return visitFMA<VPMatchContext>(N);
28129 case ISD::VP_SELECT:
28130 return visitVP_SELECT(N);
28131 case ISD::VP_MUL:
28132 return visitMUL<VPMatchContext>(N);
28133 case ISD::VP_SUB:
28134 return foldSubCtlzNot<VPMatchContext>(N, DAG);
28135 default:
28136 break;
28137 }
28138 return SDValue();
28139 }
28140
28141 // Binary operations can be replaced by UNDEF.
28142 if (ISD::isVPBinaryOp(N->getOpcode()))
28143 return DAG.getUNDEF(N->getValueType(0));
28144
28145 // VP Memory operations can be replaced by either the chain (stores) or the
28146 // chain + undef (loads).
28147 if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
28148 if (MemSD->writeMem())
28149 return MemSD->getChain();
28150 return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
28151 }
28152
28153 // Reduction operations return the start operand when no elements are active.
28154 if (ISD::isVPReduction(N->getOpcode()))
28155 return N->getOperand(0);
28156
28157 return SDValue();
28158}
28159
28160SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) {
28161 SDValue Chain = N->getOperand(0);
28162 SDValue Ptr = N->getOperand(1);
28163 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
28164
28165 // Check if the memory, where FP state is written to, is used only in a single
28166 // load operation.
28167 LoadSDNode *LdNode = nullptr;
28168 for (auto *U : Ptr->users()) {
28169 if (U == N)
28170 continue;
28171 if (auto *Ld = dyn_cast<LoadSDNode>(U)) {
28172 if (LdNode && LdNode != Ld)
28173 return SDValue();
28174 LdNode = Ld;
28175 continue;
28176 }
28177 return SDValue();
28178 }
28179 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
28180 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
28182 return SDValue();
28183
28184 // Check if the loaded value is used only in a store operation.
28185 StoreSDNode *StNode = nullptr;
28186 for (SDUse &U : LdNode->uses()) {
28187 if (U.getResNo() == 0) {
28188 if (auto *St = dyn_cast<StoreSDNode>(U.getUser())) {
28189 if (StNode)
28190 return SDValue();
28191 StNode = St;
28192 } else {
28193 return SDValue();
28194 }
28195 }
28196 }
28197 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
28198 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
28199 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
28200 return SDValue();
28201
28202 // Create new node GET_FPENV_MEM, which uses the store address to write FP
28203 // environment.
28204 SDValue Res = DAG.getGetFPEnv(Chain, SDLoc(N), StNode->getBasePtr(), MemVT,
28205 StNode->getMemOperand());
28206 CombineTo(StNode, Res, false);
28207 return Res;
28208}
28209
28210SDValue DAGCombiner::visitSET_FPENV_MEM(SDNode *N) {
28211 SDValue Chain = N->getOperand(0);
28212 SDValue Ptr = N->getOperand(1);
28213 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
28214
28215 // Check if the address of FP state is used also in a store operation only.
28216 StoreSDNode *StNode = nullptr;
28217 for (auto *U : Ptr->users()) {
28218 if (U == N)
28219 continue;
28220 if (auto *St = dyn_cast<StoreSDNode>(U)) {
28221 if (StNode && StNode != St)
28222 return SDValue();
28223 StNode = St;
28224 continue;
28225 }
28226 return SDValue();
28227 }
28228 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
28229 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
28230 !Chain.reachesChainWithoutSideEffects(SDValue(StNode, 0)))
28231 return SDValue();
28232
28233 // Check if the stored value is loaded from some location and the loaded
28234 // value is used only in the store operation.
28235 SDValue StValue = StNode->getValue();
28236 auto *LdNode = dyn_cast<LoadSDNode>(StValue);
28237 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
28238 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
28239 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
28240 return SDValue();
28241
28242 // Create new node SET_FPENV_MEM, which uses the load address to read FP
28243 // environment.
28244 SDValue Res =
28245 DAG.getSetFPEnv(LdNode->getChain(), SDLoc(N), LdNode->getBasePtr(), MemVT,
28246 LdNode->getMemOperand());
28247 return Res;
28248}
28249
28250/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
28251/// with the destination vector and a zero vector.
28252/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
28253/// vector_shuffle V, Zero, <0, 4, 2, 4>
28254SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
28255 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
28256
28257 EVT VT = N->getValueType(0);
28258 SDValue LHS = N->getOperand(0);
28259 SDValue RHS = peekThroughBitcasts(N->getOperand(1));
28260 SDLoc DL(N);
28261
28262 // Make sure we're not running after operation legalization where it
28263 // may have custom lowered the vector shuffles.
28264 if (LegalOperations)
28265 return SDValue();
28266
28267 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
28268 return SDValue();
28269
28270 EVT RVT = RHS.getValueType();
28271 unsigned NumElts = RHS.getNumOperands();
28272
28273 // Attempt to create a valid clear mask, splitting the mask into
28274 // sub elements and checking to see if each is
28275 // all zeros or all ones - suitable for shuffle masking.
28276 auto BuildClearMask = [&](int Split) {
28277 int NumSubElts = NumElts * Split;
28278 int NumSubBits = RVT.getScalarSizeInBits() / Split;
28279
28280 SmallVector<int, 8> Indices;
28281 for (int i = 0; i != NumSubElts; ++i) {
28282 int EltIdx = i / Split;
28283 int SubIdx = i % Split;
28284 SDValue Elt = RHS.getOperand(EltIdx);
28285 // X & undef --> 0 (not undef). So this lane must be converted to choose
28286 // from the zero constant vector (same as if the element had all 0-bits).
28287 if (Elt.isUndef()) {
28288 Indices.push_back(i + NumSubElts);
28289 continue;
28290 }
28291
28292 std::optional<APInt> Bits = Elt->bitcastToAPInt();
28293 if (!Bits)
28294 return SDValue();
28295
28296 // Extract the sub element from the constant bit mask.
28297 if (DAG.getDataLayout().isBigEndian())
28298 *Bits =
28299 Bits->extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
28300 else
28301 *Bits = Bits->extractBits(NumSubBits, SubIdx * NumSubBits);
28302
28303 if (Bits->isAllOnes())
28304 Indices.push_back(i);
28305 else if (*Bits == 0)
28306 Indices.push_back(i + NumSubElts);
28307 else
28308 return SDValue();
28309 }
28310
28311 // Let's see if the target supports this vector_shuffle.
28312 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
28313 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
28314 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
28315 return SDValue();
28316
28317 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
28318 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
28319 DAG.getBitcast(ClearVT, LHS),
28320 Zero, Indices));
28321 };
28322
28323 // Determine maximum split level (byte level masking).
28324 int MaxSplit = 1;
28325 if (RVT.getScalarSizeInBits() % 8 == 0)
28326 MaxSplit = RVT.getScalarSizeInBits() / 8;
28327
28328 for (int Split = 1; Split <= MaxSplit; ++Split)
28329 if (RVT.getScalarSizeInBits() % Split == 0)
28330 if (SDValue S = BuildClearMask(Split))
28331 return S;
28332
28333 return SDValue();
28334}
28335
28336/// If a vector binop is performed on splat values, it may be profitable to
28337/// extract, scalarize, and insert/splat.
28339 const SDLoc &DL, bool LegalTypes) {
28340 SDValue N0 = N->getOperand(0);
28341 SDValue N1 = N->getOperand(1);
28342 unsigned Opcode = N->getOpcode();
28343 EVT VT = N->getValueType(0);
28344 EVT EltVT = VT.getVectorElementType();
28345 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
28346
28347 // TODO: Remove/replace the extract cost check? If the elements are available
28348 // as scalars, then there may be no extract cost. Should we ask if
28349 // inserting a scalar back into a vector is cheap instead?
28350 int Index0, Index1;
28351 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
28352 SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
28353 // Extract element from splat_vector should be free.
28354 // TODO: use DAG.isSplatValue instead?
28355 bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
28357 if (!Src0 || !Src1 || Index0 != Index1 ||
28358 Src0.getValueType().getVectorElementType() != EltVT ||
28359 Src1.getValueType().getVectorElementType() != EltVT ||
28360 !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
28361 // If before type legalization, allow scalar types that will eventually be
28362 // made legal.
28364 Opcode, LegalTypes
28365 ? EltVT
28366 : TLI.getTypeToTransformTo(*DAG.getContext(), EltVT)))
28367 return SDValue();
28368
28369 // FIXME: Type legalization can't handle illegal MULHS/MULHU.
28370 if ((Opcode == ISD::MULHS || Opcode == ISD::MULHU) && !TLI.isTypeLegal(EltVT))
28371 return SDValue();
28372
28373 if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode()) {
28374 // All but one element should have an undef input, which will fold to a
28375 // constant or undef. Avoid splatting which would over-define potentially
28376 // undefined elements.
28377
28378 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
28379 // build_vec ..undef, (bo X, Y), undef...
28380 SmallVector<SDValue, 16> EltsX, EltsY, EltsResult;
28381 DAG.ExtractVectorElements(Src0, EltsX);
28382 DAG.ExtractVectorElements(Src1, EltsY);
28383
28384 for (auto [X, Y] : zip(EltsX, EltsY))
28385 EltsResult.push_back(DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags()));
28386 return DAG.getBuildVector(VT, DL, EltsResult);
28387 }
28388
28389 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
28390 SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
28391 SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
28392 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
28393
28394 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
28395 return DAG.getSplat(VT, DL, ScalarBO);
28396}
28397
28398/// Visit a vector cast operation, like FP_EXTEND.
28399SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) {
28400 EVT VT = N->getValueType(0);
28401 assert(VT.isVector() && "SimplifyVCastOp only works on vectors!");
28402 EVT EltVT = VT.getVectorElementType();
28403 unsigned Opcode = N->getOpcode();
28404
28405 SDValue N0 = N->getOperand(0);
28406 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
28407
28408 // TODO: promote operation might be also good here?
28409 int Index0;
28410 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
28411 if (Src0 &&
28412 (N0.getOpcode() == ISD::SPLAT_VECTOR ||
28413 TLI.isExtractVecEltCheap(VT, Index0)) &&
28414 TLI.isOperationLegalOrCustom(Opcode, EltVT) &&
28415 TLI.preferScalarizeSplat(N)) {
28416 EVT SrcVT = N0.getValueType();
28417 EVT SrcEltVT = SrcVT.getVectorElementType();
28418 if (!LegalTypes || TLI.isTypeLegal(SrcEltVT)) {
28419 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
28420 SDValue Elt =
28421 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC);
28422 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags());
28423 if (VT.isScalableVector())
28424 return DAG.getSplatVector(VT, DL, ScalarBO);
28426 return DAG.getBuildVector(VT, DL, Ops);
28427 }
28428 }
28429
28430 return SDValue();
28431}
28432
28433/// Visit a binary vector operation, like ADD.
28434SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
28435 EVT VT = N->getValueType(0);
28436 assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
28437
28438 SDValue LHS = N->getOperand(0);
28439 SDValue RHS = N->getOperand(1);
28440 unsigned Opcode = N->getOpcode();
28441 SDNodeFlags Flags = N->getFlags();
28442
28443 // Move unary shuffles with identical masks after a vector binop:
28444 // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
28445 // --> shuffle (VBinOp A, B), Undef, Mask
28446 // This does not require type legality checks because we are creating the
28447 // same types of operations that are in the original sequence. We do have to
28448 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
28449 // though. This code is adapted from the identical transform in instcombine.
28450 if (DAG.isSafeToSpeculativelyExecute(Opcode)) {
28451 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
28452 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
28453 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
28454 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
28455 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
28456 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
28457 RHS.getOperand(0), Flags);
28458 SDValue UndefV = LHS.getOperand(1);
28459 return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
28460 }
28461
28462 // Try to sink a splat shuffle after a binop with a uniform constant.
28463 // This is limited to cases where neither the shuffle nor the constant have
28464 // undefined elements because that could be poison-unsafe or inhibit
28465 // demanded elements analysis. It is further limited to not change a splat
28466 // of an inserted scalar because that may be optimized better by
28467 // load-folding or other target-specific behaviors.
28468 if (isConstOrConstSplat(RHS) && Shuf0 && all_equal(Shuf0->getMask()) &&
28469 Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
28470 Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
28471 // binop (splat X), (splat C) --> splat (binop X, C)
28472 SDValue X = Shuf0->getOperand(0);
28473 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
28474 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
28475 Shuf0->getMask());
28476 }
28477 if (isConstOrConstSplat(LHS) && Shuf1 && all_equal(Shuf1->getMask()) &&
28478 Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
28479 Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
28480 // binop (splat C), (splat X) --> splat (binop C, X)
28481 SDValue X = Shuf1->getOperand(0);
28482 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
28483 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
28484 Shuf1->getMask());
28485 }
28486 }
28487
28488 // The following pattern is likely to emerge with vector reduction ops. Moving
28489 // the binary operation ahead of insertion may allow using a narrower vector
28490 // instruction that has better performance than the wide version of the op:
28491 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
28492 if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
28493 RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
28494 LHS.getOperand(2) == RHS.getOperand(2) &&
28495 (LHS.hasOneUse() || RHS.hasOneUse())) {
28496 SDValue X = LHS.getOperand(1);
28497 SDValue Y = RHS.getOperand(1);
28498 SDValue Z = LHS.getOperand(2);
28499 EVT NarrowVT = X.getValueType();
28500 if (NarrowVT == Y.getValueType() &&
28501 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
28502 LegalOperations)) {
28503 // (binop undef, undef) may not return undef, so compute that result.
28504 SDValue VecC =
28505 DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
28506 SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
28507 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
28508 }
28509 }
28510
28511 // Make sure all but the first op are undef or constant.
28512 auto ConcatWithConstantOrUndef = [](SDValue Concat) {
28513 return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
28514 all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
28515 return Op.isUndef() ||
28516 ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
28517 });
28518 };
28519
28520 // The following pattern is likely to emerge with vector reduction ops. Moving
28521 // the binary operation ahead of the concat may allow using a narrower vector
28522 // instruction that has better performance than the wide version of the op:
28523 // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
28524 // concat (VBinOp X, Y), VecC
28525 if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
28526 (LHS.hasOneUse() || RHS.hasOneUse())) {
28527 EVT NarrowVT = LHS.getOperand(0).getValueType();
28528 if (NarrowVT == RHS.getOperand(0).getValueType() &&
28529 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
28530 unsigned NumOperands = LHS.getNumOperands();
28531 SmallVector<SDValue, 4> ConcatOps;
28532 for (unsigned i = 0; i != NumOperands; ++i) {
28533 // This constant fold for operands 1 and up.
28534 ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
28535 RHS.getOperand(i)));
28536 }
28537
28538 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
28539 }
28540 }
28541
28542 if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL, LegalTypes))
28543 return V;
28544
28545 return SDValue();
28546}
28547
28548SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
28549 SDValue N2) {
28550 assert(N0.getOpcode() == ISD::SETCC &&
28551 "First argument must be a SetCC node!");
28552
28553 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
28554 cast<CondCodeSDNode>(N0.getOperand(2))->get());
28555
28556 // If we got a simplified select_cc node back from SimplifySelectCC, then
28557 // break it down into a new SETCC node, and a new SELECT node, and then return
28558 // the SELECT node, since we were called with a SELECT node.
28559 if (SCC.getNode()) {
28560 // Check to see if we got a select_cc back (to turn into setcc/select).
28561 // Otherwise, just return whatever node we got back, like fabs.
28562 if (SCC.getOpcode() == ISD::SELECT_CC) {
28563 const SDNodeFlags Flags = N0->getFlags();
28564 SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
28565 N0.getValueType(),
28566 SCC.getOperand(0), SCC.getOperand(1),
28567 SCC.getOperand(4), Flags);
28568 AddToWorklist(SETCC.getNode());
28569 return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
28570 SCC.getOperand(2), SCC.getOperand(3), Flags);
28571 }
28572
28573 return SCC;
28574 }
28575 return SDValue();
28576}
28577
28578/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
28579/// being selected between, see if we can simplify the select. Callers of this
28580/// should assume that TheSelect is deleted if this returns true. As such, they
28581/// should return the appropriate thing (e.g. the node) back to the top-level of
28582/// the DAG combiner loop to avoid it being looked at.
28583bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
28584 SDValue RHS) {
28585 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
28586 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
28587 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
28588 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
28589 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
28590 SDValue Sqrt = RHS;
28591 ISD::CondCode CC;
28592 SDValue CmpLHS;
28593 const ConstantFPSDNode *Zero = nullptr;
28594
28595 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
28596 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
28597 CmpLHS = TheSelect->getOperand(0);
28598 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
28599 } else {
28600 // SELECT or VSELECT
28601 SDValue Cmp = TheSelect->getOperand(0);
28602 if (Cmp.getOpcode() == ISD::SETCC) {
28603 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
28604 CmpLHS = Cmp.getOperand(0);
28605 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
28606 }
28607 }
28608 if (Zero && Zero->isZero() &&
28609 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
28610 CC == ISD::SETULT || CC == ISD::SETLT)) {
28611 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
28612 CombineTo(TheSelect, Sqrt);
28613 return true;
28614 }
28615 }
28616 }
28617 // Cannot simplify select with vector condition
28618 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
28619
28620 // If this is a select from two identical things, try to pull the operation
28621 // through the select.
28622 if (LHS.getOpcode() != RHS.getOpcode() ||
28623 !LHS.hasOneUse() || !RHS.hasOneUse())
28624 return false;
28625
28626 // If this is a load and the token chain is identical, replace the select
28627 // of two loads with a load through a select of the address to load from.
28628 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
28629 // constants have been dropped into the constant pool.
28630 if (LHS.getOpcode() == ISD::LOAD) {
28631 LoadSDNode *LLD = cast<LoadSDNode>(LHS);
28632 LoadSDNode *RLD = cast<LoadSDNode>(RHS);
28633
28634 // Token chains must be identical.
28635 if (LHS.getOperand(0) != RHS.getOperand(0) ||
28636 // Do not let this transformation reduce the number of volatile loads.
28637 // Be conservative for atomics for the moment
28638 // TODO: This does appear to be legal for unordered atomics (see D66309)
28639 !LLD->isSimple() || !RLD->isSimple() ||
28640 // FIXME: If either is a pre/post inc/dec load,
28641 // we'd need to split out the address adjustment.
28642 LLD->isIndexed() || RLD->isIndexed() ||
28643 // If this is an EXTLOAD, the VT's must match.
28644 LLD->getMemoryVT() != RLD->getMemoryVT() ||
28645 // If this is an EXTLOAD, the kind of extension must match.
28646 (LLD->getExtensionType() != RLD->getExtensionType() &&
28647 // The only exception is if one of the extensions is anyext.
28648 LLD->getExtensionType() != ISD::EXTLOAD &&
28649 RLD->getExtensionType() != ISD::EXTLOAD) ||
28650 // FIXME: this discards src value information. This is
28651 // over-conservative. It would be beneficial to be able to remember
28652 // both potential memory locations. Since we are discarding
28653 // src value info, don't do the transformation if the memory
28654 // locations are not in the default address space.
28655 LLD->getPointerInfo().getAddrSpace() != 0 ||
28656 RLD->getPointerInfo().getAddrSpace() != 0 ||
28657 // We can't produce a CMOV of a TargetFrameIndex since we won't
28658 // generate the address generation required.
28661 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
28662 LLD->getBasePtr().getValueType()))
28663 return false;
28664
28665 // The loads must not depend on one another.
28666 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
28667 return false;
28668
28669 // Check that the select condition doesn't reach either load. If so,
28670 // folding this will induce a cycle into the DAG. If not, this is safe to
28671 // xform, so create a select of the addresses.
28672
28673 SmallPtrSet<const SDNode *, 32> Visited;
28675
28676 // Always fail if LLD and RLD are not independent. TheSelect is a
28677 // predecessor to all Nodes in question so we need not search past it.
28678
28679 Visited.insert(TheSelect);
28680 Worklist.push_back(LLD);
28681 Worklist.push_back(RLD);
28682
28683 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
28684 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
28685 return false;
28686
28687 SDValue Addr;
28688 if (TheSelect->getOpcode() == ISD::SELECT) {
28689 // We cannot do this optimization if any pair of {RLD, LLD} is a
28690 // predecessor to {RLD, LLD, CondNode}. As we've already compared the
28691 // Loads, we only need to check if CondNode is a successor to one of the
28692 // loads. We can further avoid this if there's no use of their chain
28693 // value.
28694 SDNode *CondNode = TheSelect->getOperand(0).getNode();
28695 Worklist.push_back(CondNode);
28696
28697 if ((LLD->hasAnyUseOfValue(1) &&
28698 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
28699 (RLD->hasAnyUseOfValue(1) &&
28700 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
28701 return false;
28702
28703 Addr = DAG.getSelect(SDLoc(TheSelect),
28704 LLD->getBasePtr().getValueType(),
28705 TheSelect->getOperand(0), LLD->getBasePtr(),
28706 RLD->getBasePtr());
28707 } else { // Otherwise SELECT_CC
28708 // We cannot do this optimization if any pair of {RLD, LLD} is a
28709 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
28710 // the Loads, we only need to check if CondLHS/CondRHS is a successor to
28711 // one of the loads. We can further avoid this if there's no use of their
28712 // chain value.
28713
28714 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
28715 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
28716 Worklist.push_back(CondLHS);
28717 Worklist.push_back(CondRHS);
28718
28719 if ((LLD->hasAnyUseOfValue(1) &&
28720 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
28721 (RLD->hasAnyUseOfValue(1) &&
28722 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
28723 return false;
28724
28725 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
28726 LLD->getBasePtr().getValueType(),
28727 TheSelect->getOperand(0),
28728 TheSelect->getOperand(1),
28729 LLD->getBasePtr(), RLD->getBasePtr(),
28730 TheSelect->getOperand(4));
28731 }
28732
28733 SDValue Load;
28734 // It is safe to replace the two loads if they have different alignments,
28735 // but the new load must be the minimum (most restrictive) alignment of the
28736 // inputs.
28737 Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
28738 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
28739 if (!RLD->isInvariant())
28740 MMOFlags &= ~MachineMemOperand::MOInvariant;
28741 if (!RLD->isDereferenceable())
28742 MMOFlags &= ~MachineMemOperand::MODereferenceable;
28743 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
28744 // FIXME: Discards pointer and AA info.
28745 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
28746 LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
28747 MMOFlags);
28748 } else {
28749 // FIXME: Discards pointer and AA info.
28750 Load = DAG.getExtLoad(
28752 : LLD->getExtensionType(),
28753 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
28754 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
28755 }
28756
28757 // Users of the select now use the result of the load.
28758 CombineTo(TheSelect, Load);
28759
28760 // Users of the old loads now use the new load's chain. We know the
28761 // old-load value is dead now.
28762 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
28763 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
28764 return true;
28765 }
28766
28767 return false;
28768}
28769
28770/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
28771/// bitwise 'and'.
28772SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
28773 SDValue N1, SDValue N2, SDValue N3,
28774 ISD::CondCode CC) {
28775 // If this is a select where the false operand is zero and the compare is a
28776 // check of the sign bit, see if we can perform the "gzip trick":
28777 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
28778 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
28779 EVT XType = N0.getValueType();
28780 EVT AType = N2.getValueType();
28781 if (!isNullConstant(N3) || !XType.bitsGE(AType))
28782 return SDValue();
28783
28784 // If the comparison is testing for a positive value, we have to invert
28785 // the sign bit mask, so only do that transform if the target has a bitwise
28786 // 'and not' instruction (the invert is free).
28787 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
28788 // (X > -1) ? A : 0
28789 // (X > 0) ? X : 0 <-- This is canonical signed max.
28790 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
28791 return SDValue();
28792 } else if (CC == ISD::SETLT) {
28793 // (X < 0) ? A : 0
28794 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
28795 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
28796 return SDValue();
28797 } else {
28798 return SDValue();
28799 }
28800
28801 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
28802 // constant.
28803 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
28804 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
28805 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
28806 if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
28807 SDValue ShiftAmt = DAG.getShiftAmountConstant(ShCt, XType, DL);
28808 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
28809 AddToWorklist(Shift.getNode());
28810
28811 if (XType.bitsGT(AType)) {
28812 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
28813 AddToWorklist(Shift.getNode());
28814 }
28815
28816 if (CC == ISD::SETGT)
28817 Shift = DAG.getNOT(DL, Shift, AType);
28818
28819 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
28820 }
28821 }
28822
28823 unsigned ShCt = XType.getSizeInBits() - 1;
28824 if (TLI.shouldAvoidTransformToShift(XType, ShCt))
28825 return SDValue();
28826
28827 SDValue ShiftAmt = DAG.getShiftAmountConstant(ShCt, XType, DL);
28828 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
28829 AddToWorklist(Shift.getNode());
28830
28831 if (XType.bitsGT(AType)) {
28832 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
28833 AddToWorklist(Shift.getNode());
28834 }
28835
28836 if (CC == ISD::SETGT)
28837 Shift = DAG.getNOT(DL, Shift, AType);
28838
28839 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
28840}
28841
28842// Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
28843SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
28844 SDValue N0 = N->getOperand(0);
28845 SDValue N1 = N->getOperand(1);
28846 SDValue N2 = N->getOperand(2);
28847 SDLoc DL(N);
28848
28849 unsigned BinOpc = N1.getOpcode();
28850 if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc) ||
28851 (N1.getResNo() != N2.getResNo()))
28852 return SDValue();
28853
28854 // The use checks are intentionally on SDNode because we may be dealing
28855 // with opcodes that produce more than one SDValue.
28856 // TODO: Do we really need to check N0 (the condition operand of the select)?
28857 // But removing that clause could cause an infinite loop...
28858 if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
28859 return SDValue();
28860
28861 // Binops may include opcodes that return multiple values, so all values
28862 // must be created/propagated from the newly created binops below.
28863 SDVTList OpVTs = N1->getVTList();
28864
28865 // Fold select(cond, binop(x, y), binop(z, y))
28866 // --> binop(select(cond, x, z), y)
28867 if (N1.getOperand(1) == N2.getOperand(1)) {
28868 SDValue N10 = N1.getOperand(0);
28869 SDValue N20 = N2.getOperand(0);
28870 SDValue NewSel = DAG.getSelect(DL, N10.getValueType(), N0, N10, N20);
28871 SDNodeFlags Flags = N1->getFlags() & N2->getFlags();
28872 SDValue NewBinOp =
28873 DAG.getNode(BinOpc, DL, OpVTs, {NewSel, N1.getOperand(1)}, Flags);
28874 return SDValue(NewBinOp.getNode(), N1.getResNo());
28875 }
28876
28877 // Fold select(cond, binop(x, y), binop(x, z))
28878 // --> binop(x, select(cond, y, z))
28879 if (N1.getOperand(0) == N2.getOperand(0)) {
28880 SDValue N11 = N1.getOperand(1);
28881 SDValue N21 = N2.getOperand(1);
28882 // Second op VT might be different (e.g. shift amount type)
28883 if (N11.getValueType() == N21.getValueType()) {
28884 SDValue NewSel = DAG.getSelect(DL, N11.getValueType(), N0, N11, N21);
28885 SDNodeFlags Flags = N1->getFlags() & N2->getFlags();
28886 SDValue NewBinOp =
28887 DAG.getNode(BinOpc, DL, OpVTs, {N1.getOperand(0), NewSel}, Flags);
28888 return SDValue(NewBinOp.getNode(), N1.getResNo());
28889 }
28890 }
28891
28892 // TODO: Handle isCommutativeBinOp patterns as well?
28893 return SDValue();
28894}
28895
28896// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
28897SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
28898 SDValue N0 = N->getOperand(0);
28899 EVT VT = N->getValueType(0);
28900 bool IsFabs = N->getOpcode() == ISD::FABS;
28901 bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
28902
28903 if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
28904 return SDValue();
28905
28906 SDValue Int = N0.getOperand(0);
28907 EVT IntVT = Int.getValueType();
28908
28909 // The operand to cast should be integer.
28910 if (!IntVT.isInteger() || IntVT.isVector())
28911 return SDValue();
28912
28913 // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
28914 // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
28915 APInt SignMask;
28916 if (N0.getValueType().isVector()) {
28917 // For vector, create a sign mask (0x80...) or its inverse (for fabs,
28918 // 0x7f...) per element and splat it.
28920 if (IsFabs)
28921 SignMask = ~SignMask;
28922 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
28923 } else {
28924 // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
28925 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
28926 if (IsFabs)
28927 SignMask = ~SignMask;
28928 }
28929 SDLoc DL(N0);
28930 Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
28931 DAG.getConstant(SignMask, DL, IntVT));
28932 AddToWorklist(Int.getNode());
28933 return DAG.getBitcast(VT, Int);
28934}
28935
28936/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
28937/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
28938/// in it. This may be a win when the constant is not otherwise available
28939/// because it replaces two constant pool loads with one.
28940SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
28941 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
28942 ISD::CondCode CC) {
28944 return SDValue();
28945
28946 // If we are before legalize types, we want the other legalization to happen
28947 // first (for example, to avoid messing with soft float).
28948 auto *TV = dyn_cast<ConstantFPSDNode>(N2);
28949 auto *FV = dyn_cast<ConstantFPSDNode>(N3);
28950 EVT VT = N2.getValueType();
28951 if (!TV || !FV || !TLI.isTypeLegal(VT))
28952 return SDValue();
28953
28954 // If a constant can be materialized without loads, this does not make sense.
28956 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
28957 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
28958 return SDValue();
28959
28960 // If both constants have multiple uses, then we won't need to do an extra
28961 // load. The values are likely around in registers for other users.
28962 if (!TV->hasOneUse() && !FV->hasOneUse())
28963 return SDValue();
28964
28965 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
28966 const_cast<ConstantFP*>(TV->getConstantFPValue()) };
28967 Type *FPTy = Elts[0]->getType();
28968 const DataLayout &TD = DAG.getDataLayout();
28969
28970 // Create a ConstantArray of the two constants.
28971 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
28972 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
28973 TD.getPrefTypeAlign(FPTy));
28974 Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
28975
28976 // Get offsets to the 0 and 1 elements of the array, so we can select between
28977 // them.
28978 SDValue Zero = DAG.getIntPtrConstant(0, DL);
28979 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
28980 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
28981 SDValue Cond =
28982 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
28983 AddToWorklist(Cond.getNode());
28984 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
28985 AddToWorklist(CstOffset.getNode());
28986 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
28987 AddToWorklist(CPIdx.getNode());
28988 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
28990 DAG.getMachineFunction()), Alignment);
28991}
28992
28993/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
28994/// where 'cond' is the comparison specified by CC.
28995SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
28996 SDValue N2, SDValue N3, ISD::CondCode CC,
28997 bool NotExtCompare) {
28998 // (x ? y : y) -> y.
28999 if (N2 == N3) return N2;
29000
29001 EVT CmpOpVT = N0.getValueType();
29002 EVT CmpResVT = getSetCCResultType(CmpOpVT);
29003 EVT VT = N2.getValueType();
29004 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
29005 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
29006 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
29007
29008 // Determine if the condition we're dealing with is constant.
29009 if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
29010 AddToWorklist(SCC.getNode());
29011 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
29012 // fold select_cc true, x, y -> x
29013 // fold select_cc false, x, y -> y
29014 return !(SCCC->isZero()) ? N2 : N3;
29015 }
29016 }
29017
29018 if (SDValue V =
29019 convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
29020 return V;
29021
29022 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
29023 return V;
29024
29025 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A)
29026 // where y is has a single bit set.
29027 // A plaintext description would be, we can turn the SELECT_CC into an AND
29028 // when the condition can be materialized as an all-ones register. Any
29029 // single bit-test can be materialized as an all-ones register with
29030 // shift-left and shift-right-arith.
29031 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
29032 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
29033 SDValue AndLHS = N0->getOperand(0);
29034 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
29035 if (ConstAndRHS && ConstAndRHS->getAPIntValue().popcount() == 1) {
29036 // Shift the tested bit over the sign bit.
29037 const APInt &AndMask = ConstAndRHS->getAPIntValue();
29038 if (TLI.shouldFoldSelectWithSingleBitTest(VT, AndMask)) {
29039 unsigned ShCt = AndMask.getBitWidth() - 1;
29040 SDValue ShlAmt = DAG.getShiftAmountConstant(AndMask.countl_zero(), VT,
29041 SDLoc(AndLHS));
29042 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
29043
29044 // Now arithmetic right shift it all the way over, so the result is
29045 // either all-ones, or zero.
29046 SDValue ShrAmt = DAG.getShiftAmountConstant(ShCt, VT, SDLoc(Shl));
29047 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
29048
29049 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
29050 }
29051 }
29052 }
29053
29054 // fold select C, 16, 0 -> shl C, 4
29055 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
29056 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
29057
29058 if ((Fold || Swap) &&
29059 TLI.getBooleanContents(CmpOpVT) ==
29061 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT)) &&
29063
29064 if (Swap) {
29065 CC = ISD::getSetCCInverse(CC, CmpOpVT);
29066 std::swap(N2C, N3C);
29067 }
29068
29069 // If the caller doesn't want us to simplify this into a zext of a compare,
29070 // don't do it.
29071 if (NotExtCompare && N2C->isOne())
29072 return SDValue();
29073
29074 SDValue Temp, SCC;
29075 // zext (setcc n0, n1)
29076 if (LegalTypes) {
29077 SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
29078 Temp = DAG.getZExtOrTrunc(SCC, SDLoc(N2), VT);
29079 } else {
29080 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
29081 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
29082 }
29083
29084 AddToWorklist(SCC.getNode());
29085 AddToWorklist(Temp.getNode());
29086
29087 if (N2C->isOne())
29088 return Temp;
29089
29090 unsigned ShCt = N2C->getAPIntValue().logBase2();
29091 if (TLI.shouldAvoidTransformToShift(VT, ShCt))
29092 return SDValue();
29093
29094 // shl setcc result by log2 n2c
29095 return DAG.getNode(
29096 ISD::SHL, DL, N2.getValueType(), Temp,
29097 DAG.getShiftAmountConstant(ShCt, N2.getValueType(), SDLoc(Temp)));
29098 }
29099
29100 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
29101 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
29102 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
29103 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
29104 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
29105 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
29106 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
29107 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
29108 if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
29109 SDValue ValueOnZero = N2;
29110 SDValue Count = N3;
29111 // If the condition is NE instead of E, swap the operands.
29112 if (CC == ISD::SETNE)
29113 std::swap(ValueOnZero, Count);
29114 // Check if the value on zero is a constant equal to the bits in the type.
29115 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
29116 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
29117 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
29118 // legal, combine to just cttz.
29119 if ((Count.getOpcode() == ISD::CTTZ ||
29120 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
29121 N0 == Count.getOperand(0) &&
29122 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
29123 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
29124 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
29125 // legal, combine to just ctlz.
29126 if ((Count.getOpcode() == ISD::CTLZ ||
29127 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
29128 N0 == Count.getOperand(0) &&
29129 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
29130 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
29131 }
29132 }
29133 }
29134
29135 // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
29136 // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
29137 if (!NotExtCompare && N1C && N2C && N3C &&
29138 N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
29139 ((N1C->isAllOnes() && CC == ISD::SETGT) ||
29140 (N1C->isZero() && CC == ISD::SETLT)) &&
29141 !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
29142 SDValue ASHR =
29143 DAG.getNode(ISD::SRA, DL, CmpOpVT, N0,
29145 CmpOpVT.getScalarSizeInBits() - 1, CmpOpVT, DL));
29146 return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASHR, DL, VT),
29147 DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
29148 }
29149
29150 // Fold sign pattern select_cc setgt X, -1, 1, -1 -> or (ashr X, BW-1), 1
29151 if (CC == ISD::SETGT && N1C && N2C && N3C && N1C->isAllOnes() &&
29152 N2C->isOne() && N3C->isAllOnes() &&
29153 !TLI.shouldAvoidTransformToShift(CmpOpVT,
29154 CmpOpVT.getScalarSizeInBits() - 1)) {
29155 SDValue ASHR =
29156 DAG.getNode(ISD::SRA, DL, CmpOpVT, N0,
29158 CmpOpVT.getScalarSizeInBits() - 1, CmpOpVT, DL));
29159 return DAG.getNode(ISD::OR, DL, VT, DAG.getSExtOrTrunc(ASHR, DL, VT),
29160 DAG.getConstant(1, DL, VT));
29161 }
29162
29163 if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
29164 return S;
29165 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
29166 return S;
29167 if (SDValue ABD = foldSelectToABD(N0, N1, N2, N3, CC, DL))
29168 return ABD;
29169
29170 return SDValue();
29171}
29172
29174 const TargetLowering &TLI) {
29175 // Match a pattern such as:
29176 // (X | (X >> C0) | (X >> C1) | ...) & Mask
29177 // This extracts contiguous parts of X and ORs them together before comparing.
29178 // We can optimize this so that we directly check (X & SomeMask) instead,
29179 // eliminating the shifts.
29180
29181 EVT VT = Root.getValueType();
29182
29183 // TODO: Support vectors?
29184 if (!VT.isScalarInteger() || Root.getOpcode() != ISD::AND)
29185 return SDValue();
29186
29187 SDValue N0 = Root.getOperand(0);
29188 SDValue N1 = Root.getOperand(1);
29189
29190 if (N0.getOpcode() != ISD::OR || !isa<ConstantSDNode>(N1))
29191 return SDValue();
29192
29193 APInt RootMask = cast<ConstantSDNode>(N1)->getAsAPIntVal();
29194
29195 SDValue Src;
29196 const auto IsSrc = [&](SDValue V) {
29197 if (!Src) {
29198 Src = V;
29199 return true;
29200 }
29201
29202 return Src == V;
29203 };
29204
29205 SmallVector<SDValue> Worklist = {N0};
29206 APInt PartsMask(VT.getSizeInBits(), 0);
29207 while (!Worklist.empty()) {
29208 SDValue V = Worklist.pop_back_val();
29209 if (!V.hasOneUse() && (Src && Src != V))
29210 return SDValue();
29211
29212 if (V.getOpcode() == ISD::OR) {
29213 Worklist.push_back(V.getOperand(0));
29214 Worklist.push_back(V.getOperand(1));
29215 continue;
29216 }
29217
29218 if (V.getOpcode() == ISD::SRL) {
29219 SDValue ShiftSrc = V.getOperand(0);
29220 SDValue ShiftAmt = V.getOperand(1);
29221
29222 if (!IsSrc(ShiftSrc) || !isa<ConstantSDNode>(ShiftAmt))
29223 return SDValue();
29224
29225 auto ShiftAmtVal = cast<ConstantSDNode>(ShiftAmt)->getAsZExtVal();
29226 if (ShiftAmtVal > RootMask.getBitWidth())
29227 return SDValue();
29228
29229 PartsMask |= (RootMask << ShiftAmtVal);
29230 continue;
29231 }
29232
29233 if (IsSrc(V)) {
29234 PartsMask |= RootMask;
29235 continue;
29236 }
29237
29238 return SDValue();
29239 }
29240
29241 if (!Src)
29242 return SDValue();
29243
29244 SDLoc DL(Root);
29245 return DAG.getNode(ISD::AND, DL, VT,
29246 {Src, DAG.getConstant(PartsMask, DL, VT)});
29247}
29248
29249/// This is a stub for TargetLowering::SimplifySetCC.
29250SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
29251 ISD::CondCode Cond, const SDLoc &DL,
29252 bool foldBooleans) {
29253 TargetLowering::DAGCombinerInfo
29254 DagCombineInfo(DAG, Level, false, this);
29255 if (SDValue C =
29256 TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL))
29257 return C;
29258
29260 isNullConstant(N1)) {
29261
29262 if (SDValue Res = matchMergedBFX(N0, DAG, TLI))
29263 return DAG.getSetCC(DL, VT, Res, N1, Cond);
29264 }
29265
29266 return SDValue();
29267}
29268
29269/// Given an ISD::SDIV node expressing a divide by constant, return
29270/// a DAG expression to select that will generate the same value by multiplying
29271/// by a magic number.
29272/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
29273SDValue DAGCombiner::BuildSDIV(SDNode *N) {
29274 // when optimising for minimum size, we don't want to expand a div to a mul
29275 // and a shift.
29277 return SDValue();
29278
29280 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, LegalTypes, Built)) {
29281 for (SDNode *N : Built)
29282 AddToWorklist(N);
29283 return S;
29284 }
29285
29286 return SDValue();
29287}
29288
29289/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
29290/// DAG expression that will generate the same value by right shifting.
29291SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
29292 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
29293 if (!C)
29294 return SDValue();
29295
29296 // Avoid division by zero.
29297 if (C->isZero())
29298 return SDValue();
29299
29301 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
29302 for (SDNode *N : Built)
29303 AddToWorklist(N);
29304 return S;
29305 }
29306
29307 return SDValue();
29308}
29309
29310/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
29311/// expression that will generate the same value by multiplying by a magic
29312/// number.
29313/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
29314SDValue DAGCombiner::BuildUDIV(SDNode *N) {
29315 // when optimising for minimum size, we don't want to expand a div to a mul
29316 // and a shift.
29318 return SDValue();
29319
29321 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, LegalTypes, Built)) {
29322 for (SDNode *N : Built)
29323 AddToWorklist(N);
29324 return S;
29325 }
29326
29327 return SDValue();
29328}
29329
29330/// Given an ISD::SREM node expressing a remainder by constant power of 2,
29331/// return a DAG expression that will generate the same value.
29332SDValue DAGCombiner::BuildSREMPow2(SDNode *N) {
29333 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
29334 if (!C)
29335 return SDValue();
29336
29337 // Avoid division by zero.
29338 if (C->isZero())
29339 return SDValue();
29340
29342 if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) {
29343 for (SDNode *N : Built)
29344 AddToWorklist(N);
29345 return S;
29346 }
29347
29348 return SDValue();
29349}
29350
29351// This is basically just a port of takeLog2 from InstCombineMulDivRem.cpp
29352//
29353// Returns the node that represents `Log2(Op)`. This may create a new node. If
29354// we are unable to compute `Log2(Op)` its return `SDValue()`.
29355//
29356// All nodes will be created at `DL` and the output will be of type `VT`.
29357//
29358// This will only return `Log2(Op)` if we can prove `Op` is non-zero. Set
29359// `AssumeNonZero` if this function should simply assume (not require proving
29360// `Op` is non-zero).
29362 SDValue Op, unsigned Depth,
29363 bool AssumeNonZero) {
29364 assert(VT.isInteger() && "Only integer types are supported!");
29365
29366 auto PeekThroughCastsAndTrunc = [](SDValue V) {
29367 while (true) {
29368 switch (V.getOpcode()) {
29369 case ISD::TRUNCATE:
29370 case ISD::ZERO_EXTEND:
29371 V = V.getOperand(0);
29372 break;
29373 default:
29374 return V;
29375 }
29376 }
29377 };
29378
29379 if (VT.isScalableVector())
29380 return SDValue();
29381
29382 Op = PeekThroughCastsAndTrunc(Op);
29383
29384 // Helper for determining whether a value is a power-2 constant scalar or a
29385 // vector of such elements.
29386 SmallVector<APInt> Pow2Constants;
29387 auto IsPowerOfTwo = [&Pow2Constants](ConstantSDNode *C) {
29388 if (C->isZero() || C->isOpaque())
29389 return false;
29390 // TODO: We may also be able to support negative powers of 2 here.
29391 if (C->getAPIntValue().isPowerOf2()) {
29392 Pow2Constants.emplace_back(C->getAPIntValue());
29393 return true;
29394 }
29395 return false;
29396 };
29397
29398 if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo)) {
29399 if (!VT.isVector())
29400 return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
29401 // We need to create a build vector
29402 if (Op.getOpcode() == ISD::SPLAT_VECTOR)
29403 return DAG.getSplat(VT, DL,
29404 DAG.getConstant(Pow2Constants.back().logBase2(), DL,
29405 VT.getScalarType()));
29406 SmallVector<SDValue> Log2Ops;
29407 for (const APInt &Pow2 : Pow2Constants)
29408 Log2Ops.emplace_back(
29409 DAG.getConstant(Pow2.logBase2(), DL, VT.getScalarType()));
29410 return DAG.getBuildVector(VT, DL, Log2Ops);
29411 }
29412
29413 if (Depth >= DAG.MaxRecursionDepth)
29414 return SDValue();
29415
29416 auto CastToVT = [&](EVT NewVT, SDValue ToCast) {
29417 // Peek through zero extend. We can't peek through truncates since this
29418 // function is called on a shift amount. We must ensure that all of the bits
29419 // above the original shift amount are zeroed by this function.
29420 while (ToCast.getOpcode() == ISD::ZERO_EXTEND)
29421 ToCast = ToCast.getOperand(0);
29422 EVT CurVT = ToCast.getValueType();
29423 if (NewVT == CurVT)
29424 return ToCast;
29425
29426 if (NewVT.getSizeInBits() == CurVT.getSizeInBits())
29427 return DAG.getBitcast(NewVT, ToCast);
29428
29429 return DAG.getZExtOrTrunc(ToCast, DL, NewVT);
29430 };
29431
29432 // log2(X << Y) -> log2(X) + Y
29433 if (Op.getOpcode() == ISD::SHL) {
29434 // 1 << Y and X nuw/nsw << Y are all non-zero.
29435 if (AssumeNonZero || Op->getFlags().hasNoUnsignedWrap() ||
29436 Op->getFlags().hasNoSignedWrap() || isOneConstant(Op.getOperand(0)))
29437 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0),
29438 Depth + 1, AssumeNonZero))
29439 return DAG.getNode(ISD::ADD, DL, VT, LogX,
29440 CastToVT(VT, Op.getOperand(1)));
29441 }
29442
29443 // c ? X : Y -> c ? Log2(X) : Log2(Y)
29444 if ((Op.getOpcode() == ISD::SELECT || Op.getOpcode() == ISD::VSELECT) &&
29445 Op.hasOneUse()) {
29446 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1),
29447 Depth + 1, AssumeNonZero))
29448 if (SDValue LogY = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(2),
29449 Depth + 1, AssumeNonZero))
29450 return DAG.getSelect(DL, VT, Op.getOperand(0), LogX, LogY);
29451 }
29452
29453 // log2(umin(X, Y)) -> umin(log2(X), log2(Y))
29454 // log2(umax(X, Y)) -> umax(log2(X), log2(Y))
29455 if ((Op.getOpcode() == ISD::UMIN || Op.getOpcode() == ISD::UMAX) &&
29456 Op.hasOneUse()) {
29457 // Use AssumeNonZero as false here. Otherwise we can hit case where
29458 // log2(umax(X, Y)) != umax(log2(X), log2(Y)) (because overflow).
29459 if (SDValue LogX =
29460 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0), Depth + 1,
29461 /*AssumeNonZero*/ false))
29462 if (SDValue LogY =
29463 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1), Depth + 1,
29464 /*AssumeNonZero*/ false))
29465 return DAG.getNode(Op.getOpcode(), DL, VT, LogX, LogY);
29466 }
29467
29468 return SDValue();
29469}
29470
29471/// Determines the LogBase2 value for a non-null input value using the
29472/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
29473SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL,
29474 bool KnownNonZero, bool InexpensiveOnly,
29475 std::optional<EVT> OutVT) {
29476 EVT VT = OutVT ? *OutVT : V.getValueType();
29477 SDValue InexpensiveLogBase2 =
29478 takeInexpensiveLog2(DAG, DL, VT, V, /*Depth*/ 0, KnownNonZero);
29479 if (InexpensiveLogBase2 || InexpensiveOnly || !DAG.isKnownToBeAPowerOfTwo(V))
29480 return InexpensiveLogBase2;
29481
29482 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
29483 SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
29484 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
29485 return LogBase2;
29486}
29487
29488/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
29489/// For the reciprocal, we need to find the zero of the function:
29490/// F(X) = 1/X - A [which has a zero at X = 1/A]
29491/// =>
29492/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
29493/// does not require additional intermediate precision]
29494/// For the last iteration, put numerator N into it to gain more precision:
29495/// Result = N X_i + X_i (N - N A X_i)
29496SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
29497 SDNodeFlags Flags) {
29498 if (LegalDAG)
29499 return SDValue();
29500
29501 // TODO: Handle extended types?
29502 EVT VT = Op.getValueType();
29503 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
29504 VT.getScalarType() != MVT::f64)
29505 return SDValue();
29506
29507 // If estimates are explicitly disabled for this function, we're done.
29508 MachineFunction &MF = DAG.getMachineFunction();
29509 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
29510 if (Enabled == TLI.ReciprocalEstimate::Disabled)
29511 return SDValue();
29512
29513 // Estimates may be explicitly enabled for this type with a custom number of
29514 // refinement steps.
29515 int Iterations = TLI.getDivRefinementSteps(VT, MF);
29516 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
29517 AddToWorklist(Est.getNode());
29518
29519 SDLoc DL(Op);
29520 if (Iterations) {
29521 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
29522
29523 // Newton iterations: Est = Est + Est (N - Arg * Est)
29524 // If this is the last iteration, also multiply by the numerator.
29525 for (int i = 0; i < Iterations; ++i) {
29526 SDValue MulEst = Est;
29527
29528 if (i == Iterations - 1) {
29529 MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
29530 AddToWorklist(MulEst.getNode());
29531 }
29532
29533 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
29534 AddToWorklist(NewEst.getNode());
29535
29536 NewEst = DAG.getNode(ISD::FSUB, DL, VT,
29537 (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
29538 AddToWorklist(NewEst.getNode());
29539
29540 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
29541 AddToWorklist(NewEst.getNode());
29542
29543 Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
29544 AddToWorklist(Est.getNode());
29545 }
29546 } else {
29547 // If no iterations are available, multiply with N.
29548 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
29549 AddToWorklist(Est.getNode());
29550 }
29551
29552 return Est;
29553 }
29554
29555 return SDValue();
29556}
29557
29558/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
29559/// For the reciprocal sqrt, we need to find the zero of the function:
29560/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
29561/// =>
29562/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
29563/// As a result, we precompute A/2 prior to the iteration loop.
29564SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
29565 unsigned Iterations,
29566 SDNodeFlags Flags, bool Reciprocal) {
29567 EVT VT = Arg.getValueType();
29568 SDLoc DL(Arg);
29569 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
29570
29571 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
29572 // this entire sequence requires only one FP constant.
29573 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
29574 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
29575
29576 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
29577 for (unsigned i = 0; i < Iterations; ++i) {
29578 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
29579 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
29580 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
29581 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
29582 }
29583
29584 // If non-reciprocal square root is requested, multiply the result by Arg.
29585 if (!Reciprocal)
29586 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
29587
29588 return Est;
29589}
29590
29591/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
29592/// For the reciprocal sqrt, we need to find the zero of the function:
29593/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
29594/// =>
29595/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
29596SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
29597 unsigned Iterations,
29598 SDNodeFlags Flags, bool Reciprocal) {
29599 EVT VT = Arg.getValueType();
29600 SDLoc DL(Arg);
29601 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
29602 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
29603
29604 // This routine must enter the loop below to work correctly
29605 // when (Reciprocal == false).
29606 assert(Iterations > 0);
29607
29608 // Newton iterations for reciprocal square root:
29609 // E = (E * -0.5) * ((A * E) * E + -3.0)
29610 for (unsigned i = 0; i < Iterations; ++i) {
29611 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
29612 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
29613 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
29614
29615 // When calculating a square root at the last iteration build:
29616 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
29617 // (notice a common subexpression)
29618 SDValue LHS;
29619 if (Reciprocal || (i + 1) < Iterations) {
29620 // RSQRT: LHS = (E * -0.5)
29621 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
29622 } else {
29623 // SQRT: LHS = (A * E) * -0.5
29624 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
29625 }
29626
29627 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
29628 }
29629
29630 return Est;
29631}
29632
29633/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
29634/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
29635/// Op can be zero.
29636SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
29637 bool Reciprocal) {
29638 if (LegalDAG)
29639 return SDValue();
29640
29641 // TODO: Handle extended types?
29642 EVT VT = Op.getValueType();
29643 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
29644 VT.getScalarType() != MVT::f64)
29645 return SDValue();
29646
29647 // If estimates are explicitly disabled for this function, we're done.
29648 MachineFunction &MF = DAG.getMachineFunction();
29649 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
29650 if (Enabled == TLI.ReciprocalEstimate::Disabled)
29651 return SDValue();
29652
29653 // Estimates may be explicitly enabled for this type with a custom number of
29654 // refinement steps.
29655 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
29656
29657 bool UseOneConstNR = false;
29658 if (SDValue Est =
29659 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
29660 Reciprocal)) {
29661 AddToWorklist(Est.getNode());
29662
29663 if (Iterations > 0)
29664 Est = UseOneConstNR
29665 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
29666 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
29667 if (!Reciprocal) {
29668 SDLoc DL(Op);
29669 // Try the target specific test first.
29670 SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
29671
29672 // The estimate is now completely wrong if the input was exactly 0.0 or
29673 // possibly a denormal. Force the answer to 0.0 or value provided by
29674 // target for those cases.
29675 Est = DAG.getSelect(DL, VT, Test,
29676 TLI.getSqrtResultForDenormInput(Op, DAG), Est);
29677 }
29678 return Est;
29679 }
29680
29681 return SDValue();
29682}
29683
29684SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
29685 return buildSqrtEstimateImpl(Op, Flags, true);
29686}
29687
29688SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
29689 return buildSqrtEstimateImpl(Op, Flags, false);
29690}
29691
29692/// Return true if there is any possibility that the two addresses overlap.
29693bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
29694
29695 struct MemUseCharacteristics {
29696 bool IsVolatile;
29697 bool IsAtomic;
29699 int64_t Offset;
29700 LocationSize NumBytes;
29701 MachineMemOperand *MMO;
29702 };
29703
29704 auto getCharacteristics = [this](SDNode *N) -> MemUseCharacteristics {
29705 if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
29706 int64_t Offset = 0;
29707 if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
29708 Offset = (LSN->getAddressingMode() == ISD::PRE_INC) ? C->getSExtValue()
29709 : (LSN->getAddressingMode() == ISD::PRE_DEC)
29710 ? -1 * C->getSExtValue()
29711 : 0;
29712 TypeSize Size = LSN->getMemoryVT().getStoreSize();
29713 return {LSN->isVolatile(), LSN->isAtomic(),
29714 LSN->getBasePtr(), Offset /*base offset*/,
29715 LocationSize::precise(Size), LSN->getMemOperand()};
29716 }
29717 if (const auto *LN = cast<LifetimeSDNode>(N)) {
29718 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
29719 return {false /*isVolatile*/,
29720 /*isAtomic*/ false,
29721 LN->getOperand(1),
29722 0,
29723 LocationSize::precise(MFI.getObjectSize(LN->getFrameIndex())),
29724 (MachineMemOperand *)nullptr};
29725 }
29726 // Default.
29727 return {false /*isvolatile*/,
29728 /*isAtomic*/ false,
29729 SDValue(),
29730 (int64_t)0 /*offset*/,
29732 (MachineMemOperand *)nullptr};
29733 };
29734
29735 MemUseCharacteristics MUC0 = getCharacteristics(Op0),
29736 MUC1 = getCharacteristics(Op1);
29737
29738 // If they are to the same address, then they must be aliases.
29739 if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
29740 MUC0.Offset == MUC1.Offset)
29741 return true;
29742
29743 // If they are both volatile then they cannot be reordered.
29744 if (MUC0.IsVolatile && MUC1.IsVolatile)
29745 return true;
29746
29747 // Be conservative about atomics for the moment
29748 // TODO: This is way overconservative for unordered atomics (see D66309)
29749 if (MUC0.IsAtomic && MUC1.IsAtomic)
29750 return true;
29751
29752 if (MUC0.MMO && MUC1.MMO) {
29753 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
29754 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
29755 return false;
29756 }
29757
29758 // If NumBytes is scalable and offset is not 0, conservatively return may
29759 // alias
29760 if ((MUC0.NumBytes.hasValue() && MUC0.NumBytes.isScalable() &&
29761 MUC0.Offset != 0) ||
29762 (MUC1.NumBytes.hasValue() && MUC1.NumBytes.isScalable() &&
29763 MUC1.Offset != 0))
29764 return true;
29765 // Try to prove that there is aliasing, or that there is no aliasing. Either
29766 // way, we can return now. If nothing can be proved, proceed with more tests.
29767 bool IsAlias;
29768 if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
29769 DAG, IsAlias))
29770 return IsAlias;
29771
29772 // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
29773 // either are not known.
29774 if (!MUC0.MMO || !MUC1.MMO)
29775 return true;
29776
29777 // If one operation reads from invariant memory, and the other may store, they
29778 // cannot alias. These should really be checking the equivalent of mayWrite,
29779 // but it only matters for memory nodes other than load /store.
29780 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
29781 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
29782 return false;
29783
29784 // If we know required SrcValue1 and SrcValue2 have relatively large
29785 // alignment compared to the size and offset of the access, we may be able
29786 // to prove they do not alias. This check is conservative for now to catch
29787 // cases created by splitting vector types, it only works when the offsets are
29788 // multiples of the size of the data.
29789 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
29790 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
29791 Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
29792 Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
29793 LocationSize Size0 = MUC0.NumBytes;
29794 LocationSize Size1 = MUC1.NumBytes;
29795
29796 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
29797 Size0.hasValue() && Size1.hasValue() && !Size0.isScalable() &&
29798 !Size1.isScalable() && Size0 == Size1 &&
29799 OrigAlignment0 > Size0.getValue().getKnownMinValue() &&
29800 SrcValOffset0 % Size0.getValue().getKnownMinValue() == 0 &&
29801 SrcValOffset1 % Size1.getValue().getKnownMinValue() == 0) {
29802 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
29803 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
29804
29805 // There is no overlap between these relatively aligned accesses of
29806 // similar size. Return no alias.
29807 if ((OffAlign0 + static_cast<int64_t>(
29808 Size0.getValue().getKnownMinValue())) <= OffAlign1 ||
29809 (OffAlign1 + static_cast<int64_t>(
29810 Size1.getValue().getKnownMinValue())) <= OffAlign0)
29811 return false;
29812 }
29813
29816 : DAG.getSubtarget().useAA();
29817#ifndef NDEBUG
29818 if (CombinerAAOnlyFunc.getNumOccurrences() &&
29820 UseAA = false;
29821#endif
29822
29823 if (UseAA && BatchAA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
29824 Size0.hasValue() && Size1.hasValue() &&
29825 // Can't represent a scalable size + fixed offset in LocationSize
29826 (!Size0.isScalable() || SrcValOffset0 == 0) &&
29827 (!Size1.isScalable() || SrcValOffset1 == 0)) {
29828 // Use alias analysis information.
29829 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
29830 int64_t Overlap0 =
29831 Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
29832 int64_t Overlap1 =
29833 Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
29834 LocationSize Loc0 =
29835 Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0);
29836 LocationSize Loc1 =
29837 Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1);
29838 if (BatchAA->isNoAlias(
29839 MemoryLocation(MUC0.MMO->getValue(), Loc0,
29840 UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
29841 MemoryLocation(MUC1.MMO->getValue(), Loc1,
29842 UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
29843 return false;
29844 }
29845
29846 // Otherwise we have to assume they alias.
29847 return true;
29848}
29849
29850/// Walk up chain skipping non-aliasing memory nodes,
29851/// looking for aliasing nodes and adding them to the Aliases vector.
29852void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
29853 SmallVectorImpl<SDValue> &Aliases) {
29854 SmallVector<SDValue, 8> Chains; // List of chains to visit.
29855 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
29856
29857 // Get alias information for node.
29858 // TODO: relax aliasing for unordered atomics (see D66309)
29859 const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
29860
29861 // Starting off.
29862 Chains.push_back(OriginalChain);
29863 unsigned Depth = 0;
29864
29865 // Attempt to improve chain by a single step
29866 auto ImproveChain = [&](SDValue &C) -> bool {
29867 switch (C.getOpcode()) {
29868 case ISD::EntryToken:
29869 // No need to mark EntryToken.
29870 C = SDValue();
29871 return true;
29872 case ISD::LOAD:
29873 case ISD::STORE: {
29874 // Get alias information for C.
29875 // TODO: Relax aliasing for unordered atomics (see D66309)
29876 bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
29877 cast<LSBaseSDNode>(C.getNode())->isSimple();
29878 if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
29879 // Look further up the chain.
29880 C = C.getOperand(0);
29881 return true;
29882 }
29883 // Alias, so stop here.
29884 return false;
29885 }
29886
29887 case ISD::CopyFromReg:
29888 // Always forward past CopyFromReg.
29889 C = C.getOperand(0);
29890 return true;
29891
29892 case ISD::LIFETIME_START:
29893 case ISD::LIFETIME_END: {
29894 // We can forward past any lifetime start/end that can be proven not to
29895 // alias the memory access.
29896 if (!mayAlias(N, C.getNode())) {
29897 // Look further up the chain.
29898 C = C.getOperand(0);
29899 return true;
29900 }
29901 return false;
29902 }
29903 default:
29904 return false;
29905 }
29906 };
29907
29908 // Look at each chain and determine if it is an alias. If so, add it to the
29909 // aliases list. If not, then continue up the chain looking for the next
29910 // candidate.
29911 while (!Chains.empty()) {
29912 SDValue Chain = Chains.pop_back_val();
29913
29914 // Don't bother if we've seen Chain before.
29915 if (!Visited.insert(Chain.getNode()).second)
29916 continue;
29917
29918 // For TokenFactor nodes, look at each operand and only continue up the
29919 // chain until we reach the depth limit.
29920 //
29921 // FIXME: The depth check could be made to return the last non-aliasing
29922 // chain we found before we hit a tokenfactor rather than the original
29923 // chain.
29924 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
29925 Aliases.clear();
29926 Aliases.push_back(OriginalChain);
29927 return;
29928 }
29929
29930 if (Chain.getOpcode() == ISD::TokenFactor) {
29931 // We have to check each of the operands of the token factor for "small"
29932 // token factors, so we queue them up. Adding the operands to the queue
29933 // (stack) in reverse order maintains the original order and increases the
29934 // likelihood that getNode will find a matching token factor (CSE.)
29935 if (Chain.getNumOperands() > 16) {
29936 Aliases.push_back(Chain);
29937 continue;
29938 }
29939 for (unsigned n = Chain.getNumOperands(); n;)
29940 Chains.push_back(Chain.getOperand(--n));
29941 ++Depth;
29942 continue;
29943 }
29944 // Everything else
29945 if (ImproveChain(Chain)) {
29946 // Updated Chain Found, Consider new chain if one exists.
29947 if (Chain.getNode())
29948 Chains.push_back(Chain);
29949 ++Depth;
29950 continue;
29951 }
29952 // No Improved Chain Possible, treat as Alias.
29953 Aliases.push_back(Chain);
29954 }
29955}
29956
29957/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
29958/// (aliasing node.)
29959SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
29960 if (OptLevel == CodeGenOptLevel::None)
29961 return OldChain;
29962
29963 // Ops for replacing token factor.
29965
29966 // Accumulate all the aliases to this node.
29967 GatherAllAliases(N, OldChain, Aliases);
29968
29969 // If no operands then chain to entry token.
29970 if (Aliases.empty())
29971 return DAG.getEntryNode();
29972
29973 // If a single operand then chain to it. We don't need to revisit it.
29974 if (Aliases.size() == 1)
29975 return Aliases[0];
29976
29977 // Construct a custom tailored token factor.
29978 return DAG.getTokenFactor(SDLoc(N), Aliases);
29979}
29980
29981// This function tries to collect a bunch of potentially interesting
29982// nodes to improve the chains of, all at once. This might seem
29983// redundant, as this function gets called when visiting every store
29984// node, so why not let the work be done on each store as it's visited?
29985//
29986// I believe this is mainly important because mergeConsecutiveStores
29987// is unable to deal with merging stores of different sizes, so unless
29988// we improve the chains of all the potential candidates up-front
29989// before running mergeConsecutiveStores, it might only see some of
29990// the nodes that will eventually be candidates, and then not be able
29991// to go from a partially-merged state to the desired final
29992// fully-merged state.
29993
29994bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
29995 SmallVector<StoreSDNode *, 8> ChainedStores;
29996 StoreSDNode *STChain = St;
29997 // Intervals records which offsets from BaseIndex have been covered. In
29998 // the common case, every store writes to the immediately previous address
29999 // space and thus merged with the previous interval at insertion time.
30000
30001 using IMap = llvm::IntervalMap<int64_t, std::monostate, 8,
30002 IntervalMapHalfOpenInfo<int64_t>>;
30003 IMap::Allocator A;
30004 IMap Intervals(A);
30005
30006 // This holds the base pointer, index, and the offset in bytes from the base
30007 // pointer.
30008 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
30009
30010 // We must have a base and an offset.
30011 if (!BasePtr.getBase().getNode())
30012 return false;
30013
30014 // Do not handle stores to undef base pointers.
30015 if (BasePtr.getBase().isUndef())
30016 return false;
30017
30018 // Do not handle stores to opaque types
30019 if (St->getMemoryVT().isZeroSized())
30020 return false;
30021
30022 // BaseIndexOffset assumes that offsets are fixed-size, which
30023 // is not valid for scalable vectors where the offsets are
30024 // scaled by `vscale`, so bail out early.
30025 if (St->getMemoryVT().isScalableVT())
30026 return false;
30027
30028 // Add ST's interval.
30029 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8,
30030 std::monostate{});
30031
30032 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
30033 if (Chain->getMemoryVT().isScalableVector())
30034 return false;
30035
30036 // If the chain has more than one use, then we can't reorder the mem ops.
30037 if (!SDValue(Chain, 0)->hasOneUse())
30038 break;
30039 // TODO: Relax for unordered atomics (see D66309)
30040 if (!Chain->isSimple() || Chain->isIndexed())
30041 break;
30042
30043 // Find the base pointer and offset for this memory node.
30044 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
30045 // Check that the base pointer is the same as the original one.
30046 int64_t Offset;
30047 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
30048 break;
30049 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
30050 // Make sure we don't overlap with other intervals by checking the ones to
30051 // the left or right before inserting.
30052 auto I = Intervals.find(Offset);
30053 // If there's a next interval, we should end before it.
30054 if (I != Intervals.end() && I.start() < (Offset + Length))
30055 break;
30056 // If there's a previous interval, we should start after it.
30057 if (I != Intervals.begin() && (--I).stop() <= Offset)
30058 break;
30059 Intervals.insert(Offset, Offset + Length, std::monostate{});
30060
30061 ChainedStores.push_back(Chain);
30062 STChain = Chain;
30063 }
30064
30065 // If we didn't find a chained store, exit.
30066 if (ChainedStores.empty())
30067 return false;
30068
30069 // Improve all chained stores (St and ChainedStores members) starting from
30070 // where the store chain ended and return single TokenFactor.
30071 SDValue NewChain = STChain->getChain();
30073 for (unsigned I = ChainedStores.size(); I;) {
30074 StoreSDNode *S = ChainedStores[--I];
30075 SDValue BetterChain = FindBetterChain(S, NewChain);
30077 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
30078 TFOps.push_back(SDValue(S, 0));
30079 ChainedStores[I] = S;
30080 }
30081
30082 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
30083 SDValue BetterChain = FindBetterChain(St, NewChain);
30084 SDValue NewST;
30085 if (St->isTruncatingStore())
30086 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
30087 St->getBasePtr(), St->getMemoryVT(),
30088 St->getMemOperand());
30089 else
30090 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
30091 St->getBasePtr(), St->getMemOperand());
30092
30093 TFOps.push_back(NewST);
30094
30095 // If we improved every element of TFOps, then we've lost the dependence on
30096 // NewChain to successors of St and we need to add it back to TFOps. Do so at
30097 // the beginning to keep relative order consistent with FindBetterChains.
30098 auto hasImprovedChain = [&](SDValue ST) -> bool {
30099 return ST->getOperand(0) != NewChain;
30100 };
30101 bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
30102 if (AddNewChain)
30103 TFOps.insert(TFOps.begin(), NewChain);
30104
30105 SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
30106 CombineTo(St, TF);
30107
30108 // Add TF and its operands to the worklist.
30109 AddToWorklist(TF.getNode());
30110 for (const SDValue &Op : TF->ops())
30111 AddToWorklist(Op.getNode());
30112 AddToWorklist(STChain);
30113 return true;
30114}
30115
30116bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
30117 if (OptLevel == CodeGenOptLevel::None)
30118 return false;
30119
30120 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
30121
30122 // We must have a base and an offset.
30123 if (!BasePtr.getBase().getNode())
30124 return false;
30125
30126 // Do not handle stores to undef base pointers.
30127 if (BasePtr.getBase().isUndef())
30128 return false;
30129
30130 // Directly improve a chain of disjoint stores starting at St.
30131 if (parallelizeChainedStores(St))
30132 return true;
30133
30134 // Improve St's Chain..
30135 SDValue BetterChain = FindBetterChain(St, St->getChain());
30136 if (St->getChain() != BetterChain) {
30137 replaceStoreChain(St, BetterChain);
30138 return true;
30139 }
30140 return false;
30141}
30142
30143/// This is the entry point for the file.
30145 CodeGenOptLevel OptLevel) {
30146 /// This is the main entry point to this class.
30147 DAGCombiner(*this, BatchAA, OptLevel).Run(Level);
30148}
return SDValue()
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
constexpr LLT S1
AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques=false)
static cl::opt< bool > EnableShrinkLoadReplaceStoreWithStore("combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable load/<replace bytes>/store with " "a narrower store"))
static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, unsigned ExtOpc, SmallVectorImpl< SDNode * > &ExtendNodes, const TargetLowering &TLI)
static cl::opt< unsigned > TokenFactorInlineLimit("combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), cl::desc("Limit the number of operands to inline for Token Factors"))
static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc, bool NonNegZExt=false)
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG)
static SDNode * getBuildPairElt(SDNode *N, unsigned i)
static SDValue foldExtractSubvectorFromShuffleVector(EVT NarrowVT, SDValue Src, unsigned Index, const SDLoc &DL, SelectionDAG &DAG, bool LegalOperations)
Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)), try to produce VECTOR_SHUFFLE(EXTRACT_SUBVEC...
static SDValue foldToMaskedStore(StoreSDNode *Store, SelectionDAG &DAG, const SDLoc &Dl)
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtendOfConstant(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalTypes)
Try to fold a sext/zext/aext dag node into a ConstantSDNode or a build_vector of constants.
static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, SDValue ExtractFrom, SDValue &Mask, const SDLoc &DL)
Helper function for visitOR to extract the needed side of a rotate idiom from a shl/srl/mul/udiv.
static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec, bool &IsLoad, bool &IsMasked, SDValue &Ptr, const TargetLowering &TLI)
bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled, SelectionDAG &DAG, const SDLoc &DL)
static SDValue narrowExtractedVectorLoad(EVT VT, SDValue Src, unsigned Index, const SDLoc &DL, SelectionDAG &DAG)
If we are extracting a subvector from a wide vector load, convert to a narrow load to eliminate the e...
static SDValue scalarizeExtractedBinOp(SDNode *ExtElt, SelectionDAG &DAG, const SDLoc &DL, bool LegalTypes)
Transform a vector binary operation into a scalar binary operation by moving the math/logic after an ...
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI)
Return true if divmod libcall is available.
static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG)
static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, SelectionDAG &DAG, const SDLoc &DL)
Given the operands of an add/sub operation, see if the 2nd operand is a masked 0/1 whose source opera...
static bool mergeEltWithShuffle(SDValue &X, SDValue &Y, ArrayRef< int > Mask, SmallVectorImpl< int > &NewMask, SDValue Elt, unsigned InsIndex)
static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf)
If we have a unary shuffle of a shuffle, see if it can be folded away completely.
static bool canSplitIdx(LoadSDNode *LD)
static SDValue ShrinkLoadReplaceStoreWithStore(const std::pair< unsigned, unsigned > &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC)
Check to see if IVal is something that provides a value as specified by MaskInfo.
static cl::opt< bool > StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, cl::desc("Bypass the profitability model of load slicing"), cl::init(false))
Hidden option to stress test load slicing, i.e., when this option is enabled, load slicing bypasses m...
static cl::opt< bool > UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), cl::desc("Enable DAG combiner's use of TBAA"))
static void adjustCostForPairing(SmallVectorImpl< LoadedSlice > &LoadedSlices, LoadedSlice::Cost &GlobalLSCost)
Adjust the GlobalLSCost according to the target paring capabilities and the layout of the slices.
static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI, SDValue N0, SDValue N1, SDNode *N)
static cl::opt< bool > DisableCombines("combiner-disabled", cl::Hidden, cl::init(false), cl::desc("Disable the DAG combiner"))
static SDValue foldExtendVectorInregToExtendOfSubvector(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalOperations)
static SDValue narrowExtractedVectorBinOp(EVT VT, SDValue Src, unsigned Index, const SDLoc &DL, SelectionDAG &DAG, bool LegalOperations)
If we are extracting a subvector produced by a wide binary operator try to use a narrow binary operat...
static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode)
Check if N satisfies: N is used once.
static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG, const SDLoc &DL)
Given an extending node with a pop-count operand, if the target does not support a pop-count in the n...
static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand, SDValue RightHand, SelectionDAG &DAG)
Given a tree of logic operations with shape like (LOGIC (LOGIC (X, Y), LOGIC (Z, Y))) try to match an...
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG)
static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned Depth, bool AssumeNonZero)
static SDValue combineSelectAsExtAnd(SDValue Cond, SDValue T, SDValue F, const SDLoc &DL, SelectionDAG &DAG)
static bool areUsedBitsDense(const APInt &UsedBits)
Check that all bits set in UsedBits form a dense region, i.e., UsedBits looks like 0....
static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG, const TargetLowering &TLI, const SDLoc &DL)
Fold "masked merge" expressions like (m & x) | (~m & y) and its DeMorgan variant (~m | x) & (m | y) i...
static SDValue getInputChainForNode(SDNode *N)
Given a node, return its input chain if it has one, otherwise return a null sd operand.
static ElementCount numVectorEltsOrZero(EVT T)
static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG, bool ShouldCommuteOperands)
This inverts a canonicalization in IR that replaces a variable select arm with an identity constant.
static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG)
static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
For targets that support usubsat, match a bit-hack form of that operation that ends in 'and' and conv...
static cl::opt< bool > CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Enable DAG combiner's use of IR alias analysis"))
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy)
static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf, SelectionDAG &DAG)
Combine shuffle of shuffle of the form: shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X...
static bool isDivisorPowerOfTwo(SDValue Divisor)
static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift, SDValue &Mask)
Match "(X shl/srl V1) & V2" where V2 may not be present.
static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG)
static bool hasNoInfs(const TargetOptions &Options, SDValue N)
static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS, const SDNodeFlags Flags, const TargetLowering &TLI)
static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static std::optional< EVT > canCombineShuffleToExtendVectorInreg(unsigned Opcode, EVT VT, std::function< bool(unsigned)> Match, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue combineShuffleToAnyExtendVectorInreg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static SDValue foldAddSubOfSignBit(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold a 'not' shifted sign-bit with add/sub with constant operand into a shift and add with a d...
static SDValue stripTruncAndExt(SDValue Value)
static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, SDValue X, SDValue Carry0, SDValue Carry1, SDNode *N)
If we are facing some sort of diamond carry propagation pattern try to break it up to generate someth...
static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles followed by concatenatio...
static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
static auto getFirstIndexOf(R &&Range, const T &Val)
static SDValue getSubVectorSrc(SDValue V, unsigned Index, EVT SubVT)
static std::pair< unsigned, unsigned > CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain)
Check to see if V is (and load (ptr), imm), where the load is having specific bytes cleared out.
static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef< int > Mask)
If the shuffle mask is taking exactly one element from the first vector operand and passing through a...
static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT, const TargetLowering &TLI)
static cl::opt< bool > EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable merging multiple stores " "into a wider store"))
static bool isContractableFMUL(const TargetOptions &Options, SDValue N)
static cl::opt< bool > MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), cl::desc("DAG combiner may split indexing from loads"))
static bool areSlicesNextToEachOther(const LoadedSlice &First, const LoadedSlice &Second)
Check whether or not First and Second are next to each other in memory.
static SDValue stripConstantMask(const SelectionDAG &DAG, SDValue Op, SDValue &Mask)
static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static bool isBSwapHWordPair(SDValue N, MutableArrayRef< SDNode * > Parts)
static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI)
static bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy)
copysign(x, fp_extend(y)) -> copysign(x, y) copysign(x, fp_round(y)) -> copysign(x,...
static cl::opt< bool > ReduceLoadOpStoreWidthForceNarrowingProfitable("combiner-reduce-load-op-store-width-force-narrowing-profitable", cl::Hidden, cl::init(false), cl::desc("DAG combiner force override the narrowing profitable check when " "reducing the width of load/op/store sequences"))
static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2, ISD::CondCode CC, unsigned OrAndOpcode, SelectionDAG &DAG, bool isFMAXNUMFMINNUM_IEEE, bool isFMAXNUMFMINNUM)
static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
static SDValue foldToSaturated(SDNode *N, EVT &VT, SDValue &Src, EVT &SrcVT, SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG)
static SDValue FoldIntToFPToInt(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue foldSubCtlzNot(SDNode *N, SelectionDAG &DAG)
static SDNode * getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad, bool &IsMasked, SDValue &Ptr, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, const TargetLowering &TLI, bool Force)
Flips a boolean if it is cheaper to compute.
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, KnownBits &Known)
static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc)
static SDValue combineConcatVectorOfShuffleAndItsOperands(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT, SelectionDAG &DAG)
static SDValue foldRemainderIdiom(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, SelectionDAG &DAG)
static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG)
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG)
static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset=0)
static SDValue combineShiftToMULH(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI)
static ConstantSDNode * getAsNonOpaqueConstant(SDValue N)
If N is a ConstantSDNode with isOpaque() == false return it casted to a ConstantSDNode pointer else n...
static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static SDValue detectUSatUPattern(SDValue In, EVT VT)
Detect patterns of truncation with unsigned saturation:
static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue combineConcatVectorOfSplats(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N)
OR combines for which the commuted variant will be tried as well.
static SDValue detectSSatUPattern(SDValue In, EVT VT, SelectionDAG &DAG, const SDLoc &DL)
Detect patterns of truncation with unsigned saturation:
static SDValue combineShuffleToZeroExtendVectorInReg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static cl::opt< bool > EnableReduceLoadOpStoreWidth("combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable reducing the width of load/op/store " "sequence"))
static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal, SDValue FVal, const TargetLowering &TLI, SelectionDAG &DAG, const SDLoc &DL)
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, SelectionDAG &DAG, bool IsRotate, bool FromAdd)
static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG)
static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG)
Try to replace shift/logic that tests if a bit is clear with mask + setcc.
static bool areBitwiseNotOfEachother(SDValue Op0, SDValue Op1)
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG)
static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, bool LegalTypes)
If a vector binop is performed on splat values, it may be profitable to extract, scalarize,...
static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG)
static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue combineConcatVectorOfConcatVectors(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtOfAtomicLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI, SelectionDAG &DAG, SDNode *N, SDValue N0, SDValue N1, EVT VT)
static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG, const SDLoc &DL, CombineLevel Level)
Fold (sext (select c, load x, load y)) -> (select c, sextload x, sextload y) (zext (select c,...
static SDValue getAsCarry(const TargetLowering &TLI, SDValue V, bool ForceCarryReconstruction=false)
static SDValue matchMergedBFX(SDValue Root, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue foldSelectOfConstantsUsingSra(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
If a (v)select has a condition value that is a sign-bit test, try to smear the condition operand sign...
static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG)
static SDValue detectSSatSPattern(SDValue In, EVT VT)
Detect patterns of truncation with signed saturation: (truncate (smin (smax (x, signed_min_of_dest_ty...
static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG)
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations)
static cl::opt< unsigned > StoreMergeDependenceLimit("combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10), cl::desc("Limit the number of times for the same StoreNode and RootNode " "to bail out in store merging dependence check"))
static SDValue eliminateFPCastPair(SDNode *N)
static cl::opt< std::string > CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, cl::desc("Only use DAG-combiner alias analysis in this" " function"))
static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp, SelectionDAG &DAG)
Given a bitwise logic operation N with a matching bitwise logic operand, fold a pattern where 2 of th...
ByteProvider< SDNode * > SDByteProvider
Recursively traverses the expression calculating the origin of the requested byte of the given value.
static bool isSlicingProfitable(SmallVectorImpl< LoadedSlice > &LoadedSlices, const APInt &UsedBits, bool ForCodeSize)
Check the profitability of all involved LoadedSlice.
static SDValue narrowInsertExtractVectorBinOp(EVT SubVT, SDValue BinOp, unsigned Index, const SDLoc &DL, SelectionDAG &DAG, bool LegalOperations)
static bool isBSwapHWordElement(SDValue N, MutableArrayRef< SDNode * > Parts)
Return true if the specified node is an element that makes up a 32-bit packed halfword byteswap.
static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, unsigned &BW, bool &Unsigned, SelectionDAG &DAG)
static SDValue foldBoolSelectToLogic(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static std::optional< SDByteProvider > calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, std::optional< uint64_t > VectorIndex, unsigned StartingIndex=0)
dxil translate DXIL Translate Metadata
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
This file defines the DenseMap class.
static bool isSigned(unsigned int Opcode)
static MaybeAlign getAlign(Value *Ptr)
iv Induction Variable Users
Definition IVUsers.cpp:48
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
static Value * simplifyDivRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse)
Check for common or similar folds of integer division or integer remainder.
This file implements a coalescing interval map for small objects.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
static bool isUndef(const MachineInstr &MI)
Register const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
This file contains the declarations for metadata subclasses.
#define T
#define T1
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
if(PassOpts->AAPipeline)
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
static bool isSimple(Instruction *I)
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
static cl::opt< unsigned > MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), cl::desc("DAG combiner limit number of steps when searching DAG " "for predecessor nodes"))
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static unsigned getScalarSizeInBits(Type *Ty)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
static constexpr int Concat[]
Value * RHS
Value * LHS
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
Definition APFloat.h:1120
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1208
bool isNegative() const
Definition APFloat.h:1449
bool isNormal() const
Definition APFloat.h:1453
bool isDenormal() const
Definition APFloat.h:1450
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1432
const fltSemantics & getSemantics() const
Definition APFloat.h:1457
bool isNaN() const
Definition APFloat.h:1447
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition APFloat.h:1088
APInt bitcastToAPInt() const
Definition APFloat.h:1353
bool isLargest() const
Definition APFloat.h:1465
bool isInfinity() const
Definition APFloat.h:1446
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt umul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1971
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1758
LLVM_ABI APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition APInt.cpp:644
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:449
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1670
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1385
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1512
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:206
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
APInt abs() const
Get the absolute value.
Definition APInt.h:1795
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1182
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:258
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
Definition APInt.h:466
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1111
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:209
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:329
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1249
int32_t exactLogBase2() const
Definition APInt.h:1783
LLVM_ABI APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1935
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1598
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:219
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1531
unsigned countLeadingZeros() const
Definition APInt.h:1606
void flipAllBits()
Toggle every bit to its opposite value.
Definition APInt.h:1452
unsigned logBase2() const
Definition APInt.h:1761
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:510
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:475
bool getBoolValue() const
Convert APInt to a boolean value.
Definition APInt.h:471
LLVM_ABI APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1960
bool isMask(unsigned numBits) const
Definition APInt.h:488
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition APInt.h:1150
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition APInt.h:1367
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:306
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:296
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:389
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:286
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:239
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:851
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1656
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1221
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition ArrayRef.h:200
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
static LLVM_ABI BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
static LLVM_ABI bool computeAliasing(const SDNode *Op0, const LocationSize NumBytes0, const SDNode *Op1, const LocationSize NumBytes1, const SelectionDAG &DAG, bool &IsAlias)
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
LLVM_ABI bool isConstant() const
Represents known origin of an individual byte in combine pattern.
static ByteProvider getConstantZero()
static ByteProvider getSrc(std::optional< SDNode * > Val, int64_t ByteOffset, int64_t VectorOffset)
Combiner implementation.
Definition Combiner.h:34
ISD::CondCode get() const
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:535
const APFloat & getValueAPF() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
bool isNegative() const
Return true if the value is negative.
bool isZero() const
Return true if the value is positive or negative zero.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI bool isFullSet() const
Return true if this set contains all of the elements possible for this data-type.
LLVM_ABI ConstantRange truncate(uint32_t BitWidth, unsigned NoWrapKind=0) const
Return a new range in the specified integer type, which must be strictly smaller than the current typ...
const APInt & getUpper() const
Return the upper value for this range.
uint32_t getBitWidth() const
Get the bit width of this ConstantRange.
const ConstantInt * getConstantIntValue() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition Constant.h:43
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:198
bool isBigEndian() const
Definition DataLayout.h:199
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
static bool shouldExecute(unsigned CounterName)
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:165
iterator end()
Definition DenseMap.h:81
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
const_iterator find(KeyT x) const
find - Return an iterator pointing to the first interval ending at or after x, or end().
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
static LocationSize precise(uint64_t Value)
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
bool isScalable() const
TypeSize getValue() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1565
Machine Value Type.
SimpleValueType SimpleTy
static auto all_valuetypes()
SimpleValueType Iteration.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
A description of a memory reference used in the backend.
const PseudoSourceValue * getPseudoValue() const
void clearRanges()
Unset the tracked range metadata.
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MONonTemporal
The memory access is non-temporal.
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
const SDValue & getPassThru() const
ISD::LoadExtType getExtensionType() const
const SDValue & getBasePtr() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getInc() const
const SDValue & getScale() const
const SDValue & getMask() const
const SDValue & getIntID() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
ISD::MemIndexType getIndexType() const
This class is used to represent an MLOAD node.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
const SDValue & getMask() const
const SDValue & getPassThru() const
const SDValue & getOffset() const
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This class is used to represent an MSTORE node.
bool isCompressingStore() const
Returns true if the op does a compression to the vector before storing.
const SDValue & getOffset() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getBaseAlign() const
Returns alignment and volatility of the memory access.
const MDNode * getRanges() const
Returns the Ranges that describes the dereference.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
bool isNonTemporal() const
bool isInvariant() const
bool isDereferenceable() const
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
MutableArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition ArrayRef.h:424
iterator end() const
Definition ArrayRef.h:348
iterator begin() const
Definition ArrayRef.h:347
MutableArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition ArrayRef.h:417
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
LLVM_ABI void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
TypeSize getValueSizeInBits(unsigned ResNo) const
Returns MVT::getSizeInBits(getValueType(ResNo)).
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
LLVM_ABI bool isOperandOf(const SDNode *N) const
Return true if this node is an operand of N.
const APInt & getConstantOperandAPInt(unsigned Num) const
Helper method returns the APInt of a ConstantSDNode operand.
std::optional< APInt > bitcastToAPInt() const
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
LLVM_ABI bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
LLVM_ABI bool reachesChainWithoutSideEffects(SDValue Dest, unsigned Depth=2) const
Return true if this operand (which must be a chain) reaches the specified operand without crossing an...
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isAnyAdd() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
LLVM_ABI SDValue getSplatSourceVector(SDValue V, int &SplatIndex)
If V is a splatted value, return the source vector and its splat index.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
LLVM_ABI SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
bool isKnownNeverSNaN(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
LLVM_ABI std::optional< bool > isBoolConstant(SDValue N) const
Check if a value \op N is a constant using the target's BooleanContent for its type.
const TargetSubtargetInfo & getSubtarget() const
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
LLVM_ABI SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getAtomicLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, MachineMemOperand *MMO)
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
LLVM_ABI void Combine(CombineLevel Level, BatchAAResults *BatchAA, CodeGenOptLevel OptLevel)
This iterates over the nodes in the SelectionDAG, folding certain types of nodes together,...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
static LLVM_ABI unsigned getHasPredecessorMaxSteps()
LLVM_ABI bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI bool cannotBeOrderedNegativeFP(SDValue Op) const
Test whether the given float value is known to be positive.
LLVM_ABI SDValue getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
LLVM_ABI SDValue getAssertAlign(const SDLoc &DL, SDValue V, Align A)
Return an AssertAlignSDNode.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
LLVM_ABI SDValue getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
LLVM_ABI APInt computeVectorKnownZeroElements(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
For each demanded element of a vector, see if it is known to be zero.
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
LLVM_ABI SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI void DeleteNode(SDNode *N)
Remove the specified node from the system.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI std::optional< unsigned > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
LLVM_ABI SDValue simplifySelect(SDValue Cond, SDValue TVal, SDValue FVal)
Try to simplify a select/vselect into 1 of its operands or a constant.
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
LLVM_ABI bool isConstantFPBuildVectorOrConstantFP(SDValue N) const
Test whether the given value is a constant FP or similar node.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
LLVM_ABI bool LegalizeOp(SDNode *N, SmallSetVector< SDNode *, 16 > &UpdatedNodes)
Transforms a SelectionDAG node and any operands to it into a node that is compatible with the target ...
LLVM_ABI bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
LLVM_ABI bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
LLVM_ABI SDValue getMaskedHistogram(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
LLVM_ABI SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
bool willNotOverflowMul(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the mul of 2 nodes can never overflow.
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
LLVM_ABI MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
bool isConstantValueOfAnyType(SDValue N) const
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
LLVM_ABI bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
LLVM_ABI SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getSetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
iterator_range< allnodes_iterator > allnodes()
LLVM_ABI SDValue getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, const MDNode *Ranges=nullptr, bool IsExpanding=false)
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
LLVM_ABI SDValue FoldConstantBuildVector(BuildVectorSDNode *BV, const SDLoc &DL, EVT DstEltVT)
Fold BUILD_VECTOR of constants/undefs to the destination type BUILD_VECTOR of constants/undefs elemen...
LLVM_ABI SDValue getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
const TargetLibraryInfo & getLibInfo() const
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI bool MaskedVectorIsZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Return true if 'Op' is known to be zero in DemandedElts.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
LLVM_ABI bool canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, bool PoisonOnly=false, bool ConsiderFlags=true, unsigned Depth=0) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
LLVM_ABI OverflowKind computeOverflowForUnsignedAdd(SDValue N0, SDValue N1) const
Determine if the result of the unsigned addition of 2 nodes can overflow.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
bool isSafeToSpeculativelyExecuteNode(const SDNode *N) const
Check if the provided node is save to speculatively executed given its current arguments.
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVM_ABI bool isKnownToBeAPowerOfTwoFP(SDValue Val, unsigned Depth=0) const
Test if the given fp value is known to be an integer power-of-2, either positive or negative.
LLVMContext * getContext() const
LLVM_ABI SDValue simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y, SDNodeFlags Flags)
Try to simplify a floating-point binary operation into 1 of its operands or a constant.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
LLVM_ABI bool isUndef(unsigned Opcode, ArrayRef< SDValue > Ops)
Return true if the result of this operation is always undefined.
LLVM_ABI SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
LLVM_ABI SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags)
Get the specified node if it's already available, or else return NULL.
LLVM_ABI SDValue getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
DenormalMode getDenormalMode(EVT VT) const
Return the current function's default denormal handling kind for the given floating point type.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
static unsigned getOpcode_EXTEND(unsigned Opcode)
Convert *_EXTEND_VECTOR_INREG to *_EXTEND opcode.
LLVM_ABI bool isADDLike(SDValue Op, bool NoWrap=false) const
Return true if the specified operand is an ISD::OR or ISD::XOR node that can be treated as an ISD::AD...
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
LLVM_ABI SDValue simplifyShift(SDValue X, SDValue Y)
Try to simplify a shift into 1 of its operands or a constant.
LLVM_ABI void transferDbgValues(SDValue From, SDValue To, unsigned OffsetInBits=0, unsigned SizeInBits=0, bool InvalidateDbg=true)
Transfer debug values from one node to another, while optionally generating fragment expressions for ...
LLVM_ABI SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
LLVM_ABI SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:99
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:168
value_type pop_back_val()
Definition SetVector.h:296
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
void push_back(bool Val)
void reserve(unsigned N)
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:356
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
bool empty() const
Definition SmallSet.h:168
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:181
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
iterator insert(iterator I, T &&Elt)
void resize(size_type N)
void push_back(const T &Elt)
pointer data()
Return a pointer to the vector's buffer, even if empty().
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
bool has(LibFunc F) const
Tests whether a library function is available.
virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT) const
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, EVT ExtVT) const
virtual bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
virtual bool hasAndNot(SDValue X) const
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool enableAggressiveFMAFusion(EVT VT) const
Return true if target always benefits from combining into FMA for a given value type.
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
SDValue promoteTargetBoolean(SelectionDAG &DAG, SDValue Bool, EVT ValVT) const
Promote the given target boolean to a target boolean of the given type.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, std::optional< unsigned > ByteOffset=std::nullopt) const
Return true if it is profitable to reduce a load to a smaller type.
virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT, bool LegalOnly) const
virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const
Use bitwise logic to make pairs of compares more efficient.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a square root of the given type based on the function's at...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, LLT DestTy, LLT SrcTy) const
Return true if an fpext operation input to an Opcode operation is free (for instance,...
virtual bool hasBitTest(SDValue X, SDValue Y) const
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: fold (conv (load x)) -> (load (conv*)x) On arch...
virtual bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
virtual bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst, SDValue IntPow2) const
virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, EVT VT) const
Returns true if we should normalize select(N0&N1, X, Y) => select(N0, select(N1, X,...
virtual bool preferScalarizeSplat(SDNode *N) const
bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const
Return true if it is profitable to convert a select of FP constants into a constant pool load whose a...
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
virtual bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
bool isPartialReduceMLALegalOrCustom(unsigned Opc, EVT AccVT, EVT InputVT) const
Return true if a PARTIAL_REDUCE_U/SMLA node with the specified types is legal or custom for this targ...
virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const
Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
int getDivRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a division of the given type based on the function's attributes.
virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to fold a pair of shifts into a mask.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a division of the given type based on the function's attri...
virtual bool preferIncOfAddToSubOfNot(EVT VT) const
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const
virtual bool isFNegFree(EVT VT) const
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
virtual bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X, SDValue Y) const
Return true if pulling a binary operation into a select with an identity constant is profitable.
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool mergeStoresAfterLegalization(EVT MemVT) const
Allow store merging for the specified type after legalization in addition to before legalization.
virtual bool shouldMergeStoreOfLoadsOverCall(EVT, EVT) const
Returns true if it's profitable to allow merging store of loads when there are functions calls betwee...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getGatherAllAliasesMaxDepth() const
virtual bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, unsigned AddrSpace) const
Return true if it is expected to be cheaper to do a store of vector constant with the given size and ...
virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal or custom on this target.
bool isAtomicLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified atomic load with extension is legal on this target.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const
There are two ways to clear extreme bits (either low or high): Mask: x & (-1 << y) (the instcombine c...
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const
Returns if it's reasonable to merge stores to MemVT size.
virtual bool preferABDSToABSWithNSW(EVT VT) const
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
AndOrSETCCFoldKind
Enum of different potentially desirable ways to fold (and/or (setcc ...), (setcc ....
virtual bool shouldScalarizeBinop(SDValue VecOp) const
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: (store (y (conv x)), y*)) -> (store x,...
bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool isVectorClearMaskLegal(ArrayRef< int >, EVT) const
Similar to isShuffleMaskLegal.
bool hasTargetDAGCombine(ISD::NodeType NT) const
If true, the target has custom DAG combine transformations that it can perform for the specified node...
virtual bool shouldSplatInsEltVarIndex(EVT) const
Return true if inserting a scalar into a variable element of an undef vector is more efficiently hand...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a square root of the given type based on the function's attribut...
virtual unsigned preferedOpcodeForCmpEqPiecesOfOperand(EVT VT, unsigned ShiftOpc, bool MayTransformRotate, const APInt &ShiftOrRotateAmt, const std::optional< APInt > &AndMask) const
virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const
Returns true if MI can be combined with another instruction to form TargetOpcode::G_FMAD.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const
virtual bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
virtual bool generateFMAsInMachineCombiner(EVT VT, CodeGenOptLevel OptLevel) const
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool hasPairedLoad(EVT, Align &) const
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
virtual bool convertSelectOfConstantsToMath(EVT VT) const
Return true if a select of constants (select Cond, C1, C2) should be transformed into simple math ops...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const
Hooks for building estimates in place of slower divisions and square roots.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const
SDValue getCheaperOrNeutralNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, const NegatibleCost CostThreshold=NegatibleCost::Neutral, unsigned Depth=0) const
virtual bool isTargetCanonicalSelect(SDNode *N) const
Return true if the given select/vselect should be considered canonical and not be transformed.
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
virtual bool IsDesirableToPromoteOp(SDValue, EVT &) const
This method query the target whether it is beneficial for dag combiner to promote the specified node.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const
Return a reciprocal estimate value for the input operand.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
TargetLowering(const TargetLowering &)=delete
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool getPostIndexedAddressParts(SDNode *, SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to move this shift by a constant amount through its operand,...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual AndOrSETCCFoldKind isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const
virtual bool getPreIndexedAddressParts(SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
SDValue scalarizeExtractedVectorLoad(EVT ResultVT, const SDLoc &DL, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad, SelectionDAG &DAG) const
Replace an extraction of a load with a narrowed load.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
virtual bool isDesirableToTransformToIntegerOp(unsigned, EVT) const
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetOptions Options
unsigned NoSignedZerosFPMath
NoSignedZerosFPMath - This flag is enabled when the -enable-no-signed-zeros-fp-math is specified on t...
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:107
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:232
const SDValue & getScale() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getVectorLength() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
iterator_range< user_iterator > users()
Definition Value.h:426
int getNumOccurrences() const
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:181
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:230
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:169
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
Changed
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition APInt.h:2248
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition APInt.h:2253
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition APInt.h:2258
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition APInt.h:2263
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Entry
Definition COFF.h:862
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical AND between different comparisons of identical values: ((X op1 Y) & (X...
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:801
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:774
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:587
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:765
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition ISDOpcodes.h:387
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:289
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition ISDOpcodes.h:515
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:393
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:571
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:738
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:892
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:826
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:706
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:656
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:773
@ TRUNCATE_SSAT_U
Definition ISDOpcodes.h:855
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition ISDOpcodes.h:809
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:347
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor to...
Definition ISDOpcodes.h:622
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition ISDOpcodes.h:682
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:528
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:778
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:663
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition ISDOpcodes.h:69
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:225
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:343
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:601
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
Definition ISDOpcodes.h:48
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:563
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:832
@ TargetConstantFP
Definition ISDOpcodes.h:175
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:793
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:379
@ SMULO
Same for multiplication.
Definition ISDOpcodes.h:351
@ TargetFrameIndex
Definition ISDOpcodes.h:182
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:881
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:718
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:787
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:323
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:908
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:174
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ CARRY_FALSE
CARRY_FALSE - This node is used when folding other nodes, like ADDC/SUBC, which indicate the carry re...
Definition ISDOpcodes.h:280
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:701
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:299
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:420
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:941
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition ISDOpcodes.h:690
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:903
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:927
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:521
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
Definition ISDOpcodes.h:853
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:713
@ TRUNCATE_USAT_U
Definition ISDOpcodes.h:857
@ SADDO_CARRY
Carry-using overflow-aware nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:333
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:543
bool isIndexTypeSigned(MemIndexType IndexType)
bool isExtVecInRegOpcode(unsigned Opcode)
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool matchUnaryFpPredicate(SDValue Op, std::function< bool(ConstantFPSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantFPSDNode predicate.
bool isFPEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with floati...
bool isExtOpcode(unsigned Opcode)
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI bool isVPBinaryOp(unsigned Opcode)
Whether this is a vector-predicated binary operation opcode.
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
LLVM_ABI std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
LLVM_ABI std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
LLVM_ABI bool allOperandsUndef(const SDNode *N)
Return true if the node has at least one operand and all operands of the specified node are ISD::UNDE...
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
LLVM_ABI NodeType getInverseMinMaxOpcode(unsigned MinMaxOpc)
Given a MinMaxOpc of ISD::(U|S)MIN or ISD::(U|S)MAX, returns ISD::(U|S)MAX and ISD::(U|S)MIN,...
LLVM_ABI bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
LLVM_ABI bool isVPReduction(unsigned Opcode)
Whether this is a vector-predicated reduction opcode.
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTruncation=false)
Hook for matching ConstantSDNode predicate.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
LLVM_ABI bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
LLVM_ABI CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical OR between different comparisons of identical values: ((X op1 Y) | (X ...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
CastInst_match< OpTy, FPToUIInst > m_FPToUI(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinOpPred_match< LHS, RHS, is_bitwiselogic_op > m_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
@ Undef
Value of the register doesn't matter.
Opcode_match m_Opc(unsigned Opcode)
auto m_SelectCCLike(const LTy &L, const RTy &R, const TTy &T, const FTy &F, const CCTy &CC)
BinaryOpc_match< LHS, RHS > m_Srl(const LHS &L, const RHS &R)
auto m_SpecificVT(EVT RefVT, const Pattern &P)
Match a specific ValueType.
BinaryOpc_match< LHS, RHS > m_Sra(const LHS &L, const RHS &R)
auto m_UMinLike(const LHS &L, const RHS &R)
auto m_UMaxLike(const LHS &L, const RHS &R)
UnaryOpc_match< Opnd > m_Abs(const Opnd &Op)
Or< Preds... > m_AnyOf(const Preds &...preds)
And< Preds... > m_AllOf(const Preds &...preds)
TernaryOpc_match< T0_P, T1_P, T2_P > m_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
UnaryOpc_match< Opnd > m_AnyExt(const Opnd &Op)
auto m_SMaxLike(const LHS &L, const RHS &R)
UnaryOpc_match< Opnd > m_Ctlz(const Opnd &Op)
TernaryOpc_match< T0_P, T1_P, T2_P > m_VSelect(const T0_P &Cond, const T1_P &T, const T2_P &F)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
UnaryOpc_match< Opnd > m_UnaryOp(unsigned Opc, const Opnd &Op)
auto m_SMinLike(const LHS &L, const RHS &R)
CondCode_match m_SpecificCondCode(ISD::CondCode CC)
Match a conditional code SDNode with a specific ISD::CondCode.
NUses_match< 1, Value_match > m_OneUse()
CondCode_match m_CondCode()
Match any conditional code SDNode.
Not(const Pred &P) -> Not< Pred >
TernaryOpc_match< T0_P, T1_P, T2_P, true, false > m_c_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
bool sd_context_match(SDValue N, const MatchContext &Ctx, Pattern &&P)
ConstantInt_match m_ConstInt()
Match any integer constants or splat of an integer constant.
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:666
constexpr double e
Definition MathExtras.h:47
@ User
could "use" a pointer
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:311
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:262
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:355
@ Offset
Definition DWP.cpp:477
@ Length
Definition DWP.cpp:477
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:824
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:362
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void stable_sort(R &&Range)
Definition STLExtras.h:2047
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1740
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1714
InstructionCost Cost
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition Utils.cpp:1607
LLVM_ABI SDValue getBitwiseNotOperand(SDValue V, SDValue Mask, bool AllowUndefs)
If V is a bitwise not, returns the inverted operand.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2461
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:279
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:145
LLVM_ABI llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
bool operator!=(uint64_t V1, const APInt &V2)
Definition APInt.h:2113
bool operator>=(int64_t V1, const APSInt &V2)
Definition APSInt.h:361
LLVM_ATTRIBUTE_ALWAYS_INLINE DynamicAPInt & operator+=(DynamicAPInt &A, int64_t B)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2125
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
int ilogb(const APFloat &Arg)
Returns the exponent of the internal representation of the APFloat.
Definition APFloat.h:1534
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1589
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:348
LLVM_ABI bool isMinSignedConstant(SDValue V)
Returns true if V is a constant min signed integer value.
LLVM_ABI ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
LLVM_ABI ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:396
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:186
unsigned M1(unsigned Val)
Definition VE.h:377
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1721
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
LLVM_ABI bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition Utils.cpp:1545
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:222
bool operator>(int64_t V1, const APSInt &V2)
Definition APSInt.h:363
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:401
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1633
detail::ValueMatchesPoly< M > HasValue(M Matcher)
Definition Error.h:221
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI SDValue peekThroughTruncates(SDValue V)
Return the non-truncated source operand of V if it exists.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1728
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
LLVM_ABI SDValue peekThroughOneUseBitcasts(SDValue V)
Return the non-bitcasted and one-use source operand of V if it exists.
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
Definition ModRef.h:68
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
CombineLevel
Definition DAGCombine.h:15
@ AfterLegalizeDAG
Definition DAGCombine.h:19
@ AfterLegalizeVectorOps
Definition DAGCombine.h:18
@ BeforeLegalizeTypes
Definition DAGCombine.h:16
@ AfterLegalizeTypes
Definition DAGCombine.h:17
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ FMul
Product of floats.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1950
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
LLVM_ABI void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1886
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:212
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition STLExtras.h:2097
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:208
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool operator<=(int64_t V1, const APSInt &V2)
Definition APSInt.h:360
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:384
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
LLVM_ABI AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.
static LLVM_ABI ExponentType semanticsMinExponent(const fltSemantics &)
Definition APFloat.cpp:332
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static LLVM_ABI ExponentType semanticsMaxExponent(const fltSemantics &)
Definition APFloat.cpp:328
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:324
static LLVM_ABI bool isIEEELikeFP(const fltSemantics &)
Definition APFloat.cpp:365
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:320
static LLVM_ABI unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition APFloat.cpp:338
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:85
static constexpr DenormalMode getIEEE()
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
bool knownBitsLE(EVT VT) const
Return true if we know at compile time this has fewer than or the same bits as VT.
Definition ValueTypes.h:279
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition ValueTypes.h:121
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:243
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition ValueTypes.h:470
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:412
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isScalableVT() const
Return true if the type is a scalable type.
Definition ValueTypes.h:187
bool isFixedLengthVector() const
Definition ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:292
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition ValueTypes.h:256
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition ValueTypes.h:248
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
bool knownBitsGE(EVT VT) const
Return true if we know at compile time this has more than or the same bits as VT.
Definition ValueTypes.h:268
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition ValueTypes.h:142
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool isZeroSized() const
Test if the given EVT has zero size, this will fail if called on a scalable type.
Definition ValueTypes.h:132
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:308
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:101
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:235
bool isConstant() const
Returns true if we know the value of all bits.
Definition KnownBits.h:54
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:289
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:241
bool isAllOnes() const
Returns true if value is all one bits.
Definition KnownBits.h:83
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition KnownBits.h:60
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
These are IR-level optimization flags that may be propagated to SDNodes.
void setAllowContract(bool b)
bool hasNoUnsignedWrap() const
void setAllowReassociation(bool b)
void setAllowReciprocal(bool b)
bool hasAllowContract() const
bool hasApproximateFuncs() const
void setApproximateFuncs(bool b)
bool hasNoSignedWrap() const
bool hasAllowReciprocal() const
bool hasAllowReassociation() const
Clients of various APIs that cause global effects on the DAG can optionally implement this interface.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI bool recursivelyDeleteUnusedNodes(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
LLVM_ABI void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...